The maximum number of rows that a OnePetro query can return is 1000. It means that the user could set up the query to return up to amximum of 1000 papers. Abover that number, the query to OnePetro will return error.
OnePetro has options to define the number of rows to display at 10, 50 and 100 rows. Additionally, through scripts like these, that number could be raised up to 1,000.
This article describes the process of reading multiple pages with thousand of papers to a unique dataframe.
Retrieve the most numerous paper by type
library(petro.One)
my_url <- make_search_url(query = "pressure transient analysis",
how = "all")
get_papers_count(my_url)
## [1] 4049
papers_by_type(my_url)
## # A tibble: 7 x 2
## name value
## <chr> <dbl>
## 1 Chapter 1
## 2 Conference paper 3080
## 3 General 60
## 4 Journal paper 895
## 5 Media 5
## 6 Other 1
## 7 Presentation 7
For the tyme being we will retrieve only conference papers.
# we use "conference-paper" only because other document types have
# different dataframe structure
my_url_1 <- make_search_url(query = "pressure transient analysis",
how = "all",
dc_type = "conference-paper",
start = 0,
rows = 1000)
get_papers_count(my_url_1)
## [1] 3080
page_1 <- read_onepetro(my_url_1)
htm_1 <- "pta-01-conference.html"
xml2::write_html(page_1, file = htm_1)
onepetro_page_to_dataframe(htm_1)
## # A tibble: 1,000 x 6
## title_data
## <chr>
## 1 Pressure Transient Analysis in SAGD
## 2 Well-head Pressure Transient Analysis
## 3 Automated Pressure Transient Analysis with Smart Technology
## 4 Pressure Transient Analysis of Multifractured Horizontal Wells
## 5 Pressure Transient Analysis in Multilayered Faulted Reservoirs
## 6 Integrating Pressure Transient Analysis in Hydraulic Fracturing
## 7 How Wellbore Dynamics Affect Pressure Transient Analysis
## 8 Software Showcase: Pressure Transient Analysis Programs
## 9 Numerical Solutions for Pressure Transient Analysis
## 10 Pressure-Transient Analysis for Perforated Wells
## # ... with 990 more rows, and 5 more variables: paper_id <chr>,
## # source <chr>, type <chr>, year <int>, author1_data <chr>
my_url_2 <- make_search_url(query = "pressure transient analysis",
how = "all",
dc_type = "conference-paper",
start = 1000,
rows = 1000)
page_2 <- read_onepetro(my_url_2)
htm_2 <- "pta-02-conference.html"
xml2::write_html(page_2, file = htm_2)
onepetro_page_to_dataframe(htm_2)
## # A tibble: 1,000 x 6
## title_data
## <chr>
## 1 Pressure Transient Behavior of Horizontal Wells Intersecting Multiple Hydra
## 2 Physics-Based Approach for Shale Gas Numerical Simulation: Quintuple Porosi
## 3 An Improved Boundary Element Method for Modeling Fluid Flow through Fractur
## 4 The Value of Transient Temperature Responses in Testing Operations
## 5 Application of Multi-Level and High-Resolution Fracture Modeling in Field-S
## 6 Rate Decline, Power Laws, and Subdiffusion in Fractured Rocks
## 7 A Practical Workflow for Probabilistic History Matching and Forecast Uncert
## 8 Geophysical Monitoring of the Multilayer Reservoir with of Flooding and Ind
## 9 Geophysical Monitoring of the Multilayer Reservoir with of Flooding and Ind
## 10 Low Salinity Flooding Trial at West Salym Field
## # ... with 990 more rows, and 5 more variables: paper_id <chr>,
## # source <chr>, type <chr>, year <int>, author1_data <chr>
my_url_3 <- make_search_url(query = "pressure transient analysis",
how = "all",
dc_type = "conference-paper",
start = 2000,
rows = 1000)
page_3 <- read_onepetro(my_url_3)
htm_3 <- "pta-03-conference.html"
xml2::write_html(page_3, file = htm_3)
onepetro_page_to_dataframe(htm_3)
## # A tibble: 1,000 x 6
## title_data
## <chr>
## 1 Case History: Production Results From Partial Monolayer Proppant Fracture T
## 2 Development Of Boomerang Hills Area Of Bolivia
## 3 Subzero Hydraulic Fracturing: A Field Case Study, Lisburne Carbonate Reserv
## 4 Slug Testing in Multiple Coal Seams Intersected by a Single, Vertical Fract
## 5 Interpretation of Horizontal Well Performance in Complicated Systems by the
## 6 Numerical Study of the Effects of Lean Zones on SAGD Performance in Periodi
## 7 Integrating Pressure Transient Test Data With Seismic Attribute Analysis to
## 8 Propellant Perforation Breakdown Technique: Eastern Venezuela Field Applica
## 9 Multirate Well Testing to Evaluate the Effectiveness of Frac Packing
## 10 Improving Reservoir Characterization Using Accurate Flow-Rate History
## # ... with 990 more rows, and 5 more variables: paper_id <chr>,
## # source <chr>, type <chr>, year <int>, author1_data <chr>
my_url_4 <- make_search_url(query = "pressure transient analysis",
how = "all",
dc_type = "conference-paper",
start = 3000,
rows = 100)
page_4 <- read_onepetro(my_url_4)
htm_4 <- "pta-04-conference.html"
xml2::write_html(page_4, file = htm_4)
onepetro_page_to_dataframe(htm_4)
## # A tibble: 80 x 6
## title_data
## <chr>
## 1 From Operations to Desktop Analysis to Field Implementation: Well and ESP O
## 2 Fracture and Wellbore Spacing Optimization in Multistage Fractured Horizont
## 3 Horizontal-Well Productivity Equations With Both Uniform-Flux and Uniform-P
## 4 Adopting North American, Multi-stage Fracturing and Horizontal Completion T
## 5 The Role of Natural Fractures in Shale Gas Production
## 6 Experience from the Use of Automatic Well-Test Analysis
## 7 Using Empirically Developed Rock Tables to Predict and History Match Fractu
## 8 Horizontal Well Pressure Analysis
## 9 Analytical Hindered-Matrix-Fracture Transfer Models For Naturally Fractured
## 10 Semi-Analytical Model for Reservoirs with Forchheimer's Non-Darcy Flow
## # ... with 70 more rows, and 5 more variables: paper_id <chr>,
## # source <chr>, type <chr>, year <int>, author1_data <chr>
p1 <- onepetro_page_to_dataframe(htm_1)
p2 <- onepetro_page_to_dataframe(htm_2)
p3 <- onepetro_page_to_dataframe(htm_3)
p4 <- onepetro_page_to_dataframe(htm_4)
papers <- rbind(p1, p2, p3, p4)
papers
## # A tibble: 3,080 x 6
## title_data
## <chr>
## 1 Pressure Transient Analysis in SAGD
## 2 Well-head Pressure Transient Analysis
## 3 Automated Pressure Transient Analysis with Smart Technology
## 4 Pressure Transient Analysis of Multifractured Horizontal Wells
## 5 Pressure Transient Analysis in Multilayered Faulted Reservoirs
## 6 Integrating Pressure Transient Analysis in Hydraulic Fracturing
## 7 How Wellbore Dynamics Affect Pressure Transient Analysis
## 8 Software Showcase: Pressure Transient Analysis Programs
## 9 Numerical Solutions for Pressure Transient Analysis
## 10 Pressure-Transient Analysis for Perforated Wells
## # ... with 3,070 more rows, and 5 more variables: paper_id <chr>,
## # source <chr>, type <chr>, year <int>, author1_data <chr>
pattern <- "pressure transient analysis"
rows <- grep(pattern = pattern, papers$title_data, ignore.case = TRUE)
papers[rows, ]
## # A tibble: 163 x 6
## title_data
## <chr>
## 1 Pressure Transient Analysis in SAGD
## 2 Well-head Pressure Transient Analysis
## 3 Automated Pressure Transient Analysis with Smart Technology
## 4 Pressure Transient Analysis of Multifractured Horizontal Wells
## 5 Pressure Transient Analysis in Multilayered Faulted Reservoirs
## 6 Integrating Pressure Transient Analysis in Hydraulic Fracturing
## 7 How Wellbore Dynamics Affect Pressure Transient Analysis
## 8 Software Showcase: Pressure Transient Analysis Programs
## 9 Numerical Solutions for Pressure Transient Analysis
## 10 Generalization of Wellbore Effects in Pressure Transient Analysis
## # ... with 153 more rows, and 5 more variables: paper_id <chr>,
## # source <chr>, type <chr>, year <int>, author1_data <chr>
# remove files that were created
files <- c(htm_1, htm_2, htm_3, htm_4)
file.remove(files)
## [1] TRUE TRUE TRUE TRUE