Getting a list of 50 species from occurrence data
landkreis_konstanz <- osmdata::getbb("Landkreis Konstanz",
format_out = "sf_polygon")
crs <- sf::st_crs(landkreis_konstanz)
f_out_ebd <- "ebird/ebd_lk_konstanz.txt"
library("magrittr")
ebd <- auk::read_ebd(f_out_ebd) %>%
sf::st_as_sf(coords = c("longitude", "latitude"),
crs = crs)
in_indices <- sf::st_within(ebd, landkreis_konstanz)
ebd <- dplyr::filter(ebd, lengths(in_indices) > 0)
ebd <- as.data.frame(ebd)
ebd <- dplyr::filter(ebd, approved, lubridate::year(observation_date) > 2010)
species <- ebd %>%
dplyr::count(common_name, sort = TRUE) %>%
head(n = 50) %>%
dplyr::pull(common_name)
Querying the scientific literature
.get_papers <- function(species){
species %>%
tolower() %>%
fulltext::ft_search() %>%
fulltext::ft_get() %>%
fulltext::ft_collect() %>%
fulltext::ft_chunks(c("title", "abstract")) %>%
fulltext::ft_tabularize() %>%
dplyr::bind_rows()
}
.get_papers(species[1]) %>%
dplyr::pull(title)
## [1] "Great spotted cuckoo nestlings have no antipredatory effect on magpie or carrion crow host nests in southern Spain"
## [2] "Donor-Control of Scavenging Food Webs at the Land-Ocean Interface"
## [3] "Formal comment to Soler et al.: Great spotted cuckoo nestlings have no antipredatory effect on magpie or carrion crow host nests in southern Spain"
## [4] "Socially Driven Consistent Behavioural Differences during Development in Common Ravens and Carrion Crows"
## [5] "Behavioral Responses to Inequity in Reward Distribution and Working Effort in Crows and Ravens"
## [6] "Early Duplication of a Single MHC IIB Locus Prior to the Passerine Radiations"
## [7] "Investigating the impact of media on demand for wildlife: A case study of Harry Potter and the UK trade in owls"
## [8] "New Caledonian Crows Rapidly Solve a Collaborative Problem without Cooperative Cognition"
## [9] "Nest Predation Deviates from Nest Predator Abundance in an Ecologically Trapped Bird"
## [10] "Dietary Compositions and Their Seasonal Shifts in Japanese Resident Birds, Estimated from the Analysis of Volunteer Monitoring Data"
get_papers <- ratelimitr::limit_rate(.get_papers,
rate = ratelimitr::rate(1, 2))
all_papers <- purrr::map_df(species, get_papers)
nrow(all_papers)
## [1] 521
all_papers <- unique(all_papers)
nrow(all_papers)
## [1] 378
library("tidytext")
library("rcorpora")
stopwords <- corpora("words/stopwords/en")$stopWords
all_papers %>%
dplyr::group_by(title, abstract) %>%
dplyr::summarise(text = paste(title, abstract)) %>%
dplyr::ungroup() %>%
unnest_tokens(word, text) %>%
dplyr::filter(!word %in% stopwords) %>%
dplyr::count(word, sort = TRUE) -> words
head(words, n = 10)
## # A tibble: 10 x 2
## word n
## <chr> <int>
## 1 species 757
## 2 birds 523
## 3 virus 266
## 4 bird 263
## 5 avian 262
## 6 breeding 249
## 7 study 245
## 8 wild 227
## 9 populations 217
## 10 population 213
library("wordcloud")
with(words, wordcloud(word, n, max.words = 100))
bird <- words %>%
head(n = 100) %>%
wordcloud2::wordcloud2(figPath = "bird.png",
color = "black", size = 1.5)
htmlwidgets::saveWidget(bird,
"tmp.html",
selfcontained = F)
magick::image_read("screenshot.png")
Querying scientific open data
.get_meta <- function(species){
cn <- dataone::CNode("PROD")
search <- list(q = glue::glue("abstract:{species}"),
fl = "id,title,abstract",
sort = "dateUploaded+desc")
result <- dataone::query(cn, solrQuery = search,
as="data.frame")
if(nrow(result) == 0){
NULL
}else{
result <- unique(result)
tibble::tibble(species = species,
title = result$title,
abstract = result$abstract)
}
}
get_meta <- ratelimitr::limit_rate(.get_meta,
rate = ratelimitr::rate(1, 2))
all_meta <- purrr::map_df(species, get_meta)
nrow(all_meta)
## [1] 266
length(unique(all_meta$species))
## [1] 35
all_meta <- unique(all_meta[,c("title", "abstract")])
nrow(all_meta)
## [1] 105
all_meta %>%
dplyr::group_by(title, abstract) %>%
dplyr::summarise(text = paste(title, abstract)) %>%
dplyr::ungroup() %>%
unnest_tokens(word, text) %>%
dplyr::filter(!word %in% stopwords) %>%
dplyr::count(word, sort = TRUE) -> data_words
head(data_words, n = 10)
## # A tibble: 10 x 2
## word n
## <chr> <int>
## 1 data 154
## 2 species 122
## 3 birds 94
## 4 breeding 89
## 5 feeding 75
## 6 population 66
## 7 genetic 64
## 8 bird 60
## 9 study 58
## 10 effects 56
with(data_words, wordcloud(word, n, max.words = 100))
Scientific literature access
species %>%
tolower() %>%
fulltext::ft_search() %>%
fulltext::ft_get() %>%
fulltext::ft_collect() %>%
fulltext::ft_chunks(c("title", "abstract")) %>%
fulltext::ft_tabularize() %>%
dplyr::bind_rows()
Scientific data access… and publication with R
No more birding? No, your turn!