Getting a list of 50 species from occurrence data

# polygon for filtering
landkreis_konstanz <- osmdata::getbb("Landkreis Konstanz",
                             format_out = "sf_polygon")
crs <- sf::st_crs(landkreis_konstanz)

# get and filter data
f_out_ebd <- "ebird/ebd_lk_konstanz.txt"

library("magrittr")

ebd <- auk::read_ebd(f_out_ebd) %>%
  sf::st_as_sf(coords = c("longitude", "latitude"), 
                crs = crs) 

in_indices <- sf::st_within(ebd, landkreis_konstanz)

ebd <- dplyr::filter(ebd, lengths(in_indices) > 0)

ebd <- as.data.frame(ebd)

ebd <- dplyr::filter(ebd, approved, lubridate::year(observation_date) > 2010)
species <- ebd %>%
  dplyr::count(common_name, sort = TRUE) %>%
  head(n = 50) %>%
  dplyr::pull(common_name)

Querying the scientific literature

.get_papers <- function(species){
  species %>%
    tolower() %>%
    fulltext::ft_search() %>%
    fulltext::ft_get() %>%
    fulltext::ft_collect() %>%
    fulltext::ft_chunks(c("title", "abstract")) %>%
    fulltext::ft_tabularize() %>%
    dplyr::bind_rows()
}

.get_papers(species[1]) %>%
  dplyr::pull(title)
##  [1] "Great spotted cuckoo nestlings have no antipredatory effect on magpie or carrion crow host nests in southern Spain"                                
##  [2] "Donor-Control of Scavenging Food Webs at the Land-Ocean Interface"                                                                                 
##  [3] "Formal comment to Soler et al.: Great spotted cuckoo nestlings have no antipredatory effect on magpie or carrion crow host nests in southern Spain"
##  [4] "Socially Driven Consistent Behavioural Differences during Development in Common Ravens and Carrion Crows"                                          
##  [5] "Behavioral Responses to Inequity in Reward Distribution and Working Effort in Crows and Ravens"                                                    
##  [6] "Early Duplication of a Single MHC IIB Locus Prior to the Passerine Radiations"                                                                     
##  [7] "Investigating the impact of media on demand for wildlife: A case study of Harry Potter and the UK trade in owls"                                   
##  [8] "New Caledonian Crows Rapidly Solve a Collaborative Problem without Cooperative Cognition"                                                          
##  [9] "Nest Predation Deviates from Nest Predator Abundance in an Ecologically Trapped Bird"                                                              
## [10] "Dietary Compositions and Their Seasonal Shifts in Japanese Resident Birds, Estimated from the Analysis of Volunteer Monitoring Data"
get_papers <- ratelimitr::limit_rate(.get_papers,
                                     rate = ratelimitr::rate(1, 2))

all_papers <- purrr::map_df(species, get_papers)

nrow(all_papers)
## [1] 521
all_papers <- unique(all_papers)

nrow(all_papers)
## [1] 378
library("tidytext")
library("rcorpora")

stopwords <- corpora("words/stopwords/en")$stopWords

all_papers %>%
  dplyr::group_by(title, abstract) %>%
  dplyr::summarise(text = paste(title, abstract)) %>%
  dplyr::ungroup() %>%
  unnest_tokens(word, text) %>%
  dplyr::filter(!word %in% stopwords) %>%
  dplyr::count(word, sort = TRUE) -> words
head(words, n = 10) 
## # A tibble: 10 x 2
##    word            n
##    <chr>       <int>
##  1 species       757
##  2 birds         523
##  3 virus         266
##  4 bird          263
##  5 avian         262
##  6 breeding      249
##  7 study         245
##  8 wild          227
##  9 populations   217
## 10 population    213
library("wordcloud")

with(words, wordcloud(word, n, max.words = 100))
wordcloud of titles and abstracts of scientific papers

wordcloud of titles and abstracts of scientific papers

bird <- words %>%
  head(n = 100) %>%
  wordcloud2::wordcloud2(figPath = "bird.png", 
                       color = "black", size = 1.5)
# https://www.r-graph-gallery.com/196-the-wordcloud2-library/
htmlwidgets::saveWidget(bird,
                        "tmp.html",
                        selfcontained = F)
magick::image_read("screenshot.png")
wordcloud shaped as a bird

wordcloud shaped as a bird

Querying scientific open data

.get_meta <- function(species){
  
  cn <- dataone::CNode("PROD")
  search <- list(q = glue::glue("abstract:{species}"),
                        fl = "id,title,abstract",
                        sort = "dateUploaded+desc")
  
  result <- dataone::query(cn, solrQuery = search,
                           as="data.frame")
  
  if(nrow(result) == 0){
    NULL
  }else{
    # otherwise one line by version
  result <- unique(result)
  
  tibble::tibble(species = species,
                 title = result$title,
                 abstract = result$abstract)
  }
}
get_meta <- ratelimitr::limit_rate(.get_meta,
                                     rate = ratelimitr::rate(1, 2))

all_meta <- purrr::map_df(species, get_meta)

nrow(all_meta)
## [1] 266
length(unique(all_meta$species))
## [1] 35
all_meta <- unique(all_meta[,c("title", "abstract")])

nrow(all_meta)
## [1] 105
all_meta %>%
  dplyr::group_by(title, abstract) %>%
  dplyr::summarise(text = paste(title, abstract)) %>%
  dplyr::ungroup() %>%
  unnest_tokens(word, text) %>%
  dplyr::filter(!word %in% stopwords) %>%
  dplyr::count(word, sort = TRUE) -> data_words

head(data_words, n = 10)
## # A tibble: 10 x 2
##    word           n
##    <chr>      <int>
##  1 data         154
##  2 species      122
##  3 birds         94
##  4 breeding      89
##  5 feeding       75
##  6 population    66
##  7 genetic       64
##  8 bird          60
##  9 study         58
## 10 effects       56
with(data_words, wordcloud(word, n, max.words = 100))
wordcloud of titles and abstracts of scientific metadata

wordcloud of titles and abstracts of scientific metadata

Scientific literature access

species %>%
    tolower() %>%
    fulltext::ft_search() %>%
    fulltext::ft_get() %>%
    fulltext::ft_collect() %>%
    fulltext::ft_chunks(c("title", "abstract")) %>%
    fulltext::ft_tabularize() %>%
    dplyr::bind_rows()

Scientific data access… and publication with R

No more birding? No, your turn!