Tidy Tuesday: Numbat observations in Australia

In this post, I will analyse the #TidyTuesday Australia Numbats dataset. The data this week comes from the Atlas of Living Australia. Thanks to Di Cook for preparing this week’s dataset!

Tip

You can find my github code repository here.

1 Load libraries

# For loading Tidy Tuesday data
library(tidytuesdayR)

# EDA
library(tidyverse)
library(DT)

# Plotting
library(highcharter)

2 Load data

numbats <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-03-07/numbats.csv')

numbats %>% 
  head()

# A tibble: 6 × 16
  decimalLat…¹ decim…² eventDate           scien…³ taxon…⁴ recor…⁵ dataR…⁶  year
         <dbl>   <dbl> <dttm>              <chr>   <chr>   <chr>   <chr>   <dbl>
1        -37.6    146. NA                  Myrmec… https:… 738306… Queen …    NA
2        -35.1    150. 2014-06-05 02:00:00 Myrmec… https:… 13287c… ALA sp…  2014
3        -35      118. NA                  Myrmec… https:… 1041c2… Wester…    NA
4        -34.7    118. NA                  Myrmec… https:… c9804b… Wester…    NA
5        -34.6    117. NA                  Myrmec… https:… bc0c87… Wester…    NA
6        -34.6    117. NA                  Myrmec… https:… 2b917c… Wester…    NA
# … with 8 more variables: month <chr>, wday <chr>, hour <dbl>, day <date>,
#   dryandra <lgl>, prcp <dbl>, tmax <dbl>, tmin <dbl>, and abbreviated
#   variable names ¹decimalLatitude, ²decimalLongitude, ³scientificName,
#   ⁴taxonConceptID, ⁵recordID, ⁶dataResourceName

numbats %>% 
  count(year)

# A tibble: 26 × 2
    year     n
   <dbl> <int>
 1  1856     2
 2  1902     1
 3  1906     1
 4  1954     1
 5  1968     4
 6  1969     3
 7  1985     1
 8  2000     1
 9  2006     7
10  2007     2
# … with 16 more rows

numbats_edited <- numbats %>% 
  filter(year >= 2000) %>% 
  count(dataResourceName, year) %>% 
  complete(dataResourceName, year = seq(2000,2020,by=1)) %>% 
  replace_na(list(n=0)) 

# Create the initial column plot data
numbats_column <- numbats_edited %>% 
  group_by(name = dataResourceName) %>% 
  summarise(n = sum(n)) %>% 
  ungroup() %>% 
  mutate(drilldown = tolower(name)) %>% 
  arrange(desc(n)) %>% 
  head(5) %>% 
  tibble()

# Create the yearly drilldown data
numbats_year <- numbats_edited %>% 
  group_by(year) %>% 
  summarise(n = sum(n)) %>% 
  ungroup() 

# Create ids to connect layers of plot
idlist <- numbats_column$name

yearlyview <- idlist %>% map(~ numbats_edited %>% 
                               filter(dataResourceName == .x) %>% select(year, n))

dflist <- yearlyview %>% map(~ list_parse2(.x))

listall <- c(1:5) %>% map(~ list(
  id = tolower(idlist[[.x]]),
  data = dflist[[.x]]
))

numbat_observations <-   highchart() %>%
  hc_title(text = "Numbat observations by Data Resource",
           style = list(fontWeight = "bold", fontSize = "20px"),
           align = "center") %>% 
  hc_subtitle(text = "Top 5 by count from year 2000 onward") %>% 
  hc_caption(text = "Click on bar to drilldown for yearly view") %>% 
  hc_xAxis(type = "category") %>%
  hc_legend(enabled = FALSE) %>%
  hc_plotOptions(
    series = list(
      boderWidth = 0,
      dataLabels = list(
        enabled = TRUE,
        style = list(fontSize = 15, textOutline = "none")
      )
    )
  ) %>%
  hc_add_series(
    data = numbats_column,
    type = "column",
    hcaes(name = name, y = n),
    name = "observations",
    color = "#008080"
  ) %>% 
  hc_drilldown(
    allowPointDrilldown = TRUE,
    series = listall
  )

numbat_observations