Data

# libraries
library(here)
library(glue)
library(stringr)
library(readr)
library(dplyr)
library(purrr)
library(DT)
library(rerddap)
library(sf)
library(mapview)
library(leafsync)
library(leaflet)
library(skimr)
library(lubridate)
library(plotly)
here <- here::here

# paths & variables
erddap_url <- "https://oceanview.pfeg.noaa.gov/erddap/"
d_id       <- "EB_MM_BC"
dir_data   <- here("data")
# manual download from Axiom space:
axiom_url  <- "https://researchworkspace.com/file/5851070/BeachCOMBERS_formatted_2019.csv"
# TODO: compsare BeachCOMBERS_formatted_2019.csv w/ ERDDAP cciea_B_B_MORT
d_bo19_csv <- glue("{dir_data}/{d_id}_bo19.csv")
d_bm16_csv <- glue("{dir_data}/{d_id}_bm16.csv")
d_csv      <- glue("{dir_data}/{d_id}.csv")
segs_geo   <- glue("{dir_data}/{d_id}_segments.geojson")
d_spp_csv  <- glue("{dir_data}/{d_id}_species.csv")
cols_bo19_csv       <- glue("{dir_data}/cols_bo19.csv")
cols_bm16tobo19_csv <- glue("{dir_data}/cols_bm16tobo19.csv")
d_mbnms_csv <- glue("{dir_data}/BeachCOMBERS_MBNMS.csv")

Sys.setenv(RERDDAP_DEFAULT_URL = erddap_url)

if (!file.exists(d_bo19_csv)){
  download.file(axiom_url, d_bo19_csv)
}

if (!file.exists(d_bm16_csv)){
  d_info <- info(d_id)
  d      <- tabledap(d_info)
  write_csv(d, d_bm16_csv)
}

d_bo19 <- read_csv(d_bo19_csv, guess_max = 100000)
d_bm16 <- read_csv(d_bm16_csv, guess_max = 10000)

if (!file.exists(cols_bm16tobo19_csv)){
  tibble(
    bo19_name = names(d_bo19),
    bo19_type = map_chr(d_bo19, ~ class(.) %>% paste(collapse = ","))) %>%
    arrange(bo19_name) %>%
    write_csv(cols_bo19_csv)
  tibble(
    bm16_name = names(d_bm16),
    bm16_type = map_chr(d_bm16, ~ class(.) %>% paste(collapse = ","))) %>%
    arrange(bm16_name) %>%
    write_csv(cols_bm16tobo19_csv)
  stop("NEXT: manually match column name and type for bm16 ({basename(d_bm16tobo19_csv)}) to match bo19 ({basename(d_bo19_csv)}).")
}
cx      <- read_csv(cols_bm16tobo19_csv)
cx_vars <- setNames(cx$bm16_name, cx$bo19_name)

# TODO: update bm16 rows
# table(d_bo19 %>% select(age_class_label, age_class))
# table(d_bo19 %>% select(condition_label, condition))
# table(d_bo19 %>% select(sex_label, sex))
# other bo19 flds missing in bo19: 
# - organization_code
# - organization_label
# - deposition_rate_carcass_per_km
# - inst_code

# setdiff(unique(d_bm16$age_class), unique(d_bo19$age_class))
# unique(d_bo19$age_class_label)

d_bm164bo19 <- d_bm16 %>% 
    filter(bird_or_mammal != "bird") %>% 
    mutate(
      unique_carcass_identifier = as.character(unique_carcass_identifier)) %>% 
    rename(!!cx_vars)
d <- bind_rows(
  d_bo19,
  d_bm164bo19) %>% 
  mutate(
    species = glue(
      "{species_code}: {species_common_name} ({species_sci_name}; ITIS: {species_itis})"))

Of the 148,100 rows in this dataset, let’s look at the first 1,000 records:

d %>% 
  slice(1:100) %>% 
  datatable()
skimr::skim(d)
Data summary
Name d
Number of rows 148100
Number of columns 40
_______________________
Column type frequency:
character 20
logical 3
numeric 16
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
unique_survey_identifier 0 1.00 4 17 0 27650 0
beach_segment_code 0 1.00 1 4 0 163 0
beach_segment_name 0 1.00 5 43 0 153 0
organization_code 7064 0.95 2 3 0 3 0
unique_carcass_identifier 12691 0.91 1 8 0 130699 0
species_sci_name 24844 0.83 6 36 0 218 0
species_common_name 2917 0.98 5 37 0 234 0
species_code 12691 0.91 4 4 0 223 0
sex 2917 0.98 1 1 0 3 0
external_findings 140646 0.05 1 652 0 4806 0
bird_or_mammal 2917 0.98 4 6 0 2 0
photos 2918 0.98 1 1 0 3 0
photo_code 136936 0.08 55 58 0 9760 0
notes 99602 0.33 1 253 0 3432 0
age_class_label 2917 0.98 2 21 0 7 0
sex_label 9981 0.93 4 12 0 3 0
condition_label 9981 0.93 5 32 0 6 0
organization_label 7064 0.95 11 30 0 3 0
geom 7064 0.95 23 37 0 151 0
species 0 1.00 21 85 0 250 0

Variable type: logical

skim_variable n_missing complete_rate mean count
species_genus 148100 0 NaN :
datum 148100 0 NaN :
deposition_rate_carcass_per_km 148100 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
latitude 0 1.00 37.15 0.90 34.05 36.72 36.87 37.83 41.98 ▁▇▅▁▁
longitude 0 1.00 -122.12 0.71 -124.40 -122.54 -121.85 -121.81 -118.92 ▁▆▇▁▁
beach_length_km 0 1.00 3.80 1.49 0.09 2.91 4.30 5.15 8.02 ▅▅▇▇▁
percent_of_beach_surveyed 87491 0.41 97.05 9.70 0.00 100.00 100.00 100.00 100.00 ▁▁▁▁▇
carcass_present 2917 0.98 1.00 0.00 1.00 1.00 1.00 1.00 1.00 ▁▁▇▁▁
species_itis 3032 0.98 194457.59 96101.90 17417.00 174536.00 175170.00 176974.00 824147.00 ▇▁▁▁▁
age_class 118453 0.20 0.86 0.34 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
standard_length_m 99076 0.33 0.00 0.00 0.00 0.00 0.00 0.00 0.70 ▇▁▁▁▁
condition 2917 0.98 3.06 1.12 0.00 3.00 3.00 4.00 9.00 ▁▇▇▁▁
cause_of_death 61715 0.58 3.96 0.38 0.00 4.00 4.00 4.00 7.00 ▁▁▇▁▁
alive 2917 0.98 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ▁▁▇▁▁
inst_code 9981 0.93 2.57 0.49 2.00 2.00 3.00 3.00 3.00 ▆▁▁▁▇
north_latitude 2917 0.98 37.12 0.80 34.04 36.74 36.90 37.80 38.97 ▁▁▇▃▃
north_longitude 2917 0.98 -122.11 0.68 -123.72 -122.51 -121.85 -121.81 -118.93 ▂▇▂▁▁
south_latitude 2917 0.98 37.10 0.81 34.04 36.70 36.85 37.79 38.95 ▁▁▇▃▃
south_longitude 2917 0.98 -122.10 0.68 -123.73 -122.52 -121.85 -121.81 -118.92 ▃▇▃▁▁

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
time 0 1 1993-09-15 2019-05-27 2008-12-01 7637
  • percent_of_beach_surveyed: all NaN
  • cause_of_death: > 0 1 2 3 4 5 6 7 > 818 6 302 21 64182 189 3 8

taxa

if (!file.exists(d_spp_csv)){
  
  flds_spp <- names(d) %>% 
    str_subset("species") %>% 
    c("bird_or_mammal")
  
  d_spp <- d %>% 
    select(!!!flds_spp) %>% 
    group_by_all() %>% 
    summarise(n = n()) %>% 
    arrange(bird_or_mammal, species)
  
  write_csv(d_spp, d_spp_csv)
}
d_spp <- read_csv(d_spp_csv)

table(d_spp$bird_or_mammal)
## 
##   bird mammal 
##    224     25

birds

d_spp %>% 
  filter(bird_or_mammal == "bird") %>% 
  datatable()

mammals

d_spp %>% 
  filter(bird_or_mammal == "mammal") %>% 
  datatable()

spatial

intersect sanctuary with segments

xy2ln <- function(x1, y1, x2, y2){
  if (any(is.na(c(x1, y1, x2, y2)))) return(NA)
  
  st_linestring(c(
    st_point(c(x1, y1)), 
    st_point(c(x2, y2))))
}

if (!file.exists(segs_geo)){
  
  segs <- d %>% 
    filter(
      !is.na(north_longitude), !is.na(north_latitude),
      !is.na(south_longitude), !is.na(south_latitude)) %>% 
    group_by(
      beach_segment_code, 
      #longitude, latitude, 
      north_longitude, north_latitude,
      south_longitude, south_latitude) %>% 
    summarize(
      n_rows = n()) %>% 
    ungroup() %>% 
    mutate(
      geom = pmap(list(
        north_longitude, north_latitude,
        south_longitude, south_latitude), xy2ln) %>% 
        st_sfc(crs = 4326)) %>% 
    st_set_geometry(.$geom)
  
  write_sf(segs, segs_geo, delete_dsn = T)
}
segs <- read_sf(segs_geo)

# TODO: move used fxns into library and utility.R
source("~/github/cinms/scripts/rocky.R")

nms     <- "mbnms"
nms_ply <- get_nms_ply(nms)

nms_segs <- segs %>% 
  filter(
    st_intersects(
      segs, nms_ply, sparse = F))

m1 <- mapview(nms_ply) + 
  mapview(segs, color="red") + 
  mapview(nms_segs, color="green")
m1

NOTE the red segments near San Jose that did not get intersected by the polygon because of spatial coarseness and mismatch.

intersect buffered sanctuary with segments

Buffer sanctuary by 0.005 decimal degress (~ 0.555 km at equator).

nms_ply_buf <- st_buffer(nms_ply, dist = 0.005)

nms_buf_segs <- segs %>% 
  filter(
    st_intersects(
      segs, nms_ply_buf, sparse = F))

d_nms_buf <- d %>% 
  semi_join(
    nms_buf_segs, by = "beach_segment_code")

# write to csv
d_mbnms <- d_nms_buf %>% 
  mutate(
    year = year(time)) %>% 
  group_by(
    year,
    carcass_present, bird_or_mammal, 
    species_code, species_sci_name, species_common_name, species_itis, species, 
    age_class, sex, condition, cause_of_death, alive) %>% 
  summarize(
    n = n()) %>% 
  ungroup()
write_csv(d_mbnms, d_mbnms_csv)

d_mbnms <- read_csv(d_mbnms_csv)

m2 <- mapview(nms_ply_buf) +
  mapview(nms_buf_segs, color="green")

leafsync::sync
## function (..., ncol = 2, sync = "all", sync.cursor = TRUE, no.initial.sync = TRUE) 
## {
##     latticeView(..., ncol = ncol, sync = sync, sync.cursor = sync.cursor, 
##         no.initial.sync = no.initial.sync)
## }
## <bytecode: 0x7f99f603f128>
## <environment: namespace:leafsync>
sync(m1, m2)

Filter by beach_segment_code in MBNMS: 98,503 of 148,100 rows (66.5%).

plots

bird carcasses

static

g_birds <- d_mbnms %>% 
  filter(
    bird_or_mammal == "bird",
    carcass_present == T) %>% 
  group_by(year) %>% 
  summarize(
    n = sum(n)) %>% 
  ungroup() %>% 
  ggplot(aes(x=year, y=n)) + 
  #geom_bar(stat = "identity") #+ 
  geom_col() #+ 
  #title("Bird carcasses over time")
print(g_birds)

interactive, color by species on hover

#plot.new()
g_bird_spp <- d_mbnms %>% 
  filter(
    bird_or_mammal == "bird",
    carcass_present == T) %>% 
  group_by(year, species) %>% 
  summarize(
    n = sum(n)) %>% 
  ungroup() %>% 
  ggplot(aes(x=year, y=n, fill=species)) + 
  #geom_bar(stat = "identity") + 
  geom_col() +
  #title("Bird carcasses over time, by species") + 
  theme(legend.position = "none")

ggplotly(g_bird_spp)

mammal carcasses

static

g_mammal <- d_mbnms %>% 
  filter(
    bird_or_mammal == "mammal",
    carcass_present == T) %>% 
  group_by(year) %>% 
  summarize(
    n = sum(n)) %>% 
  ungroup() %>%  
  ggplot(aes(x=year, y=n)) + 
  #geom_bar(stat = "identity") #+ 
  geom_col() #+ 
  #title("Bird carcasses over time")
print(g_mammal)

interactive, color by species on hover

#plot.new()
g_mammal_spp <- d_mbnms %>% 
  filter(
    bird_or_mammal == "mammal",
    carcass_present == T) %>% 
 group_by(year, species) %>% 
  summarize(
    n = sum(n)) %>% 
  ungroup() %>% 
  ggplot(aes(x=year, y=n, fill=species)) + 
  #geom_bar(stat = "identity") + 
  geom_col() +
  #title("Bird carcasses over time, by species") + 
  theme(legend.position = "none")

ggplotly(g_mammal_spp)