BeachCOMBERS for NMS infographics

Data

bm16: [b]irds + [m]ammals, up to 20[16]
- ERDDAP - BeachCOMBERS Effort-Based Marine Mammal and Seabird Beach Cast Survey - Data Access Form EB_MM_BC, birds + mammals, but only up to 2016
b019: [b]irds [o]nly, up to 20[19]
- ERDDAP - Seabird beached mortality - Data Access Form cciea_B_B_MORT
- researchworkspace.com/…/BeachCOMBERS_formatted_2019.csv, birds only, up to 2019
- TODO: check ERDDAP same as researchworkspace.com

# libraries
library(here)
library(glue)
library(stringr)
library(readr)
library(dplyr)
library(purrr)
library(DT)
library(rerddap)
library(sf)
library(mapview)
library(leafsync)
library(leaflet)
library(skimr)
library(lubridate)
library(plotly)
here <- here::here

# paths & variables
erddap_url <- "https://oceanview.pfeg.noaa.gov/erddap/"
d_id       <- "EB_MM_BC"
dir_data   <- here("data")
# manual download from Axiom space:
axiom_url  <- "https://researchworkspace.com/file/5851070/BeachCOMBERS_formatted_2019.csv"
# TODO: compsare BeachCOMBERS_formatted_2019.csv w/ ERDDAP cciea_B_B_MORT
d_bo19_csv <- glue("{dir_data}/{d_id}_bo19.csv")
d_bm16_csv <- glue("{dir_data}/{d_id}_bm16.csv")
d_csv      <- glue("{dir_data}/{d_id}.csv")
segs_geo   <- glue("{dir_data}/{d_id}_segments.geojson")
d_spp_csv  <- glue("{dir_data}/{d_id}_species.csv")
cols_bo19_csv       <- glue("{dir_data}/cols_bo19.csv")
cols_bm16tobo19_csv <- glue("{dir_data}/cols_bm16tobo19.csv")
d_mbnms_csv <- glue("{dir_data}/BeachCOMBERS_MBNMS.csv")

Sys.setenv(RERDDAP_DEFAULT_URL = erddap_url)

if (!file.exists(d_bo19_csv)){
  download.file(axiom_url, d_bo19_csv)
}

if (!file.exists(d_bm16_csv)){
  d_info <- info(d_id)
  d      <- tabledap(d_info)
  write_csv(d, d_bm16_csv)
}

d_bo19 <- read_csv(d_bo19_csv, guess_max = 100000)
d_bm16 <- read_csv(d_bm16_csv, guess_max = 10000)

if (!file.exists(cols_bm16tobo19_csv)){
  tibble(
    bo19_name = names(d_bo19),
    bo19_type = map_chr(d_bo19, ~ class(.) %>% paste(collapse = ","))) %>%
    arrange(bo19_name) %>%
    write_csv(cols_bo19_csv)
  tibble(
    bm16_name = names(d_bm16),
    bm16_type = map_chr(d_bm16, ~ class(.) %>% paste(collapse = ","))) %>%
    arrange(bm16_name) %>%
    write_csv(cols_bm16tobo19_csv)
  stop("NEXT: manually match column name and type for bm16 ({basename(d_bm16tobo19_csv)}) to match bo19 ({basename(d_bo19_csv)}).")
}
cx      <- read_csv(cols_bm16tobo19_csv)
cx_vars <- setNames(cx$bm16_name, cx$bo19_name)

# TODO: update bm16 rows
# table(d_bo19 %>% select(age_class_label, age_class))
# table(d_bo19 %>% select(condition_label, condition))
# table(d_bo19 %>% select(sex_label, sex))
# other bo19 flds missing in bo19: 
# - organization_code
# - organization_label
# - deposition_rate_carcass_per_km
# - inst_code

# setdiff(unique(d_bm16$age_class), unique(d_bo19$age_class))
# unique(d_bo19$age_class_label)

d_bm164bo19 <- d_bm16 %>% 
    filter(bird_or_mammal != "bird") %>% 
    mutate(
      unique_carcass_identifier = as.character(unique_carcass_identifier)) %>% 
    rename(!!cx_vars)
d <- bind_rows(
  d_bo19,
  d_bm164bo19) %>% 
  mutate(
    species = glue(
      "{species_code}: {species_common_name} ({species_sci_name}; ITIS: {species_itis})"))

Of the 148,100 rows in this dataset, let’s look at the first 1,000 records:

d %>% 
  slice(1:100) %>% 
  datatable()

skimr::skim(d)

Data summary
Name	d
Number of rows	148100
Number of columns	40
_______________________
Column type frequency:
character	20
logical	3
numeric	16
POSIXct	1
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	n_unique
unique_survey_identifier	0	1.00	4	17	27650
beach_segment_code	0	1.00	1	4	163
beach_segment_name	0	1.00	5	43	153
organization_code	7064	0.95	2	3	3
unique_carcass_identifier	12691	0.91	1	8	130699
species_sci_name	24844	0.83	6	36	218
species_common_name	2917	0.98	5	37	234
species_code	12691	0.91	4	4	223
sex	2917	0.98	1	1	3
external_findings	140646	0.05	1	652	4806
bird_or_mammal	2917	0.98	4	6	2
photos	2918	0.98	1	1	3
photo_code	136936	0.08	55	58	9760
notes	99602	0.33	1	253	3432
age_class_label	2917	0.98	2	21	7
sex_label	9981	0.93	4	12	3
condition_label	9981	0.93	5	32	6
organization_label	7064	0.95	11	30	3
geom	7064	0.95	23	37	151
species	0	1.00	21	85	250

Variable type: logical

skim_variable	n_missing	mean	count
species_genus	148100	NaN	:
datum	148100	NaN	:
deposition_rate_carcass_per_km	148100	NaN	:

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
latitude	0	1.00	37.15	0.90	34.05	36.72	36.87	37.83	41.98	▁▇▅▁▁
longitude	0	1.00	-122.12	0.71	-124.40	-122.54	-121.85	-121.81	-118.92	▁▆▇▁▁
beach_length_km	0	1.00	3.80	1.49	0.09	2.91	4.30	5.15	8.02	▅▅▇▇▁
percent_of_beach_surveyed	87491	0.41	97.05	9.70	0.00	100.00	100.00	100.00	100.00	▁▁▁▁▇
carcass_present	2917	0.98	1.00	0.00	1.00	1.00	1.00	1.00	1.00	▁▁▇▁▁
species_itis	3032	0.98	194457.59	96101.90	17417.00	174536.00	175170.00	176974.00	824147.00	▇▁▁▁▁
age_class	118453	0.20	0.86	0.34	0.00	1.00	1.00	1.00	1.00	▁▁▁▁▇
standard_length_m	99076	0.33	0.00	0.00	0.00	0.00	0.00	0.00	0.70	▇▁▁▁▁
condition	2917	0.98	3.06	1.12	0.00	3.00	3.00	4.00	9.00	▁▇▇▁▁
cause_of_death	61715	0.58	3.96	0.38	0.00	4.00	4.00	4.00	7.00	▁▁▇▁▁
alive	2917	0.98	0.00	0.00	0.00	0.00	0.00	0.00	0.00	▁▁▇▁▁
inst_code	9981	0.93	2.57	0.49	2.00	2.00	3.00	3.00	3.00	▆▁▁▁▇
north_latitude	2917	0.98	37.12	0.80	34.04	36.74	36.90	37.80	38.97	▁▁▇▃▃
north_longitude	2917	0.98	-122.11	0.68	-123.72	-122.51	-121.85	-121.81	-118.93	▂▇▂▁▁
south_latitude	2917	0.98	37.10	0.81	34.04	36.70	36.85	37.79	38.95	▁▁▇▃▃
south_longitude	2917	0.98	-122.10	0.68	-123.73	-122.52	-121.85	-121.81	-118.92	▃▇▃▁▁

Variable type: POSIXct

skim_variable	n_missing	complete_rate	min	max	median	n_unique
time	0	1	1993-09-15	2019-05-27	2008-12-01	7637

percent_of_beach_surveyed: all NaN
cause_of_death: > 0 1 2 3 4 5 6 7 > 818 6 302 21 64182 189 3 8

taxa

if (!file.exists(d_spp_csv)){
  
  flds_spp <- names(d) %>% 
    str_subset("species") %>% 
    c("bird_or_mammal")
  
  d_spp <- d %>% 
    select(!!!flds_spp) %>% 
    group_by_all() %>% 
    summarise(n = n()) %>% 
    arrange(bird_or_mammal, species)
  
  write_csv(d_spp, d_spp_csv)
}
d_spp <- read_csv(d_spp_csv)

table(d_spp$bird_or_mammal)

## 
##   bird mammal 
##    224     25

birds

d_spp %>% 
  filter(bird_or_mammal == "bird") %>% 
  datatable()

mammals

d_spp %>% 
  filter(bird_or_mammal == "mammal") %>% 
  datatable()

spatial

intersect sanctuary with segments

xy2ln <- function(x1, y1, x2, y2){
  if (any(is.na(c(x1, y1, x2, y2)))) return(NA)
  
  st_linestring(c(
    st_point(c(x1, y1)), 
    st_point(c(x2, y2))))
}

if (!file.exists(segs_geo)){
  
  segs <- d %>% 
    filter(
      !is.na(north_longitude), !is.na(north_latitude),
      !is.na(south_longitude), !is.na(south_latitude)) %>% 
    group_by(
      beach_segment_code, 
      #longitude, latitude, 
      north_longitude, north_latitude,
      south_longitude, south_latitude) %>% 
    summarize(
      n_rows = n()) %>% 
    ungroup() %>% 
    mutate(
      geom = pmap(list(
        north_longitude, north_latitude,
        south_longitude, south_latitude), xy2ln) %>% 
        st_sfc(crs = 4326)) %>% 
    st_set_geometry(.$geom)
  
  write_sf(segs, segs_geo, delete_dsn = T)
}
segs <- read_sf(segs_geo)

# TODO: move used fxns into library and utility.R
source("~/github/cinms/scripts/rocky.R")

nms     <- "mbnms"
nms_ply <- get_nms_ply(nms)

nms_segs <- segs %>% 
  filter(
    st_intersects(
      segs, nms_ply, sparse = F))

m1 <- mapview(nms_ply) + 
  mapview(segs, color="red") + 
  mapview(nms_segs, color="green")
m1

NOTE the red segments near San Jose that did not get intersected by the polygon because of spatial coarseness and mismatch.

intersect buffered sanctuary with segments

Buffer sanctuary by 0.005 decimal degress (~ 0.555 km at equator).

nms_ply_buf <- st_buffer(nms_ply, dist = 0.005)

nms_buf_segs <- segs %>% 
  filter(
    st_intersects(
      segs, nms_ply_buf, sparse = F))

d_nms_buf <- d %>% 
  semi_join(
    nms_buf_segs, by = "beach_segment_code")

# write to csv
d_mbnms <- d_nms_buf %>% 
  mutate(
    year = year(time)) %>% 
  group_by(
    year,
    carcass_present, bird_or_mammal, 
    species_code, species_sci_name, species_common_name, species_itis, species, 
    age_class, sex, condition, cause_of_death, alive) %>% 
  summarize(
    n = n()) %>% 
  ungroup()
write_csv(d_mbnms, d_mbnms_csv)

d_mbnms <- read_csv(d_mbnms_csv)

m2 <- mapview(nms_ply_buf) +
  mapview(nms_buf_segs, color="green")

leafsync::sync

## function (..., ncol = 2, sync = "all", sync.cursor = TRUE, no.initial.sync = TRUE) 
## {
##     latticeView(..., ncol = ncol, sync = sync, sync.cursor = sync.cursor, 
##         no.initial.sync = no.initial.sync)
## }
## <bytecode: 0x7f99f603f128>
## <environment: namespace:leafsync>

sync(m1, m2)

Filter by beach_segment_code in MBNMS: 98,503 of 148,100 rows (66.5%).

plots

bird carcasses

static

g_birds <- d_mbnms %>% 
  filter(
    bird_or_mammal == "bird",
    carcass_present == T) %>% 
  group_by(year) %>% 
  summarize(
    n = sum(n)) %>% 
  ungroup() %>% 
  ggplot(aes(x=year, y=n)) + 
  #geom_bar(stat = "identity") #+ 
  geom_col() #+ 
  #title("Bird carcasses over time")
print(g_birds)

interactive, color by species on hover

#plot.new()
g_bird_spp <- d_mbnms %>% 
  filter(
    bird_or_mammal == "bird",
    carcass_present == T) %>% 
  group_by(year, species) %>% 
  summarize(
    n = sum(n)) %>% 
  ungroup() %>% 
  ggplot(aes(x=year, y=n, fill=species)) + 
  #geom_bar(stat = "identity") + 
  geom_col() +
  #title("Bird carcasses over time, by species") + 
  theme(legend.position = "none")

ggplotly(g_bird_spp)

mammal carcasses

static

g_mammal <- d_mbnms %>% 
  filter(
    bird_or_mammal == "mammal",
    carcass_present == T) %>% 
  group_by(year) %>% 
  summarize(
    n = sum(n)) %>% 
  ungroup() %>%  
  ggplot(aes(x=year, y=n)) + 
  #geom_bar(stat = "identity") #+ 
  geom_col() #+ 
  #title("Bird carcasses over time")
print(g_mammal)

interactive, color by species on hover

#plot.new()
g_mammal_spp <- d_mbnms %>% 
  filter(
    bird_or_mammal == "mammal",
    carcass_present == T) %>% 
 group_by(year, species) %>% 
  summarize(
    n = sum(n)) %>% 
  ungroup() %>% 
  ggplot(aes(x=year, y=n, fill=species)) + 
  #geom_bar(stat = "identity") + 
  geom_col() +
  #title("Bird carcasses over time, by species") + 
  theme(legend.position = "none")

ggplotly(g_mammal_spp)