bm16
: [b]irds + [m]ammals, up to 20[16]
EB_MM_BC
, birds + mammals, but only up to 2016b019
: [b]irds [o]nly, up to 20[19]
cciea_B_B_MORT
# libraries
library(here)
library(glue)
library(stringr)
library(readr)
library(dplyr)
library(purrr)
library(DT)
library(rerddap)
library(sf)
library(mapview)
library(leafsync)
library(leaflet)
library(skimr)
library(lubridate)
library(plotly)
here <- here::here
# paths & variables
erddap_url <- "https://oceanview.pfeg.noaa.gov/erddap/"
d_id <- "EB_MM_BC"
dir_data <- here("data")
# manual download from Axiom space:
axiom_url <- "https://researchworkspace.com/file/5851070/BeachCOMBERS_formatted_2019.csv"
# TODO: compsare BeachCOMBERS_formatted_2019.csv w/ ERDDAP cciea_B_B_MORT
d_bo19_csv <- glue("{dir_data}/{d_id}_bo19.csv")
d_bm16_csv <- glue("{dir_data}/{d_id}_bm16.csv")
d_csv <- glue("{dir_data}/{d_id}.csv")
segs_geo <- glue("{dir_data}/{d_id}_segments.geojson")
d_spp_csv <- glue("{dir_data}/{d_id}_species.csv")
cols_bo19_csv <- glue("{dir_data}/cols_bo19.csv")
cols_bm16tobo19_csv <- glue("{dir_data}/cols_bm16tobo19.csv")
d_mbnms_csv <- glue("{dir_data}/BeachCOMBERS_MBNMS.csv")
Sys.setenv(RERDDAP_DEFAULT_URL = erddap_url)
if (!file.exists(d_bo19_csv)){
download.file(axiom_url, d_bo19_csv)
}
if (!file.exists(d_bm16_csv)){
d_info <- info(d_id)
d <- tabledap(d_info)
write_csv(d, d_bm16_csv)
}
d_bo19 <- read_csv(d_bo19_csv, guess_max = 100000)
d_bm16 <- read_csv(d_bm16_csv, guess_max = 10000)
if (!file.exists(cols_bm16tobo19_csv)){
tibble(
bo19_name = names(d_bo19),
bo19_type = map_chr(d_bo19, ~ class(.) %>% paste(collapse = ","))) %>%
arrange(bo19_name) %>%
write_csv(cols_bo19_csv)
tibble(
bm16_name = names(d_bm16),
bm16_type = map_chr(d_bm16, ~ class(.) %>% paste(collapse = ","))) %>%
arrange(bm16_name) %>%
write_csv(cols_bm16tobo19_csv)
stop("NEXT: manually match column name and type for bm16 ({basename(d_bm16tobo19_csv)}) to match bo19 ({basename(d_bo19_csv)}).")
}
cx <- read_csv(cols_bm16tobo19_csv)
cx_vars <- setNames(cx$bm16_name, cx$bo19_name)
# TODO: update bm16 rows
# table(d_bo19 %>% select(age_class_label, age_class))
# table(d_bo19 %>% select(condition_label, condition))
# table(d_bo19 %>% select(sex_label, sex))
# other bo19 flds missing in bo19:
# - organization_code
# - organization_label
# - deposition_rate_carcass_per_km
# - inst_code
# setdiff(unique(d_bm16$age_class), unique(d_bo19$age_class))
# unique(d_bo19$age_class_label)
d_bm164bo19 <- d_bm16 %>%
filter(bird_or_mammal != "bird") %>%
mutate(
unique_carcass_identifier = as.character(unique_carcass_identifier)) %>%
rename(!!cx_vars)
d <- bind_rows(
d_bo19,
d_bm164bo19) %>%
mutate(
species = glue(
"{species_code}: {species_common_name} ({species_sci_name}; ITIS: {species_itis})"))
Of the 148,100 rows in this dataset, let’s look at the first 1,000 records:
d %>%
slice(1:100) %>%
datatable()
skimr::skim(d)
Name | d |
Number of rows | 148100 |
Number of columns | 40 |
_______________________ | |
Column type frequency: | |
character | 20 |
logical | 3 |
numeric | 16 |
POSIXct | 1 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
unique_survey_identifier | 0 | 1.00 | 4 | 17 | 0 | 27650 | 0 |
beach_segment_code | 0 | 1.00 | 1 | 4 | 0 | 163 | 0 |
beach_segment_name | 0 | 1.00 | 5 | 43 | 0 | 153 | 0 |
organization_code | 7064 | 0.95 | 2 | 3 | 0 | 3 | 0 |
unique_carcass_identifier | 12691 | 0.91 | 1 | 8 | 0 | 130699 | 0 |
species_sci_name | 24844 | 0.83 | 6 | 36 | 0 | 218 | 0 |
species_common_name | 2917 | 0.98 | 5 | 37 | 0 | 234 | 0 |
species_code | 12691 | 0.91 | 4 | 4 | 0 | 223 | 0 |
sex | 2917 | 0.98 | 1 | 1 | 0 | 3 | 0 |
external_findings | 140646 | 0.05 | 1 | 652 | 0 | 4806 | 0 |
bird_or_mammal | 2917 | 0.98 | 4 | 6 | 0 | 2 | 0 |
photos | 2918 | 0.98 | 1 | 1 | 0 | 3 | 0 |
photo_code | 136936 | 0.08 | 55 | 58 | 0 | 9760 | 0 |
notes | 99602 | 0.33 | 1 | 253 | 0 | 3432 | 0 |
age_class_label | 2917 | 0.98 | 2 | 21 | 0 | 7 | 0 |
sex_label | 9981 | 0.93 | 4 | 12 | 0 | 3 | 0 |
condition_label | 9981 | 0.93 | 5 | 32 | 0 | 6 | 0 |
organization_label | 7064 | 0.95 | 11 | 30 | 0 | 3 | 0 |
geom | 7064 | 0.95 | 23 | 37 | 0 | 151 | 0 |
species | 0 | 1.00 | 21 | 85 | 0 | 250 | 0 |
Variable type: logical
skim_variable | n_missing | complete_rate | mean | count |
---|---|---|---|---|
species_genus | 148100 | 0 | NaN | : |
datum | 148100 | 0 | NaN | : |
deposition_rate_carcass_per_km | 148100 | 0 | NaN | : |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
latitude | 0 | 1.00 | 37.15 | 0.90 | 34.05 | 36.72 | 36.87 | 37.83 | 41.98 | ▁▇▅▁▁ |
longitude | 0 | 1.00 | -122.12 | 0.71 | -124.40 | -122.54 | -121.85 | -121.81 | -118.92 | ▁▆▇▁▁ |
beach_length_km | 0 | 1.00 | 3.80 | 1.49 | 0.09 | 2.91 | 4.30 | 5.15 | 8.02 | ▅▅▇▇▁ |
percent_of_beach_surveyed | 87491 | 0.41 | 97.05 | 9.70 | 0.00 | 100.00 | 100.00 | 100.00 | 100.00 | ▁▁▁▁▇ |
carcass_present | 2917 | 0.98 | 1.00 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▇▁▁ |
species_itis | 3032 | 0.98 | 194457.59 | 96101.90 | 17417.00 | 174536.00 | 175170.00 | 176974.00 | 824147.00 | ▇▁▁▁▁ |
age_class | 118453 | 0.20 | 0.86 | 0.34 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▁▁▇ |
standard_length_m | 99076 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.70 | ▇▁▁▁▁ |
condition | 2917 | 0.98 | 3.06 | 1.12 | 0.00 | 3.00 | 3.00 | 4.00 | 9.00 | ▁▇▇▁▁ |
cause_of_death | 61715 | 0.58 | 3.96 | 0.38 | 0.00 | 4.00 | 4.00 | 4.00 | 7.00 | ▁▁▇▁▁ |
alive | 2917 | 0.98 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ▁▁▇▁▁ |
inst_code | 9981 | 0.93 | 2.57 | 0.49 | 2.00 | 2.00 | 3.00 | 3.00 | 3.00 | ▆▁▁▁▇ |
north_latitude | 2917 | 0.98 | 37.12 | 0.80 | 34.04 | 36.74 | 36.90 | 37.80 | 38.97 | ▁▁▇▃▃ |
north_longitude | 2917 | 0.98 | -122.11 | 0.68 | -123.72 | -122.51 | -121.85 | -121.81 | -118.93 | ▂▇▂▁▁ |
south_latitude | 2917 | 0.98 | 37.10 | 0.81 | 34.04 | 36.70 | 36.85 | 37.79 | 38.95 | ▁▁▇▃▃ |
south_longitude | 2917 | 0.98 | -122.10 | 0.68 | -123.73 | -122.52 | -121.85 | -121.81 | -118.92 | ▃▇▃▁▁ |
Variable type: POSIXct
skim_variable | n_missing | complete_rate | min | max | median | n_unique |
---|---|---|---|---|---|---|
time | 0 | 1 | 1993-09-15 | 2019-05-27 | 2008-12-01 | 7637 |
percent_of_beach_surveyed
: all NaNcause_of_death
: > 0 1 2 3 4 5 6 7 > 818 6 302 21 64182 189 3 8if (!file.exists(d_spp_csv)){
flds_spp <- names(d) %>%
str_subset("species") %>%
c("bird_or_mammal")
d_spp <- d %>%
select(!!!flds_spp) %>%
group_by_all() %>%
summarise(n = n()) %>%
arrange(bird_or_mammal, species)
write_csv(d_spp, d_spp_csv)
}
d_spp <- read_csv(d_spp_csv)
table(d_spp$bird_or_mammal)
##
## bird mammal
## 224 25
d_spp %>%
filter(bird_or_mammal == "bird") %>%
datatable()
d_spp %>%
filter(bird_or_mammal == "mammal") %>%
datatable()
xy2ln <- function(x1, y1, x2, y2){
if (any(is.na(c(x1, y1, x2, y2)))) return(NA)
st_linestring(c(
st_point(c(x1, y1)),
st_point(c(x2, y2))))
}
if (!file.exists(segs_geo)){
segs <- d %>%
filter(
!is.na(north_longitude), !is.na(north_latitude),
!is.na(south_longitude), !is.na(south_latitude)) %>%
group_by(
beach_segment_code,
#longitude, latitude,
north_longitude, north_latitude,
south_longitude, south_latitude) %>%
summarize(
n_rows = n()) %>%
ungroup() %>%
mutate(
geom = pmap(list(
north_longitude, north_latitude,
south_longitude, south_latitude), xy2ln) %>%
st_sfc(crs = 4326)) %>%
st_set_geometry(.$geom)
write_sf(segs, segs_geo, delete_dsn = T)
}
segs <- read_sf(segs_geo)
# TODO: move used fxns into library and utility.R
source("~/github/cinms/scripts/rocky.R")
nms <- "mbnms"
nms_ply <- get_nms_ply(nms)
nms_segs <- segs %>%
filter(
st_intersects(
segs, nms_ply, sparse = F))
m1 <- mapview(nms_ply) +
mapview(segs, color="red") +
mapview(nms_segs, color="green")
m1
NOTE the red segments near San Jose that did not get intersected by the polygon because of spatial coarseness and mismatch.
Buffer sanctuary by 0.005
decimal degress (~ 0.555 km at equator).
nms_ply_buf <- st_buffer(nms_ply, dist = 0.005)
nms_buf_segs <- segs %>%
filter(
st_intersects(
segs, nms_ply_buf, sparse = F))
d_nms_buf <- d %>%
semi_join(
nms_buf_segs, by = "beach_segment_code")
# write to csv
d_mbnms <- d_nms_buf %>%
mutate(
year = year(time)) %>%
group_by(
year,
carcass_present, bird_or_mammal,
species_code, species_sci_name, species_common_name, species_itis, species,
age_class, sex, condition, cause_of_death, alive) %>%
summarize(
n = n()) %>%
ungroup()
write_csv(d_mbnms, d_mbnms_csv)
d_mbnms <- read_csv(d_mbnms_csv)
m2 <- mapview(nms_ply_buf) +
mapview(nms_buf_segs, color="green")
leafsync::sync
## function (..., ncol = 2, sync = "all", sync.cursor = TRUE, no.initial.sync = TRUE)
## {
## latticeView(..., ncol = ncol, sync = sync, sync.cursor = sync.cursor,
## no.initial.sync = no.initial.sync)
## }
## <bytecode: 0x7f99f603f128>
## <environment: namespace:leafsync>
sync(m1, m2)
Filter by beach_segment_code
in MBNMS: 98,503 of 148,100 rows (66.5%).
g_birds <- d_mbnms %>%
filter(
bird_or_mammal == "bird",
carcass_present == T) %>%
group_by(year) %>%
summarize(
n = sum(n)) %>%
ungroup() %>%
ggplot(aes(x=year, y=n)) +
#geom_bar(stat = "identity") #+
geom_col() #+
#title("Bird carcasses over time")
print(g_birds)
#plot.new()
g_bird_spp <- d_mbnms %>%
filter(
bird_or_mammal == "bird",
carcass_present == T) %>%
group_by(year, species) %>%
summarize(
n = sum(n)) %>%
ungroup() %>%
ggplot(aes(x=year, y=n, fill=species)) +
#geom_bar(stat = "identity") +
geom_col() +
#title("Bird carcasses over time, by species") +
theme(legend.position = "none")
ggplotly(g_bird_spp)
g_mammal <- d_mbnms %>%
filter(
bird_or_mammal == "mammal",
carcass_present == T) %>%
group_by(year) %>%
summarize(
n = sum(n)) %>%
ungroup() %>%
ggplot(aes(x=year, y=n)) +
#geom_bar(stat = "identity") #+
geom_col() #+
#title("Bird carcasses over time")
print(g_mammal)
#plot.new()
g_mammal_spp <- d_mbnms %>%
filter(
bird_or_mammal == "mammal",
carcass_present == T) %>%
group_by(year, species) %>%
summarize(
n = sum(n)) %>%
ungroup() %>%
ggplot(aes(x=year, y=n, fill=species)) +
#geom_bar(stat = "identity") +
geom_col() +
#title("Bird carcasses over time, by species") +
theme(legend.position = "none")
ggplotly(g_mammal_spp)