Parse:
from OTU_table_taxa_all.txt: original tab-seperated text file in wide format (OTU rows x site columns)
to otu.csv: comma-seperated file with zeros and other data filtered out (not having site or taxanomic identification) in long format (OTU, site rows)
# setup columns for otu
otu_1 = read_tsv(otu_txt, n_max=1)
otu_cols = c(names(otu_1), 'taxa_v')
# read otu, in wide format with many 0s, and extra taxa vector column named
otu_w = read_tsv(otu_txt, col_names=otu_cols, skip=1)
# gather into long format, sparse 0s removed, column names and values expanded
otu = otu_w %>% # 5,707 rows
gather(sample, count, -DUP_ID, -taxa_v) %>% # 291,057 rows
filter(count > 0) %>% # 18,616 rows
filter(!is.na(taxa_v)) %>% # 2,097 rows
filter(
!sample %in% c('extr_blank_1','extr_blank_2','extr_blank_3','Negative')) %>% # 2,082 rows
rownames_to_column('row_long') %>%
extract(
sample,
c('site','moyr','replicate_ltr','replicate_num'),
'([A-Z]{2})([0-9]{3,4})([a-z]{1})([0-9]{0,3})',
remove=T, convert=T) %>%
mutate(
date = parse_date(sprintf('01%04d', moyr), '%d%m%y')) %>%
select(-moyr) %>%
extract(
taxa_v,
c('kingdom','phylum','class','order','family','genus','species'),
"'k__(.*)','p__(.*)','c__(.*)','o__(.*)','f__(.*)','g__(.*)','s__(.*)'",
remove=T)
write_csv(otu, otu_csv)
datatable(otu)
table(otu$site, useNA='ifany')
##
## LK MR WS
## 646 1013 388
Meanwhile using these coordinates from web search of names …
sites = tribble(
~site_code, ~site_name, ~lon, ~lat,
'MR' , 'Molasses Reef', -81.4317097, 24.5524431,
'LK' , 'Looe Key', -80.3812147, 25.0165408,
'WS' , 'Western Sambo', -81.713333, 24.477778)
write_csv(sites, sites_csv)
datatable(sites)
leaflet(sites) %>%
addProviderTiles('Esri.OceanBasemap') %>%
addMarkers(
~lon, ~lat,
popup = ~sprintf("%s: %s", site_code, site_name))
Eg columns: extr_blank_1, extr_blank_2, extr_blank_3, Negative
Per code above applying filter(!is.na(taxa_v))
goes from 18,616 to 2,097 rows.
table(otu$replicate_ltr, otu$replicate_num, useNA='ifany')
##
## 64 200 500 <NA>
## a 0 0 0 392
## b 0 0 0 327
## c 0 0 0 255
## t 369 419 285 0
Construct phylogenetic tree using Open Tree of Life with R package rotl.
Phylogenetics | CRAN Task View
install.packages("ctv")
library("ctv")
install.views("Phylogenetics")
getting tree distances:
rotl: an R package to interact with the Open Tree of Life data [PeerJ Preprints]
getting taxonomies:
plotting trees:
plot.phylo | RDocumentation
Visualizing Dendrograms in R | RPubs
phytools: Phylogenetic Tools for Comparative Biology
treeman: an R package for efficient and intuitive manipulation of phylogenetic trees (treeman github wiki)
interactive:
phylowidget: An R htmlwidgets package of phylotree.js
summarizing phylogenetic diversity:
vegan::treedive(): Functional Diversity And Community Distances From Species Trees.
Functional diversity is defined as the total branch length in a trait dendrogram connecting all species, but excluding the unnecessary root segments of the tree (Petchey and Gaston 2006). Tree distance is the increase in total branch length when combining two sites.
phylosignal: an R package to measure, test, and explore the phylogenetic signal
Phylogenetic signal is the tendency for closely related species to display similar trait values as a consequence of their phylogenetic proximity… Here, we present a new R package, phylosignal which provides a collection of tools to explore the phylogenetic signal for continuous biological traits. These tools are mainly based on the concept of autocorrelation and have been first developed in the field of spatial statistics. To illustrate the use of the package, we analyze the phylogenetic signal in pollution sensitivity for 17 species of diatoms.
otu = read_csv(otu_csv)
otu = otu %>%
mutate(
genus_species = ifelse(is.na(species), genus, species),
genus_species = str_replace(genus_species, ' sp\\..*',''),
genus_species = str_replace(genus_species, ' \\(Halichoclona\\)',''),
genus_species = str_replace(genus_species, ' environmental sample',''),
search_string = tolower(genus_species))
otu_gs = otu %>%
group_by(genus_species) %>%
summarise(
n = n())
if (!file.exists(otl_csv)){
otl = tnrs_match_names(otu_gs$genus_species)
# otl = read_csv(otl_csv)
otu %>%
left_join(otl, by = 'search_string') %>%
select(DUP_ID, search_string=genus_species, unique_name:number_matches) %>%
write_csv(otl_csv)
} else {
otl = read_csv(otl_csv)
}
otl_notfound = c(5264367, 632176, 621380, 67823, 955367, 588763, 566119, 3634672, 1083518, 2841628)
tr <- tol_induced_subtree(setdiff(unique(otl$ott_id), otl_notfound), label_format = 'name')
##
Progress [-----------------------------------] 0/4 ( 0) ?s
Progress [==================================] 4/4 (100) 0s
#Error: HTTP failure: 400 The following OTT ids were not found:
# [5264367, 632176, 621380, 67823, 955367, 588763, 566119, 3634672, 1083518, 2841628]
#Dropping singleton nodes with labels:
# Amphibalanus ott1042709, Creseis ott671266, Abylopsis ott4718809, Polysiphonia ott674045
plot(tr, 'radial', font = 1, cex = 0.5)
phylowidget(tr)