First we load the R libraries we will need and manually set the working directory.

library(knitr)
#set knitr inline code to round to 2 decimals
knitr::opts_chunk$set(echo = TRUE)
inline_hook <- function (x) {
  if (is.numeric(x)) {
    # ifelse does a vectorized comparison
    # If integer, print without decimal; otherwise print two places
    res <- ifelse(x == round(x),
      sprintf("%d", x),
      sprintf("%.2f", x)
    )
    paste(res, collapse = ", ")
  }
}
knit_hooks$set(inline = inline_hook)

library(vegan)
library(ggplot2)
library(gridExtra)
library(RColorBrewer)
library(plyr)
library(dplyr)
library(reshape2)
library(lattice)
library(scales)
library(DT)

setwd("/Users/duhaimem/Box Sync/manuscripts_in_prep/highschool_WWTP/WWTP_git/")

Figures 1-3. Introducing the study system: three WWTPs and multiple sampling locations therein.

Figure 1. Detroit WWTP

Figure 1. Detroit WWTP

Figure 2. Northfield WWTP

Figure 2. Northfield WWTP

Figure 3. Pilot-scale Anaerobic Membrane Bioreactor

Figure 3. Pilot-scale Anaerobic Membrane Bioreactor

Next, we import the raw data file documenting the numbers and shape classes of microplatsics (MP) found at each unit process sampled.

counts<-read.table("./R_infiles/count_data.tsv", sep="\t", header=TRUE)
supp<-read.table("./R_infiles/Supp_R_data.tsv", sep="\t", header=TRUE)

Now let’s make a few additional columns we will use for our data summary, such as calculating MPs per liter.

counts$cnt_per_liter <- counts$count/counts$vol
#head(counts)
counts_Spr<-subset(counts, Date=="25.Mar.16" | Date=="21.Mar.16")

groupColumns.cnt_by_unit = c("plant","sample","replicate")
dataColumns.cnt_by_unit = c("cnt_per_liter")
cnt_by_unit_Spr<-ddply(counts_Spr, groupColumns.cnt_by_unit, function(x) colSums(x[dataColumns.cnt_by_unit]))
names(cnt_by_unit_Spr)[names(cnt_by_unit_Spr) == 'cnt_per_liter'] <- 'sum_classes_cnt_per_L'
#View(cnt_by_unit_Spr)

cnt_by_unit_Spr_by_rep<-ddply(cnt_by_unit_Spr, c("plant","sample"), summarize, mean = mean(sum_classes_cnt_per_L, na.rm=TRUE), sd = sd(sum_classes_cnt_per_L, na.rm=TRUE), st_er_mean = sd(sum_classes_cnt_per_L)/sqrt(length(sum_classes_cnt_per_L)))
#View(cnt_by_unit_Spr_by_rep)

Figure 4. Percent of total incoming MP removed along WWTP unit processes

With error bars based on standard deviation of replicates.

# manually set order of variables in dataframe to achieve the desired plots with the same order
cnt_by_unit_Spr_by_rep$sample <- factor(cnt_by_unit_Spr_by_rep$sample, levels = c("Raw Wastewater","Prelim effluent","Primary effluent","Sec effluent trickling filter","Sec effluent activated sludge","Final effluent"))
cnt_by_unit_Spr_by_rep$plant <- factor(cnt_by_unit_Spr_by_rep$plant, levels = c("Detroit WWTP","Northfield WWTP","Anaerobic Membrane Bioreactor (AnMBR)"))

# function to manually set tick interval on barplot
number_ticks <- function(n) {function(cnt_by_unit_Spr_by_rep) pretty(cnt_by_unit_Spr_by_rep, n)}

Fig4_sd<-ggplot(cnt_by_unit_Spr_by_rep, aes(x = factor(sample), y=mean)) + 
  theme_bw()+
  geom_bar(stat = "identity") +
  xlab("Unit Process Sampled")+
  ylab("MP count (avg per L) and STD DEV")+
  scale_y_continuous(breaks=number_ticks(10)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.2, color="darkgray")+
  facet_wrap(~plant)
Fig4_sd

ggsave("./R_outfiles/Fig4_sd.pdf",Fig4_sd,width=10,height=6)

And the error bars based on standard error of replicates, rather than standard deviation.

Now we will calculate percent removal at each stage based on total MPs counted in raw wastewater.

cnt_by_unit_Spr_by_rep_RWW<-subset(cnt_by_unit_Spr_by_rep, sample=="Raw Wastewater")
cnt_by_unit_Spr_by_rep_RWW<-cnt_by_unit_Spr_by_rep_RWW[,-(4:5),drop=FALSE]
cnt_by_unit_Spr_by_rep_RWW<-cnt_by_unit_Spr_by_rep_RWW[,-(2),drop=FALSE]
names(cnt_by_unit_Spr_by_rep_RWW)[names(cnt_by_unit_Spr_by_rep_RWW)=="mean"] <- "mean_in"
#View(cnt_by_unit_Spr_by_rep_RWW)

cnt_by_unit_Spr_by_rep_mean_in <- merge(cnt_by_unit_Spr_by_rep, cnt_by_unit_Spr_by_rep_RWW, by = "plant", all.x = TRUE)
#View(cnt_by_unit_Spr_by_rep_mean_in)

cnt_by_unit_Spr_by_rep_mean_in$pct_rem <- ((cnt_by_unit_Spr_by_rep_mean_in$mean_in - cnt_by_unit_Spr_by_rep_mean_in$mean)/cnt_by_unit_Spr_by_rep_mean_in$mean_in*100)

Data for Tables 1 and 2. Count summaries per unit process per plant.

Replicate count means, standard dev, standard error, mean incoming in raw wastewater for each plant, and percent of total incoming that is removed at each unit process step.

#View(cnt_by_unit_Spr_by_rep_mean_in)
datatable(cnt_by_unit_Spr_by_rep_mean_in, options = list(iDisplayLength = 100)) %>% 
  formatRound(c('mean','sd','st_er_mean','mean_in','pct_rem'), 2)

Data work for Figure 5. Seasonal comparison at Northfield WWTP

counts_Northfield<-subset(counts, plant=="Northfield WWTP")
#head(counts_Northfield)

groupColumns.cnt_by_unit = c("season","sample","replicate")
dataColumns.cnt_by_unit = c("cnt_per_liter")
cnt_by_unit_Northfield<-ddply(counts_Northfield, groupColumns.cnt_by_unit, function(x) colSums(x[dataColumns.cnt_by_unit]))
names(cnt_by_unit_Northfield)[names(cnt_by_unit_Northfield) == 'cnt_per_liter'] <- 'sum_reps_cnt_per_L'
#View(cnt_by_unit_Northfield)

cnt_by_unit_Northfield_by_rep<-ddply(cnt_by_unit_Northfield, c("season","sample"), summarize, mean = mean(sum_reps_cnt_per_L, na.rm=TRUE), sd = sd(sum_reps_cnt_per_L, na.rm=TRUE), st_er_mean = sd(sum_reps_cnt_per_L)/sqrt(length(sum_reps_cnt_per_L)))
#View(cnt_by_unit_Northfield_by_rep)

Figure 5. Seasonal comparison at Northfield WWTP

# manually set order of variables in dataframe to achieve the desired plots with the same order
cnt_by_unit_Northfield_by_rep$sample <- factor(cnt_by_unit_Northfield_by_rep$sample, levels = c("Raw Wastewater","Prelim effluent","Primary effluent","Sec effluent trickling filter","Sec effluent activated sludge","Final effluent"))
cnt_by_unit_Northfield_by_rep$season <- factor(cnt_by_unit_Northfield_by_rep$season, levels = c("Fall","Winter","Spring"))

# function to manually set tick interval on barplot
number_ticks <- function(n) {function(cnt_by_unit_Northfield_by_rep) pretty(cnt_by_unit_Northfield_by_rep, n)}

# define a custom color palette for the seasons
seasonPalette <- c("darkgoldenrod3", "cyan3", "darkolivegreen3")

Fig5_facet<-ggplot(cnt_by_unit_Northfield_by_rep, aes(x = factor(season), y=mean,fill=factor(season)),color=factor(season)) + 
  theme_bw()+
  geom_bar(stat = "identity") +
  scale_fill_manual(values=seasonPalette) +
  xlab("Unit Process Sampled")+
  ylab("MP count (avg per L) and STD DEV")+
  scale_y_continuous(breaks=number_ticks(10)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.2, color="gray40")+
  facet_grid(~sample)
Fig5_facet

ggsave("./R_outfiles/Fig5_facet.pdf",Fig5_facet,width=14,height=6)

Working up the data to examine MP shape class profiles along the treatment stream for Figure 6.

groupColumns.units_by_class = c("plant","sample","class")
dataColumns.units_by_class = c("cnt_per_liter")
cnt_unit_by_class<-ddply(counts, groupColumns.units_by_class, function(x) colSums(x[dataColumns.units_by_class], na.rm=TRUE))
names(cnt_unit_by_class)[names(cnt_unit_by_class) == 'cnt_per_liter'] <- 'total_cnts_per_L'
#cnt_by_unit_Spr_by_rep_RWW<-subset(cnt_by_unit_Spr_by_rep, sample=="Raw Wastewater")
cnt_unit_by_class_Raw_Final<-subset(cnt_unit_by_class, sample == "Raw Wastewater"|sample == "Final effluent")
#View(cnt_unit_by_class_Raw_Final)

cnt_unit_by_class_Raw_Final<-group_by(cnt_unit_by_class_Raw_Final, plant, sample) %>% mutate(pct_per_sample = round(total_cnts_per_L/sum(total_cnts_per_L)*100,digits=1))

Data for Figure 6. MP shape class profiles along the treatment stream.

datatable(cnt_unit_by_class_Raw_Final, options = list(iDisplayLength = 10)) %>% 
  formatRound('total_cnts_per_L', 2)

Figure 6. Comparison of shape class profiles for MPs in incoming wastewater and outgoing effluent.

# manually set order of variables in dataframe to achieve the desired plots with the same order
cnt_unit_by_class_Raw_Final$sample <- factor(cnt_unit_by_class_Raw_Final$sample, levels = c("Raw Wastewater","Final effluent"))
cnt_unit_by_class_Raw_Final$plant <- factor(cnt_unit_by_class_Raw_Final$plant, levels = c("Detroit WWTP","Northfield WWTP","Anaerobic Membrane Bioreactor (AnMBR)"))

# define a custom color palette for the seasons
classPalette <- c("orange3", "darkolivegreen3","deepskyblue3", "gold1","cadetblue3","deeppink3", "coral1")

Fig6<-ggplot(cnt_unit_by_class_Raw_Final, aes(x = sample, y=total_cnts_per_L,fill = class)) + 
  theme_bw()+
  scale_fill_manual(values=classPalette)+
  geom_bar(stat = "identity", position = "fill") +
  xlab("class")+
  ylab("% of total MP")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  scale_y_continuous(labels = percent_format())+
  facet_grid(~plant)
Fig6

ggsave("./R_outfiles/Fig6.pdf",Fig6,width=7,height=6)
supp$cnt_per_liter <- supp$count/supp$vol
#head(supp)

# manually set order of variables in dataframe to achieve the desired plots with the same order
supp$sample <- factor(supp$sample, levels = c("Raw Wastewater","Prelim effluent","Primary effluent","Sec effluent trickling filter","Sec effluent activated sludge","Final effluent"))

SuppFig1<-ggplot(supp, aes(x = factor(size), y=cnt_per_liter,fill=factor(size)),color=factor(size)) + 
  theme_bw()+
  geom_bar(stat = "identity") +
 # scale_fill_manual(values=seasonPalette) +
  xlab("Sieve size (mm)")+
  ylab("MP count (per L)")+
  #scale_y_continuous(breaks=number_ticks(10)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  #geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.2, color="gray40")+
  facet_grid(~sample)
SuppFig1

ggsave("./R_outfiles/SuppFig1.pdf",SuppFig1,width=14,height=6)

datatable(supp, options = list(iDisplayLength = 10)) %>% 
  formatRound('cnt_per_liter', 2)

Using this data, we were able to calculate the mean of the fiber lenths weigthed by the frequency f fibers found in each size class.

eff_fiber_sizes<-subset(supp, sample=="Final effluent" & class=="Fiber")
weighted_mean_fiber_length<-sum((eff_fiber_sizes$cnt_per_liter/sum(eff_fiber_sizes$cnt_per_liter))*eff_fiber_sizes$size)

The weigthed arithmetic mean of fiber lengths is 0.58 mm. This value was then used to calculate the mass of plastic entering Detroit River each day.

linear_density<-0.002 # mg/mm (see manuscript refs 31-32)
Detroit_fibers_eff<-subset(cnt_unit_by_class_Raw_Final, plant=="Detroit WWTP" & sample=="Final effluent" & class=="Fiber")
Detroit_fibers_in<-subset(cnt_unit_by_class_Raw_Final, plant=="Detroit WWTP" & sample=="Raw Wastewater" & class=="Fiber")
Detroit_gpd<-660000000 #gallons per day, Detroit flow rate per EPA Clean Watersheds Needs Survey 2012 Reports (see references)
lpg<-3.78541 # conversion for liters per gallon
Detroit_lpd<-Detroit_gpd*lpg
mg2kg<-1/1000000 # mg to kg conversion
pos_ID<-0.8
Detroit_fiber_per_day<-Detroit_gpd*lpg*Detroit_fibers_eff$total_cnts_per_L
kg_fibers_Detroit<-Detroit_gpd*lpg*Detroit_fibers_eff$total_cnts_per_L*pos_ID*weighted_mean_fiber_length*linear_density*mg2kg
mg_fibers_Detroit_per_L<-Detroit_fibers_eff$total_cnts_per_L*pos_ID*weighted_mean_fiber_length*linear_density

mg_fibers_Detroit_per_L #mg fibers/L in the Detroit effluent
## [1] 0.003311264
mg_fibers_Detroit_runoff_per_L<-Detroit_fibers_in$total_cnts_per_L*pos_ID*weighted_mean_fiber_length*linear_density

North_fibers_eff<-subset(cnt_unit_by_class_Raw_Final, plant=="Northfield WWTP" & sample=="Final effluent" & class=="Fiber")
North_gpd<-450000 #gallons per day, Northfield flow rate per EPA Clean Watersheds Needs Survey 2012 Reports (see references)
North_lpd<-North_gpd*lpg
North_fiber_per_day<-North_gpd*lpg*(North_fibers_eff$total_cnts_per_L)
kg_fiber_North<-North_gpd*lpg*(North_fibers_eff$total_cnts_per_L)*pos_ID*weighted_mean_fiber_length*linear_density*mg2kg

AnMBR_fibers_fila_per_day<-North_gpd*lpg*(0.47)
kg_fibers_fila_AnMBR<-North_gpd*lpg*(0.47)*pos_ID*weighted_mean_fiber_length*linear_density*mg2kg

There are 8946322665.10 (Detroit) and 8939728.89 (Northfield) fibers released per day. Considering a plastic fiber linear density of 0.002 mg/mm (see manuscript references 31-32), we calculate that 8.27 kg of fibers are released per day to the Detroit River in the Detroit WWTP effluent.

References for textile fiber linear density (dtex):
31. Barnet Europe Polyester Fibers, http://www.barnet-europe.com/en/fibers/staple-fiber/polyester.html, (accessed 25 September 2016).
32. N. A. Kalebek and O. Babaarslan, Fiber Selection for the Production of Nonwovens, Intech, 2016. pp. 1–32.