1 Import libraries

library(dplyr)
library(ggpubr)
library(lme4)
library(lmerTest)
library(tidyr)
library(dygraphs)
library(ggplot2)
library(ggpubr)
library(stringr)
library(data.table)
library(yarrr)
library(lattice)

2 Load data

project_path = 'put in the path of your folder/'
path = project_path
fig_path = paste0(project_path, 'plots/')
data <- read.csv(paste0(project_path, 'data/utterance_duration.csv'), stringsAsFactors = T)

data_summary <- data %>% group_by(method) %>% summarise(mean_duration=mean(duration)*1000)

data_summary$diff <- ""
data_summary$diff[1] <- data_summary$mean_duration[1]-data_summary$mean_duration[2]
data_summary$diff[2] <- data_summary$mean_duration[2]-data_summary$mean_duration[2]
data_summary$diff[3] <- data_summary$mean_duration[3]-data_summary$mean_duration[2]
data_summary$diff[4] <- data_summary$mean_duration[4]-data_summary$mean_duration[2]
data_summary$diff <- as.numeric(data_summary$diff)

data$method <-  as.factor(data$method)
data$utterance <-  as.factor(data$utterance)

data.sum <- data %>% group_by(method) %>% summarize(dur = mean(duration), sd = sd(duration))
levels(data$method) <- c('H6', 'AVR', 'Zoom-default', 'Zoom-raw')

3 data analysis: lmer model

m <- lmer(duration ~ method +(1|repetition) + (1|speaker) + (1|utterance), data, REML = F )
summary(m)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: duration ~ method + (1 | repetition) + (1 | speaker) + (1 | utterance)
##    Data: data
## 
##      AIC      BIC   logLik deviance df.resid 
##   -504.2   -470.8    260.1   -520.2      472 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.3940 -0.7635 -0.1444  0.7703  2.6614 
## 
## Random effects:
##  Groups     Name        Variance  Std.Dev.
##  speaker    (Intercept) 0.0521184 0.22829 
##  utterance  (Intercept) 0.1226250 0.35018 
##  repetition (Intercept) 0.0001106 0.01052 
##  Residual               0.0170921 0.13074 
## Number of obs: 480, groups:  speaker, 8; utterance, 5; repetition, 3
## 
## Fixed effects:
##                      Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)          2.018454   0.176689   7.549775  11.424 4.94e-06 ***
## methodAVR            0.002102   0.016878 465.994985   0.125    0.901    
## methodZoom-default  -0.001163   0.016878 465.994985  -0.069    0.945    
## methodZoom-raw      -0.009310   0.016878 465.994985  -0.552    0.581    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) mthAVR mthdZm-d
## methodAVR   -0.048                
## mthdZm-dflt -0.048  0.500         
## methodZm-rw -0.048  0.500  0.500

4 Plots

4.1 Mean difference

4.1.1 Figure 3

## plots for differences
df <- data_summary[c(1,3:4),]
names(df)[1] <- "method_difference"
df$method_difference <- c("AVR-H6", "ZoomDefault-H6", "ZoomRaw-H6")
ggdotchart(df, "method_difference", "diff", color = "method_difference", 
          ylim=c(-12, 12),
          xlab = "",
          ylab = "temporal difference (ms)",
          legend = "right",
          palette = c("#E0586F", "#61D04F", "#2398E6"),
          sorting = "descending",
          add = "segments",                                           # Add segments from y = 0 to dots
          add.params = list(color = "lightgray", size = 2),           # Change segment color and size
          # add = 'jitter', 
          # group = "method_difference",                                         # Order by groups
          dot.size = 6,                                               # Large dot size
          label = round(df$diff,1),      # Add mpg values as dot labels
          font.label = list(color = "black", size = 12,
                            hjust = -0.4, vjust = 0.5),               # Adjust label parameters
          ggtheme = theme_pubr()) + 
  geom_hline(aes(yintercept = 0)) + 
  rotate_x_text(angle = 0, hjust = 0.3)

# ggsave(paste0(fig_path, "duration_diff.png"), height = 3, width = 7, dpi = 300)
# ggsave(paste0(fig_path, "duration_diff.pdf"), height = 3, width = 7, dpi = 300)

4.2 Time lag difference throughout the whole file

4.2.1 read new data

data <- read.csv(paste0(project_path, 'data/time_lag.csv'), stringsAsFactors = T)
names(data)[7:9] <- c("AVR-H6", "ZoomDefault-H6", "ZoomRaw-H6")


df <- melt(setDT(data[-c(4:6)]), id=c("file", "time_point", "h6.time"))
names(df)[4:5] <- c("method_difference", "time_lag")

df$time_point <- factor(df$time_point, levels = c("early", "mid", "end"))       
df$h6.time <- df$h6.time*1000
       
df_summary <- df %>% group_by(method_difference, time_point) %>% summarise(mean_timelag = mean(time_lag), sd_timelag = sd(time_lag))

4.2.2 Figure 4

png(paste0(fig_path, "time_lag.png"), height = 3, width = 8, units="in", res=300)
  par(mar=c(4,7,1,1))
  
  pirateplot(formula = time_lag ~ time_point + method_difference,    # DV = height, IV1 = sex, IV2 = headband
             data = df,    
             # color = method_difference,
             theme = 2,
             # main = "Pirate Heights",
             xlab = "time point",
             ylab = "temporal difference (ms)",
             pal = c("#E0586F","#E0586F","#E0586F", "#61D04F","#61D04F","#61D04F", "#2398E6","#2398E6","#2398E6"),
             inf.f.o = 0, # Turn off inf fill
             inf.b.o = 0, # Turn off inf border
             point.o = .2,   # Turn up points
             bar.f.o = .5, # Turn up bars
             bar.b.o = 1,
             bean.f.o = .4, # Light bean filling
             bean.b.o = .2, # Light bean border
             avg.line.o = 1, # Turn off average line
             ylim = c(-40,20),
             yaxt = "n",
             gl = c(10, -10, -20, -30, -40),
             gl.lty = 2,
             point.col = "black")
  abline(h=0) 
  axis(2, at = seq(from = -40, to = 20, by = 10))
  
  text(x = 1, y=df_summary$mean_timelag[1]+ 10, labels=paste(format(round(df_summary$mean_timelag[1], 2), nsmall = 2)), cex=1) 
  text(x = 2, y=df_summary$mean_timelag[2]+ 10, labels=paste(format(round(df_summary$mean_timelag[2], 2), nsmall = 2)), cex=1) 
  text(x = 3, y=df_summary$mean_timelag[3]+ 10, labels=paste(format(round(df_summary$mean_timelag[3], 2), nsmall = 2)), cex=1) 
  
  text(x = 5, y=df_summary$mean_timelag[4]- 7, labels=paste(format(round(df_summary$mean_timelag[4], 2), nsmall = 2)), cex=1) 
  text(x = 6, y=df_summary$mean_timelag[5]- 10, labels=paste(format(round(df_summary$mean_timelag[5], 2), nsmall = 2)), cex=1) 
  text(x = 7, y=df_summary$mean_timelag[6]- 12, labels=paste(format(round(df_summary$mean_timelag[6], 2), nsmall = 2)), cex=1) 
  
  text(x = 9, y=df_summary$mean_timelag[7]- 7, labels=paste(format(round(df_summary$mean_timelag[7], 2), nsmall = 2)), cex=1) 
  text(x = 10, y=df_summary$mean_timelag[8]- 8, labels=paste(format(round(df_summary$mean_timelag[8], 2), nsmall = 2)), cex=1) 
  text(x = 11, y=df_summary$mean_timelag[9]- 10, labels=paste(format(round(df_summary$mean_timelag[9], 2), nsmall = 2)), cex=1) 
  dev.off()
## png 
##   2
pdf(paste0(fig_path, "time_lag.pdf"), height = 3, width = 8)
  par(mar=c(4,7,1,1))
  
  pirateplot(formula = time_lag ~ time_point + method_difference,    # DV = height, IV1 = sex, IV2 = headband
             data = df,    
             # color = method_difference,
             theme = 2,
             # main = "Pirate Heights",
             xlab = "time point",
             ylab = "temporal difference (ms)",
             pal = c("#E0586F","#E0586F","#E0586F", "#61D04F","#61D04F","#61D04F", "#2398E6","#2398E6","#2398E6"),
             inf.f.o = 0, # Turn off inf fill
             inf.b.o = 0, # Turn off inf border
             point.o = .2,   # Turn up points
             bar.f.o = .5, # Turn up bars
             bar.b.o = 1,
             bean.f.o = .4, # Light bean filling
             bean.b.o = .2, # Light bean border
             avg.line.o = 1, # Turn off average line
             ylim = c(-40,20),
             yaxt = "n",
             gl = c(10, -10, -20, -30, -40),
             gl.lty = 2,
             point.col = "black")
  abline(h=0) 
  axis(2, at = seq(from = -40, to = 20, by = 10))
  
  text(x = 1, y=df_summary$mean_timelag[1]+ 10, labels=paste(format(round(df_summary$mean_timelag[1], 2), nsmall = 2)), cex=1) 
  text(x = 2, y=df_summary$mean_timelag[2]+ 10, labels=paste(format(round(df_summary$mean_timelag[2], 2), nsmall = 2)), cex=1) 
  text(x = 3, y=df_summary$mean_timelag[3]+ 10, labels=paste(format(round(df_summary$mean_timelag[3], 2), nsmall = 2)), cex=1) 
  
  text(x = 5, y=df_summary$mean_timelag[4]- 7, labels=paste(format(round(df_summary$mean_timelag[4], 2), nsmall = 2)), cex=1) 
  text(x = 6, y=df_summary$mean_timelag[5]- 10, labels=paste(format(round(df_summary$mean_timelag[5], 2), nsmall = 2)), cex=1) 
  text(x = 7, y=df_summary$mean_timelag[6]- 12, labels=paste(format(round(df_summary$mean_timelag[6], 2), nsmall = 2)), cex=1) 
  
  text(x = 9, y=df_summary$mean_timelag[7]- 7, labels=paste(format(round(df_summary$mean_timelag[7], 2), nsmall = 2)), cex=1) 
  text(x = 10, y=df_summary$mean_timelag[8]- 8, labels=paste(format(round(df_summary$mean_timelag[8], 2), nsmall = 2)), cex=1) 
  text(x = 11, y=df_summary$mean_timelag[9]- 10, labels=paste(format(round(df_summary$mean_timelag[9], 2), nsmall = 2)), cex=1) 
  dev.off()
## png 
##   2

4.3 Linear regression between time lag and Zoom H6 time

# is it a linear lag
avr.m <- lm(h6.time ~ data$"AVR-H6", data = data)
summary(avr.m)
## 
## Call:
## lm(formula = h6.time ~ data$"AVR-H6", data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -77.778 -46.525   4.367  37.479  79.253 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     92.317      7.784  11.861 1.37e-15 ***
## data$"AVR-H6"   19.371      4.317   4.487 4.81e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 52.46 on 46 degrees of freedom
## Multiple R-squared:  0.3044, Adjusted R-squared:  0.2893 
## F-statistic: 20.13 on 1 and 46 DF,  p-value: 4.806e-05
zd.m <- lm(h6.time ~ data$"ZoomDefault-H6", data = data)
summary(zd.m)
## 
## Call:
## lm(formula = h6.time ~ data$"ZoomDefault-H6", data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.846  -9.157  -4.538   7.796  31.142 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            10.2729     2.9184    3.52 0.000985 ***
## data$"ZoomDefault-H6"  -5.7214     0.1538  -37.21  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.28 on 46 degrees of freedom
## Multiple R-squared:  0.9678, Adjusted R-squared:  0.9671 
## F-statistic:  1385 on 1 and 46 DF,  p-value: < 2.2e-16
zr.m <- lm(h6.time ~ data$"ZoomRaw-H6", data = data)
summary(zr.m)
## 
## Call:
## lm(formula = h6.time ~ data$"ZoomRaw-H6", data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.896  -7.536   1.634   5.133  21.293 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         2.2081     2.5306   0.873    0.387    
## data$"ZoomRaw-H6"  -6.5603     0.1437 -45.660   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.243 on 46 degrees of freedom
## Multiple R-squared:  0.9784, Adjusted R-squared:  0.9779 
## F-statistic:  2085 on 1 and 46 DF,  p-value: < 2.2e-16

4.3.1 Figure 5

png(paste0(fig_path, "time_lag_xyplot.png"),
    height = 5, width = 7, units="in", res=300)
colors <- c("#E0586F", "#61D04F", "#2398E6")

xyplot(time_lag ~ h6.time, groups=method_difference, data = df, 
       type=c('p','r'), 
       col.line = colors,
       lty = c(1, 2, 3),
       pch = c(1, 2, 3),
       lwd = 3,
       cex = 1,
       xlab = "Zoom H6 time (ms)",
       ylab = "temporal difference (ms)",
       col = colors,
       level.order = c(1, 2, 3),
       key = list(space = "top",
                  text = list(levels(df$method_difference)),
                  points = list(pch = c(1, 2, 3), col = colors),
                  lines = list(lty = c(1, 2, 3), col = colors),
                  padding = 1),
       key.x = 0.5)
       
       
dev.off()
## png 
##   2
pdf(paste0(fig_path, "time_lag_xyplot.pdf"),
    height = 5, width = 7)
colors <- c("#E0586F", "#61D04F", "#2398E6")

xyplot(time_lag ~ h6.time, groups=method_difference, data = df, 
       type=c('p','r'), 
       col.line = colors,
       lty = c(1, 2, 3),
       pch = c(1, 2, 3),
       lwd = 3,
       cex = 1,
       xlab = "Zoom H6 time (ms)",
       ylab = "temporal difference (ms)",
       col = colors,
       level.order = c(1, 2, 3),
       key = list(space = "top",
                  text = list(levels(df$method_difference)),
                  points = list(pch = c(1, 2, 3), col = colors),
                  lines = list(lty = c(1, 2, 3), col = colors),
                  padding = 1),
       key.x = 0.5)
       
       
dev.off()
## png 
##   2

5 Save models

# save(avr.m, zd.m, zr.m, file = paste0(project_path, 'temporal_models.RData'))