1 Set up

library(qgam)
library(dplyr)
library(tidyr)
library(stringr)

project_path = 'put in the path of your folder/'
path = project_path
fig_path = paste0(project_path, 'plots/')
dat = read.csv(paste0(project_path, 'data/utt_data.csv'))

source(paste0(path,"function.R"))

2 f0

2.1 check data distribution

datp = dat[!is.na(dat$f0),]


plop(6, 6)
plot(density(datp$f0))
abline(v=tapply(datp$f0, datp$gender, mean))

As the f0’s distribution is difficult for GAMMs to model, we use quantile GAMMS (qgam), which does not have any requirements for the distribution of the response variable. As outliers might be the “wrong behaviors” of the devices, we also keep all extreme values.

datp$gender = as.factor(datp$gender)
datp$speaker = as.factor(datp$speaker)
datp$utt_id = as.factor(datp$utt_id)
datp$device = relevel(as.factor(datp$device), "H6")
datp$deviceOrd = as.ordered(datp$device)
contrasts(datp$deviceOrd) = "contr.treatment"

2.2 final qgam model

# this is the final qgam model for f0

# datp.qgamDiff = qgam(f0 ~ gender + device +
#                  s(measurement.no, k=20) +
#                  s(measurement.no, by=deviceOrd, k=20) +
#                  s(measurement.no, speaker, bs="fs", m=1, k=20) +
#                  s(measurement.no, utt_id, bs="fs", m=1, k=20),
#          data = datp, qu=0.5)

2.3 load qgam model output

Since qgam models takes a long while to run, we saved the model output. In later iterations, we load the model output instead of re-running the models.

load(paste0(path,"qgams/datp.qgamDiff.rda"))

2.4 qgam results

2.4.1 Table 4

summary(datp.qgamDiff)
## 
## Family: elf 
## Link function: identity 
## 
## Formula:
## f0 ~ gender + device + s(measurement.no, k = 20) + s(measurement.no, 
##     by = deviceOrd, k = 20) + s(measurement.no, speaker, bs = "fs", 
##     m = 1, k = 20) + s(measurement.no, utt_id, bs = "fs", m = 1, 
##     k = 20)
## 
## Parametric coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         266.16522   25.96188  10.252   <2e-16 ***
## genderM            -139.09013    9.72814 -14.298   <2e-16 ***
## deviceAVR             0.06214    0.26893   0.231    0.817    
## deviceZoom-default   -0.05097    0.27186  -0.187    0.851    
## deviceZoom-raw       -0.00886    0.27064  -0.033    0.974    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                             edf  Ref.df    Chi.sq p-value    
## s(measurement.no)                        12.802  13.479    24.415  0.0260 *  
## s(measurement.no):deviceOrdAVR            1.077   1.148     0.058  0.8973    
## s(measurement.no):deviceOrdZoom-default   4.747   5.929    12.870  0.0431 *  
## s(measurement.no):deviceOrdZoom-raw       4.880   6.093    10.951  0.0930 .  
## s(measurement.no,speaker)               137.133 158.000 26734.829  <2e-16 ***
## s(measurement.no,utt_id)                 87.625  99.000 76212.693  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =   0.78   Deviance explained = 73.7%
## -REML = 3.8604e+05  Scale est. = 1         n = 79407

2.5 plot average curves

2.5.1 Figure 6

plop(8,5)

# png
png(paste0(fig_path, "utt_f0_ave.png"),width=7,height=5, units="in", res=300)
plot_ave_curves(df=datp, variable="device",
                categories=c("H6", "AVR", "Zoom-default", "Zoom-raw"),
                response="f0", yrange=c(100, 300),
                y_axis_label="F0 (Hz)", 
                x_axis_label="Normalized time")
dev.off()
## png 
##   2
pdf(paste0(fig_path, "utt_f0_ave.pdf"),width=7,height=5)
plot_ave_curves(df=datp, variable="device",
                categories=c("H6", "AVR", "Zoom-default", "Zoom-raw"),
                response="f0", yrange=c(100, 300),
                y_axis_label="F0 (Hz)", 
                x_axis_label="Normalized time")
dev.off()
## png 
##   2

2.6 plot model difference

2.6.1 Figure 7

plop(16,6)

plot_model_diff(datp.qgamDiff, c(-5,5), 
                y_axis_label = "Model estimated F0 (Hz)", 
                x_axis_label = "Proportion of utterance duration", 
                title1 = "H6", title2 = "AVR", title3 = "Zoom-default", title4 = "Zoom-raw")

# png
png(paste0(fig_path, "utt_f0_diff.png"),width=12,height=4, units="in", res=300)
plot_model_diff(datp.qgamDiff, c(-5,5), 
                y_axis_label = "Model estimated F0 (Hz)", 
                x_axis_label = "Proportion of utterance duration", 
                title1 = "H6", title2 = "AVR", title3 = "Zoom-default", title4 = "Zoom-raw")
dev.off()
## png 
##   2
# pdf
pdf(paste0(fig_path, "utt_f0_diff.pdf"),width=12,height=4)
plot_model_diff(datp.qgamDiff, c(-5,5), 
                y_axis_label = "Model estimated F0 (Hz)", 
                x_axis_label = "Proportion of utterance duration", 
                title1 = "H6", title2 = "AVR", title3 = "Zoom-default", title4 = "Zoom-raw")
dev.off()
## png 
##   2

There is no evidence that the three devices are significantly different from H6. We can also see this from the average curves shown below.

3 intensity

3.1 check data distribution

dati = dat[!is.na(dat$intensity),]

plop(6, 6)
plot(density(dati$intensity))
abline(v=tapply(dati$intensity, dati$gender, mean))

dati$gender = as.factor(dati$gender)
dati$speaker = as.factor(dati$speaker)
dati$utt_id = as.factor(dati$utt_id)
dati$device = relevel(as.factor(dati$device), "H6")
dati$deviceOrd = as.ordered(dati$device)
contrasts(dati$deviceOrd) = "contr.treatment"

3.2 final qgam model

# library(qgam)
# dati.qgamDiff = qgam(intensity ~ #device +
#                  s(measurement.no, k=20) +
#                  s(measurement.no, by=deviceOrd, k=20) +
#                  s(measurement.no, speaker, bs="fs", m=1, k=20) +
#                  s(measurement.no, utt_id, bs="fs", m=1, k=20),
#          data = dati, qu=0.5)

3.3 load qgam model output

load(paste0(path,"qgams/dati.qgamDiff.rda"))

3.4 qgam results

This is not included in the final results. Instead, normalised intensity results are presented.

summary(dati.qgamDiff)
## 
## Family: elf 
## Link function: identity 
## 
## Formula:
## intensity ~ device + s(measurement.no, k = 20) + s(measurement.no, 
##     by = deviceOrd, k = 20) + s(measurement.no, speaker, bs = "fs", 
##     m = 1, k = 20) + s(measurement.no, utt_id, bs = "fs", m = 1, 
##     k = 20)
## 
## Parametric coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        76.33336    4.28811   17.80   <2e-16 ***
## deviceAVR          -4.22669    0.04473  -94.49   <2e-16 ***
## deviceZoom-default -2.34299    0.04521  -51.83   <2e-16 ***
## deviceZoom-raw     -1.95206    0.04573  -42.68   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                            edf Ref.df  Chi.sq p-value    
## s(measurement.no)                        16.93  17.27   293.6  <2e-16 ***
## s(measurement.no):deviceOrdAVR           11.80  14.10   200.0  <2e-16 ***
## s(measurement.no):deviceOrdZoom-default  15.67  17.57   269.0  <2e-16 ***
## s(measurement.no):deviceOrdZoom-raw      15.74  17.62   192.2  <2e-16 ***
## s(measurement.no,speaker)               131.48 159.00 10998.6  <2e-16 ***
## s(measurement.no,utt_id)                 86.01  99.00 64583.9  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.632   Deviance explained = 45.3%
## -REML = 2.8761e+05  Scale est. = 1         n = 96714

3.5 plot average curves

3.5.1 Figure 8a

plop(8,5)


png(paste0(fig_path, "utt_intensity_ave.png"),width=7,height=5, units="in", res=300)

plot_ave_curves(df=dati, variable="device",
                categories=c("H6", "AVR", "Zoom-default", "Zoom-raw"),
                response="intensity", yrange=c(50, 80),
                y_axis_label="Intensity (dB)", 
                x_axis_label="Normalized time")
dev.off()
## png 
##   2
pdf(paste0(fig_path, "utt_intensity_ave.pdf"),width=7,height=5)

plot_ave_curves(df=dati, variable="device",
                categories=c("H6", "AVR", "Zoom-default", "Zoom-raw"),
                response="intensity", yrange=c(50, 80),
                y_axis_label="Intensity (dB)", 
                x_axis_label="Normalized time")

dev.off()
## png 
##   2

The plot below is not included. The plot for normalised intensity is used.

# plop(16,6)
# 
# png(paste0(fig_path, "utt_intensity_diff.png"),width=9,height=4, units="in", res=300)

plot_model_diff3(dati.qgamDiff, c(-5,5), 
                y_axis_label = "Model estimated intensity (dB)", 
                x_axis_label = "Proportion of utterance duration", 
                title1 = "AVR", title2 = "Zoom-default", title3 = "Zoom-raw")

dev.off()
## null device 
##           1
pdf(paste0(fig_path, "utt_intensity_diff.pdf"),width=9,height=4)

plot_model_diff3(dati.qgamDiff, c(-5,5), 
                y_axis_label = "Model estimated intensity (dB)", 
                x_axis_label = "Proportion of utterance duration", 
                title1 = "AVR", title2 = "Zoom-default", title3 = "Zoom-raw")
# dev.off()

Overall, AVR has the lowest intensity, and its curve is entirely below the y=0 line. For all three devices, the difference is greatest at the beginning. But the two Zoom devices quickly adjust up. In addition, it seems that intensity differences are smaller in the low intensity range.

4 normalised intensity

4.1 read normalised data

dat.norm <- read.csv(paste0(project_path, 'data/normalised_intensity.csv'))

datiN <- dat.norm %>% tidyr::separate(filename, c("speaker", "device", "utt_id", "repetition" ), sep = "_")
datiN$intensity <- str_replace(datiN$intensity, "--undefined-- ", "")
datiN$intensity <- as.numeric(datiN$intensity)

datiN = datiN[!is.na(datiN$intensity),]
datiN$intensity <- as.numeric(datiN$intensity)
datiN$gender <- ifelse(datiN$speaker=="PM1"|datiN$speaker=="PM2"|datiN$speaker=="PM3"|datiN$speaker=="PM4", "M", "F")

4.2 check data distribution

plop(6, 6)
plot(density(datiN$intensity))
abline(v=tapply(datiN$intensity, datiN$gender, mean))

datiN$gender = as.factor(datiN$gender)
datiN$speaker = as.factor(datiN$speaker)
datiN$utt_id = as.factor(datiN$utt_id)
datiN$device = relevel(as.factor(datiN$device), "H6")
datiN$deviceOrd = as.ordered(datiN$device)
contrasts(datiN$deviceOrd) = "contr.treatment"
datiN$measurement.no <- (datiN$time-datiN$StartTime)/(datiN$EndTime - datiN$StartTime)
datiN$measurement.ms <- round(datiN$time-datiN$StartTime, 2)

4.3 final qgam model

# datiN.qgamDiff = qgam(intensity ~ device +
#                  s(measurement.no, k=20) +
#                  s(measurement.no, by=deviceOrd, k=20) +
#                  s(measurement.no, speaker, bs="fs", m=1, k=20) +
#                  s(measurement.no, utt_id, bs="fs", m=1, k=20),
#          data = datiN, qu=0.5)

4.4 load qgam model output

load(paste0(path,"qgams/datiN.qgamDiff.rda"))

4.5 qgam results

4.5.1 Table 5

summary(datiN.qgamDiff)
## 
## Family: elf 
## Link function: identity 
## 
## Formula:
## intensity ~ device + s(measurement.no, k = 20) + s(measurement.no, 
##     by = deviceOrd, k = 20) + s(measurement.no, speaker, bs = "fs", 
##     m = 1, k = 20) + s(measurement.no, utt_id, bs = "fs", m = 1, 
##     k = 20)
## 
## Parametric coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        78.09171    4.59853  16.982  < 2e-16 ***
## deviceAVR           0.22847    0.03138   7.281 3.32e-13 ***
## deviceZoom-default  0.01382    0.03140   0.440 0.659869    
## deviceZoom-raw     -0.12679    0.03262  -3.887 0.000102 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                            edf Ref.df  Chi.sq p-value    
## s(measurement.no)                        16.84  17.21  1043.6  <2e-16 ***
## s(measurement.no):deviceOrdAVR           12.22  14.56   320.0  <2e-16 ***
## s(measurement.no):deviceOrdZoom-default  15.37  17.37   263.0  <2e-16 ***
## s(measurement.no):deviceOrdZoom-raw      16.80  18.30   255.7  <2e-16 ***
## s(measurement.no,speaker)               135.06 159.00  8928.4  <2e-16 ***
## s(measurement.no,utt_id)                 87.25  99.00 84438.2  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =   0.67   Deviance explained = 49.8%
## -REML = 2.6361e+05  Scale est. = 1         n = 96719

4.6 plot average curves

4.6.1 Figure 8b

plop(8,5)


png(paste0(fig_path, "utt_normintensity_ave.png"),width=7,height=5, units="in", res=300)

plot_ave_curves(df=datiN, variable="device",
                categories=c("H6", "AVR", "Zoom-default", "Zoom-raw"),
                response="intensity", yrange=c(50, 80),
                y_axis_label="Intensity (dB)",
                x_axis_label="Normalized time")
dev.off()
## png 
##   2
pdf(paste0(fig_path, "utt_normintensity_ave.pdf"),width=7,height=5)

plot_ave_curves(df=datiN, variable="device",
                categories=c("H6", "AVR", "Zoom-default", "Zoom-raw"),
                response="intensity", yrange=c(50, 80),
                y_axis_label="Intensity (dB)",
                x_axis_label="Normalized time")

dev.off()
## png 
##   2

4.7 plot model difference

4.7.1 Figure 9

plop(16,6)

png(paste0(fig_path, "utt_normintensity_diff.png"),width=9,height=4, units="in", res=300)

plot_model_diff3(datiN.qgamDiff, c(-5,5),
                y_axis_label = "Model estimated intensity (dB)",
                x_axis_label = "Proportion of utterance duration",
                title1 = "AVR", title2 = "Zoom-default", title3 = "Zoom-raw")
dev.off()
## png 
##   2
pdf(paste0(fig_path, "utt_normintensity_diff.pdf"),width=9,height=4)

plot_model_diff3(datiN.qgamDiff, c(-5,5),
                y_axis_label = "Model estimated intensity (dB)",
                x_axis_label = "Proportion of utterance duration",
                title1 = "AVR", title2 = "Zoom-default", title3 = "Zoom-raw")
dev.off()
## png 
##   2

Overall, AVR has the lowest intensity, and its curve is entirely below the y=0 line. For all three devices, the difference is greatest at the beginning. But the two Zoom devices quickly adjust up. In addition, it seems that intensity differences are smaller in the low intensity range.