1 Set up

library(qgam)
library(itsadug)
library(dplyr)
library(ggplot2)
library(ipa)

project_path = 'put in the path of your folder/'
path = project_path
fig_path = paste0(project_path, 'plots/')
dat = read.csv(paste0(project_path, 'data/vow_data.csv'))
source(paste0(path,"function.R"))
dat$word = gsub("[[:punct:]]", "", dat$word)
dat$word = gsub("\'", "", dat$word)

dat$vw = paste(dat$segment, dat$word, sep=".")
dat$vwr = paste(dat$segment, dat$word, dat$rep, sep=".")
dat$uni_token = paste(dat$speaker, dat$device, dat$vwr, sep=".")

Summarise the distribution of the 11 chosen vowels in different words:

table(dat$word,dat$segment)
##           
##               @   @U    {   A:   aI    e   eI    I   i:   i@   u:
##   Amelia    504    0    0    0    0    0    0    0 1524 4128    0
##   arent       0    0    0 1188    0    0    0    0    0    0    0
##   banana    596    0    0 1428    0    0    0    0    0    0    0
##   bread       0    0    0    0    0 1264    0    0    0    0    0
##   Emmanuel    0    0 1256    0    0    0    0  384    0    0    0
##   Free        0    0    0    0    0    0    0    0  984    0    0
##   goulash     0    0 1720    0    0    0    0    0    0    0 1168
##   inedible    0    0    0    0    0 1076    0  452    0    0    0
##   know        0 2344    0    0    0    0    0    0    0    0    0
##   made        0    0    0    0    0    0 1020    0    0    0    0
##   mangoes     0 2032 1364    0    0    0    0    0    0    0    0
##   mean        0    0    0    0    0    0    0    0  872    0    0
##   my          0    0    0    0 1000    0    0    0    0    0    0
##   My          0    0    0    0 1600    0    0    0    0    0    0
##   noodles     0    0    0    0    0    0    0    0    0    0 1044
##   ramen     592    0    0 1384    0    0    0    0    0    0    0
##   ready       0    0    0    0    0 1060    0    0    0    0    0
##   soup        0    0    0    0    0    0    0    0    0    0  900

2 vowel space by speaker

vowel.data <- dat
f1.1<- vowel.data %>%
  group_by(traj, segment, speaker, device) %>%
  summarise_at(vars(f1), list(f1 = mean))

f2.1<- vowel.data %>%
  group_by(traj, segment, speaker,device) %>%
  summarise_at(vars(f2), list(f2 = mean))

formants <- merge(x=f1.1,y=f2.1, 
             by=c("traj", "segment", "speaker", "device"))


formants$device <- factor(formants$device, levels = c("H6", "AVR", "Zoom-default", "Zoom-raw"))

2.1 Figure 2

# tiff(paste0(fig_path, "F1F2.tiff"), units="mm", width=240, height=100, res=600, compression = 'lzw',type="cairo")

ggplot(subset(formants, segment %in% c("{", "i:", "u:")), aes(x = f2, y = f1)) + 
  geom_point(aes (colour = segment, shape = device)) + 
  scale_x_reverse() + scale_y_reverse() +
  facet_wrap(~speaker, ncol = 4)+
  stat_ellipse(aes(colour = segment))+
  scale_colour_manual(values = c("{" = "#f8ea4e", "i:" = "#ff8c52", "u:" = "#aa5ebc"),
                      labels = c(cmu('AE', to='ipa'), xsampa('i:', to='ipa'), xsampa('u:', to='ipa')))+ 
  scale_shape_manual(values = c(16, 0, 2, 3)) +
  xlab("F2 (Hz)") + 
  ylab("F1 (Hz)")+ 
  labs(colour = "vowel")+
  labs(shape = "method") +
  theme_bw()

# dev.off()

3 f0

datp = dat[!is.na(dat$f0),]
## remove singltons
datp_ls = split(datp, as.factor(datp$uni_token))
datp_nrows = sapply(datp_ls, nrow)
datp = datp[!is.element(datp$uni_token, names(datp_nrows[which(datp_nrows==1)])),]

3.1 check data distribution

plop(6, 6)
plot(density(datp$f0))
abline(v=tapply(datp$f0, datp$gender, mean))

datp$gender = as.factor(datp$gender)
datp$speaker = as.factor(datp$speaker)
datp$vw = as.factor(datp$vw)
datp$device = relevel(as.factor(datp$device), "H6")
datp$deviceOrd = as.ordered(datp$device)
contrasts(datp$deviceOrd) = "contr.treatment"

3.2 final qgam model

Same as the utterance file, since qgam models take a long time to run, we save the full models and load the pre-saved versions below.

# datp.qgamDiff.vow = qgam(f0 ~ gender + device +
#                  s(measurement.no) +
#                  s(measurement.no, by=deviceOrd) +
#                  s(measurement.no, speaker, bs="fs", m=1) +
#                  s(measurement.no, vw, bs="fs", m=1),
#          data = datp, qu=0.5)

3.3 load qgam model output

load(paste0(path,"qgams/datp.qgamDiff.vow.rda"))

3.4 qgam results

3.4.1 Supplementary Material 3: Table 1

summary(datp.qgamDiff.vow)
## 
## Family: elf 
## Link function: identity 
## 
## Formula:
## f0 ~ gender + device + s(measurement.no) + s(measurement.no, 
##     by = deviceOrd) + s(measurement.no, speaker, bs = "fs", m = 1) + 
##     s(measurement.no, vw, bs = "fs", m = 1)
## 
## Parametric coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         269.0501     9.8255  27.383   <2e-16 ***
## genderM            -140.1424     6.9730 -20.098   <2e-16 ***
## deviceAVR             0.1735     0.4856   0.357    0.721    
## deviceZoom-default   -0.6617     0.4908  -1.348    0.178    
## deviceZoom-raw       -0.4513     0.4887  -0.923    0.356    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                             edf  Ref.df    Chi.sq p-value    
## s(measurement.no)                         1.039   1.046     3.141  0.0793 .  
## s(measurement.no):deviceOrdAVR            1.024   1.047     0.236  0.6614    
## s(measurement.no):deviceOrdZoom-default   1.021   1.042     0.029  0.9053    
## s(measurement.no):deviceOrdZoom-raw       1.026   1.051     0.000  0.9997    
## s(measurement.no,speaker)                35.421  70.000  2228.741  <2e-16 ***
## s(measurement.no,vw)                    152.262 233.000 45260.489  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.758   Deviance explained = 72.4%
## -REML = 1.529e+05  Scale est. = 1         n = 30865

Again there’s not really f0 difference

3.5 plot average curves

3.5.1 Supplementary Material 3: Figure 1

plop(6,5)

# png
png(paste0(fig_path, "vow_f0_ave.png"),width=7,height=5, units="in", res=300)
plot_ave_curves(df=datp, variable="device",
                categories=c("H6", "AVR", "Zoom-default", "Zoom-raw"),
                response="f0", yrange=c(180, 260), smoothness=1,
                y_axis_label="F0 (Hz)", 
                x_axis_label="Normalized time")
dev.off()
## png 
##   2
# pdf
pdf(paste0(fig_path, "vow_f0_ave.pdf"),width=7,height=5)
plot_ave_curves(df=datp, variable="device",
                categories=c("H6", "AVR", "Zoom-default", "Zoom-raw"),
                response="f0", yrange=c(180, 260), smoothness=1,
                y_axis_label="F0 (Hz)", 
                x_axis_label="Normalized time")
dev.off()
## png 
##   2

3.6 plot model difference

3.6.1 Supplementary Material 3: Figure 2

plop(16,6)
# png
png(paste0(fig_path, "vow_f0_diff.png"),width=9,height=4, units="in", res=300)
plot_model_diff3(datp.qgamDiff.vow, c(-5,5), 
                y_axis_label = "Model estimated F0 (Hz)", 
                x_axis_label = "Proportion of vowel duration", 
                title1 = "AVR", title2 = "Zoom-default", title3 = "Zoom-raw")
dev.off()
## png 
##   2
# pdf
pdf(paste0(fig_path, "vow_f0_diff.pdf"),width=9,height=4)
plot_model_diff3(datp.qgamDiff.vow, c(-5,5), 
                y_axis_label = "Model estimated F0 (Hz)", 
                x_axis_label = "Proportion of vowel duration", 
                title1 = "AVR", title2 = "Zoom-default", title3 = "Zoom-raw")
dev.off()
## png 
##   2

4 F1

datf1 = dat[!is.na(dat$f1),]
## remove singltons
datf1_ls = split(datf1, as.factor(datf1$uni_token))
datf1_nrows = sapply(datf1_ls, nrow)
datf1 = datf1[!is.element(datf1$uni_token, names(datf1_nrows[which(datf1_nrows==1)])),]

4.1 check data distribution

plop(6, 6)
plot(density(datf1$f1))
abline(v=tapply(datf1$f1, datf1$gender, mean))

datf1$gender = as.factor(datf1$gender)
datf1$speaker = as.factor(datf1$speaker)
datf1$vw = as.factor(datf1$vw)
datf1$device = relevel(as.factor(datf1$device), "H6")
datf1$deviceOrd = as.ordered(datf1$device)
contrasts(datf1$deviceOrd) = "contr.treatment"

4.2 final qgam model

# datf1.qgamDiff.vow = qgam(f1 ~ gender + device +
#                  s(measurement.no) +
#                  s(measurement.no, by=deviceOrd) +
#                  s(measurement.no, speaker, bs="fs", m=1) +
#                  s(measurement.no, vw, bs="fs", m=1),
#          data = datf1, qu=0.5)

4.3 load qgam model output

load(paste0(path,"qgams/datf1.qgamDiff.vow.rda"))

4.4 qgam results

4.4.1 Table 6

summary(datf1.qgamDiff.vow)
## 
## Family: elf 
## Link function: identity 
## 
## Formula:
## f1 ~ gender + device + s(measurement.no) + s(measurement.no, 
##     by = deviceOrd) + s(measurement.no, speaker, bs = "fs", m = 1) + 
##     s(measurement.no, vw, bs = "fs", m = 1)
## 
## Parametric coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         682.777     23.562  28.977  < 2e-16 ***
## genderM            -155.259     16.446  -9.441  < 2e-16 ***
## deviceAVR           -11.054      1.516  -7.290 3.09e-13 ***
## deviceZoom-default  -17.144      1.584 -10.821  < 2e-16 ***
## deviceZoom-raw      -42.937      1.638 -26.220  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                             edf  Ref.df   Chi.sq  p-value    
## s(measurement.no)                         7.009   7.466   160.16  < 2e-16 ***
## s(measurement.no):deviceOrdAVR            2.885   3.588    23.56 8.59e-05 ***
## s(measurement.no):deviceOrdZoom-default   3.629   4.493   197.70  < 2e-16 ***
## s(measurement.no):deviceOrdZoom-raw       2.274   2.835    98.82  < 2e-16 ***
## s(measurement.no,speaker)                48.519  70.000  1997.66  < 2e-16 ***
## s(measurement.no,vw)                    178.676 233.000 47776.86  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.662   Deviance explained =   57%
## -REML = 2.0273e+05  Scale est. = 1         n = 32880

4.5 plot average curves

4.5.1 Figure 10a

plop(7,7)
par(mar=c(4,4,1,7), xpd=TRUE)

# png
png(paste0(fig_path, "vow_f1_ave.png"),width=7,height=5, units="in", res=300)
plot_ave_curves(df=datf1, variable="device",
                categories=c("H6", "AVR", "Zoom-default", "Zoom-raw"),
                response="f1", yrange=c(400, 700), smoothness=1,
                y_axis_label="F1 (Hz)", 
                x_axis_label="Normalized time")
dev.off()
## png 
##   2
# pdf
pdf(paste0(fig_path, "vow_f1_ave.pdf"),width=7,height=5)
plot_ave_curves(df=datf1, variable="device",
                categories=c("H6", "AVR", "Zoom-default", "Zoom-raw"),
                response="f1", yrange=c(400, 700), smoothness=1,
                y_axis_label="F1 (Hz)", 
                x_axis_label="Normalized time")
dev.off()
## png 
##   2

4.6 plot model difference

4.6.1 Figure 11

plop(16,6)
# png
png(paste0(fig_path, "vow_f1_diff.png"),width=9,height=4, units="in", res=300)
plot_model_diff3(datf1.qgamDiff.vow, c(-50,50), 
                y_axis_label = "Model estimated F1 (Hz)", 
                x_axis_label = "Proportion of vowel duration", 
                title1 = "AVR", title2 = "Zoom-default", title3 = "Zoom-raw")
dev.off()
## png 
##   2
# pdf
pdf(paste0(fig_path, "vow_f1_diff.pdf"),width=9,height=4)
plot_model_diff3(datf1.qgamDiff.vow, c(-50,50), 
                y_axis_label = "Model estimated F1 (Hz)", 
                x_axis_label = "Proportion of vowel duration", 
                title1 = "AVR", title2 = "Zoom-default", title3 = "Zoom-raw")
dev.off()
## png 
##   2

5 F2

datf2 = dat[!is.na(dat$f2),]
## remove singltons
datf2_ls = split(datf2, as.factor(datf2$uni_token))
datf2_nrows = sapply(datf2_ls, nrow)
datf2 = datf2[!is.element(datf1$uni_token, names(datf2_nrows[which(datf2_nrows==1)])),]

5.1 check data distribution

plop(6, 6)
plot(density(datf2$f2))
abline(v=tapply(datf2$f2, datf2$gender, mean))

datf2$gender = as.factor(datf2$gender)
datf2$speaker = as.factor(datf2$speaker)
datf2$vw = as.factor(datf2$vw)
datf2$device = relevel(as.factor(datf2$device), "H6")
datf2$deviceOrd = as.ordered(datf2$device)
contrasts(datf2$deviceOrd) = "contr.treatment"

5.2 final qgam model

# datf2.qgamDiff.vow = qgam(f2 ~ gender + device +
#                  s(measurement.no) +
#                  s(measurement.no, by=deviceOrd) +
#                  s(measurement.no, speaker, bs="fs", m=1) +
#                  s(measurement.no, vw, bs="fs", m=1),
#          data = datf2, qu=0.5)

5.3 load qgam model output

load(paste0(path,"qgams/datf2.qgamDiff.vow.rda"))

5.4 qgam results

5.4.1 Table 7

summary(datf2.qgamDiff.vow)
## 
## Family: elf 
## Link function: identity 
## 
## Formula:
## f2 ~ gender + device + s(measurement.no) + s(measurement.no, 
##     by = deviceOrd) + s(measurement.no, speaker, bs = "fs", m = 1) + 
##     s(measurement.no, vw, bs = "fs", m = 1)
## 
## Parametric coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1851.556     60.490  30.609  < 2e-16 ***
## genderM            -251.629     51.626  -4.874 1.09e-06 ***
## deviceAVR             1.909      3.307   0.577    0.564    
## deviceZoom-default   15.125      3.250   4.654 3.26e-06 ***
## deviceZoom-raw        5.230      3.317   1.577    0.115    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                             edf  Ref.df    Chi.sq p-value    
## s(measurement.no)                         5.833   6.342    20.796 0.00271 ** 
## s(measurement.no):deviceOrdAVR            1.009   1.018     0.894 0.34900    
## s(measurement.no):deviceOrdZoom-default   1.735   2.160    12.931 0.00236 ** 
## s(measurement.no):deviceOrdZoom-raw       1.032   1.062     1.719 0.19354    
## s(measurement.no,speaker)                42.374  70.000  2694.238 < 2e-16 ***
## s(measurement.no,vw)                    189.030 233.000 60533.855 < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =    0.6   Deviance explained = 52.9%
## -REML = 2.2596e+05  Scale est. = 1         n = 32884

5.5 plot average curves

5.5.1 Figure 10b

plop(6, 5)

# png
png(paste0(fig_path, "vow_f2_ave.png"),width=7,height=5, units="in", res=300)
plot_ave_curves(df=datf2, variable="device",
                categories=c("H6", "AVR", "Zoom-default", "Zoom-raw"),
                response="f2", yrange=c(1500, 2000), smoothness=1,
                y_axis_label="F2 (Hz)", 
                x_axis_label="Normalized time")
dev.off()
## png 
##   2
# pdf
pdf(paste0(fig_path, "vow_f2_ave.pdf"),width=7,height=5)
plot_ave_curves(df=datf2, variable="device",
                categories=c("H6", "AVR", "Zoom-default", "Zoom-raw"),
                response="f2", yrange=c(1500, 2000), smoothness=1,
                y_axis_label="F2 (Hz)", 
                x_axis_label="Normalized time")
dev.off()
## png 
##   2

5.6 plot model difference

5.6.1 Figure 12

plop(16,6)
# png
png(paste0(fig_path, "vow_f2_diff.png"),width=9,height=4, units="in", res=300)
plot_model_diff3(datf2.qgamDiff.vow, c(-30, 30), 
                y_axis_label = "Model estimated F2 (Hz)", 
                x_axis_label = "Proportion of vowel duration", 
                title1 = "AVR", title2 = "Zoom-default", title3 = "Zoom-raw")
dev.off()
## png 
##   2
# pdf
pdf(paste0(fig_path, "vow_f2_diff.pdf"),width=9,height=4)
plot_model_diff3(datf2.qgamDiff.vow, c(-30, 30), 
                y_axis_label = "Model estimated F2 (Hz)", 
                x_axis_label = "Proportion of vowel duration", 
                title1 = "AVR", title2 = "Zoom-default", title3 = "Zoom-raw")
dev.off()
## png 
##   2

6 corner vowels

# remove length mark in vowel
dat$segment = gsub("\\{", "a", dat$segment)
dat$segment = gsub("i:", "i", dat$segment)
dat$segment = gsub("u:", "u", dat$segment)


datf1c = dat[!is.na(dat$f1),]
datf1c = datf1c[datf1c$segment%in%c("a", "i", "u"),]
datf2c = dat[!is.na(dat$f2),]
datf2c = datf2c[datf2c$segment%in%c("a", "i", "u"),]
datcv = rbind(datf1c, datf2c)
datcv$formant = c(rep("f1",nrow(datf1c)), rep("f2",nrow(datf2c)))
datcv$formantVow = paste(datcv$formant, datcv$segment, sep=".")
datcv$formantValue = c(datf1c$f1, datf2c$f2)
unique(datcv$formantVow)
## [1] "f1.a" "f1.i" "f1.u" "f2.a" "f2.i" "f2.u"
datcv = code_order(datcv, "AVR", "f1.a")
datcv = code_order(datcv, "AVR", "f1.i")
datcv = code_order(datcv, "AVR", "f1.u")
datcv = code_order(datcv, "AVR", "f2.a")
datcv = code_order(datcv, "AVR", "f2.i")
datcv = code_order(datcv, "AVR", "f2.u")
datcv = code_order(datcv, "Zoom-default", "f1.a")
datcv = code_order(datcv, "Zoom-default", "f1.i")
datcv = code_order(datcv, "Zoom-default", "f1.u")
datcv = code_order(datcv, "Zoom-default", "f2.a")
datcv = code_order(datcv, "Zoom-default", "f2.i")
datcv = code_order(datcv, "Zoom-default", "f2.u")
datcv = code_order(datcv, "Zoom-raw", "f1.a")
datcv = code_order(datcv, "Zoom-raw", "f1.i")
datcv = code_order(datcv, "Zoom-raw", "f1.u")
datcv = code_order(datcv, "Zoom-raw", "f2.a")
datcv = code_order(datcv, "Zoom-raw", "f2.i")
datcv = code_order(datcv, "Zoom-raw", "f2.u")
datcv$formantVow = as.factor(datcv$formantVow)
datcv$word = as.factor(datcv$word)
datcv$speaker = as.factor(datcv$speaker)
datcv$gender = as.factor(datcv$gender)

datcv$deviceFormantVow = interaction(datcv$device, datcv$formantVow)
datcv$deviceFormantVow = relevel(datcv$deviceFormantVow, 'H6.f1.a')

6.1 final qgam model

# datcv.qgamDiff.vow = qgam(formantValue ~ gender + formantVow + s(measurement.no, by=formantVow) +
#                  s(measurement.no, by=IsAVR.f1.a) + IsAVR.f1.a +
#                  s(measurement.no, by=IsAVR.f1.i) + IsAVR.f1.i +
#                  s(measurement.no, by=IsAVR.f1.u) + IsAVR.f1.u +
#                  s(measurement.no, by=IsAVR.f2.a) + IsAVR.f2.a +
#                  s(measurement.no, by=IsAVR.f2.i) + IsAVR.f2.i +
#                  s(measurement.no, by=IsAVR.f2.u) + IsAVR.f2.u +
#                  s(measurement.no, by=IsZoom.default.f1.a) + IsZoom.default.f1.a +
#                  s(measurement.no, by=IsZoom.default.f1.i) + IsZoom.default.f1.i +
#                  s(measurement.no, by=IsZoom.default.f1.u) + IsZoom.default.f1.u +
#                  s(measurement.no, by=IsZoom.default.f2.a) + IsZoom.default.f2.a +
#                  s(measurement.no, by=IsZoom.default.f2.i) + IsZoom.default.f2.i +
#                  s(measurement.no, by=IsZoom.default.f2.u) + IsZoom.default.f2.u +
#                  s(measurement.no, by=IsZoom.raw.f1.a) + IsZoom.raw.f1.a +
#                  s(measurement.no, by=IsZoom.raw.f1.i) + IsZoom.raw.f1.i +
#                  s(measurement.no, by=IsZoom.raw.f1.u) + IsZoom.raw.f1.u +
#                  s(measurement.no, by=IsZoom.raw.f2.a) + IsZoom.raw.f2.a +
#                  s(measurement.no, by=IsZoom.raw.f2.i) + IsZoom.raw.f2.i +
#                  s(measurement.no, by=IsZoom.raw.f2.u) + IsZoom.raw.f2.u +
#                  s(measurement.no, speaker, bs="fs", m=1) +
#                  s(measurement.no, word, bs="fs", m=1),
#          data = datcv, qu=0.5)

6.2 load qgam model output

load(paste0(path,"qgams/datcv.qgamDiff.vow.rda"))

6.3 qgam results

6.3.1 Table 8

summary(datcv.qgamDiff.vow)
## 
## Family: elf 
## Link function: identity 
## 
## Formula:
## formantValue ~ gender + formantVow + s(measurement.no, by = formantVow) + 
##     s(measurement.no, by = IsAVR.f1.a) + IsAVR.f1.a + s(measurement.no, 
##     by = IsAVR.f1.i) + IsAVR.f1.i + s(measurement.no, by = IsAVR.f1.u) + 
##     IsAVR.f1.u + s(measurement.no, by = IsAVR.f2.a) + IsAVR.f2.a + 
##     s(measurement.no, by = IsAVR.f2.i) + IsAVR.f2.i + s(measurement.no, 
##     by = IsAVR.f2.u) + IsAVR.f2.u + s(measurement.no, by = IsZoom.default.f1.a) + 
##     IsZoom.default.f1.a + s(measurement.no, by = IsZoom.default.f1.i) + 
##     IsZoom.default.f1.i + s(measurement.no, by = IsZoom.default.f1.u) + 
##     IsZoom.default.f1.u + s(measurement.no, by = IsZoom.default.f2.a) + 
##     IsZoom.default.f2.a + s(measurement.no, by = IsZoom.default.f2.i) + 
##     IsZoom.default.f2.i + s(measurement.no, by = IsZoom.default.f2.u) + 
##     IsZoom.default.f2.u + s(measurement.no, by = IsZoom.raw.f1.a) + 
##     IsZoom.raw.f1.a + s(measurement.no, by = IsZoom.raw.f1.i) + 
##     IsZoom.raw.f1.i + s(measurement.no, by = IsZoom.raw.f1.u) + 
##     IsZoom.raw.f1.u + s(measurement.no, by = IsZoom.raw.f2.a) + 
##     IsZoom.raw.f2.a + s(measurement.no, by = IsZoom.raw.f2.i) + 
##     IsZoom.raw.f2.i + s(measurement.no, by = IsZoom.raw.f2.u) + 
##     IsZoom.raw.f2.u + s(measurement.no, speaker, bs = "fs", m = 1) + 
##     s(measurement.no, word2, bs = "fs", m = 1)
## 
## Parametric coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           903.151     31.333  28.825  < 2e-16 ***
## genderM              -202.627     28.674  -7.067 1.59e-12 ***
## formantVowf1.i       -352.403     38.660  -9.115  < 2e-16 ***
## formantVowf1.u       -375.441      8.693 -43.188  < 2e-16 ***
## formantVowf2.a        880.827      8.163 107.905  < 2e-16 ***
## formantVowf2.i       1431.460     39.069  36.639  < 2e-16 ***
## formantVowf2.u        611.806     10.978  55.729  < 2e-16 ***
## IsAVR.f1.a1           -21.248      6.665  -3.188 0.001433 ** 
## IsAVR.f1.i1            10.669      7.852   1.359 0.174219    
## IsAVR.f1.u1            -2.913      8.246  -0.353 0.723907    
## IsAVR.f2.a1             4.144      9.306   0.445 0.656085    
## IsAVR.f2.i1             3.596     10.886   0.330 0.741116    
## IsAVR.f2.u1             0.792     13.107   0.060 0.951816    
## IsZoom.default.f1.a1   11.572      7.368   1.570 0.116303    
## IsZoom.default.f1.i1  -55.329      8.289  -6.675 2.47e-11 ***
## IsZoom.default.f1.u1  -35.947      8.664  -4.149 3.34e-05 ***
## IsZoom.default.f2.a1   -3.275      9.377  -0.349 0.726903    
## IsZoom.default.f2.i1    3.698     11.107   0.333 0.739161    
## IsZoom.default.f2.u1   78.956     11.963   6.600 4.12e-11 ***
## IsZoom.raw.f1.a1      -25.915      7.523  -3.445 0.000571 ***
## IsZoom.raw.f1.i1      -55.882      8.095  -6.903 5.09e-12 ***
## IsZoom.raw.f1.u1      -43.226      8.473  -5.102 3.37e-07 ***
## IsZoom.raw.f2.a1        5.805      9.425   0.616 0.537955    
## IsZoom.raw.f2.i1       18.377     11.171   1.645 0.099975 .  
## IsZoom.raw.f2.u1       59.192     13.507   4.382 1.17e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                                           edf Ref.df   Chi.sq  p-value    
## s(measurement.no):formantVowf1.a        5.752  6.778  195.124  < 2e-16 ***
## s(measurement.no):formantVowf1.i        1.011  1.016    6.520   0.0109 *  
## s(measurement.no):formantVowf1.u        2.305  2.727   11.662   0.0119 *  
## s(measurement.no):formantVowf2.a        4.148  5.014   70.934  < 2e-16 ***
## s(measurement.no):formantVowf2.i        6.631  7.710  449.500  < 2e-16 ***
## s(measurement.no):formantVowf2.u        4.421  5.365  100.002  < 2e-16 ***
## s(measurement.no):IsAVR.f1.a1           1.005  1.009    4.962   0.0264 *  
## s(measurement.no):IsAVR.f1.i1           1.006  1.012    1.687   0.1968    
## s(measurement.no):IsAVR.f1.u1           1.009  1.018    0.138   0.7169    
## s(measurement.no):IsAVR.f2.a1           1.006  1.011    0.331   0.5698    
## s(measurement.no):IsAVR.f2.i1           1.025  1.050    1.739   0.2010    
## s(measurement.no):IsAVR.f2.u1           1.462  1.779    1.516   0.3127    
## s(measurement.no):IsZoom.default.f1.a1  3.333  4.134   56.622  < 2e-16 ***
## s(measurement.no):IsZoom.default.f1.i1  1.007  1.013    0.332   0.5690    
## s(measurement.no):IsZoom.default.f1.u1  1.008  1.016    0.047   0.8421    
## s(measurement.no):IsZoom.default.f2.a1  1.130  1.243    1.636   0.3000    
## s(measurement.no):IsZoom.default.f2.i1  1.021  1.042   49.325  < 2e-16 ***
## s(measurement.no):IsZoom.default.f2.u1  1.616  1.994    2.566   0.2586    
## s(measurement.no):IsZoom.raw.f1.a1      2.617  3.257   23.084 6.83e-05 ***
## s(measurement.no):IsZoom.raw.f1.i1      1.049  1.093    0.374   0.5495    
## s(measurement.no):IsZoom.raw.f1.u1      1.010  1.019    0.036   0.8808    
## s(measurement.no):IsZoom.raw.f2.a1      2.063  2.574   10.143   0.0135 *  
## s(measurement.no):IsZoom.raw.f2.i1      2.421  3.017   32.645  < 2e-16 ***
## s(measurement.no):IsZoom.raw.f2.u1      1.002  1.004    2.993   0.0839 .  
## s(measurement.no,speaker)              27.335 70.000  643.185  < 2e-16 ***
## s(measurement.no,word2)                28.461 70.000 1755.168  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.883   Deviance explained = 78.7%
## -REML = 1.4706e+05  Scale est. = 1         n = 21664

6.4 plot average curves

mean formant values (of the raw data) for a, i, u (from left to right)

6.4.1 Figure 13

datcv$deviceFormant = paste(datcv$device, datcv$formant, sep=".")
cats = c('H6.f1', 'AVR.f1', 'Zoom-default.f1', 'Zoom-raw.f1', 'H6.f2', 'AVR.f2', 'Zoom-default.f2', 'Zoom-raw.f2')
colsN = rep(1:4, 2)
ltys = rep(c(1,2), each=4)
plop(16,7)


# png
png(paste0(fig_path, "vow_corner_ave.png"),width=12,height=5.5, units="in", res=300)
par(mfrow=c(1,3))


  #i
  datcvI = droplevels(datcv[datcv$segment=="i",])
  plot_ave_curves(df=datcvI, variable="deviceFormant", categories=cats,
                  cols=colsN, lineTypes=ltys,
                  response="formantValue", yrange=c(200, 2500), print.legend=F,
                  smoothness=1,
                y_axis_label="Frequency (Hz)", 
                x_axis_label="Normalized time")
  
  #a
  datcvA = droplevels(datcv[datcv$segment=="a",])
  plot_ave_curves(df=datcvA, variable="deviceFormant", categories=cats, pos="bottom",
                  cols=colsN, lineTypes=ltys,
                  response="formantValue", yrange=c(200, 2500),
                  smoothness=1,
                y_axis_label="Frequency (Hz)", 
                x_axis_label="Normalized time")
  
  #u
  datcvU = droplevels(datcv[datcv$segment=="u",])
  plot_ave_curves(df=datcvU, variable="deviceFormant", categories=cats,
                  cols=colsN, lineTypes=ltys,
                  response="formantValue", yrange=c(200, 2500), print.legend=F,
                  smoothness=1,
                y_axis_label="Frequency (Hz)", 
                x_axis_label="Normalized time")
dev.off()
## png 
##   2
# # pdf
# pdf(paste0(fig_path, "vow_corner_ave.pdf"),width=12,height=5.5)
# par(mfrow=c(1,3))
# 
#   #i
#   datcvI = droplevels(datcv[datcv$segment=="i",])
#   plot_ave_curves(df=datcvI, variable="deviceFormant", categories=cats,
#                   cols=colsN, lineTypes=ltys,
#                   response="formantValue", yrange=c(200, 2500), print.legend=F,
#                   smoothness=1,
#                 y_axis_label="Frequency (Hz)", 
#                 x_axis_label="Normalized time")
#   
#   #a
#   datcvA = droplevels(datcv[datcv$segment=="a",])
#   plot_ave_curves(df=datcvA, variable="deviceFormant", categories=cats,pos="bottom",
#                   cols=colsN, lineTypes=ltys,
#                   response="formantValue", yrange=c(200, 2500),
#                   smoothness=1,
#                 y_axis_label="Frequency (Hz)", 
#                 x_axis_label="Normalized time")
#   
#   #u
#   datcvU = droplevels(datcv[datcv$segment=="u",])
#   plot_ave_curves(df=datcvU, variable="deviceFormant", categories=cats,
#                   cols=colsN, lineTypes=ltys,
#                   response="formantValue", yrange=c(200, 2500), print.legend=F,
#                   smoothness=1,
#                 y_axis_label="Frequency (Hz)", 
#                 x_axis_label="Normalized time")
# dev.off()

6.5 plot model difference

6.5.1 Figure 14a, 15a: [i]

# i
plop(16,5)


# png
## f1
png(paste0(fig_path, "f1_i.png"),width=12,height=5.5, units="in", res=300)
plot_model_diff3(datcv.qgamDiff.vow, c(-120,120), sels=c(2,8,14,20), 
                y_axis_label = "Model estimated F1 (Hz)", 
                x_axis_label = "Proportion of vowel duration",
                title1 = paste0("/", xsampa('i:', to='ipa'), "/: AVR"), 
                title2 = paste0("/", xsampa('i:', to='ipa'), "/: Zoom-default"), 
                title3 = paste0("/", xsampa('i:', to='ipa'), "/: Zoom-raw")
                )

dev.off()
## png 
##   2
## f2
png(paste0(fig_path, "f2_i.png"),width=12,height=5.5, units="in", res=300)
plot_model_diff3(datcv.qgamDiff.vow, c(-150,200), sels=c(5,11,17,23), 
                y_axis_label = "Model estimated F2 (Hz)", 
                x_axis_label = "Proportion of vowel duration",
                title1 = paste0("/", xsampa('i:', to='ipa'), "/: AVR"), 
                title2 = paste0("/", xsampa('i:', to='ipa'), "/: Zoom-default"), 
                title3 = paste0("/", xsampa('i:', to='ipa'), "/: Zoom-raw")
                )
dev.off()
## png 
##   2
# # pdf
# 
# ## f1
# pdf(paste0(fig_path, "f1_i.pdf"),width=12,height=5.5)
# plot_model_diff3(datcv.qgamDiff.vow, c(-120,120), sels=c(2,8,14,20), 
#                 y_axis_label = "Model estimated F1 (Hz)", 
#                 x_axis_label = "Proportion of vowel duration",
#                 title1 = paste0("/", xsampa('i:', to='ipa'), "/: AVR"), 
#                 title2 = paste0("/", xsampa('i:', to='ipa'), "/: Zoom-default"), 
#                 title3 = paste0("/", xsampa('i:', to='ipa'), "/: Zoom-raw")
#                 )
# dev.off()
# 
# ## f2
# pdf(paste0(fig_path, "f2_i.pdf"),width=12,height=5.5)
# plot_model_diff3(datcv.qgamDiff.vow, c(-150,200), sels=c(5,11,17,23), 
#                 y_axis_label = "Model estimated F2 (Hz)", 
#                 x_axis_label = "Proportion of vowel duration",
#                 title1 = paste0("/", xsampa('i:', to='ipa'), "/: AVR"), 
#                 title2 = paste0("/", xsampa('i:', to='ipa'), "/: Zoom-default"), 
#                 title3 = paste0("/", xsampa('i:', to='ipa'), "/: Zoom-raw")
#                 )
# dev.off()

6.5.2 Figure 14b, 15b: [æ]

# a
plop(16,5)


# png
## f1
png(paste0(fig_path, "f1_a.png"),width=12,height=5.5, units="in", res=300)
plot_model_diff3(datcv.qgamDiff.vow, c(-120,120), sels=c(1,7,13,19), 
                y_axis_label = "Model estimated F1 (Hz)", 
                x_axis_label = "Proportion of vowel duration",
                title1 = paste0("/", cmu('AE', to='ipa'), "/: AVR"), 
                title2 = paste0("/", cmu('AE', to='ipa'), "/: Zoom-default"), 
                title3 = paste0("/", cmu('AE', to='ipa'), "/: Zoom-raw")
                )
dev.off()
## png 
##   2
## f2
png(paste0(fig_path, "f2_a.png"),width=12,height=5.5, units="in", res=300)
plot_model_diff3(datcv.qgamDiff.vow, c(-120,120), sels=c(4,10,16,22), 
                y_axis_label = "Model estimated F2 (Hz)", 
                x_axis_label = "Proportion of vowel duration",
                title1 = paste0("/", cmu('AE', to='ipa'), "/: AVR"), 
                title2 = paste0("/", cmu('AE', to='ipa'), "/: Zoom-default"), 
                title3 = paste0("/", cmu('AE', to='ipa'), "/: Zoom-raw"))
dev.off()
## png 
##   2
# # pdf
# pdf(paste0(fig_path, "f1_a.pdf"),width=12,height=5.5)
# plot_model_diff3(datcv.qgamDiff.vow, c(-120,120), sels=c(1,7,13,19), 
#                 y_axis_label = "Model estimated F1 (Hz)", 
#                 x_axis_label = "Proportion of vowel duration",
#                 title1 = paste0("/", cmu('AE', to='ipa'), "/: AVR"), 
#                 title2 = paste0("/", cmu('AE', to='ipa'), "/: Zoom-default"), 
#                 title3 = paste0("/", cmu('AE', to='ipa'), "/: Zoom-raw"))
# dev.off()
# 
# ## f2
# pdf(paste0(fig_path, "f2_a.pdf"),width=12,height=5.5)
# plot_model_diff3(datcv.qgamDiff.vow, c(-120,120), sels=c(4,10,16,22), 
#                 y_axis_label = "Model estimated F2 (Hz)", 
#                 x_axis_label = "Proportion of vowel duration",
#                 title1 = paste0("/", cmu('AE', to='ipa'), "/: AVR"), 
#                 title2 = paste0("/", cmu('AE', to='ipa'), "/: Zoom-default"), 
#                 title3 = paste0("/", cmu('AE', to='ipa'), "/: Zoom-raw"))
# dev.off()

6.5.3 Figure 14c, 15c: [u]

# u
plop(16,5)


# png
## f1
png(paste0(fig_path, "f1_u.png"),width=12,height=5.5, units="in", res=300)
plot_model_diff3(datcv.qgamDiff.vow, c(-120,150), sels=c(3,9,15,21), 
                y_axis_label = "Model estimated F1 (Hz)", 
                x_axis_label = "Proportion of vowel duration",
                title1 = paste0("/", xsampa('u:', to='ipa'), "/: AVR"), 
                title2 = paste0("/", xsampa('u:', to='ipa'), "/: Zoom-default"), 
                title3 = paste0("/", xsampa('u:', to='ipa'), "/: Zoom-raw")
                )
dev.off()
## png 
##   2
## f2
png(paste0(fig_path, "f2_u.png"),width=12,height=5.5, units="in", res=300)
plot_model_diff3(datcv.qgamDiff.vow, c(-120,150), sels=c(6,12,18,24), 
                y_axis_label = "Model estimated F2 (Hz)", 
                x_axis_label = "Proportion of vowel duration",
                title1 = paste0("/", xsampa('u:', to='ipa'), "/: AVR"), 
                title2 = paste0("/", xsampa('u:', to='ipa'), "/: Zoom-default"), 
                title3 = paste0("/", xsampa('u:', to='ipa'), "/: Zoom-raw")
                )
dev.off()
## png 
##   2
# # pdf
# pdf(paste0(fig_path, "f1_u.pdf"),width=12,height=5.5)
# plot_model_diff3(datcv.qgamDiff.vow, c(-120,150), sels=c(3,9,15,21), 
#                 y_axis_label = "Model estimated F1 (Hz)", 
#                 x_axis_label = "Proportion of vowel duration",
#                 title1 = paste0("/", xsampa('u:', to='ipa'), "/: AVR"), 
#                 title2 = paste0("/", xsampa('u:', to='ipa'), "/: Zoom-default"), 
#                 title3 = paste0("/", xsampa('u:', to='ipa'), "/: Zoom-raw")
#                 )
# dev.off()
# 
# ## f2
# pdf(paste0(fig_path, "f2_u.pdf"),width=12,height=5.5)
# plot_model_diff3(datcv.qgamDiff.vow, c(-120,150), sels=c(6,12,18,24), 
#                 y_axis_label = "Model estimated F2 (Hz)", 
#                 x_axis_label = "Proportion of vowel duration",
#                 title1 = paste0("/", xsampa('u:', to='ipa'), "/: AVR"), 
#                 title2 = paste0("/", xsampa('u:', to='ipa'), "/: Zoom-default"), 
#                 title3 = paste0("/", xsampa('u:', to='ipa'), "/: Zoom-raw")
#                 )
# dev.off()

7 Vowel space

7.1 Figure 16

The following plots are based on raw data. From left to right, time points are 0.1+/-0.02, 0.5+/-0.02, 0.9+/-0.02. The “+/- 0.02” is because that we don’t have time points that are exactly 0.1, 0.5, or 0.9. So we took the average of the datapoints in this range.

# png
png(paste0(fig_path, "vspace_3timepoints.png"),width=9,height=4, units="in", res=300)
plop(16,6)
plot_2D_formants_raw(datcv)
dev.off()
## png 
##   2
#pdf
pdf(paste0(fig_path, "vspace_3timepoints.pdf"),width=9,height=4)
plop(16,6)
plot_2D_formants_raw(datcv)
dev.off()
## png 
##   2

7.2 Figure 17

# png
png(paste0(fig_path, "vspace_FM.png"),width=9,height=6, units="in", res=300)
plop(10,6)
datcvF = datcv[datcv$gender=="F",]
datcvM = datcv[datcv$gender=="M",]
par(mfrow=c(1,2))
plot_2D_formants_all(datcvF, xrange = c(-2600, -1200), yrange = c(-1000,-300))
plot_2D_formants_all(datcvM, xrange = c(-2600, -1200), yrange = c(-1000,-300))
dev.off()
## png 
##   2
# pdf
pdf(paste0(fig_path, "vspace_FM.pdf"),width=9,height=6)
plop(10,6)
datcvF = datcv[datcv$gender=="F",]
datcvM = datcv[datcv$gender=="M",]
par(mfrow=c(1,2))
plot_2D_formants_all(datcvF, xrange = c(-2600, -1200), yrange = c(-1000,-300))
plot_2D_formants_all(datcvM, xrange = c(-2600, -1200), yrange = c(-1000,-300))
dev.off()
## png 
##   2