poistepikkused=c(172, 174, 173)
tydrukutepikkused=c(167, 169, 171, 170)
t.test(poistepikkused, tydrukutepikkused)
##
## Welch Two Sample t-test
##
## data: poistepikkused and tydrukutepikkused
## t = 3.638, df = 4.8496, p-value = 0.01575
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.075396 6.424604
## sample estimates:
## mean of x mean of y
## 173.00 169.25
poistepikkused_jaanuar=c(170, 173, 173)
poistepikkused_mai=c(172, 174, 173)
t.test(poistepikkused_mai, poistepikkused_jaanuar, paired = TRUE)
##
## Paired t-test
##
## data: poistepikkused_mai and poistepikkused_jaanuar
## t = 1.7321, df = 2, p-value = 0.2254
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.484138 3.484138
## sample estimates:
## mean of the differences
## 1
t.test(poistepikkused_mai, poistepikkused_jaanuar)
##
## Welch Two Sample t-test
##
## data: poistepikkused_mai and poistepikkused_jaanuar
## t = 0.86603, df = 3.2, p-value = 0.4465
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.548197 4.548197
## sample estimates:
## mean of x mean of y
## 173 172
t.test(poistepikkused_mai, poistepikkused_jaanuar, alternative = "greater")
##
## Welch Two Sample t-test
##
## data: poistepikkused_mai and poistepikkused_jaanuar
## t = 0.86603, df = 3.2, p-value = 0.2233
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -1.649569 Inf
## sample estimates:
## mean of x mean of y
## 173 172
t.test(poistepikkused_mai, poistepikkused_jaanuar, alternative = "greater", paired=TRUE)
##
## Paired t-test
##
## data: poistepikkused_mai and poistepikkused_jaanuar
## t = 1.7321, df = 2, p-value = 0.1127
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -0.6858545 Inf
## sample estimates:
## mean of the differences
## 1
Illustreerige naiste ja meeste nimisõnade (S) kasutust tekstides
Püüdke T-testiga näidata nende sarnasust või erinevust
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 3.0.0 <U+221A> purrr 0.2.5
## <U+221A> tibble 1.4.2 <U+221A> dplyr 0.7.6
## <U+221A> tidyr 0.8.1 <U+221A> stringr 1.3.1
## <U+221A> readr 1.1.1 <U+221A> forcats 0.3.0
## -- Conflicts ------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
dokmeta=read_csv("http://www.tlu.ee/~jaagup/andmed/keel/korpus/dokmeta.txt")
## Parsed with column specification:
## cols(
## kood = col_character(),
## korpus = col_character(),
## tekstikeel = col_character(),
## tekstityyp = col_character(),
## elukoht = col_character(),
## taust = col_character(),
## vanus = col_character(),
## sugu = col_character(),
## emakeel = col_character(),
## kodukeel = col_character(),
## keeletase = col_character(),
## haridus = col_character(),
## abivahendid = col_character()
## )
head(dokmeta %>% na.omit())
## # A tibble: 6 x 13
## kood korpus tekstikeel tekstityyp elukoht taust vanus sugu emakeel
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 doc_~ cFOoR~ eesti essee idaviru op kuni~ naine vene
## 2 doc_~ cFOoR~ eesti muu idaviru op kuni~ naine vene
## 3 doc_~ cFOoR~ eesti essee idaviru op kuni~ naine vene
## 4 doc_~ cFOoR~ eesti muu tallinn ylop kuni~ naine vene
## 5 doc_~ cFOoR~ eesti muu tallinn ylop kuni~ naine vene
## 6 doc_~ cFOoR~ eesti muu tallinn ylop kuni~ naine vene
## # ... with 4 more variables: kodukeel <chr>, keeletase <chr>,
## # haridus <chr>, abivahendid <chr>
head(dokmeta %>% filter(!is.na(sugu)))
## # A tibble: 6 x 13
## kood korpus tekstikeel tekstityyp elukoht taust vanus sugu emakeel
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 doc_~ cFOoR~ eesti essee idaviru op kuni~ naine vene
## 2 doc_~ cFOoR~ eesti muu idaviru op kuni~ naine vene
## 3 doc_~ cFOoR~ eesti essee idaviru op kuni~ naine vene
## 4 doc_~ cFOoR~ eesti muu tallinn ylop kuni~ naine vene
## 5 doc_~ cFOoR~ eesti muu tallinn ylop kuni~ naine vene
## 6 doc_~ cFOoR~ eesti muu tallinn ylop kuni~ naine vene
## # ... with 4 more variables: kodukeel <chr>, keeletase <chr>,
## # haridus <chr>, abivahendid <chr>
doksonaliigid <-read_csv("http://www.tlu.ee/~jaagup/andmed/keel/korpus/doksonaliigid.txt")
## Parsed with column specification:
## cols(
## kood = col_character(),
## A = col_integer(),
## C = col_integer(),
## D = col_integer(),
## G = col_integer(),
## H = col_integer(),
## I = col_integer(),
## J = col_integer(),
## K = col_integer(),
## N = col_integer(),
## P = col_integer(),
## S = col_integer(),
## U = col_integer(),
## V = col_integer(),
## X = col_integer(),
## Y = col_integer(),
## Z = col_integer(),
## kokku = col_integer()
## )
koos=dokmeta %>% na.omit() %>% inner_join(doksonaliigid, by="kood")
head(koos)
## # A tibble: 6 x 30
## kood korpus tekstikeel tekstityyp elukoht taust vanus sugu emakeel
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 doc_~ cFOoR~ eesti essee idaviru op kuni~ naine vene
## 2 doc_~ cFOoR~ eesti muu idaviru op kuni~ naine vene
## 3 doc_~ cFOoR~ eesti essee idaviru op kuni~ naine vene
## 4 doc_~ cFOoR~ eesti muu tallinn ylop kuni~ naine vene
## 5 doc_~ cFOoR~ eesti muu tallinn ylop kuni~ naine vene
## 6 doc_~ cFOoR~ eesti muu tallinn ylop kuni~ naine vene
## # ... with 21 more variables: kodukeel <chr>, keeletase <chr>,
## # haridus <chr>, abivahendid <chr>, A <int>, C <int>, D <int>, G <int>,
## # H <int>, I <int>, J <int>, K <int>, N <int>, P <int>, S <int>,
## # U <int>, V <int>, X <int>, Y <int>, Z <int>, kokku <int>
meestenimisonad <- koos %>% filter(sugu=="mees") %>% .$S
naistenimisonad <- koos %>% filter(sugu=="naine") %>% .$S
t.test(meestenimisonad, naistenimisonad)
##
## Welch Two Sample t-test
##
## data: meestenimisonad and naistenimisonad
## t = -8.8883, df = 2706.9, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -44.03450 -28.11719
## sample estimates:
## mean of x mean of y
## 53.23359 89.30943
ggplot(koos, aes(sugu, kokku)) + geom_boxplot() + ylim(0, 1000)
## Warning: Removed 76 rows containing non-finite values (stat_boxplot).
ggplot(koos, aes(kokku, fill=sugu)) + geom_histogram() + xlim(0, 1000)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 76 rows containing non-finite values (stat_bin).
ggplot(koos, aes(S/kokku)) + geom_histogram() +facet_wrap(~sugu)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 4 rows containing non-finite values (stat_bin).
koos2=koos %>% mutate(nimisonaosakaal=S/kokku) %>% select(sugu, nimisonaosakaal)
t.test(koos2[koos2$sugu=="mees", ]$nimisonaosakaal,
koos2[koos2$sugu=="naine",] $nimisonaosakaal)
##
## Welch Two Sample t-test
##
## data: koos2[koos2$sugu == "mees", ]$nimisonaosakaal and koos2[koos2$sugu == "naine", ]$nimisonaosakaal
## t = -8.2892, df = 1335.8, p-value = 2.748e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.02765178 -0.01706819
## sample estimates:
## mean of x mean of y
## 0.2331012 0.2554612
t.test(koos2[koos2$sugu=="mees", ]$nimisonaosakaal, mu=0.25)
##
## One Sample t-test
##
## data: koos2[koos2$sugu == "mees", ]$nimisonaosakaal
## t = -7.2194, df = 789, p-value = 1.229e-12
## alternative hypothesis: true mean is not equal to 0.25
## 95 percent confidence interval:
## 0.2285064 0.2376960
## sample estimates:
## mean of x
## 0.2331012
t.test(koos2[koos2$sugu=="naine", ]$nimisonaosakaal, mu=0.25, conf.level=0.99)
##
## One Sample t-test
##
## data: koos2[koos2$sugu == "naine", ]$nimisonaosakaal
## t = 4.0734, df = 2033, p-value = 4.81e-05
## alternative hypothesis: true mean is not equal to 0.25
## 99 percent confidence interval:
## 0.2520046 0.2589178
## sample estimates:
## mean of x
## 0.2554612
koos %>% ggplot(aes(keeletase, S/kokku)) + geom_boxplot()
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).
koos2=koos %>% mutate(nimisonaosakaal=S/kokku) %>% select(keeletase, nimisonaosakaal)
aov(koos2$nimisonaosakaal ~ koos2$keeletase)
## Call:
## aov(formula = koos2$nimisonaosakaal ~ koos2$keeletase)
##
## Terms:
## koos2$keeletase Residuals
## Sum of Squares 0.558411 10.573804
## Deg. of Freedom 8 2815
##
## Residual standard error: 0.06128814
## Estimated effects may be unbalanced
## 4 observations deleted due to missingness
summary(aov(koos2$nimisonaosakaal ~ koos2$keeletase))
## Df Sum Sq Mean Sq F value Pr(>F)
## koos2$keeletase 8 0.558 0.06980 18.58 <2e-16 ***
## Residuals 2815 10.574 0.00376
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 4 observations deleted due to missingness
TukeyHSD(aov(koos2$nimisonaosakaal ~ koos2$keeletase))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = koos2$nimisonaosakaal ~ koos2$keeletase)
##
## $`koos2$keeletase`
## diff lwr upr p adj
## A1-A 0.0997530619 -0.090585642 0.2900917662 0.7901502
## A2-A 0.0272170012 -0.001147260 0.0555812627 0.0717962
## B-A 0.0191631008 0.010881574 0.0274446278 0.0000000
## B1-A -0.0047398816 -0.019908381 0.0104286175 0.9884950
## B2-A -0.0037594995 -0.025330859 0.0178118605 0.9998209
## C-A 0.0390842856 0.027380794 0.0507877770 0.0000000
## C1-A 0.0058461926 -0.024090559 0.0357829442 0.9995808
## C2-A -0.0195671021 -0.209905806 0.1707716023 0.9999969
## A2-A1 -0.0725360606 -0.264797543 0.1197254218 0.9624843
## B-A1 -0.0805899611 -0.270927897 0.1097479752 0.9273383
## B1-A1 -0.1044929435 -0.295254653 0.0862687656 0.7465281
## B2-A1 -0.1035125613 -0.294889858 0.0878647358 0.7595775
## C-A1 -0.0606687762 -0.251186277 0.1298487243 0.9869842
## C1-A1 -0.0939068693 -0.286406624 0.0985928850 0.8491500
## C2-A1 -0.1193201640 -0.388371774 0.1497314457 0.9067389
## B-A2 -0.0080539005 -0.036413007 0.0203052062 0.9939236
## B1-A2 -0.0319568828 -0.063033218 -0.0008805477 0.0383189
## B2-A2 -0.0309765007 -0.065631642 0.0036786405 0.1233301
## C-A2 0.0118672844 -0.017672975 0.0414075442 0.9458705
## C1-A2 -0.0213708087 -0.061767149 0.0190255320 0.7813449
## C2-A2 -0.0467841033 -0.239045586 0.1454773791 0.9979257
## B1-B -0.0239029824 -0.039061840 -0.0087441247 0.0000365
## B2-B -0.0229226002 -0.044487182 -0.0013580188 0.0273001
## C-B 0.0199211848 0.008230192 0.0316121776 0.0000047
## C1-B -0.0133169082 -0.043248776 0.0166149594 0.9051502
## C2-B -0.0387302029 -0.229068139 0.1516077334 0.9994319
## B2-B1 0.0009803821 -0.024050216 0.0260109804 1.0000000
## C-B1 0.0438241672 0.026556307 0.0610920272 0.0000000
## C1-B1 0.0105860741 -0.021931865 0.0431040134 0.9849859
## C2-B1 -0.0148272205 -0.205588930 0.1759344886 0.9999997
## C-B2 0.0428437851 0.019747915 0.0659396547 0.0000003
## C1-B2 0.0096056920 -0.026347842 0.0455592257 0.9960039
## C2-B2 -0.0158076027 -0.207184900 0.1755696945 0.9999995
## C1-C -0.0332380931 -0.064291349 -0.0021848368 0.0253479
## C2-C -0.0586513877 -0.249168888 0.1318661128 0.9895706
## C2-C1 -0.0254132947 -0.217913049 0.1670864596 0.9999785
koos2=koos %>% filter(keeletase %in% c("A2", "B1", "B2", "C1")) %>% mutate(nimisonaosakaal=S/kokku) %>% select(keeletase, nimisonaosakaal)
aov(koos2$nimisonaosakaal ~ koos2$keeletase)
## Call:
## aov(formula = koos2$nimisonaosakaal ~ koos2$keeletase)
##
## Terms:
## koos2$keeletase Residuals
## Sum of Squares 0.041287 1.146924
## Deg. of Freedom 3 354
##
## Residual standard error: 0.0569201
## Estimated effects may be unbalanced
## 1 observation deleted due to missingness
summary(aov(koos2$nimisonaosakaal ~ koos2$keeletase))
## Df Sum Sq Mean Sq F value Pr(>F)
## koos2$keeletase 3 0.0413 0.01376 4.248 0.00575 **
## Residuals 354 1.1469 0.00324
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
TukeyHSD(aov(koos2$nimisonaosakaal ~ koos2$keeletase))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = koos2$nimisonaosakaal ~ koos2$keeletase)
##
## $`koos2$keeletase`
## diff lwr upr p adj
## B1-A2 -0.0319568828 -0.05595677 -0.007956995 0.0036624
## B2-A2 -0.0309765007 -0.05774026 -0.004212743 0.0158480
## C1-A2 -0.0213708087 -0.05256843 0.009826810 0.2904491
## B2-B1 0.0009803821 -0.01835045 0.020311219 0.9991977
## C1-B1 0.0105860741 -0.01452715 0.035699296 0.6970545
## C1-B2 0.0096056920 -0.01816080 0.037372183 0.8085111