Histogramm

  ounad=read.table("http://www.tlu.ee/~jaagup/andmed/muu/ounad/antoonovka2.txt",
                   sep=",", header=TRUE)
  head(ounad)
##   august september
## 1    6.0       7.9
## 2    4.0       5.7
## 3    5.2       6.6
## 4    4.1       5.4
## 5    5.7       7.9
## 6    4.2       5.9
  hist(ounad$august, yaxp=c(0, 25, 5), ylim=c(0, 25), las=1, xlab="diameeter", ylab="kogus",
       main="Õunte läbimõõtude jaotus")
  abline(v=mean(ounad$august), col="red")
  text(mean(ounad$august)-0.25, 10, "keskmine", srt=90)
  points(3, 15, col="green")
  points(5:7, c(15, 15, 15), col="green")
  segments(2, 0, 5, 20)

#  lines(density(ounad$august))

Tihedusgraafik

 plot(density(ounad$august), xlim=c(0, 10))
 lines(density(ounad$september), col="blue")

XY

  plot(ounad$august, ounad$september)

  #Lisage joonisele seletused 
  plot(ounad$august, ounad$september, xlab="Diameeter augustis, cm", ylab="Diameeter septembris, cm",
      main="Õunte läbimõõdud")
  
  paljukasvanud=ounad[ ounad$september/ounad$august>1.4, ]
  points(paljukasvanud$august, paljukasvanud$september, col="red")
  #Värvige siniseks õunad, mis on kasvanud vähem kui 1.5 cm
  vahekasvanud=ounad[ounad$september-ounad$august<1.5,]
  points(vahekasvanud$august, vahekasvanud$september, col="blue")

Jitter

Muuda algandmeid vaevumärgatavalt. Praegu selleks, et samade koordinaatidega õunad ei satuks üksteise peale

  plot(jitter(ounad$august), jitter(ounad$september), 
         col=rgb(0, 0, 0, 0.3), pch=19, cex=3)

Karp ja vurrud

  boxplot(ounad$august)

  #install.packages("reshape")
  library(reshape)
  pikk=melt(ounad)
## Using  as id variables
  head(pikk)
##   variable value
## 1   august   6.0
## 2   august   4.0
## 3   august   5.2
## 4   august   4.1
## 5   august   5.7
## 6   august   4.2
  tail(pikk)
##      variable value
## 195 september   6.1
## 196 september   6.3
## 197 september   5.1
## 198 september   5.0
## 199 september   7.9
## 200 september   8.4
  colnames(pikk)=c("kuu", "diameeter")
  head(pikk)
##      kuu diameeter
## 1 august       6.0
## 2 august       4.0
## 3 august       5.2
## 4 august       4.1
## 5 august       5.7
## 6 august       4.2
  boxplot(pikk$diameeter~pikk$kuu)

  #Joonistage histogramm ning karp-ja-vurrud diagramm
  #juurdekasvude absoluutse ja suhtelise jaotuse kohta
  vahesentimeetrid=ounad$september-ounad$august 
  suhe=ounad$september/ounad$august 
  hist(vahesentimeetrid)

  boxplot(vahesentimeetrid)

  hist(suhe)

  boxplot(suhe)

  head(ounad)
##   august september
## 1    6.0       7.9
## 2    4.0       5.7
## 3    5.2       6.6
## 4    4.1       5.4
## 5    5.7       7.9
## 6    4.2       5.9
  quantile(ounad$august)
##    0%   25%   50%   75%  100% 
## 2.100 4.175 4.850 5.500 7.000
  plot(quantile(ounad$august))

  plot(quantile(ounad$august), xaxt="n")
  mtext(c("0%", "25%", "50%", "75%", "100%"),
         side=1, at=1:5)

  kvantiilid=quantile(ounad$august, seq(0, 1, 0.1))
  plot(kvantiilid, xaxt="n")
  mtext(names(kvantiilid), side=1, at=1:length(kvantiilid), cex=0.5)

  plot(ounad$august, ounad$september) #iga täpp on sama õun kahel kuul

  qqnorm(ounad$august)

  qqnorm(ounad$september)
  qqline(ounad$september)

  qqplot(ounad$august, ounad$september)

Kahe õunasordi võrdlemine

  ounad=read.table("http://www.tlu.ee/~jaagup/andmed/muu/ounad/ounad1000.txt", header=TRUE, sep=",")
  mean(ounad$diameeter)
## [1] 3.75649
  quantile(ounad$diameeter)
##      0%     25%     50%     75%    100% 
##  0.0100  2.4575  3.5650  4.8225 10.1000
  quantile(ounad$diameeter, seq(0, 1, 0.1))
##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90% 
##  0.010  1.549  2.228  2.727  3.150  3.565  4.024  4.590  5.260  6.222 
##   100% 
## 10.100
  #Kuvage õunte diameetrite detsiilid joonisel
  #võimalusel lisage ka protsendisildid
    kvantiilid=quantile(ounad$diameeter, seq(0, 1, 0.1))
  plot(kvantiilid, xaxt="n")
  mtext(names(kvantiilid), side=1, at=1:length(kvantiilid), cex=0.5)

 mean(ounad$diameeter)
## [1] 3.75649
 head(ounad[ounad$ounasort=="Liivi sibul", ])
##       ounasort diameeter
## 1  Liivi sibul      2.60
## 2  Liivi sibul      2.43
## 3  Liivi sibul      3.67
## 5  Liivi sibul      3.33
## 6  Liivi sibul      3.72
## 10 Liivi sibul      2.53
 ounad[ounad$ounasort=="Liivi sibul", "diameeter"]
##   [1] 2.60 2.43 3.67 3.33 3.72 2.53 2.49 2.38 2.78 0.38 3.66 2.71 4.76 3.46
##  [15] 3.16 3.76 4.53 1.46 2.76 4.46 3.20 3.23 3.43 3.72 1.46 2.86 4.88 3.85
##  [29] 3.86 2.98 4.49 3.59 1.77 1.99 3.07 1.17 4.00 3.43 1.91 2.66 1.75 3.97
##  [43] 2.73 3.56 4.21 3.26 0.01 3.03 4.95 2.25 3.53 2.32 2.71 2.19 3.13 3.57
##  [57] 4.50 3.03 3.78 6.60 2.44 3.28 2.93 4.52 3.44 4.41 2.96 2.58 3.07 1.25
##  [71] 2.89 2.31 1.75 1.91 2.41 1.83 3.09 4.04 1.36 4.19 2.67 2.02 2.69 1.81
##  [85] 2.39 2.98 4.75 3.25 1.67 3.43 3.23 3.17 4.21 1.51 3.69 2.45 3.22 4.68
##  [99] 4.60 3.69 4.04 2.82 3.45 1.93 5.02 3.25 1.58 3.99 2.33 3.78 2.69 5.00
## [113] 4.80 1.68 3.22 4.29 1.71 2.33 3.15 3.19 2.85 3.64 1.98 2.50 3.10 3.74
## [127] 3.11 2.06 4.35 2.28 4.75 1.35 3.13 2.98 2.10 2.59 4.47 3.17 3.46 3.46
## [141] 4.22 3.72 3.04 3.11 0.66 3.76 1.75 3.27 4.34 2.96 3.81 1.86 4.64 2.60
## [155] 4.76 4.14 3.01 1.70 1.54 3.79 3.61 3.48 3.18 3.35 0.83 3.10 4.76 2.46
## [169] 3.37 4.99 2.27 4.16 2.08 2.30 1.78 2.80 3.05 3.17 2.26 4.20 2.59 2.96
## [183] 3.21 2.24 2.39 2.46 1.52 2.73 2.21 2.41 3.81 3.50 1.98 3.04 2.33 3.28
## [197] 3.39 2.62 3.34 4.13 3.51 0.53 3.72 4.70 2.96 2.62 4.60 3.97 2.44 3.63
## [211] 3.69 1.65 3.13 3.33 4.37 4.07 2.47 2.29 3.43 3.89 3.61 0.59 2.28 4.67
## [225] 4.55 2.18 2.83 2.71 3.19 3.30 2.09 2.82 3.78 2.62 1.75 5.22 3.07 2.94
## [239] 3.16 2.73 2.42 3.69 0.45 2.11 3.74 3.17 1.20 2.42 1.83 4.80 2.94 2.89
## [253] 3.64 1.32 3.83 2.33 2.44 2.41 3.24 3.76 2.88 1.91 2.27 3.89 2.13 1.73
## [267] 2.57 1.97 2.51 3.23 3.15 4.60 2.67 4.80 1.82 4.50 4.00 1.99 2.61 3.67
## [281] 1.59 2.78 2.50 1.03 2.93 2.98 1.37 2.86 1.76
 mean(ounad$diameeter)
## [1] 3.75649
 mean(ounad[ounad$ounasort=="Liivi sibul", "diameeter"])
## [1] 3.011038
 mean(ounad[ounad$ounasort=="Kuldrenett", "diameeter"])
## [1] 4.059494
 tapply(ounad$diameeter, ounad$ounasort, mean)
##  Kuldrenett Liivi sibul 
##    4.059494    3.011038
 tapply(ounad$diameeter, ounad$ounasort, max)
##  Kuldrenett Liivi sibul 
##        10.1         6.6
 tapply(ounad$diameeter, ounad$ounasort, min)
##  Kuldrenett Liivi sibul 
##        0.01        0.01
 tapply(ounad$diameeter, ounad$ounasort, 
        function(diameetrid){max(diameetrid)-min(diameetrid)})
##  Kuldrenett Liivi sibul 
##       10.09        6.59
 barplot(tapply(ounad$diameeter, ounad$ounasort, mean))

 barplot(tapply(ounad$diameeter, ounad$ounasort, mean),
         main="Diameetrite võrdlus", 
         ylab="Keskmine diameeter (cm)")

 boxplot(ounad$diameeter~ounad$ounasort)

  t.test(ounad$diameeter)
## 
##  One Sample t-test
## 
## data:  ounad$diameeter
## t = 65.34, df = 999, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  3.643672 3.869308
## sample estimates:
## mean of x 
##   3.75649

Nende õunte diameetrite põhjal võin väita, et 95% tõenäosusega on selle aia õunte diameetrite keskmine vahemikus 3,64 kuni 3,86 cm

Ehk siis: kui korjan korduvalt 1000 õuna ja mõõdan diameetrid, siis keskmiselt 95-l juhul sajast jääb nende mõõdetud diameetrite keskmine nimetatud vahemikku

95 percent confidence interval: 3.643672 3.869308

99 percent confidence interval: 3.608118 3.904862

  t.test(ounad$diameeter, conf.level = 0.99)
## 
##  One Sample t-test
## 
## data:  ounad$diameeter
## t = 65.34, df = 999, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 99 percent confidence interval:
##  3.608118 3.904862
## sample estimates:
## mean of x 
##   3.75649

Võrdlus eelmise aasta keskmisega (4.0)

  t.test(ounad$diameeter, mu=4.0)
## 
##  One Sample t-test
## 
## data:  ounad$diameeter
## t = -4.2356, df = 999, p-value = 2.49e-05
## alternative hypothesis: true mean is not equal to 4
## 95 percent confidence interval:
##  3.643672 3.869308
## sample estimates:
## mean of x 
##   3.75649

t = -4.2356, df = 999, p-value = 2.49e-05 Pragune keskmine (3,75) on võrreldavast keskmisest (4.0) 4,23 Studenti hälbe jagu väiksem Tõenäosus, et praeguste õunte keskmine võiks juhuslikult siiski olla 4, on 2.49e-05 0,0000249

  t.test(ounad$diameeter, mu=3.8)
## 
##  One Sample t-test
## 
## data:  ounad$diameeter
## t = -0.75681, df = 999, p-value = 0.4493
## alternative hypothesis: true mean is not equal to 3.8
## 95 percent confidence interval:
##  3.643672 3.869308
## sample estimates:
## mean of x 
##   3.75649

Tõenäosus, et õunte keskmine diameeter võib olla 3,8 on 44,9%

 t.test(ounad[ounad$ounasort=="Liivi sibul", "diameeter"],
        ounad[ounad$ounasort=="Kuldrenett", "diameeter"])
## 
##  Welch Two Sample t-test
## 
## data:  ounad[ounad$ounasort == "Liivi sibul", "diameeter"] and ounad[ounad$ounasort == "Kuldrenett", "diameeter"]
## t = -11, df = 946.53, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.2355042 -0.8614071
## sample estimates:
## mean of x mean of y 
##  3.011038  4.059494

95 percent confidence interval: -1.2355042 -0.8614071

95% tõenäosusega on Liivi sibulate keskmine diameeter Kuldrenettide keskmisest diameetist selles aias väiksem 0,86 kuni 1,23 cm võrra

 #hist(ounad$diameeter)
 nahtav=0.4
 keskmine=mean(ounad$diameeter)
 xpiirid=c((1-nahtav)*keskmine, (1+nahtav)*keskmine)
 plot(density(ounad$diameeter), xlim=xpiirid)
 abline(v=mean(ounad$diameeter), col="red", lty=2)
 tulemus=t.test(ounad$diameeter, conf.level=0.99)
 abline(v=tulemus$conf.int, col="blue")

  ounad=read.table("http://www.tlu.ee/~jaagup/andmed/muu/ounad/antoonovka2.txt", sep=",", header=TRUE)
  head(ounad)
##   august september
## 1    6.0       7.9
## 2    4.0       5.7
## 3    5.2       6.6
## 4    4.1       5.4
## 5    5.7       7.9
## 6    4.2       5.9
  plot(ounad$august, ounad$september)
  cor(ounad$august, ounad$september)
## [1] 0.892967
  cor.test(ounad$august, ounad$september)
## 
##  Pearson's product-moment correlation
## 
## data:  ounad$august and ounad$september
## t = 19.639, df = 98, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8447060 0.9268248
## sample estimates:
##      cor 
## 0.892967
  cor.test(ounad$august, ounad$september, conf.level=0.90)
## 
##  Pearson's product-moment correlation
## 
## data:  ounad$august and ounad$september
## t = 19.639, df = 98, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.8536274 0.9221775
## sample estimates:
##      cor 
## 0.892967
  lm(ounad$september~ounad$august)
## 
## Call:
## lm(formula = ounad$september ~ ounad$august)
## 
## Coefficients:
##  (Intercept)  ounad$august  
##         1.85          1.01
  abline(lm(ounad$september~ounad$august))

lm(ounad\(september~ounad\)august) Coefficients: (Intercept) ounad$august
1.85 1.01

Õunte ennustatav diameeter septembris =

1.85cm + 1.01 x diameeter augustis