library(tidyverse)
## -- Attaching packages --------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 2.2.1     <U+221A> purrr   0.2.4
## <U+221A> tibble  1.4.2     <U+221A> dplyr   0.7.4
## <U+221A> tidyr   0.7.2     <U+221A> stringr 1.2.0
## <U+221A> readr   1.1.1     <U+221A> forcats 0.2.0
## -- Conflicts ------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
ounad=read_csv("http://www.tlu.ee/~jaagup/andmed/muu/ounad/antoonovka2.txt")
## Parsed with column specification:
## cols(
##   august = col_double(),
##   september = col_double()
## )
ggplot(ounad, aes(august, september))+geom_point()

cor(ounad)
##             august september
## august    1.000000  0.892967
## september 0.892967  1.000000
cor.test(ounad$august, ounad$september)
## 
##  Pearson's product-moment correlation
## 
## data:  ounad$august and ounad$september
## t = 19.639, df = 98, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8447060 0.9268248
## sample estimates:
##      cor 
## 0.892967
kymmeouna=ounad %>% head(10) 

cor.test(kymmeouna$august, kymmeouna$september)
## 
##  Pearson's product-moment correlation
## 
## data:  kymmeouna$august and kymmeouna$september
## t = 9.1606, df = 8, p-value = 1.627e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8179395 0.9897073
## sample estimates:
##       cor 
## 0.9554913
ggplot(ounad, aes(august, september))+geom_point()+
  geom_smooth(method="lm")

lm(ounad$september~ounad$august)
## 
## Call:
## lm(formula = ounad$september ~ ounad$august)
## 
## Coefficients:
##  (Intercept)  ounad$august  
##         1.85          1.01
summary(lm(ounad$september~ounad$august))
## 
## Call:
## lm(formula = ounad$september ~ ounad$august)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.90315 -0.39738 -0.09633  0.30839  1.20315 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.84966    0.25183   7.345 6.16e-11 ***
## ounad$august  1.01049    0.05145  19.639  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4923 on 98 degrees of freedom
## Multiple R-squared:  0.7974, Adjusted R-squared:  0.7953 
## F-statistic: 385.7 on 1 and 98 DF,  p-value: < 2.2e-16
lm(september~august, data=ounad)
## 
## Call:
## lm(formula = september ~ august, data = ounad)
## 
## Coefficients:
## (Intercept)       august  
##        1.85         1.01
tibble(august=c(4, 5, 6))
## # A tibble: 3 x 1
##   august
##    <dbl>
## 1   4.00
## 2   5.00
## 3   6.00
predict(lm(september~august, data=ounad), tibble(august=c(2,3,4)))
##        1        2        3 
## 3.870636 4.881123 5.891610
uuritavad=tibble(august=c(2,3,4))
mudel=lm(september~august, data=ounad)
uuritavad$september=predict(mudel, uuritavad)
uuritavad
## # A tibble: 3 x 2
##   august september
##    <dbl>     <dbl>
## 1   2.00      3.87
## 2   3.00      4.88
## 3   4.00      5.89
ggplot(ounad, aes(august, september))+geom_point(color="gray")+
    geom_point(data=uuritavad, color="red")

ounad2=read_csv("http://www.tlu.ee/~jaagup/andmed/muu/ounad/liivi_antoonovka_aug_sept_1000.txt")
## Parsed with column specification:
## cols(
##   ounasort = col_character(),
##   august = col_double(),
##   september = col_double()
## )
ggplot(ounad2, aes(august, september, color=ounasort))+geom_point()

lm(september~august+ounasort, data=ounad2)
## 
## Call:
## lm(formula = september ~ august + ounasort, data = ounad2)
## 
## Coefficients:
##         (Intercept)               august  ounasortLiivi sibul  
##              2.0709               0.9928              -0.9405
predict(lm(september~august+ounasort, data=ounad2), 
      tibble(august=c(4, 4, 5, 5), 
    ounasort=c("Antoonovka", "Liivi sibul", "Antoonovka", "Liivi sibul")))
##        1        2        3        4 
## 6.042083 5.101616 7.034873 6.094406