library(tidyverse)
## -- Attaching packages ------------------------------------------------ tidyverse 1.2.1 --
## <U+221A> ggplot2 3.0.0 <U+221A> purrr 0.2.5
## <U+221A> tibble 1.4.2 <U+221A> dplyr 0.7.6
## <U+221A> tidyr 0.8.1 <U+221A> stringr 1.3.1
## <U+221A> readr 1.1.1 <U+221A> forcats 0.3.0
## -- Conflicts --------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
dokarvud=read_csv("http://www.tlu.ee/~jaagup/andmed/keel/korpus/dokarvud.txt")
## Parsed with column specification:
## cols(
## kood = col_character(),
## tahti = col_integer(),
## sonu = col_integer(),
## lauseid = col_integer(),
## vigu = col_integer(),
## veatyype = col_integer(),
## kolmetahelistepr = col_double(),
## viietahelistepr = col_double(),
## kymnejarohkemtahelistepr = col_double(),
## kahesonalistepr = col_double(),
## kolmesonalistepr = col_double(),
## kuuekuni9sonalistepr = col_double(),
## kymnekuni20sonalistepr = col_double()
## )
cor(dokarvud$tahti, dokarvud$sonu)
## [1] 0.9485238
ggplot(dokarvud, aes(tahti, sonu))+geom_point()
cor(dokarvud$kolmetahelistepr, dokarvud$kymnejarohkemtahelistepr)
## [1] 0.04020447
dokarvud %>% filter(kolmetahelistepr>=10) %>%
ggplot(aes(kolmetahelistepr, kymnejarohkemtahelistepr))+geom_point()
dokarvud %>% select(-kood) %>% cor()
## tahti sonu lauseid vigu
## tahti 1.00000000 0.948523752 0.76349587 0.11671231
## sonu 0.94852375 1.000000000 0.87509753 0.09399618
## lauseid 0.76349587 0.875097529 1.00000000 0.03921906
## vigu 0.11671231 0.093996181 0.03921906 1.00000000
## veatyype 0.06077898 0.034037878 -0.01433122 0.91320970
## kolmetahelistepr 0.26874036 0.285605263 0.22136377 0.13545784
## viietahelistepr -0.02614740 -0.001154944 0.02417365 0.02773669
## kymnejarohkemtahelistepr 0.34035927 0.299882653 0.23341836 0.07798654
## kahesonalistepr -0.27392780 -0.241113660 -0.13734791 -0.26149534
## kolmesonalistepr -0.25697597 -0.230008481 -0.11005180 -0.12357209
## kuuekuni9sonalistepr -0.10023959 -0.071771981 -0.02069215 0.01784823
## kymnekuni20sonalistepr 0.32848945 0.248616557 0.02539867 0.13799848
## veatyype kolmetahelistepr viietahelistepr
## tahti 0.06077898 0.26874036 -0.026147395
## sonu 0.03403788 0.28560526 -0.001154944
## lauseid -0.01433122 0.22136377 0.024173645
## vigu 0.91320970 0.13545784 0.027736689
## veatyype 1.00000000 0.16312916 0.034766664
## kolmetahelistepr 0.16312916 1.00000000 0.146729210
## viietahelistepr 0.03476666 0.14672921 1.000000000
## kymnejarohkemtahelistepr 0.09011467 0.04020447 -0.084268512
## kahesonalistepr -0.30795975 -0.29144859 0.007988313
## kolmesonalistepr -0.14088647 -0.17852454 0.061309363
## kuuekuni9sonalistepr 0.02714058 0.08658549 0.202496419
## kymnekuni20sonalistepr 0.16369640 0.31855493 0.029144301
## kymnejarohkemtahelistepr kahesonalistepr
## tahti 0.34035927 -0.273927797
## sonu 0.29988265 -0.241113660
## lauseid 0.23341836 -0.137347912
## vigu 0.07798654 -0.261495337
## veatyype 0.09011467 -0.307959749
## kolmetahelistepr 0.04020447 -0.291448594
## viietahelistepr -0.08426851 0.007988313
## kymnejarohkemtahelistepr 1.00000000 -0.173145261
## kahesonalistepr -0.17314526 1.000000000
## kolmesonalistepr -0.16194025 0.307186145
## kuuekuni9sonalistepr -0.04886594 -0.011285771
## kymnekuni20sonalistepr 0.31412171 -0.341191678
## kolmesonalistepr kuuekuni9sonalistepr
## tahti -0.25697597 -0.10023959
## sonu -0.23000848 -0.07177198
## lauseid -0.11005180 -0.02069215
## vigu -0.12357209 0.01784823
## veatyype -0.14088647 0.02714058
## kolmetahelistepr -0.17852454 0.08658549
## viietahelistepr 0.06130936 0.20249642
## kymnejarohkemtahelistepr -0.16194025 -0.04886594
## kahesonalistepr 0.30718614 -0.01128577
## kolmesonalistepr 1.00000000 -0.13223122
## kuuekuni9sonalistepr -0.13223122 1.00000000
## kymnekuni20sonalistepr -0.39380628 -0.28428212
## kymnekuni20sonalistepr
## tahti 0.32848945
## sonu 0.24861656
## lauseid 0.02539867
## vigu 0.13799848
## veatyype 0.16369640
## kolmetahelistepr 0.31855493
## viietahelistepr 0.02914430
## kymnejarohkemtahelistepr 0.31412171
## kahesonalistepr -0.34119168
## kolmesonalistepr -0.39380628
## kuuekuni9sonalistepr -0.28428212
## kymnekuni20sonalistepr 1.00000000
dokarvud %>% head(100) %>% select(-kood) %>% pairs()
cor.test(dokarvud$tahti, dokarvud$sonu)
##
## Pearson's product-moment correlation
##
## data: dokarvud$tahti and dokarvud$sonu
## t = 337.81, df = 12722, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9467517 0.9502383
## sample estimates:
## cor
## 0.9485238
#esisada=head(dokarvud, 100)
esisada=sample_n(dokarvud, 100)
cor.test(esisada$tahti, esisada$sonu)
##
## Pearson's product-moment correlation
##
## data: esisada$tahti and esisada$sonu
## t = 43.183, df = 98, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9625861 0.9829466
## sample estimates:
## cor
## 0.9747154
dokarvud %>% ggplot(aes(tahti, sonu)) +
geom_point()+ geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
dokarvud %>% ggplot(aes(tahti, sonu)) +
geom_point()+ geom_smooth(method="lm")
lm(dokarvud$tahti~dokarvud$sonu)
##
## Call:
## lm(formula = dokarvud$tahti ~ dokarvud$sonu)
##
## Coefficients:
## (Intercept) dokarvud$sonu
## -161.978 7.241
summary(lm(dokarvud$tahti~dokarvud$sonu))
##
## Call:
## lm(formula = dokarvud$tahti ~ dokarvud$sonu)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8302.7 -90.6 46.7 140.0 9380.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -161.97760 8.69239 -18.63 <2e-16 ***
## dokarvud$sonu 7.24094 0.02144 337.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 722.4 on 12722 degrees of freedom
## Multiple R-squared: 0.8997, Adjusted R-squared: 0.8997
## F-statistic: 1.141e+05 on 1 and 12722 DF, p-value: < 2.2e-16
lm(tahti~sonu, data=dokarvud)
##
## Call:
## lm(formula = tahti ~ sonu, data = dokarvud)
##
## Coefficients:
## (Intercept) sonu
## -161.978 7.241
predict(lm(tahti~sonu, data=dokarvud), tibble(sonu=c(10, 100, 1000)))
## 1 2 3
## -89.56823 562.11608 7078.95917
andmed=read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/00396/Sales_Transactions_Dataset_Weekly.csv")
## Parsed with column specification:
## cols(
## .default = col_integer(),
## Product_Code = col_character(),
## `Normalized 0` = col_double(),
## `Normalized 1` = col_double(),
## `Normalized 2` = col_double(),
## `Normalized 3` = col_double(),
## `Normalized 4` = col_double(),
## `Normalized 5` = col_double(),
## `Normalized 6` = col_double(),
## `Normalized 7` = col_double(),
## `Normalized 8` = col_double(),
## `Normalized 9` = col_double(),
## `Normalized 10` = col_double(),
## `Normalized 11` = col_double(),
## `Normalized 12` = col_double(),
## `Normalized 13` = col_double(),
## `Normalized 14` = col_double(),
## `Normalized 15` = col_double(),
## `Normalized 16` = col_double(),
## `Normalized 17` = col_double(),
## `Normalized 18` = col_double()
## # ... with 33 more columns
## )
## See spec(...) for full column specifications.
colnames(andmed)
## [1] "Product_Code" "W0" "W1" "W2"
## [5] "W3" "W4" "W5" "W6"
## [9] "W7" "W8" "W9" "W10"
## [13] "W11" "W12" "W13" "W14"
## [17] "W15" "W16" "W17" "W18"
## [21] "W19" "W20" "W21" "W22"
## [25] "W23" "W24" "W25" "W26"
## [29] "W27" "W28" "W29" "W30"
## [33] "W31" "W32" "W33" "W34"
## [37] "W35" "W36" "W37" "W38"
## [41] "W39" "W40" "W41" "W42"
## [45] "W43" "W44" "W45" "W46"
## [49] "W47" "W48" "W49" "W50"
## [53] "W51" "MIN" "MAX" "Normalized 0"
## [57] "Normalized 1" "Normalized 2" "Normalized 3" "Normalized 4"
## [61] "Normalized 5" "Normalized 6" "Normalized 7" "Normalized 8"
## [65] "Normalized 9" "Normalized 10" "Normalized 11" "Normalized 12"
## [69] "Normalized 13" "Normalized 14" "Normalized 15" "Normalized 16"
## [73] "Normalized 17" "Normalized 18" "Normalized 19" "Normalized 20"
## [77] "Normalized 21" "Normalized 22" "Normalized 23" "Normalized 24"
## [81] "Normalized 25" "Normalized 26" "Normalized 27" "Normalized 28"
## [85] "Normalized 29" "Normalized 30" "Normalized 31" "Normalized 32"
## [89] "Normalized 33" "Normalized 34" "Normalized 35" "Normalized 36"
## [93] "Normalized 37" "Normalized 38" "Normalized 39" "Normalized 40"
## [97] "Normalized 41" "Normalized 42" "Normalized 43" "Normalized 44"
## [101] "Normalized 45" "Normalized 46" "Normalized 47" "Normalized 48"
## [105] "Normalized 49" "Normalized 50" "Normalized 51"
nadalad=andmed %>% select(W0:W51)
nadalad[1, ]-nadalad[3,]
## W0 W1 W2 W3 W4 W5 W6 W7 W8 W9 W10 W11 W12 W13 W14 W15 W16 W17 W18 W19
## 1 4 1 2 -1 3 4 7 8 -6 8 -3 5 12 2 -3 1 7 -2 -7 5
## W20 W21 W22 W23 W24 W25 W26 W27 W28 W29 W30 W31 W32 W33 W34 W35 W36 W37
## 1 -2 -2 -4 4 -5 1 -7 5 2 -2 -4 1 5 -2 -2 5 4 -3
## W38 W39 W40 W41 W42 W43 W44 W45 W46 W47 W48 W49 W50 W51
## 1 0 8 4 6 -2 -7 3 5 5 -5 -7 -2 -3 3
(nadalad[1, ]-nadalad[3,])>0
## W0 W1 W2 W3 W4 W5 W6 W7 W8 W9 W10 W11 W12
## [1,] TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE
## W13 W14 W15 W16 W17 W18 W19 W20 W21 W22 W23 W24
## [1,] TRUE FALSE TRUE TRUE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE
## W25 W26 W27 W28 W29 W30 W31 W32 W33 W34 W35 W36
## [1,] TRUE FALSE TRUE TRUE FALSE FALSE TRUE TRUE FALSE FALSE TRUE TRUE
## W37 W38 W39 W40 W41 W42 W43 W44 W45 W46 W47 W48
## [1,] FALSE FALSE TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE FALSE FALSE
## W49 W50 W51
## [1,] FALSE FALSE TRUE
names(nadalad)[(nadalad[1, ]-nadalad[3,])>0]
## [1] "W0" "W1" "W2" "W4" "W5" "W6" "W7" "W9" "W11" "W12" "W13"
## [12] "W15" "W16" "W19" "W23" "W25" "W27" "W28" "W31" "W32" "W35" "W36"
## [23] "W39" "W40" "W41" "W44" "W45" "W46" "W51"
nadalad=andmed %>% select(Product_Code, W0:W51)
nadalad %>% filter(Product_Code=="P1" | Product_Code=="P3") %>%
select(-Product_Code) %>% t() %>% as_tibble() %>%
mutate(nadal=colnames(nadalad)[2:53], vahe=V1-V2) %>%
filter(vahe>0) %>% .$nadal
## [1] "W0" "W1" "W2" "W4" "W5" "W6" "W7" "W9" "W11" "W12" "W13"
## [12] "W15" "W16" "W19" "W23" "W25" "W27" "W28" "W31" "W32" "W35" "W36"
## [23] "W39" "W40" "W41" "W44" "W45" "W46" "W51"
nadalad=andmed %>% select(W0:W51)
andmed$Product_Code[order(cor(t(nadalad))[,1 ])]
## [1] "P704" "P735" "P389" "P705" "P374" "P372" "P764" "P340" "P362" "P804"
## [11] "P789" "P743" "P248" "P358" "P675" "P221" "P117" "P356" "P339" "P685"
## [21] "P707" "P422" "P347" "P775" "P555" "P490" "P378" "P818" "P800" "P765"
## [31] "P233" "P752" "P576" "P795" "P478" "P689" "P206" "P721" "P217" "P784"
## [41] "P793" "P386" "P780" "P405" "P720" "P692" "P429" "P594" "P694" "P534"
## [51] "P737" "P734" "P455" "P408" "P223" "P409" "P367" "P253" "P382" "P376"
## [61] "P699" "P600" "P674" "P418" "P291" "P470" "P456" "P741" "P637" "P410"
## [71] "P258" "P807" "P354" "P683" "P672" "P686" "P402" "P205" "P706" "P616"
## [81] "P95" "P733" "P597" "P698" "P262" "P671" "P419" "P263" "P779" "P792"
## [91] "P589" "P755" "P801" "P503" "P663" "P806" "P745" "P361" "P732" "P373"
## [101] "P697" "P664" "P420" "P771" "P106" "P346" "P650" "P799" "P703" "P278"
## [111] "P601" "P785" "P202" "P100" "P392" "P480" "P238" "P532" "P769" "P444"
## [121] "P652" "P438" "P599" "P797" "P255" "P242" "P757" "P432" "P277" "P729"
## [131] "P413" "P591" "P393" "P384" "P417" "P287" "P232" "P653" "P368" "P265"
## [141] "P359" "P396" "P447" "P170" "P796" "P371" "P606" "P776" "P216" "P398"
## [151] "P638" "P603" "P773" "P250" "P460" "P459" "P604" "P610" "P551" "P461"
## [161] "P6" "P538" "P611" "P96" "P237" "P753" "P712" "P568" "P586" "P542"
## [171] "P484" "P596" "P742" "P476" "P252" "P590" "P809" "P274" "P512" "P578"
## [181] "P575" "P768" "P257" "P311" "P595" "P295" "P391" "P646" "P760" "P375"
## [191] "P116" "P608" "P366" "P781" "P751" "P640" "P200" "P270" "P94" "P436"
## [201] "P273" "P474" "P293" "P330" "P772" "P472" "P449" "P582" "P536" "P395"
## [211] "P394" "P762" "P673" "P749" "P241" "P169" "P211" "P492" "P251" "P816"
## [221] "P269" "P639" "P39" "P360" "P808" "P324" "P344" "P657" "P592" "P403"
## [231] "P80" "P312" "P711" "P448" "P260" "P759" "P411" "P502" "P783" "P350"
## [241] "P442" "P243" "P256" "P383" "P247" "P814" "P782" "P349" "P504" "P320"
## [251] "P226" "P246" "P16" "P224" "P740" "P285" "P533" "P676" "P156" "P805"
## [261] "P497" "P540" "P634" "P426" "P264" "P412" "P819" "P643" "P651" "P607"
## [271] "P580" "P105" "P724" "P195" "P728" "P598" "P272" "P570" "P397" "P577"
## [281] "P234" "P286" "P118" "P13" "P475" "P261" "P441" "P670" "P351" "P416"
## [291] "P520" "P767" "P514" "P281" "P228" "P230" "P649" "P154" "P556" "P236"
## [301] "P669" "P613" "P377" "P400" "P754" "P526" "P43" "P585" "P462" "P691"
## [311] "P213" "P778" "P121" "P541" "P380" "P662" "P736" "P627" "P584" "P8"
## [321] "P33" "P495" "P296" "P115" "P794" "P443" "P695" "P201" "P343" "P700"
## [331] "P464" "P282" "P326" "P297" "P791" "P189" "P316" "P306" "P731" "P573"
## [341] "P626" "P810" "P177" "P505" "P275" "P58" "P572" "P817" "P655" "P430"
## [351] "P321" "P623" "P268" "P310" "P301" "P748" "P679" "P802" "P37" "P73"
## [361] "P288" "P337" "P207" "P696" "P482" "P702" "P750" "P615" "P249" "P172"
## [371] "P352" "P774" "P583" "P629" "P240" "P208" "P546" "P786" "P587" "P289"
## [381] "P756" "P82" "P25" "P680" "P666" "P159" "P74" "P668" "P53" "P342"
## [391] "P122" "P440" "P510" "P516" "P813" "P235" "P434" "P529" "P458" "P77"
## [401] "P465" "P424" "P559" "P328" "P198" "P642" "P744" "P803" "P284" "P421"
## [411] "P428" "P467" "P468" "P644" "P722" "P763" "P27" "P747" "P563" "P103"
## [421] "P65" "P336" "P746" "P544" "P537" "P129" "P605" "P136" "P677" "P150"
## [431] "P307" "P283" "P525" "P399" "P239" "P28" "P245" "P481" "P290" "P687"
## [441] "P44" "P446" "P633" "P7" "P770" "P518" "P401" "P47" "P660" "P787"
## [451] "P714" "P219" "P355" "P715" "P113" "P335" "P445" "P4" "P515" "P203"
## [461] "P562" "P130" "P209" "P678" "P693" "P654" "P507" "P364" "P423" "P738"
## [471] "P530" "P62" "P204" "P3" "P280" "P473" "P719" "P667" "P535" "P628"
## [481] "P303" "P425" "P450" "P488" "P647" "P34" "P294" "P777" "P107" "P140"
## [491] "P338" "P14" "P758" "P212" "P454" "P153" "P126" "P619" "P499" "P479"
## [501] "P493" "P690" "P174" "P379" "P766" "P407" "P333" "P227" "P199" "P91"
## [511] "P137" "P299" "P466" "P661" "P98" "P279" "P471" "P112" "P487" "P143"
## [521] "P183" "P187" "P111" "P388" "P406" "P548" "P369" "P357" "P489" "P81"
## [531] "P469" "P659" "P60" "P477" "P553" "P726" "P739" "P717" "P636" "P108"
## [541] "P593" "P31" "P370" "P345" "P609" "P483" "P69" "P220" "P266" "P59"
## [551] "P494" "P581" "P225" "P78" "P327" "P500" "P12" "P713" "P579" "P68"
## [561] "P57" "P381" "P656" "P125" "P109" "P558" "P124" "P67" "P348" "P55"
## [571] "P545" "P612" "P135" "P414" "P119" "P92" "P415" "P433" "P665" "P435"
## [581] "P19" "P457" "P63" "P139" "P215" "P254" "P259" "P716" "P761" "P29"
## [591] "P110" "P2" "P131" "P341" "P521" "P267" "P602" "P528" "P727" "P790"
## [601] "P452" "P304" "P565" "P138" "P363" "P708" "P231" "P427" "P64" "P182"
## [611] "P132" "P11" "P164" "P404" "P641" "P45" "P26" "P319" "P513" "P574"
## [621] "P524" "P508" "P86" "P50" "P811" "P5" "P114" "P168" "P682" "P176"
## [631] "P620" "P21" "P23" "P85" "P788" "P437" "P517" "P222" "P120" "P491"
## [641] "P72" "P588" "P178" "P24" "P309" "P155" "P522" "P99" "P175" "P52"
## [651] "P151" "P571" "P35" "P453" "P625" "P632" "P730" "P148" "P815" "P431"
## [661] "P54" "P160" "P127" "P614" "P71" "P439" "P163" "P192" "P315" "P66"
## [671] "P543" "P331" "P688" "P547" "P147" "P552" "P323" "P539" "P557" "P486"
## [681] "P149" "P509" "P180" "P10" "P184" "P152" "P185" "P322" "P46" "P305"
## [691] "P49" "P812" "P630" "P681" "P97" "P38" "P89" "P292" "P387" "P560"
## [701] "P51" "P463" "P18" "P314" "P179" "P41" "P158" "P519" "P101" "P171"
## [711] "P9" "P317" "P329" "P229" "P193" "P70" "P718" "P167" "P554" "P214"
## [721] "P48" "P564" "P145" "P569" "P188" "P498" "P701" "P550" "P313" "P181"
## [731] "P76" "P32" "P190" "P300" "P506" "P128" "P631" "P83" "P390" "P144"
## [741] "P658" "P194" "P523" "P271" "P210" "P798" "P61" "P166" "P567" "P93"
## [751] "P134" "P197" "P622" "P191" "P173" "P501" "P15" "P298" "P308" "P318"
## [761] "P332" "P511" "P20" "P141" "P146" "P88" "P218" "P527" "P186" "P334"
## [771] "P325" "P40" "P42" "P365" "P617" "P549" "P162" "P84" "P22" "P102"
## [781] "P196" "P123" "P79" "P17" "P276" "P90" "P684" "P244" "P56" "P161"
## [791] "P165" "P635" "P531" "P566" "P30" "P618" "P36" "P142" "P496" "P75"
## [801] "P561" "P451" "P104" "P157" "P87" "P485" "P624" "P302" "P133" "P621"
## [811] "P1"