Sõnapaaride andmed

library(RMySQL)
## Loading required package: DBI
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 3.0.0     <U+221A> purrr   0.2.5
## <U+221A> tibble  1.4.2     <U+221A> dplyr   0.7.6
## <U+221A> tidyr   0.8.1     <U+221A> stringr 1.3.1
## <U+221A> readr   1.1.1     <U+221A> forcats 0.3.0
## -- Conflicts ------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
yhendus<-dbConnect(dbDriver("MySQL"), user="root",
        password="root", host="localhost", dbname='film')

paring<-dbSendQuery(yhendus, "SELECT film_id, keyword_id FROM efis_film_keyword ")
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 0 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 1 imported
## as numeric
andmed1<-fetch(paring, n=-1)
head(andmed1)
##   film_id keyword_id
## 1     628       4921
## 2     628      20304
## 3     628       4995
## 4     628      20330
## 5     628      20331
## 6     628      20332
paarid<-andmed1 %>% inner_join(andmed1, by="film_id") %>% filter(keyword_id.x<keyword_id.y)
head(paarid)
##   film_id keyword_id.x keyword_id.y
## 1     628         4921        20304
## 2     628         4921         4995
## 3     628         4921        20330
## 4     628         4921        20331
## 5     628         4921        20332
## 6     628         4921        20333
paring<-dbSendQuery(yhendus, "SELECT id, production_year FROM efis_film")
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 0 imported
## as numeric
filmid1<-fetch(paring, n=-1)
head(filmid1)
##   id production_year
## 1  1            2001
## 2  2            2006
## 3  5            1976
## 4  7            2004
## 5  8            1979
## 6  9            1980
aastapaarid<-paarid %>% inner_join(filmid1, by=c("film_id"="id"))
aastapaarid<-aastapaarid %>% group_by(keyword_id.x, keyword_id.y) %>% mutate(kogus=n()) %>% 
    ungroup() %>% arrange(desc(kogus))
head(aastapaarid)
## # A tibble: 6 x 5
##   film_id keyword_id.x keyword_id.y production_year kogus
##     <dbl>        <dbl>        <dbl> <chr>           <int>
## 1    9142        36346        82991 1978             2005
## 2    6651        36346        82991 1974             2005
## 3    9178        36346        82991 1978             2005
## 4    6423        36346        82991 1974             2005
## 5    9195        36346        82991 1978             2005
## 6    9198        36346        82991 1978             2005
paarikogused<-aastapaarid %>% group_by(keyword_id.x, keyword_id.y) %>% 
   summarise(kogus=n()) %>%   ungroup() %>% arrange(desc(kogus)) %>% head(5)
paarikogused
## # A tibble: 5 x 3
##   keyword_id.x keyword_id.y kogus
##          <dbl>        <dbl> <int>
## 1        36346        82991  2005
## 2        73815        82991  1358
## 3        29231        82949   816
## 4        36346        73815   669
## 5        71495        82991   493
paarikogused[2, ]$keyword_id.x
## [1] 73815
for(nr in 1:nrow(paarikogused)){
  print(nr)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
paring<-dbSendQuery(yhendus, "SELECT keyword_id, value FROM efis_keyword_translation
                      WHERE language='et'")
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 0 imported
## as numeric
votmesonad<-fetch(paring, n=-1)

for(nr in 1:nrow(paarikogused)){
  kood1=paarikogused[nr, ]$keyword_id.x
  kood2=paarikogused[nr, ]$keyword_id.y
  sona1<-(votmesonad %>% filter(keyword_id==kood1))$value
  sona2<-(votmesonad %>% filter(keyword_id==kood2))$value
  (aastapaarid %>% filter(keyword_id.x==kood1, keyword_id.y==kood2))$production_year %>% 
    as.numeric() %>% hist( main=paste(sona1, " - ", sona2, " paari esinemine aastati"),
                           xlab="aasta", ylab="kogus")
}