Sõnapaaride andmed
library(RMySQL)
## Loading required package: DBI
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 3.0.0 <U+221A> purrr 0.2.5
## <U+221A> tibble 1.4.2 <U+221A> dplyr 0.7.6
## <U+221A> tidyr 0.8.1 <U+221A> stringr 1.3.1
## <U+221A> readr 1.1.1 <U+221A> forcats 0.3.0
## -- Conflicts ------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
yhendus<-dbConnect(dbDriver("MySQL"), user="root",
password="root", host="localhost", dbname='film')
paring<-dbSendQuery(yhendus, "SELECT film_id, keyword_id FROM efis_film_keyword ")
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 0 imported
## as numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 1 imported
## as numeric
andmed1<-fetch(paring, n=-1)
head(andmed1)
## film_id keyword_id
## 1 628 4921
## 2 628 20304
## 3 628 4995
## 4 628 20330
## 5 628 20331
## 6 628 20332
paarid<-andmed1 %>% inner_join(andmed1, by="film_id") %>% filter(keyword_id.x<keyword_id.y)
head(paarid)
## film_id keyword_id.x keyword_id.y
## 1 628 4921 20304
## 2 628 4921 4995
## 3 628 4921 20330
## 4 628 4921 20331
## 5 628 4921 20332
## 6 628 4921 20333
paring<-dbSendQuery(yhendus, "SELECT id, production_year FROM efis_film")
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 0 imported
## as numeric
filmid1<-fetch(paring, n=-1)
head(filmid1)
## id production_year
## 1 1 2001
## 2 2 2006
## 3 5 1976
## 4 7 2004
## 5 8 1979
## 6 9 1980
aastapaarid<-paarid %>% inner_join(filmid1, by=c("film_id"="id"))
aastapaarid<-aastapaarid %>% group_by(keyword_id.x, keyword_id.y) %>% mutate(kogus=n()) %>%
ungroup() %>% arrange(desc(kogus))
head(aastapaarid)
## # A tibble: 6 x 5
## film_id keyword_id.x keyword_id.y production_year kogus
## <dbl> <dbl> <dbl> <chr> <int>
## 1 9142 36346 82991 1978 2005
## 2 6651 36346 82991 1974 2005
## 3 9178 36346 82991 1978 2005
## 4 6423 36346 82991 1974 2005
## 5 9195 36346 82991 1978 2005
## 6 9198 36346 82991 1978 2005
paarikogused<-aastapaarid %>% group_by(keyword_id.x, keyword_id.y) %>%
summarise(kogus=n()) %>% ungroup() %>% arrange(desc(kogus)) %>% head(5)
paarikogused
## # A tibble: 5 x 3
## keyword_id.x keyword_id.y kogus
## <dbl> <dbl> <int>
## 1 36346 82991 2005
## 2 73815 82991 1358
## 3 29231 82949 816
## 4 36346 73815 669
## 5 71495 82991 493
paarikogused[2, ]$keyword_id.x
## [1] 73815
for(nr in 1:nrow(paarikogused)){
print(nr)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
paring<-dbSendQuery(yhendus, "SELECT keyword_id, value FROM efis_keyword_translation
WHERE language='et'")
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 0 imported
## as numeric
votmesonad<-fetch(paring, n=-1)
for(nr in 1:nrow(paarikogused)){
kood1=paarikogused[nr, ]$keyword_id.x
kood2=paarikogused[nr, ]$keyword_id.y
sona1<-(votmesonad %>% filter(keyword_id==kood1))$value
sona2<-(votmesonad %>% filter(keyword_id==kood2))$value
(aastapaarid %>% filter(keyword_id.x==kood1, keyword_id.y==kood2))$production_year %>%
as.numeric() %>% hist( main=paste(sona1, " - ", sona2, " paari esinemine aastati"),
xlab="aasta", ylab="kogus")
}