Draiver sisse
library("RSQLite")
## Warning: package 'RSQLite' was built under R version 3.2.5
## Loading required package: DBI
## Warning: package 'DBI' was built under R version 3.2.5
drv=dbDriver("SQLite")
Konkreetse baasi külge
Faili nimeks paistis olema lihtsalt main.db, mitte main-SkypeR.db
db=dbConnect(drv, "C:/Users/Jaagup Kippar/AppData/Roaming/Skype/jaagup.kippar/main.db")
Tabelite loetelu baasis
dbListTables(db)
## [1] "Accounts" "Alerts" "AppSchemaVersion"
## [4] "CallHandlers" "CallMembers" "Calls"
## [7] "ChatMembers" "Chats" "ContactGroups"
## [10] "Contacts" "Conversations" "DbMeta"
## [13] "LegacyMessages" "MediaDocuments" "MessageAnnotations"
## [16] "Messages" "Participants" "SMSes"
## [19] "Transfers" "Translators" "VideoMessages"
## [22] "Videos" "Voicemails" "tracker_journal"
Tulpade loetelu tabelis
dbListFields(db, "Contacts")
## [1] "id" "is_permanent"
## [3] "type" "skypename"
## [5] "pstnnumber" "aliases"
## [7] "fullname" "birthday"
## [9] "gender" "languages"
## [11] "country" "province"
## [13] "city" "phone_home"
## [15] "phone_office" "phone_mobile"
## [17] "emails" "hashed_emails"
## [19] "homepage" "about"
## [21] "avatar_image" "mood_text"
## [23] "rich_mood_text" "timezone"
## [25] "capabilities" "profile_timestamp"
## [27] "nrof_authed_buddies" "ipcountry"
## [29] "avatar_timestamp" "mood_timestamp"
## [31] "received_authrequest" "authreq_timestamp"
## [33] "lastonline_timestamp" "availability"
## [35] "displayname" "firstname"
## [37] "lastname" "refreshing"
## [39] "given_authlevel" "given_displayname"
## [41] "assigned_speeddial" "assigned_comment"
## [43] "alertstring" "lastused_timestamp"
## [45] "authrequest_count" "assigned_phone1"
## [47] "assigned_phone1_label" "assigned_phone2"
## [49] "assigned_phone2_label" "assigned_phone3"
## [51] "assigned_phone3_label" "is_mobile"
## [53] "is_trusted" "buddystatus"
## [55] "isauthorized" "popularity_ord"
## [57] "external_id" "external_system_id"
## [59] "liveid_cid" "isblocked"
## [61] "authorization_certificate" "certificate_send_count"
## [63] "account_modification_serial_nr" "saved_directory_blob"
## [65] "nr_of_buddies" "server_synced"
## [67] "contactlist_track" "last_used_networktime"
## [69] "network_availability" "authorized_time"
## [71] "sent_authrequest" "sent_authrequest_time"
## [73] "sent_authrequest_serial" "buddyblob"
## [75] "cbl_future" "node_capabilities"
## [77] "revoked_auth" "added_in_shared_group"
## [79] "in_shared_group" "authreq_history"
## [81] "profile_attachments" "stack_version"
## [83] "offline_authreq_id" "node_capabilities_and"
## [85] "authreq_crc" "authreq_src"
## [87] "pop_score" "authreq_nodeinfo"
## [89] "main_phone" "unified_servants"
## [91] "phone_home_normalized" "phone_office_normalized"
## [93] "phone_mobile_normalized" "sent_authrequest_initmethod"
## [95] "authreq_initmethod" "verified_email"
## [97] "verified_company" "sent_authrequest_extrasbitmask"
## [99] "is_auto_buddy" "group_membership"
## [101] "avatar_url" "avatar_url_new"
## [103] "extprop_seen_birthday" "extprop_sms_target"
## [105] "extprop_external_data"
Andmed SELECT-päringuga sisse
Ligi pooltel oli buddystatus NA, testkontol ja ühel isikul 2 ning mõneteistkümnel siis 3. Milline staatus millisele numbrile vastab, ei suutnud esimese hooga tuvastada.
andmed=dbGetQuery(db, "SELECT displayname, skypename, languages, birthday, gender, buddystatus FROM Contacts where buddystatus=3")
andmed
## displayname skypename languages birthday gender
## 1 anne.mari.ernesaks anne.mari.ernesaks <NA> NA NA
## 2 Pille Eslon pille.eslon <NA> NA NA
## 3 KƤtlin k2tsukas et 19890217 2
## 4 ruizcalleja ruizcalleja <NA> NA NA
## 5 Peeter Marvet petskratt et NA NA
## 6 Antero Laas anterolaas <NA> NA NA
## 7 Aino Kiis aino.kiis <NA> NA NA
## 8 Ruth Randoja ruth.randoja <NA> NA NA
## 9 Siim Sildver ziim999 en 19911022 1
## 10 Kristo Tammeoja kristotammeoja <NA> NA NA
## 11 Triinu Raigna triinu_raigna <NA> NA NA
## 12 Vaiko Noor vaiko.noor et 19910227 1
## 13 Mari-Liis Peets mari.liis.lokk <NA> NA NA
## 14 Marianne Adson facebook:marianne.adson <NA> 19810311 2
## 15 Toomas HƤide toomas.haide <NA> NA NA
## 16 Ulrika Tuppits ulrika.danziger <NA> 19810423 2
## 17 Annika Tuul 1:annika.tuul@gmail.com <NA> NA NA
## buddystatus
## 1 3
## 2 3
## 3 3
## 4 3
## 5 3
## 6 3
## 7 3
## 8 3
## 9 3
## 10 3
## 11 3
## 12 3
## 13 3
## 14 3
## 15 3
## 16 3
## 17 3
Märgitud kasutajakeelte jagunemine sisseloetud andmete hulgas
table(andmed$languages, useNA="always")
##
## en et <NA>
## 1 3 13
Sünniaja klass loeti kõigepealt sisse arvulisena
class(andmed[!is.na(andmed$birthday), "birthday"])
## [1] "integer"
Kuupäeva saab sobivale kujule määrata vaid nendel, kel see olemas. Kuna olemasoleva kuupäevaga inimesi läheb rohkemgi vaja, siis jätan nende järjekorranumbrid eraldi loetellu.
olemasnr=which(!is.na(andmed$birthday))
olemasnr
## [1] 3 9 12 14 16
Kuupäevatulp kuupäeva tüüpi - neil, kel olemas. Andmete lugemine etteantud vormingu kaudu. Kõigepalt tähtedeks ja siis sealt Date-ks.
andmed[olemasnr, "birthday"]=as.Date(as.character(andmed[olemasnr, "birthday"]), "%Y%m%d")
andmed
## displayname skypename languages birthday gender
## 1 anne.mari.ernesaks anne.mari.ernesaks <NA> NA NA
## 2 Pille Eslon pille.eslon <NA> NA NA
## 3 KƤtlin k2tsukas et 6987 2
## 4 ruizcalleja ruizcalleja <NA> NA NA
## 5 Peeter Marvet petskratt et NA NA
## 6 Antero Laas anterolaas <NA> NA NA
## 7 Aino Kiis aino.kiis <NA> NA NA
## 8 Ruth Randoja ruth.randoja <NA> NA NA
## 9 Siim Sildver ziim999 en 7964 1
## 10 Kristo Tammeoja kristotammeoja <NA> NA NA
## 11 Triinu Raigna triinu_raigna <NA> NA NA
## 12 Vaiko Noor vaiko.noor et 7727 1
## 13 Mari-Liis Peets mari.liis.lokk <NA> NA NA
## 14 Marianne Adson facebook:marianne.adson <NA> 4087 2
## 15 Toomas HƤide toomas.haide <NA> NA NA
## 16 Ulrika Tuppits ulrika.danziger <NA> 4130 2
## 17 Annika Tuul 1:annika.tuul@gmail.com <NA> NA NA
## buddystatus
## 1 3
## 2 3
## 3 3
## 4 3
## 5 3
## 6 3
## 7 3
## 8 3
## 9 3
## 10 3
## 11 3
## 12 3
## 13 3
## 14 3
## 15 3
## 16 3
## 17 3
Klass endiselt muutmata
class(andmed$birthday)
## [1] "numeric"
Kirjutan lihtsalt üle
class(andmed$birthday)="Date"
class(andmed$birthday)
## [1] "Date"
Noorimat kontakti on mõtet leida ainult nende hulgast, kel sünnikuupäev olemas. Kuna muidu kippusid avaldised pikaks minema, siis tõstsin kuupäevaga inimesed eraldi muutujasse.
synnipaevagaandmed=andmed[olemasnr, ]
Sünnikuupäevaga inimesed järjestatuna kahanevalt sünnikuupäeva järgi
synnipaevagaandmed[rev(order(synnipaevagaandmed$birthday)),]
## displayname skypename languages birthday gender
## 9 Siim Sildver ziim999 en 1991-10-22 1
## 12 Vaiko Noor vaiko.noor et 1991-02-27 1
## 3 KƤtlin k2tsukas et 1989-02-17 2
## 16 Ulrika Tuppits ulrika.danziger <NA> 1981-04-23 2
## 14 Marianne Adson facebook:marianne.adson <NA> 1981-03-11 2
## buddystatus
## 9 3
## 12 3
## 3 3
## 16 3
## 14 3
Neist esimese nimi
synnipaevagaandmed[rev(order(synnipaevagaandmed$birthday)),"displayname"][[1]]
## [1] "Siim Sildver"
Sugu faktoriks (enne oli täisarv)
andmed$gender=factor(andmed$gender, exclude=NULL, levels=c(1, 2, NA), labels=c("mees", "naine", "teadmata"))
andmed$gender
## [1] teadmata teadmata naine teadmata teadmata teadmata teadmata
## [8] teadmata mees teadmata teadmata mees teadmata naine
## [15] teadmata naine teadmata
## Levels: mees naine teadmata
Sagedusjaotus
table(andmed$gender)
##
## mees naine teadmata
## 2 3 12
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
db=src_sqlite("C:/Users/Jaagup Kippar/AppData/Roaming/Skype/jaagup.kippar/main.db")
Tabeli eraldamine
kontaktid=tbl(db, "Contacts")
kontaktid
## Source: sqlite 3.8.6 [C:/Users/Jaagup Kippar/AppData/Roaming/Skype/jaagup.kippar/main.db]
## From: Contacts [43 x 105]
##
## id is_permanent type skypename pstnnumber aliases
## (int) (int) (int) (chr) (chr) (chr)
## 1 23 1 1 echo123 NA NA
## 2 703 1 1 anne.mari.ernesaks NA NA
## 3 707 1 1 pille.eslon NA NA
## 4 711 1 1 k2tsukas NA NA
## 5 715 1 1 ruizcalleja NA NA
## 6 719 1 1 petskratt NA NA
## 7 723 1 1 raido.rokk NA NA
## 8 727 1 1 anterolaas NA NA
## 9 731 1 1 aino.kiis NA NA
## 10 735 1 1 ruth.randoja NA NA
## .. ... ... ... ... ... ...
## Variables not shown: fullname (chr), birthday (int), gender (int),
## languages (chr), country (chr), province (chr), city (chr), phone_home
## (chr), phone_office (chr), phone_mobile (chr), emails (chr),
## hashed_emails (chr), homepage (chr), about (chr), avatar_image (chr),
## mood_text (chr), rich_mood_text (chr), timezone (int), capabilities
## (chr), profile_timestamp (int), nrof_authed_buddies (int), ipcountry
## (chr), avatar_timestamp (int), mood_timestamp (int),
## received_authrequest (chr), authreq_timestamp (int),
## lastonline_timestamp (int), availability (int), displayname (chr),
## firstname (chr), lastname (chr), refreshing (int), given_authlevel
## (int), given_displayname (chr), assigned_speeddial (chr),
## assigned_comment (chr), alertstring (chr), lastused_timestamp (int),
## authrequest_count (int), assigned_phone1 (chr), assigned_phone1_label
## (chr), assigned_phone2 (chr), assigned_phone2_label (chr),
## assigned_phone3 (chr), assigned_phone3_label (chr), is_mobile (int),
## is_trusted (int), buddystatus (int), isauthorized (int), popularity_ord
## (int), external_id (chr), external_system_id (chr), liveid_cid (chr),
## isblocked (int), authorization_certificate (chr), certificate_send_count
## (int), account_modification_serial_nr (int), saved_directory_blob (chr),
## nr_of_buddies (int), server_synced (int), contactlist_track (int),
## last_used_networktime (int), network_availability (int), authorized_time
## (int), sent_authrequest (chr), sent_authrequest_time (int),
## sent_authrequest_serial (int), buddyblob (chr), cbl_future (chr),
## node_capabilities (int), revoked_auth (int), added_in_shared_group
## (int), in_shared_group (int), authreq_history (chr), profile_attachments
## (chr), stack_version (int), offline_authreq_id (int),
## node_capabilities_and (int), authreq_crc (int), authreq_src (int),
## pop_score (int), authreq_nodeinfo (chr), main_phone (chr),
## unified_servants (chr), phone_home_normalized (chr),
## phone_office_normalized (chr), phone_mobile_normalized (chr),
## sent_authrequest_initmethod (int), authreq_initmethod (int),
## verified_email (chr), verified_company (chr),
## sent_authrequest_extrasbitmask (int), is_auto_buddy (int),
## group_membership (int), avatar_url (chr), avatar_url_new (chr),
## extprop_seen_birthday (int), extprop_sms_target (int),
## extprop_external_data (chr)
Kasutajanimed
kontaktid %>% filter(buddystatus==3) %>% select(skypename)
## Source: sqlite 3.8.6 [C:/Users/Jaagup Kippar/AppData/Roaming/Skype/jaagup.kippar/main.db]
## From: Contacts [17 x 1]
## Filter: buddystatus == 3
##
## skypename
## (chr)
## 1 anne.mari.ernesaks
## 2 pille.eslon
## 3 k2tsukas
## 4 ruizcalleja
## 5 petskratt
## 6 anterolaas
## 7 aino.kiis
## 8 ruth.randoja
## 9 ziim999
## 10 kristotammeoja
## 11 triinu_raigna
## 12 vaiko.noor
## 13 mari.liis.lokk
## 14 facebook:marianne.adson
## 15 toomas.haide
## 16 ulrika.danziger
## 17 1:annika.tuul@gmail.com
Keelte jaotus. Ungroup ei ole vist niisama vaatamiseks kohustuslik, aga kasulik hilisema töötluse juures.
kontaktid %>% filter(buddystatus==3) %>% group_by(languages) %>% summarize(n=count()) %>% ungroup()
## Source: sqlite 3.8.6 [C:/Users/Jaagup Kippar/AppData/Roaming/Skype/jaagup.kippar/main.db]
## From: <derived table> [?? x 2]
##
## languages n
## (chr) (int)
## 1 NA 13
## 2 en 1
## 3 et 3
## .. ... ...