korpused(korpusekood, korpusenimi) CREATE TABLE korpusenimed( korpusekood VARCHAR(20) NOT NULL PRIMARY KEY, korpusenimi VARCHAR(50) ); LOAD DATA INFILE '/home/jaagup/public_html/2017/ab/korpusenimed.txt' INTO TABLE korpusenimed FIELDS TERMINATED BY ',' IGNORE 1 LINES; CREATE TABLE dokumendid( kood VARCHAR(50) NOT NULL PRIMARY KEY, korpus VARCHAR(20) NOT NULL, tekstikeel VARCHAR(20), tekstityyp VARCHAR(20), elukoht VARCHAR(30), taust VARCHAR(20), vanus VARCHAR(20), sugu VARCHAR(20), emakeel VARCHAR(20), kodukeel VARCHAR(20), keeletase VARCHAR(20), haridus VARCHAR(20), abivahendid VARCHAR(20), FOREIGN KEY(korpus) REFERENCES korpusenimed(korpusekood) ); Kasuta baasi if17_keelekorpus * Näita iga keeletaseme kohta dokmeta tabelis, mitu teksti selle tasemega on (COUNT(*) ja GROUP BY) * Näita tulemus protsendina tekstide üldarvust SELECT COUNT(*), keeletase FROM dokmeta GROUP BY keeletase; SELECT keeletase, ROUND(100*COUNT(*)/(SELECT COUNT(*) FROM dokmeta), 1) AS protsent FROM dokmeta GROUP BY keeletase; +-----------+----------+ | keeletase | protsent | +-----------+----------+ | NULL | 68.1 | | A | 10.8 | | A1 | 0.0 | | A2 | 1.7 | | B | 9.1 | | B1 | 3.2 | | B2 | 1.8 | | C | 2.8 | | C1 | 1.0 | | C2 | 1.5 | +-----------+----------+ SELECT keeletase, COUNT(*) FROM (SELECT keeletase FROM dokmeta WHERE keeletase IS NOT NULL) AS abitabel GROUP BY keeletase; +-----------+----------+ | keeletase | COUNT(*) | +-----------+----------+ | A | 1378 | | A1 | 1 | | A2 | 217 | | B | 1154 | | B1 | 412 | | B2 | 228 | | C | 359 | | C1 | 130 | | C2 | 185 | +-----------+----------+ SELECT keeletase, tekstityyp, COUNT(*) FROM dokmeta GROUP BY keeletase, tekstityyp; +-----------+---------------+----------+ | keeletase | tekstityyp | COUNT(*) | +-----------+---------------+----------+ | NULL | NULL | 7938 | | NULL | amtkiri | 143 | | NULL | arvamuslugu | 1 | | NULL | essee | 527 | | NULL | harjutus | 26 | | NULL | isikiri | 3 | | NULL | muu | 14 | | NULL | tolge | 7 | | NULL | trilumunud | 1 | | A | NULL | 65 | | A | amtkiri | 54 | | A | analyys | 2 | | A | essee | 599 | | A | harjutus | 167 | | A | isikiri | 250 | | A | muu | 141 | | A | tolge | 11 | | A | vastkys | 66 | | A | ymberjutustus | 23 | | A1 | harjutus | 1 | | A2 | NULL | 123 | | A2 | batoo | 1 | | A2 | essee | 81 | | A2 | harjutus | 3 | | A2 | isikiri | 1 | | A2 | muu | 7 | | A2 | tolge | 1 | | B | NULL | 6 | | B | amtkiri | 83 | | B | analyys | 31 | | B | essee | 421 | | B | harjutus | 36 | | B | isikiri | 59 | | B | muu | 175 | | B | referaat | 9 | | B | tolge | 46 | | B | vastkys | 276 | | B | ymberjutustus | 12 | | B1 | NULL | 116 | | B1 | analyys | 1 | | B1 | essee | 250 | | B1 | harjutus | 8 | | B1 | isikiri | 2 | | B1 | muu | 35 | | B2 | NULL | 65 | | B2 | amtkiri | 2 | | B2 | essee | 124 | | B2 | harjutus | 3 | | B2 | isikiri | 1 | | B2 | muu | 32 | | B2 | tolge | 1 | | C | NULL | 1 | | C | amtkiri | 12 | | C | analyys | 22 | | C | batoo | 3 | | C | essee | 75 | | C | harjutus | 3 | | C | isikiri | 4 | | C | muu | 63 | | C | referaat | 15 | | C | tolge | 7 | | C | vastkys | 153 | | C | ymberjutustus | 1 | | C1 | NULL | 49 | | C1 | amtkiri | 10 | | C1 | essee | 59 | | C1 | muu | 8 | | C1 | referaat | 4 | | C2 | arvamuslugu | 182 | | C2 | essee | 3 | +-----------+---------------+----------+ SELECT keeletase, MAX(IF(tekstityyp='amtkiri', kogus, 0)) AS amtkiri FROM (SELECT keeletase, tekstityyp, COUNT(*) AS kogus FROM dokmeta GROUP BY keeletase, tekstityyp) AS abitabel GROUP BY keeletase; Lõpptulemuse kuvamisel iga vastava keeletasemega (nt A2) rea juures vaadatakse if-iga, et kas hetkel abitabelist loetava rea puhul on tekstitüüp amtkiri. Kui jah, siis võetakse vastav kogus, muidu jäetakse ootele 0. Max annab suurima (ehk praegusel juhul ainsa) väärtuse nendest SELECT keeletase, MAX(IF(tekstityyp='amtkiri', kogus, 0)) AS amtkiri, MAX(IF(tekstityyp='analyys', kogus, 0)) AS analyys, MAX(IF(tekstityyp='essee', kogus, 0)) AS essee, MAX(IF(tekstityyp='harjutus', kogus, 0)) AS harjutus, MAX(IF(tekstityyp='isikiri', kogus, 0)) AS isiklik_kiri, MAX(IF(tekstityyp='muu', kogus, 0)) AS muu FROM (SELECT keeletase, tekstityyp, COUNT(*) AS kogus FROM dokmeta GROUP BY keeletase, tekstityyp) AS abitabel GROUP BY keeletase; +-----------+---------+---------+-------+----------+--------------+------+ | keeletase | amtkiri | analyys | essee | harjutus | isiklik_kiri | muu | +-----------+---------+---------+-------+----------+--------------+------+ | NULL | 143 | 0 | 527 | 26 | 3 | 14 | | A | 54 | 2 | 599 | 167 | 250 | 141 | | A1 | 0 | 0 | 0 | 1 | 0 | 0 | | A2 | 0 | 0 | 81 | 3 | 1 | 7 | | B | 83 | 31 | 421 | 36 | 59 | 175 | | B1 | 0 | 1 | 250 | 8 | 2 | 35 | | B2 | 2 | 0 | 124 | 3 | 1 | 32 | | C | 12 | 22 | 75 | 3 | 4 | 63 | | C1 | 10 | 0 | 59 | 0 | 0 | 8 | | C2 | 0 | 0 | 3 | 0 | 0 | 0 | +-----------+---------+---------+-------+----------+--------------+------+ * Kuvage pikal ja lühikesel kujul, kui palju on kummastki soost inimestel millisel keeletasemel tekste SELECT keeletase, sugu, COUNT(*) AS kogus FROM dokmeta GROUP BY keeletase, sugu; +-----------+-------+-------+ | keeletase | sugu | kogus | +-----------+-------+-------+ | NULL | NULL | 2839 | | NULL | naine | 4498 | | NULL | mees | 1323 | | A | naine | 787 | | A | mees | 591 | | A1 | naine | 1 | | A2 | NULL | 124 | | A2 | naine | 79 | | A2 | mees | 14 | | B | naine | 918 | | B | mees | 236 | | B1 | NULL | 112 | | B1 | naine | 224 | | B1 | mees | 76 | | B2 | NULL | 66 | | B2 | naine | 131 | | B2 | mees | 31 | | C | naine | 313 | | C | mees | 46 | | C1 | NULL | 48 | | C1 | naine | 64 | | C1 | mees | 18 | | C2 | NULL | 2 | | C2 | naine | 51 | | C2 | mees | 132 | +-----------+-------+-------+ SELECT keeletase, MAX(IF(sugu='naine', kogus, 0)) AS naine, MAX(IF(sugu='mees', kogus, 0)) AS mees, MAX(IF(sugu IS NULL, kogus, 0)) AS teadmata FROM (SELECT keeletase, sugu, COUNT(*) AS kogus FROM dokmeta GROUP BY keeletase, sugu) AS abitabel GROUP BY keeletase; +-----------+-------+------+----------+ | keeletase | naine | mees | teadmata | +-----------+-------+------+----------+ | NULL | 4498 | 1323 | 2839 | | A | 787 | 591 | 0 | | A1 | 1 | 0 | 0 | | A2 | 79 | 14 | 124 | | B | 918 | 236 | 0 | | B1 | 224 | 76 | 112 | | B2 | 131 | 31 | 66 | | C | 313 | 46 | 0 | | C1 | 64 | 18 | 48 | | C2 | 51 | 132 | 2 | +-----------+-------+------+----------+ SELECT keeletase, naine, mees, teadmata, naine+mees+teadmata AS kokku FROM (SELECT keeletase, MAX(IF(sugu='naine', kogus, 0)) AS naine, MAX(IF(sugu='mees', kogus, 0)) AS mees, MAX(IF(sugu IS NULL, kogus, 0)) AS teadmata FROM (SELECT keeletase, sugu, COUNT(*) AS kogus FROM dokmeta GROUP BY keeletase, sugu) AS abitabel GROUP BY keeletase) AS abitabel2; +-----------+-------+------+----------+-------+ | keeletase | naine | mees | teadmata | kokku | +-----------+-------+------+----------+-------+ | NULL | 4498 | 1323 | 2839 | 8660 | | A | 787 | 591 | 0 | 1378 | | A1 | 1 | 0 | 0 | 1 | | A2 | 79 | 14 | 124 | 217 | | B | 918 | 236 | 0 | 1154 | | B1 | 224 | 76 | 112 | 412 | | B2 | 131 | 31 | 66 | 228 | | C | 313 | 46 | 0 | 359 | | C1 | 64 | 18 | 48 | 130 | | C2 | 51 | 132 | 2 | 185 | +-----------+-------+------+----------+-------+ * Järjestage keeletasemed naiste osalusprotsendi järgi SELECT keeletase, naine, mees, teadmata, naine+mees+teadmata AS kokku, ROUND(100*naine/(naine+mees+teadmata)) AS protsent FROM (SELECT keeletase, MAX(IF(sugu='naine', kogus, 0)) AS naine, MAX(IF(sugu='mees', kogus, 0)) AS mees, MAX(IF(sugu IS NULL, kogus, 0)) AS teadmata FROM (SELECT keeletase, sugu, COUNT(*) AS kogus FROM dokmeta GROUP BY keeletase, sugu) AS abitabel GROUP BY keeletase) AS abitabel2 ORDER BY protsent; +-----------+-------+------+----------+-------+----------+ | keeletase | naine | mees | teadmata | kokku | protsent | +-----------+-------+------+----------+-------+----------+ | C2 | 51 | 132 | 2 | 185 | 28 | | A2 | 79 | 14 | 124 | 217 | 36 | | C1 | 64 | 18 | 48 | 130 | 49 | | NULL | 4498 | 1323 | 2839 | 8660 | 52 | | B1 | 224 | 76 | 112 | 412 | 54 | | B2 | 131 | 31 | 66 | 228 | 57 | | A | 787 | 591 | 0 | 1378 | 57 | | B | 918 | 236 | 0 | 1154 | 80 | | C | 313 | 46 | 0 | 359 | 87 | | A1 | 1 | 0 | 0 | 1 | 100 | +-----------+-------+------+----------+-------+----------+ SHOW TABLES +----------------------------+ | Tables_in_if17_keelekorpus | +----------------------------+ | dokarvud | | dokmeta | | doksonaliigid | | elukohad | | haridustasemed | | keeled | | keeletasemed | | korpusenimed | | ngram1 | | ngram2 | | ngram3 | | ngram4 | | ngram5 | | sonaddokumendis | | taustad | | tekstityybid | | vanusetasemed | +----------------------------+ SELECT COUNT(*) FROM ngram3; SELECT * FROM ngram3 LIMIT 5; Tutvu tabelitega ngram3 ja dokmeta Kuva vene emakeelega inimeste eestikeelsete tekstide koodid SELECT emakeel, tekstikeel, kood FROM dokmeta LIMIT 20; +---------+------------+-----------------------+ | emakeel | tekstikeel | kood | +---------+------------+-----------------------+ | vene | eesti | doc_100636852915_item | | vene | eesti | doc_100636852916_item | | vene | eesti | doc_100636852917_item | | vene | eesti | doc_1010138197_item | | vene | eesti | doc_1010138198_item | | vene | eesti | doc_1010138199_item | | vene | eesti | doc_1010138200_item | | vene | eesti | doc_101672866015_item | | NULL | eesti | doc_104580264035_item | | vene | eesti | doc_104580264036_item | | NULL | eesti | doc_104580264037_item | SELECT kood FROM dokmeta WHERE emakeel='vene' AND tekstikeel='eesti' LIMIT 20; +-----------------------+ | kood | +-----------------------+ | doc_100636852915_item | | doc_100636852916_item | | doc_100636852917_item | | doc_1010138197_item | | doc_1010138198_item | | doc_1010138199_item | | doc_1010138200_item | | doc_101672866015_item | Kuva vene emakeelega inimeste eestikeelsete tekstide ngramid (pikkusega 3) +------------+-------------+------+-----+---------+-------+ | Field | Type | Null | Key | Default | Extra | +------------+-------------+------+-----+---------+-------+ | tekstikood | varchar(50) | NO | PRI | NULL | | | ngram3 | char(3) | NO | MUL | NULL | | | alguskoht | int(11) | NO | PRI | NULL | | +------------+-------------+------+-----+---------+-------+ Tuhandendast alates 5 tükki MariaDB [if17_keelekorpus]> SELECT * FROM ngram3 LIMIT 1000, 5; +-----------------------+--------+-----------+ | tekstikood | ngram3 | alguskoht | +-----------------------+--------+-----------+ | doc_264202857139_item | AAS | 1091 | | doc_264202857139_item | AAS | 1103 | | doc_264202857139_item | AAS | 1183 | | doc_264202857140_item | AAS | 60 | | doc_264202857141_item | AAS | 420 | +-----------------------+--------+-----------+ SELECT kood, ngram3 FROM dokmeta INNER JOIN ngram3 ON dokmeta.kood=ngram3.tekstikood WHERE emakeel='vene' AND tekstikeel='eesti' LIMIT 20; +-----------------------+--------+ | kood | ngram3 | +-----------------------+--------+ | doc_100636852915_item | SPV | | doc_100636852915_item | PVD | | doc_100636852915_item | VDS | | doc_100636852915_item | DSJ | | doc_100636852915_item | SJS | | doc_100636852915_item | JSZ | | doc_100636852915_item | SZJ | | doc_100636852915_item | ZJS | Järjestage need ngramide esinemissageduse järgi SELECT ngram3, COUNT(*) FROM ngram3 GROUP BY ngram3 ORDER BY COUNT(*); SZP | 58867 | | ZPV | 63126 | | SSZ | 78332 | | SSS | 85558 | | SZS | 92329 | +--------+----------+ SELECT ngram3, COUNT(*) FROM dokmeta INNER JOIN ngram3 ON dokmeta.kood=ngram3.tekstikood WHERE emakeel='vene' AND tekstikeel='eesti' GROUP BY ngram3 ORDER BY COUNT(*); | VPS | 9258 | | VAS | 9280 | | ZSV | 9448 | | SVS | 9766 | | SZD | 10503 | | ZSZ | 13392 | | VSZ | 13905 | | ASZ | 15057 | | SSZ | 16575 | | ZPV | 17346 | | SZP | 17584 | | SZS | 23218 | +--------+----------+ * Kuva ngramide suhtelised sagedused SELECT ngram3, COUNT(*)/ (SELECT COUNT(*) FROM dokmeta INNER JOIN ngram3 ON dokmeta.kood=ngram3.tekstikood WHERE emakeel='vene' AND tekstikeel='eesti') AS suhe FROM dokmeta INNER JOIN ngram3 ON dokmeta.kood=ngram3.tekstikood WHERE emakeel='vene' AND tekstikeel='eesti' GROUP BY ngram3 ORDER BY COUNT(*); | VSZ | 0.0148 | | ASZ | 0.0160 | | SSZ | 0.0176 | | ZPV | 0.0184 | | SZP | 0.0187 | | SZS | 0.0246 | +--------+--------+ 2682 rows in set (1.29 sec) SET @yldkogus=(SELECT COUNT(*) FROM dokmeta INNER JOIN ngram3 ON dokmeta.kood=ngram3.tekstikood WHERE emakeel='vene' AND tekstikeel='eesti'); SELECT ngram3, COUNT(*)/@yldkogus AS suhe FROM dokmeta INNER JOIN ngram3 ON dokmeta.kood=ngram3.tekstikood WHERE emakeel='vene' AND tekstikeel='eesti' GROUP BY ngram3 ORDER BY COUNT(*); | VSZ | 0.0126 | | ASZ | 0.0136 | | SZS | 0.0141 | | SSZ | 0.0156 | | SZP | 0.0197 | | ZPV | 0.0213 | +--------+--------+ SELECT ngram3, COUNT(*)/ (SELECT COUNT(*) FROM dokmeta INNER JOIN ngram3 ON dokmeta.kood=ngram3.tekstikood WHERE emakeel='soome' AND tekstikeel='eesti') AS suhe FROM dokmeta INNER JOIN ngram3 ON dokmeta.kood=ngram3.tekstikood WHERE emakeel='soome' AND tekstikeel='eesti' GROUP BY ngram3 ORDER BY COUNT(*); | VSZ | 0.0126 | | ASZ | 0.0136 | | SZS | 0.0141 | | SSZ | 0.0156 | | SZP | 0.0197 | | ZPV | 0.0213 | +--------+--------+ 2682 rows in set (1.24 sec) SELECT venetabel.ngram3, venesuhe, soomesuhe, soomesuhe/venesuhe AS keeltesuhe FROM (SELECT ngram3, COUNT(*)/ (SELECT COUNT(*) FROM dokmeta INNER JOIN ngram3 ON dokmeta.kood=ngram3.tekstikood WHERE emakeel='vene' AND tekstikeel='eesti') AS venesuhe FROM dokmeta INNER JOIN ngram3 ON dokmeta.kood=ngram3.tekstikood WHERE emakeel='vene' AND tekstikeel='eesti' GROUP BY ngram3) AS venetabel INNER JOIN (SELECT ngram3, COUNT(*)/ (SELECT COUNT(*) FROM dokmeta INNER JOIN ngram3 ON dokmeta.kood=ngram3.tekstikood WHERE emakeel='soome' AND tekstikeel='eesti') AS soomesuhe FROM dokmeta INNER JOIN ngram3 ON dokmeta.kood=ngram3.tekstikood WHERE emakeel='soome' AND tekstikeel='eesti' GROUP BY ngram3) AS soometabel ON venetabel.ngram3=soometabel.ngram3 ORDER BY keeltesuhe DESC LIMIT 30; CREATE TABLE sonaliikide_lyhendid( liigilyhend CHAR(1) NOT NULL PRIMARY KEY, liigikirjeldus VARCHAR(50) ); LOAD DATA LOCAL INFILE '/home/jaagup/public_html/2017/ab/sonaliikide_lyhendid.txt' INTO TABLE sonaliikide_lyhendid CHARACTER SET UTF8 FIELDS TERMINATED BY ',' IGNORE 1 LINES Tabelid: dokmeta, doksonaliigid, siduv tulp "kood" Leidke iga teksti kohta tegusõnade (V - Verb) ning nimisõnade (S - Substantiiv) suhtelised sagedused. MariaDB [if17_keelekorpus]> SELECT kood, S/kokku, V/kokku FROM doksonaliigid LIMIT 10; +-----------------------+---------+---------+ | kood | S/kokku | V/kokku | +-----------------------+---------+---------+ | doc_100636852915_item | 0.2559 | 0.1659 | | doc_100636852916_item | 0.2650 | 0.1880 | | doc_100636852917_item | 0.3354 | 0.1582 | | doc_1010138197_item | 0.2618 | 0.1803 | | doc_1010138198_item | 0.2607 | 0.1848 | | doc_1010138199_item | 0.2542 | 0.1864 | | doc_1010138200_item | 0.2511 | 0.1814 | | doc_101672866015_item | 0.2960 | 0.1079 | | doc_104580264035_item | NULL | NULL | | doc_104580264036_item | 0.1845 | 0.2460 | +-----------------------+---------+---------+ Leidke V ja S osakaalu keskmine ning standardhälve (STD) kõigi tekstide juures, SELECT AVG(S/kokku), STD(S/kokku), AVG(V/kokku), STD(V/kokku) FROM doksonaliigid; SELECT AVG(Sosa), STD(Sosa), AVG(Vosa), STD(Vosa) FROM (SELECT S/kokku AS Sosa, V/kokku AS Vosa FROM doksonaliigid) AS vahetabel; +------------+------------+------------+------------+ | AVG(Sosa) | STD(Sosa) | AVG(Vosa) | STD(Vosa) | +------------+------------+------------+------------+ | 0.26029662 | 0.09688962 | 0.16718369 | 0.05200827 | +------------+------------+------------+------------+ SELECT AVG(Sosa), STD(Sosa), AVG(Vosa), STD(Vosa) FROM (SELECT S/kokku AS Sosa, V/kokku AS Vosa FROM doksonaliigid WHERE kokku>0) AS vahetabel; eestikeelsete tekstide juures, +-------------+-------------------------+------+-----+---------+-------+ | Field | Type | Null | Key | Default | Extra | +-------------+-------------------------+------+-----+---------+-------+ | kood | varchar(50) | NO | PRI | NULL | | | korpus | varchar(20) | NO | MUL | NULL | | | tekstikeel | varchar(20) | YES | MUL | NULL | | | tekstityyp | varchar(20) | YES | MUL | NULL | | | elukoht | varchar(20) | YES | MUL | NULL | | | taust | varchar(20) | YES | MUL | NULL | | | vanus | varchar(20) | YES | MUL | NULL | | | sugu | enum('naine','mees','') | YES | | NULL | | | emakeel | varchar(20) | YES | MUL | NULL | | | kodukeel | varchar(20) | YES | MUL | NULL | | | keeletase | varchar(20) | YES | MUL | NULL | | | haridus | varchar(20) | YES | MUL | NULL | | | abivahendid | enum('jah','ei','') | YES | | NULL | | +-------------+-------------------------+------+-----+---------+-------+ +-------+-------------+------+-----+---------+-------+ | Field | Type | Null | Key | Default | Extra | +-------+-------------+------+-----+---------+-------+ | kood | varchar(50) | NO | PRI | NULL | | | A | int(11) | YES | | NULL | | | C | int(11) | YES | | NULL | | | D | int(11) | YES | | NULL | | | G | int(11) | YES | | NULL | | | H | int(11) | YES | | NULL | | | I | int(11) | YES | | NULL | | | J | int(11) | YES | | NULL | | | K | int(11) | YES | | NULL | | | N | int(11) | YES | | NULL | | | P | int(11) | YES | | NULL | | | S | int(11) | YES | | NULL | | | U | int(11) | YES | | NULL | | | V | int(11) | YES | | NULL | | | X | int(11) | YES | | NULL | | | Y | int(11) | YES | | NULL | | | Z | int(11) | YES | | NULL | | | kokku | int(11) | YES | | NULL | | +-------+-------------+------+-----+---------+-------+ SELECT * FROM dokmeta INNER JOIN doksonaliigid ON dokmeta.kood=doksonaliigid.kood LIMIT 10; +-----------------------+-----------+------------+------------+---------+-------+--------+-------+---------+----------+-----------+---------+-------------+-----------------------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+-------+ | kood | korpus | tekstikeel | tekstityyp | elukoht | taust | vanus | sugu | emakeel | kodukeel | keeletase | haridus | abivahendid | kood | A | C | D | G | H | I | J | K | N | P | S | U | V | X | Y | Z | kokku | +-----------------------+-----------+------------+------------+---------+-------+--------+-------+---------+----------+-----------+---------+-------------+-----------------------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+-------+ | doc_100636852915_item | cFOoRQekA | eesti | essee | idaviru | op | kuni18 | naine | vene | vene | B | pohi | ei | doc_100636852915_item | 25 | 0 | 14 | 0 | 3 | 0 | 19 | 5 | 3 | 17 | 54 | 0 | 35 | 0 | 0 | 36 | 211 | | doc_100636852916_item | cFOoRQekA | eesti | muu | idaviru | op | kuni18 | naine | vene | vene | B | pohi | ei | doc_100636852916_item | 4 | 0 | 5 | 0 | 4 | 0 | 12 | 1 | 3 | 14 | 31 | 0 | 22 | 0 | 0 | 21 | 117 | | doc_100636852917_item | cFOoRQekA | eesti | essee | idaviru | op | kuni18 | naine | vene | vene | B | pohi | ei | doc_100636852917_item | 9 | 0 | 6 | 0 | 2 | 0 | 13 | 1 | 3 | 17 | 53 | 0 | 25 | 0 | 2 | 27 | 158 | | doc_1010138197_item | cFOoRQekA | eesti | muu | tallinn | ylop | kuni26 | naine | vene | vene | A | kesk | ei | doc_1010138197_item | 46 | 7 | 50 | 4 | 20 | 0 | 38 | 3 | 2 | 34 | 183 | 0 | 126 | 0 | 2 | 184 | 699 | | doc_1010138198_item | cFOoRQekA | eesti | muu | tallinn | ylop | kuni26 | naine | vene | vene | B | kesk | ei | doc_1010138198_item | 43 | 7 | 49 | 4 | 21 | 0 | 37 | 6 | 2 | 39 | 182 | 0 | 129 | 0 | 2 | 177 | 698 | | doc_1010138199_item | cFOoRQekA | eesti | muu | tallinn | ylop | kuni26 | naine | vene | vene | A | kesk | ei | doc_1010138199_item | 45 | 7 | 51 | 4 | 20 | 0 | 38 | 4 | 2 | 37 | 180 | 1 | 132 | 0 | 2 | 185 | 708 | | doc_1010138200_item | cFOoRQekA | eesti | muu | tallinn | ylop | kuni26 | naine | vene | vene | A | kesk | ei | doc_1010138200_item | 44 | 7 | 51 | 4 | 19 | 0 | 39 | 4 | 2 | 36 | 173 | 0 | 125 | 0 | 2 | 183 | 689 | | doc_101672866015_item | cFOoRQekA | eesti | referaat | tallinn | ylop | kuni26 | naine | vene | vene | C | kesk | jah | doc_101672866015_item | 317 | 24 | 436 | 15 | 767 | 3 | 380 | 157 | 195 | 494 | 2171 | 6 | 791 | 5 | 193 | 1380 | 7334 | | doc_104580264035_item | cFqPphvYi | eesti | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | doc_104580264035_item | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | doc_104580264036_item | cFqPphvYi | eesti | essee | tallinn | op | kuni18 | mees | vene | vene | C1 | kesk | jah | doc_104580264036_item | 18 | 1 | 24 | 6 | 3 | 0 | 38 | 5 | 3 | 60 | 69 | 0 | 92 | 0 | 0 | 55 | 374 | +-----------------------+-----------+------------+------------+---------+-------+--------+-------+---------+----------+-----------+---------+-------------+-----------------------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+-------+ SELECT tekstikeel, S, V, kokku FROM dokmeta INNER JOIN doksonaliigid ON dokmeta.kood=doksonaliigid.kood LIMIT 10; +------------+------+------+-------+ | tekstikeel | S | V | kokku | +------------+------+------+-------+ | eesti | 54 | 35 | 211 | | eesti | 31 | 22 | 117 | | eesti | 53 | 25 | 158 | | eesti | 183 | 126 | 699 | | eesti | 182 | 129 | 698 | | eesti | 180 | 132 | 708 | | eesti | 173 | 125 | 689 | | eesti | 2171 | 791 | 7334 | | eesti | 0 | 0 | 0 | | eesti | 69 | 92 | 374 | +------------+------+------+-------+ SELECT AVG(S/kokku), STD(S/kokku), AVG(V/kokku), STD(V/kokku) FROM dokmeta INNER JOIN doksonaliigid ON dokmeta.kood=doksonaliigid.kood WHERE tekstikeel='eesti'; +--------------+--------------+--------------+--------------+ | AVG(S/kokku) | STD(S/kokku) | AVG(V/kokku) | STD(V/kokku) | +--------------+--------------+--------------+--------------+ | 0.24250060 | 0.06061893 | 0.17635815 | 0.03514689 | +--------------+--------------+--------------+--------------+ ning eraldi vene, soome ja eesti emakeelega autorite eestikeelsete tekstide juures SELECT AVG(S/kokku), STD(S/kokku), AVG(V/kokku), STD(V/kokku) FROM dokmeta INNER JOIN doksonaliigid ON dokmeta.kood=doksonaliigid.kood WHERE tekstikeel='eesti' AND emakeel='vene'; +--------------+--------------+--------------+--------------+ | AVG(S/kokku) | STD(S/kokku) | AVG(V/kokku) | STD(V/kokku) | +--------------+--------------+--------------+--------------+ | 0.25265170 | 0.06570395 | 0.18079563 | 0.04375605 | +--------------+--------------+--------------+--------------+ SELECT AVG(S/kokku), STD(S/kokku), AVG(V/kokku), STD(V/kokku) FROM dokmeta INNER JOIN doksonaliigid ON dokmeta.kood=doksonaliigid.kood WHERE tekstikeel='eesti' AND emakeel='soome'; +--------------+--------------+--------------+--------------+ | AVG(S/kokku) | STD(S/kokku) | AVG(V/kokku) | STD(V/kokku) | +--------------+--------------+--------------+--------------+ | 0.25623647 | 0.06982209 | 0.18753177 | 0.03684832 | +--------------+--------------+--------------+--------------+ SELECT AVG(S/kokku), STD(S/kokku), AVG(V/kokku), STD(V/kokku) FROM dokmeta INNER JOIN doksonaliigid ON dokmeta.kood=doksonaliigid.kood WHERE tekstikeel='eesti' AND emakeel='eesti'; +--------------+--------------+--------------+--------------+ | AVG(S/kokku) | STD(S/kokku) | AVG(V/kokku) | STD(V/kokku) | +--------------+--------------+--------------+--------------+ | 0.27653070 | 0.05835948 | 0.16714859 | 0.03602365 | +--------------+--------------+--------------+--------------+ SELECT emakeel, AVG(S/kokku), STD(S/kokku), AVG(V/kokku), STD(V/kokku), COUNT(*) FROM dokmeta INNER JOIN doksonaliigid ON dokmeta.kood=doksonaliigid.kood WHERE tekstikeel='eesti' GROUP BY emakeel; +-----------+--------------+--------------+--------------+--------------+----------+ | emakeel | AVG(S/kokku) | STD(S/kokku) | AVG(V/kokku) | STD(V/kokku) | COUNT(*) | +-----------+--------------+--------------+--------------+--------------+----------+ | NULL | 0.23741259 | 0.05720508 | 0.17449214 | 0.03105557 | 8270 | | eesti | 0.27653275 | 0.05835948 | 0.16715284 | 0.03602365 | 229 | | inglise | 0.23998235 | 0.06541324 | 0.16564706 | 0.03405935 | 119 | | jidis | 0.21180000 | 0.00000000 | 0.20650000 | 0.00000000 | 1 | | katalaani | 0.16670000 | 0.00000000 | 0.16670000 | 0.00000000 | 1 | | lati | 0.20155000 | 0.01857353 | 0.20655000 | 0.05760739 | 2 | | leedu | 0.21015417 | 0.03262421 | 0.19550417 | 0.02369293 | 24 | | muud | 0.24077037 | 0.05645881 | 0.19177778 | 0.04061485 | 81 | | poola | 0.29043333 | 0.03014162 | 0.17390000 | 0.02170727 | 3 | | rootsi | 0.23330000 | 0.04898731 | 0.19635000 | 0.01416146 | 2 | | saksa | 0.23332442 | 0.05284213 | 0.17612326 | 0.03456776 | 86 | | sloveenia | 0.75000000 | 0.00000000 | 0.00000000 | 0.00000000 | 1 | | soome | 0.25623882 | 0.06982209 | 0.18753522 | 0.03684832 | 391 | | ukraina | 0.29172941 | 0.06218770 | 0.17237059 | 0.03113590 | 17 | | ungari | 0.28480000 | 0.02775859 | 0.18656667 | 0.02026525 | 6 | | valgevene | 0.23580000 | 0.00000000 | 0.22640000 | 0.00000000 | 1 | | vene | 0.25265215 | 0.06570395 | 0.18079580 | 0.04375605 | 2814 | +-----------+--------------+--------------+--------------+--------------+----------+ SELECT emakeel, AVG(S/kokku), STD(S/kokku), AVG(V/kokku), STD(V/kokku), COUNT(*) FROM dokmeta INNER JOIN doksonaliigid ON dokmeta.kood=doksonaliigid.kood WHERE tekstikeel='eesti' GROUP BY emakeel HAVING COUNT(*)>100; +---------+--------------+--------------+--------------+--------------+----------+ | emakeel | AVG(S/kokku) | STD(S/kokku) | AVG(V/kokku) | STD(V/kokku) | COUNT(*) | +---------+--------------+--------------+--------------+--------------+----------+ | NULL | 0.23741259 | 0.05720508 | 0.17449214 | 0.03105557 | 8270 | | eesti | 0.27653275 | 0.05835948 | 0.16715284 | 0.03602365 | 229 | | inglise | 0.23998235 | 0.06541324 | 0.16564706 | 0.03405935 | 119 | | soome | 0.25623882 | 0.06982209 | 0.18753522 | 0.03684832 | 391 | | vene | 0.25265215 | 0.06570395 | 0.18079580 | 0.04375605 | 2814 | +---------+--------------+--------------+--------------+--------------+----------+ Leidke samad tabelid eraldi A, B ja C keeletasemete juures SELECT emakeel, AVG(S/kokku), STD(S/kokku), AVG(V/kokku), STD(V/kokku), COUNT(*) FROM dokmeta INNER JOIN doksonaliigid ON dokmeta.kood=doksonaliigid.kood WHERE tekstikeel='eesti' AND keeletase LIKE 'A%' GROUP BY emakeel; +-----------+--------------+--------------+--------------+--------------+----------+ | emakeel | AVG(S/kokku) | STD(S/kokku) | AVG(V/kokku) | STD(V/kokku) | COUNT(*) | +-----------+--------------+--------------+--------------+--------------+----------+ | NULL | 0.25095887 | 0.02191059 | 0.16850806 | 0.01299816 | 124 | | inglise | 0.24171858 | 0.06585335 | 0.16563628 | 0.03439077 | 113 | | lati | 0.20155000 | 0.01857353 | 0.20655000 | 0.05760739 | 2 | | muud | 0.24412162 | 0.05519744 | 0.19055135 | 0.04026086 | 74 | | poola | 0.27445000 | 0.02442529 | 0.18685000 | 0.01426519 | 2 | | saksa | 0.21544483 | 0.05959829 | 0.18739310 | 0.04331508 | 29 | | sloveenia | 0.75000000 | 0.00000000 | 0.00000000 | 0.00000000 | 1 | | soome | 0.24724891 | 0.06546727 | 0.19219674 | 0.03297826 | 92 | | ukraina | 0.30374286 | 0.05591549 | 0.16780714 | 0.03207085 | 14 | | ungari | 0.27454000 | 0.01713120 | 0.19082000 | 0.01960580 | 5 | | valgevene | 0.23580000 | 0.00000000 | 0.22640000 | 0.00000000 | 1 | | vene | 0.24344355 | 0.07211236 | 0.18257199 | 0.04692246 | 1139 | +-----------+--------------+--------------+--------------+--------------+----------+ SELECT emakeel, AVG(S/kokku), STD(S/kokku), AVG(V/kokku), STD(V/kokku), COUNT(*) FROM dokmeta INNER JOIN doksonaliigid ON dokmeta.kood=doksonaliigid.kood WHERE tekstikeel='eesti' AND keeletase LIKE 'B%' GROUP BY emakeel; +---------+--------------+--------------+--------------+--------------+----------+ | emakeel | AVG(S/kokku) | STD(S/kokku) | AVG(V/kokku) | STD(V/kokku) | COUNT(*) | +---------+--------------+--------------+--------------+--------------+----------+ | NULL | 0.24193931 | 0.03218034 | 0.18247630 | 0.01471219 | 174 | | inglise | 0.22732500 | 0.04059644 | 0.18407500 | 0.00928794 | 4 | | leedu | 0.21258261 | 0.03112877 | 0.19607391 | 0.02404161 | 23 | | muud | 0.20534286 | 0.05747251 | 0.20474286 | 0.04205001 | 7 | | poola | 0.32240000 | 0.00000000 | 0.14800000 | 0.00000000 | 1 | | saksa | 0.22817297 | 0.03866006 | 0.16913514 | 0.02674426 | 37 | | soome | 0.24724701 | 0.06336929 | 0.18759530 | 0.03736812 | 234 | | ukraina | 0.23566667 | 0.05923022 | 0.19366667 | 0.01194973 | 3 | | ungari | 0.33610000 | 0.00000000 | 0.16530000 | 0.00000000 | 1 | | vene | 0.25530910 | 0.06122979 | 0.18336787 | 0.04187729 | 1310 | +---------+--------------+--------------+--------------+--------------+----------+ SELECT LEFT(keeletase, 1), AVG(S/kokku), STD(S/kokku), AVG(V/kokku), STD(V/kokku), COUNT(*) FROM dokmeta INNER JOIN doksonaliigid ON dokmeta.kood=doksonaliigid.kood WHERE tekstikeel='eesti' GROUP BY LEFT(keeletase, 1); +--------------------+--------------+--------------+--------------+--------------+----------+ | LEFT(keeletase, 1) | AVG(S/kokku) | STD(S/kokku) | AVG(V/kokku) | STD(V/kokku) | COUNT(*) | +--------------------+--------------+--------------+--------------+--------------+----------+ | NULL | 0.23727217 | 0.05859148 | 0.17457430 | 0.03164863 | 7984 | | A | 0.24457268 | 0.06895988 | 0.18113741 | 0.04384087 | 1596 | | B | 0.25164486 | 0.05914116 | 0.18377598 | 0.03904958 | 1794 | | C | 0.27389985 | 0.05338174 | 0.16597979 | 0.03465609 | 674 | +--------------------+--------------+--------------+--------------+--------------+----------+ SELECT emakeel, LEFT(keeletase, 1), AVG(S/kokku), STD(S/kokku), AVG(V/kokku), STD(V/kokku), COUNT(*) FROM dokmeta INNER JOIN doksonaliigid ON dokmeta.kood=doksonaliigid.kood WHERE tekstikeel='eesti' GROUP BY emakeel, LEFT(keeletase, 1); +-----------+--------------------+--------------+--------------+--------------+--------------+----------+ | emakeel | LEFT(keeletase, 1) | AVG(S/kokku) | STD(S/kokku) | AVG(V/kokku) | STD(V/kokku) | COUNT(*) | +-----------+--------------------+--------------+--------------+--------------+--------------+----------+ | NULL | NULL | 0.23673366 | 0.05799101 | 0.17453534 | 0.03152549 | 7916 | | NULL | A | 0.25095887 | 0.02191059 | 0.16850806 | 0.01299816 | 124 | | NULL | B | 0.24193931 | 0.03218034 | 0.18247630 | 0.01471219 | 174 | | NULL | C | 0.28725893 | 0.02891738 | 0.15710714 | 0.01984625 | 56 | | eesti | NULL | 0.31140000 | 0.11911030 | 0.10577500 | 0.04303602 | 4 | | eesti | C | 0.27591289 | 0.05649909 | 0.16824400 | 0.03492363 | 225 | | inglise | NULL | 0.14520000 | 0.00000000 | 0.12370000 | 0.00000000 | 1 | | inglise | A | 0.24171858 | 0.06585335 | 0.16563628 | 0.03439077 | 113 | | inglise | B | 0.22732500 | 0.04059644 | 0.18407500 | 0.00928794 | 4 | | inglise | C | 0.18920000 | 0.00000000 | 0.13510000 | 0.00000000 | 1 | | jidis | C | 0.21180000 | 0.00000000 | 0.20650000 | 0.00000000 | 1 | | katalaani | C | 0.16670000 | 0.00000000 | 0.16670000 | 0.00000000 | 1 | | lati | A | 0.20155000 | 0.01857353 | 0.20655000 | 0.05760739 | 2 | | leedu | B | 0.21258261 | 0.03112877 | 0.19607391 | 0.02404161 | 23 | | leedu | C | 0.15430000 | 0.00000000 | 0.18240000 | 0.00000000 | 1 | | muud | A | 0.24412162 | 0.05519744 | 0.19055135 | 0.04026086 | 74 | | muud | B | 0.20534286 | 0.05747251 | 0.20474286 | 0.04205001 | 7 | | poola | A | 0.27445000 | 0.02442529 | 0.18685000 | 0.01426519 | 2 | | poola | B | 0.32240000 | 0.00000000 | 0.14800000 | 0.00000000 | 1 | | rootsi | NULL | 0.18430000 | 0.00000000 | 0.18220000 | 0.00000000 | 1 | | rootsi | C | 0.28230000 | 0.00000000 | 0.21050000 | 0.00000000 | 1 | | saksa | NULL | 0.26878000 | 0.04817787 | 0.17271000 | 0.02843155 | 20 | | saksa | A | 0.21544483 | 0.05959829 | 0.18739310 | 0.04331508 | 29 | | saksa | B | 0.22817297 | 0.03866006 | 0.16913514 | 0.02674426 | 37 | | sloveenia | A | 0.75000000 | 0.00000000 | 0.00000000 | 0.00000000 | 1 | | soome | NULL | 0.32722105 | 0.08854135 | 0.19308947 | 0.04132459 | 40 | | soome | A | 0.24724891 | 0.06546727 | 0.19219674 | 0.03297826 | 92 | | soome | B | 0.24724701 | 0.06336929 | 0.18759530 | 0.03736812 | 234 | | soome | C | 0.26559200 | 0.04221462 | 0.16137600 | 0.02545833 | 25 | | ukraina | A | 0.30374286 | 0.05591549 | 0.16780714 | 0.03207085 | 14 | | ukraina | B | 0.23566667 | 0.05923022 | 0.19366667 | 0.01194973 | 3 | | ungari | A | 0.27454000 | 0.01713120 | 0.19082000 | 0.01960580 | 5 | | ungari | B | 0.33610000 | 0.00000000 | 0.16530000 | 0.00000000 | 1 | | valgevene | A | 0.23580000 | 0.00000000 | 0.22640000 | 0.00000000 | 1 | | vene | NULL | 0.20510000 | 0.00000000 | 0.12820000 | 0.00000000 | 2 | | vene | A | 0.24344355 | 0.07211236 | 0.18257199 | 0.04692246 | 1139 | | vene | B | 0.25530910 | 0.06122979 | 0.18336787 | 0.04187729 | 1310 | | vene | C | 0.27216464 | 0.05405010 | 0.16606602 | 0.03659073 | 363 | +-----------+--------------------+--------------+--------------+--------------+--------------+----------+ Tähtede andmebaas Koosta tabelid: t_tahetyybid(tyyp) t_tahed(taht, tyyp) t_sonad(sona, tahtedearv) t_taht_sonas(id, sona, taht, koht) t_tahepaar_sonad(id, sona, tahepaar, koht) Seo võõrvõtmetega kokku Lisa andmed näitlause tarbeks "Kui Arno isaga koolimajja jõudis, olid tunnid juba alanud".