SELECT COUNT(*) FROM dokmeta COUNT(*) 12724 SELECT keeletase, COUNT(*) FROM dokmeta GROUP BY keeletase; SELECT keeletase, COUNT(*) AS kogus FROM dokmeta GROUP BY keeletase ORDER BY kogus DESC; SELECT keeletase, COUNT(*)/(SELECT COUNT(*) FROM dokmeta) FROM dokmeta GROUP BY keeletase; SELECT keeletase, ROUND(100*COUNT(*)/(SELECT COUNT(*) FROM dokmeta), 1) AS protsent FROM dokmeta GROUP BY keeletase; -- Kuvage, mitu protsenti tekstide autoritest on naised, mitu mehed SELECT sugu, ROUND(100*COUNT(*)/(SELECT COUNT(*) FROM dokmeta), 1) AS protsent FROM dokmeta GROUP BY sugu; sugu protsent NULL 25.1 naine 55.5 mees 19.4 Autorite hulgas on naisi 55,5 protsenti Autorite hulgas on naisi vähemalt 55,5 protsenti Autorite hulgas on mehi kuni 44 protsenti Autorite hulgas võib olla naisi kuni 80 protsenti 55,5/(55,5+19,4)=0,73 Teadaoleva sooga autorite hulgas on naisi 73 protsenti Teadaoleva sooga autorite hulgas on naisi 2,8 korda rohkem kui mehi Teadaoleva sooga autorite hulgas on naisi 180% rohkem kui mehi SELECT keeletase, COUNT(*) FROM dokmeta WHERE keeletase IS NOT NULL GROUP BY keeletase; -- Näidake naiste ja meeste protsent arvestamata teadmata sooga isikuid SELECT keeletase, COUNT(*)/(SELECT COUNT(*) FROM dokmeta WHERE keeletase IS NOT NULL) AS suhtarv FROM dokmeta WHERE keeletase IS NOT NULL GROUP BY keeletase; SELECT keeletase, COUNT(*) FROM dokmeta WHERE emakeel='vene' GROUP BY keeletase; SELECT keeletase, COUNT(*) FROM dokmeta WHERE emakeel='inglise' GROUP BY keeletase; SELECT tasemenimi FROM keeletasemed; SELECT tasemenimi, kogus FROM keeletasemed LEFT JOIN (SELECT keeletase, COUNT(*) AS kogus FROM dokmeta WHERE emakeel='inglise' GROUP BY keeletase) AS vahetabel ON vahetabel.keeletase=keeletasemed.tasemenimi; SELECT tasemenimi, IF(ISNULL(kogus), 0, kogus) AS kogus FROM keeletasemed LEFT JOIN (SELECT keeletase, COUNT(*) AS kogus FROM dokmeta WHERE emakeel='inglise' GROUP BY keeletase) AS vahetabel ON vahetabel.keeletase=keeletasemed.tasemenimi; SELECT korpus FROM dokmeta WHERE emakeel='soome'; SELECT DISTINCT korpus FROM dokmeta WHERE emakeel='soome'; SELECT DISTINCT korpus FROM dokmeta WHERE emakeel='vene'; SELECT DISTINCT korpus FROM dokmeta WHERE emakeel='vene'; SELECT kood, korpus FROM dokmeta WHERE emakeel='vene'; SELECT kood, korpusekood, korpusenimi FROM dokmeta INNER JOIN korpusenimed ON dokmeta.korpus=korpusenimed.korpusekood WHERE emakeel='vene'; Tutvu tabelitega dokmeta, ngram2, sonaliikide_lyhendid * Leia, mitu korda millist sõnaliikide paari esineb dokumendis koodiga doc_100636852915_item Abilisteks COUNT(*) ja GROUP BY SELECT ngram2, COUNT(*) FROM ngram2 WHERE tekstikood='doc_100636852915_item' GROUP BY ngram2; SELECT ngram2, COUNT(*) FROM ngram2 WHERE tekstikood='doc_100636852915_item' GROUP BY ngram2 ORDER BY COUNT(*) DESC LIMIT 10; +--------+----------+ | ngram2 | COUNT(*) | +--------+----------+ | SZ | 24 | | AS | 20 | | ZJ | 12 | | SV | 9 | | PV | 9 | | VD | 8 | | ZD | 7 | | JS | 7 | | VA | 7 | | SS | 6 | +--------+----------+ * Leia dokumentide koodid, kus autori emakeel on soome ning teksti keel on eesti SELECT kood FROM dokmeta WHERE emakeel='soome' AND tekstikeel='eesti'; +-----------------------+ | kood | +-----------------------+ | doc_104580264060_item | | doc_104580264061_item | | doc_104580264062_item | | doc_104580264063_item | | doc_104580264064_item | | doc_104580264065_item | * Leia, mitu korda millist sõnaliikide paari esineb soome emakeelega autorite eestikeelsetes tekstides Järjesta need sageduse järjekorras SELECT ngram2 FROM ngram2 WHERE tekstikood IN (SELECT kood FROM dokmeta WHERE emakeel='soome' AND tekstikeel='eesti'); | ZV | | VD | | DZ | +--------+ 120630 rows in set (0.20 sec) SELECT ngram2 FROM ngram2 INNER JOIN dokmeta ON dokmeta.kood=ngram2.tekstikood WHERE emakeel='soome' AND tekstikeel='eesti'; SELECT ngram2, COUNT(*) AS kogus FROM ngram2 INNER JOIN dokmeta ON dokmeta.kood=ngram2.tekstikood WHERE emakeel='soome' AND tekstikeel='eesti' GROUP BY ngram2 ORDER BY kogus desc LIMIT 10; +--------+-------+ | ngram2 | kogus | +--------+-------+ | SZ | 9100 | | SV | 6043 | | SS | 5424 | | PV | 5220 | | VS | 4693 | | AS | 4074 | | ZP | 4012 | | VD | 3927 | | PS | 3613 | | VA | 2897 | +--------+-------+ Tabelid dokmeta, dokarvud Kuvage soomekeelsete autorite tekstide koodid ja tähtede arvud SELECT dokmeta.kood, tahti FROM dokmeta INNER JOIN dokarvud ON dokmeta.kood=dokarvud.kood WHERE emakeel='soome'; | doc_919500587388_item | 1184 | | doc_980560860248_item | 1164 | | doc_980560860251_item | 470 | | doc_980560860252_item | 880 | +-----------------------+-------+ Leidke soome autoriga eestikeelsete tekstide juures keskmine tähtede, sõnade ja lausete arv. SELECT AVG(tahti), AVG(lauseid) FROM dokmeta INNER JOIN dokarvud ON dokmeta.kood=dokarvud.kood WHERE emakeel='soome'; +------------+--------------+ | AVG(tahti) | AVG(lauseid) | +------------+--------------+ | 1864.3504 | 27.7391 | +------------+--------------+ SELECT emakeel, AVG(tahti), STD(tahti), AVG(lauseid), STD(lauseid) FROM dokmeta INNER JOIN dokarvud ON dokmeta.kood=dokarvud.kood GROUP BY emakeel ORDER BY AVG(tahti); Leidke sama vene autoriga tekstide juures. MariaDB [if17_keelekorpus]> SELECT emakeel, AVG(tahti), STD(tahti), AVG(lauseid), STD(lauseid) FROM dokmeta -> INNER JOIN dokarvud ON dokmeta.kood=dokarvud.kood -> GROUP BY emakeel -> ORDER BY AVG(tahti); +-----------+------------+------------+--------------+--------------+ | emakeel | AVG(tahti) | STD(tahti) | AVG(lauseid) | STD(lauseid) | +-----------+------------+------------+--------------+--------------+ | katalaani | 18.0000 | 0.0000 | 2.0000 | 0.0000 | | sloveenia | 20.0000 | 0.0000 | 0.0000 | 0.0000 | | valgevene | 561.0000 | 0.0000 | 12.0000 | 0.0000 | | ukraina | 751.7059 | 489.6904 | 17.2353 | 9.5396 | | muud | 809.4691 | 482.3466 | 17.0247 | 8.8930 | | inglise | 921.8655 | 477.1065 | 19.2269 | 10.3432 | | saksa | 929.8488 | 688.1679 | 18.8372 | 10.8326 | | poola | 1167.3333 | 361.1189 | 21.3333 | 4.9889 | | ungari | 1249.3333 | 596.6726 | 26.0000 | 14.1185 | | lati | 1456.0000 | 187.0000 | 27.5000 | 4.5000 | | NULL | 1513.2352 | 1369.6694 | 36.0905 | 28.4056 | | soome | 1864.3504 | 2344.0393 | 27.7391 | 29.9245 | | leedu | 1977.2917 | 1020.1942 | 30.2083 | 10.4880 | | rootsi | 2088.0000 | 749.0000 | 31.5000 | 11.5000 | | vene | 2567.2057 | 3571.3061 | 33.9026 | 55.5868 | | eesti | 4267.9738 | 3566.2095 | 40.4498 | 35.8724 | | jidis | 10373.0000 | 0.0000 | 152.0000 | 0.0000 | +-----------+------------+------------+--------------+--------------+ SELECT emakeel, AVG(tahti), STD(tahti), COUNT(*) FROM dokmeta INNER JOIN dokarvud ON dokmeta.kood=dokarvud.kood GROUP BY emakeel ORDER BY AVG(tahti) DESC; +-----------+------------+------------+----------+ | emakeel | AVG(tahti) | STD(tahti) | COUNT(*) | +-----------+------------+------------+----------+ | jidis | 10373.0000 | 0.0000 | 1 | | eesti | 4267.9738 | 3566.2095 | 229 | | vene | 2567.2057 | 3571.3061 | 3184 | | rootsi | 2088.0000 | 749.0000 | 2 | | leedu | 1977.2917 | 1020.1942 | 24 | | soome | 1864.3504 | 2344.0393 | 391 | | NULL | 1513.2352 | 1369.6694 | 8576 | | lati | 1456.0000 | 187.0000 | 2 | | ungari | 1249.3333 | 596.6726 | 6 | | poola | 1167.3333 | 361.1189 | 3 | | saksa | 929.8488 | 688.1679 | 86 | | inglise | 921.8655 | 477.1065 | 119 | | muud | 809.4691 | 482.3466 | 81 | | ukraina | 751.7059 | 489.6904 | 17 | | valgevene | 561.0000 | 0.0000 | 1 | | sloveenia | 20.0000 | 0.0000 | 1 | | katalaani | 18.0000 | 0.0000 | 1 | +-----------+------------+------------+----------+