Kopeerige http://www.tlu.ee/~jaagup/andmed/keel/korpus/korpusenimed.txt omale greeny kataloogi Logige MySQL-i sisse Looge tabel CREATE TABLE korpusenimed( korpusekood VARCHAR(20) NOT NULL PRIMARY KEY, korpusenimi VARCHAR(50) ); LOAD DATA LOCAL INFILE '/home/jaagup/public_html/2017/ab/korpusenimed.txt' INTO TABLE korpusenimed FIELDS TERMINATED BY ',' IGNORE 1 LINES; DELETE FROM korpusenimed; LOAD DATA LOCAL INFILE '/home/jaagup/public_html/2017/ab/korpusenimed.txt' INTO TABLE korpusenimed CHARACTER SET UTF8 FIELDS TERMINATED BY ',' IGNORE 1 LINES; LOAD DATA LOCAL INFILE '/home/jaagup/public_html/2017/ab/korpusenimed.txt' INTO TABLE korpusenimed CHARACTER SET UTF8 FIELDS TERMINATED BY ',' LINES TERMINATED BY '\r\n' IGNORE 1 LINES; mysqladmin -uif17 -pif17 create if17_keelekorpus kood,korpus,tekstikeel,tekstityyp,elukoht,taust,vanus,sugu,emakeel,kodukeel,keeletase,haridus,abivahendid CREATE TABLE keeled( keelenimi VARCHAR(20) NOT NULL PRIMARY KEY ); CREATE TABLE keeletasemed( tasemenimi VARCHAR(20) NOT NULL PRIMARY KEY, mitmeastmeline ENUM('3', '6') ); CREATE TABLE haridustasemed( haridustase VARCHAR(20) NOT NULL PRIMARY KEY ); CREATE TABLE vanusetasemed( vanusetase VARCHAR(20) NOT NULL PRIMARY KEY ); CREATE TABLE elukohad( elukohakood VARCHAR(20) NOT NULL PRIMARY KEY ); CREATE TABLE taustad( taustakood VARCHAR(20) NOT NULL PRIMARY KEY ); CREATE TABLE tekstityybid( tyybikood VARCHAR(20) NOT NULL PRIMARY KEY ); CREATE TABLE dokmeta( kood VARCHAR(50) NOT NULL PRIMARY KEY, korpus VARCHAR(20) NOT NULL, tekstikeel VARCHAR(20), tekstityyp VARCHAR(20), elukoht VARCHAR(20), taust VARCHAR(20), vanus VARCHAR(20), sugu ENUM('naine', 'mees'), emakeel VARCHAR(20), kodukeel VARCHAR(20), keeletase VARCHAR(20), haridus VARCHAR(20), abivahendid ENUM('jah', 'ei'), FOREIGN KEY(korpus) REFERENCES korpusenimed(korpusekood), FOREIGN KEY(tekstikeel) REFERENCES keeled(keelenimi), FOREIGN KEY(tekstityyp) REFERENCES tekstityybid(tyybikood), FOREIGN KEY(elukoht) REFERENCES elukohad(elukohakood), FOREIGN KEY(taust) REFERENCES taustad(taustakood), FOREIGN KEY(vanus) REFERENCES vanusetasemed(vanusetase), FOREIGN KEY(emakeel) REFERENCES keeled(keelenimi), FOREIGN KEY(kodukeel) REFERENCES keeled(keelenimi), FOREIGN KEY(keeletase) REFERENCES keeletasemed(tasemenimi), FOREIGN KEY(haridus) REFERENCES haridustasemed(haridustase) ); DROP TABLE dokmeta; vene soome inglise [6] saksa eesti leedu rootsi muud [11] poola ukraina lati sloveenia valgevene [16] ungari katalaani jidis INSERT INTO keeled VALUES('eesti'); INSERT INTO keeled VALUES('vene'); INSERT INTO keeled VALUES('soome'); INSERT INTO keeled VALUES('inglise'); INSERT INTO keeled VALUES('saksa'); INSERT INTO keeled VALUES('leedu'); INSERT INTO keeled VALUES('rootsi'); INSERT INTO keeled VALUES('muud'); INSERT INTO keeled VALUES('poola'); INSERT INTO keeled VALUES('ukraina'); INSERT INTO keeled VALUES('lati'); INSERT INTO keeled VALUES('sloveenia'); INSERT INTO keeled VALUES('valgevene'); INSERT INTO keeled VALUES('ungari'); INSERT INTO keeled VALUES('katalaani'); INSERT INTO keeled VALUES('jidis'); INSERT INTO tekstityybid VALUES ('essee'); INSERT INTO tekstityybid VALUES ('muu'); INSERT INTO tekstityybid VALUES ('referaat'); INSERT INTO tekstityybid VALUES ('amtkiri'); INSERT INTO tekstityybid VALUES ('harjutus'); INSERT INTO tekstityybid VALUES ('batoo'); INSERT INTO tekstityybid VALUES ('vastkys'); INSERT INTO tekstityybid VALUES ('isikiri'); INSERT INTO tekstityybid VALUES ('analyys'); INSERT INTO tekstityybid VALUES ('tolge'); INSERT INTO tekstityybid VALUES ('ymberjutustus'); INSERT INTO tekstityybid VALUES ('arvamuslugu'); INSERT INTO tekstityybid VALUES ('trilumunud'); SELECT * FROM if17_keelekorpus.korpusenimed; | korpusekood | korpusenimi | +-------------+--------------------------------+ | cFOoRQekA | EVKK | | cFqPphvYi | Eesti keele olümpiaadi tööd | | cgSRJPKTr | Vene keel kui emakeel | | clWmOIrLa | REKKi kogud | | cwUprXCTL | Eesti teaduskeel | | cwUSEqQLt | Akadeemiline õppijakeel | | cYDRkpymb | K1 referentskorpus | | cZjHWUPtD | Vene keel kui võõrkeel | +-------------+--------------------------------+ MariaDB [if17_jaagup_4]> SELECT korpusenimi FROM if17_keelekorpus.korpusenimed; +--------------------------------+ | korpusenimi | +--------------------------------+ | EVKK | | Eesti keele olümpiaadi tööd | | Vene keel kui emakeel | | REKKi kogud | | Eesti teaduskeel | | Akadeemiline õppijakeel | | K1 referentskorpus | | Vene keel kui võõrkeel | +--------------------------------+ SELECT korpusenimed.korpusenimi FROM if17_keelekorpus.korpusenimed; SELECT if17_keelekorpus.korpusenimed.korpusenimi FROM if17_keelekorpus.korpusenimed; MariaDB [if17_jaagup_4]> use if17_keelekorpus; Reading table information for completion of table and column names You can turn off this feature to get a quicker startup with -A Database changed MariaDB [if17_keelekorpus]> show tables; +----------------------------+ | Tables_in_if17_keelekorpus | +----------------------------+ | dokmeta | | elukohad | | haridustasemed | | keeled | | keeletasemed | | korpusenimed | | taustad | | tekstityybid | | vanusetasemed | +----------------------------+ SELECT * FROM dokmeta WHERE emakeel='soome'; | doc_980560860251_item | cFOoRQekA | eesti | essee | NULL | ylop | kuni40 | naine | soome | inglise | A | korg | jah | | doc_980560860252_item | cFOoRQekA | eesti | harjutus | NULL | ylop | kuni40 | naine | soome | inglise | A | korg | jah | +-----------------------+-----------+------------+------------+---------+---------------+--------+-------+---------+----------+-----------+---------+-------------+ 391 rows in set (0.01 sec) SELECT COUNT(*) FROM dokmeta WHERE emakeel='soome'; +----------+ | COUNT(*) | +----------+ | 391 | +----------+ SELECT * FROM dokmeta INNER JOIN korpusenimed ON dokmeta.korpus=korpusenimed.korpusekood LIMIT 10; SELECT korpusenimi, tekstityyp FROM dokmeta INNER JOIN korpusenimed ON dokmeta.korpus=korpusenimed.korpusekood WHERE emakeel='soome' AND keeletase='B1'; | EVKK | essee | | EVKK | essee | | EVKK | essee | | EVKK | essee | +-------------+------------+ 114 rows in set (0.00 sec) * Kuvage, mitme teksti juures on emakeel soome ning keeletase B1 SELECT COUNT(*) FROM dokmeta WHERE emakeel='soome' AND keeletase='B1'; +----------+ | COUNT(*) | +----------+ | 114 | +----------+ * Kuvage soome emakeelega inimeste juures, mitu korda milline keeletase esineb SELECT keeletase, COUNT(*) FROM dokmeta WHERE emakeel='soome' GROUP BY keeletase; +-----------+----------+ | keeletase | COUNT(*) | +-----------+----------+ | NULL | 40 | | A | 40 | | A1 | 1 | | A2 | 51 | | B | 45 | | B1 | 114 | | B2 | 75 | | C | 4 | | C1 | 20 | | C2 | 1 | +-----------+----------+ 10 rows in set (0.01 sec) * Näidake, mitu protsenti vastava tasemega tekstid moodustavad soome emakeelega tekstide üldarvust SELECT keeletase, COUNT(*)/(SELECT COUNT(*) FROM dokmeta WHERE emakeel='soome') AS suhe FROM dokmeta WHERE emakeel='soome' GROUP BY keeletase; +-----------+--------+ | keeletase | suhe | +-----------+--------+ | NULL | 0.1023 | | A | 0.1023 | | A1 | 0.0026 | | A2 | 0.1304 | | B | 0.1151 | | B1 | 0.2916 | | B2 | 0.1918 | | C | 0.0102 | | C1 | 0.0512 | | C2 | 0.0026 | +-----------+--------+ 10 rows in set (0.00 sec) SELECT keeletase, ROUND(100* COUNT(*)/(SELECT COUNT(*) FROM dokmeta WHERE emakeel='soome'), 1) AS protsent FROM dokmeta WHERE emakeel='soome' GROUP BY keeletase; +-----------+----------+ | keeletase | protsent | +-----------+----------+ | NULL | 10.2 | | A | 10.2 | | A1 | 0.3 | | A2 | 13.0 | | B | 11.5 | | B1 | 29.2 | | B2 | 19.2 | | C | 1.0 | | C1 | 5.1 | | C2 | 0.3 | +-----------+----------+ 10 rows in set (0.00 sec) SELECT IF(ISNULL(keeletase),'määramata', keeletase) AS keeletase, ROUND(100*COUNT(*)/ (SELECT COUNT(*) FROM dokmeta WHERE emakeel='soome'), 1) AS protsent FROM dokmeta WHERE emakeel='soome' GROUP BY dokmeta.keeletase; +-------------+----------+ | keeletase | protsent | +-------------+----------+ | määramata | 10.2 | | A | 10.2 | | A1 | 0.3 | | A2 | 13.0 | | B | 11.5 | | B1 | 29.2 | | B2 | 19.2 | | C | 1.0 | | C1 | 5.1 | | C2 | 0.3 | +-------------+----------+ SELECT IF(ISNULL(keeletase),'määramata', keeletase) AS keeletase, ROUND(100*COUNT(*)/ (SELECT COUNT(*) FROM dokmeta WHERE emakeel='vene'), 1) AS protsent FROM dokmeta WHERE emakeel='vene' GROUP BY dokmeta.keeletase; +-------------+----------+ | keeletase | protsent | +-------------+----------+ | määramata | 11.7 | | A | 34.8 | | A2 | 1.0 | | B | 32.6 | | B1 | 5.7 | | B2 | 2.8 | | C | 9.6 | | C1 | 1.8 | +-------------+----------+ SELECT keeletase, COUNT(*) FROM dokmeta WHERE emakeel='vene' GROUP BY keeletase; +-----------+----------+ | keeletase | COUNT(*) | +-----------+----------+ | NULL | 372 | | A | 1107 | | A2 | 32 | | B | 1038 | | B1 | 183 | | B2 | 89 | | C | 306 | | C1 | 57 | +-----------+----------+ 8 rows in set (0.01 sec) SELECT * FROM keeletasemed LEFT JOIN (SELECT keeletase, COUNT(*) FROM dokmeta WHERE emakeel='vene' GROUP BY keeletase) AS vahetabel ON keeletasemed.tasemenimi=vahetabel.keeletase; +------------+----------------+-----------+----------+ | tasemenimi | mitmeastmeline | keeletase | COUNT(*) | +------------+----------------+-----------+----------+ | A | 3 | A | 1107 | | A1 | 6 | NULL | NULL | | A2 | 6 | A2 | 32 | | B | 3 | B | 1038 | | B1 | 6 | B1 | 183 | | B2 | 6 | B2 | 89 | | C | 3 | C | 306 | | C1 | 6 | C1 | 57 | | C2 | 6 | NULL | NULL | +------------+----------------+-----------+----------+ * Kuvage vaid tasemenimi ja kogus, koguse juures asendage NULL 0-ga SELECT tasemenimi, venekogus FROM keeletasemed LEFT JOIN (SELECT keeletase, COUNT(*) AS venekogus FROM dokmeta WHERE emakeel='vene' GROUP BY keeletase) AS venetabel ON keeletasemed.tasemenimi=venetabel.keeletase; +------------+-----------+ | tasemenimi | venekogus | +------------+-----------+ | A | 1107 | | A1 | NULL | | A2 | 32 | | B | 1038 | | B1 | 183 | | B2 | 89 | | C | 306 | | C1 | 57 | | C2 | NULL | +------------+-----------+ SELECT tasemenimi, COALESCE(venekogus, 0) AS vene, COALESCE(soomekogus, 0) AS soome FROM keeletasemed LEFT JOIN (SELECT keeletase, COUNT(*) AS venekogus FROM dokmeta WHERE emakeel='vene' GROUP BY keeletase) AS venetabel ON keeletasemed.tasemenimi=venetabel.keeletase LEFT JOIN (SELECT keeletase, COUNT(*) AS soomekogus FROM dokmeta WHERE emakeel='soome' GROUP BY keeletase) AS soometabel ON keeletasemed.tasemenimi=soometabel.keeletase; +------------+------+-------+ | tasemenimi | vene | soome | +------------+------+-------+ | A | 1107 | 40 | | A1 | 0 | 1 | | A2 | 32 | 51 | | B | 1038 | 45 | | B1 | 183 | 114 | | B2 | 89 | 75 | | C | 306 | 4 | | C1 | 57 | 20 | | C2 | 0 | 1 | +------------+------+-------+