koht="/content/drive/MyDrive/oma/2023/ERR data failid /andmed/lahti/lemmadpikk.txt" tekst=open(koht, encoding="utf-8").readlines() sisend=[rida.strip().split() for rida in tekst] mudel=Word2Vec(sisend, min_count=50) mudel.save("/content/drive/MyDrive/kursused/2023/kvantitatiivne_digihumanitaaria/proov_votmesonad/lemmad1.model") mudel.wv.most_similar(positive="maja") from sklearn.cluster import KMeans klastreid=1000 kmeans=KMeans(n_clusters=klastreid, n_init="auto") kmeans.fit(mudel.wv.get_normed_vectors()) kmeans.labels_ hoidla=[[] for nr in range(klastreid)] kataloog="/content/drive/MyDrive/kursused/2023/kvantitatiivne_digihumanitaaria/proov_votmesonad/vastused/" for sona, koht in zip(mudel.wv.index_to_key, kmeans.labels_): hoidla[koht].append(sona) f=open(kataloog+"jaotus"+str(klastreid)+".txt", "w", encoding="utf-8") for nr in range(klastreid): print(nr, file=f) print(" ".join(hoidla[nr]), file=f) print(file=f) f.close() from gensim.models import Word2Vec mudel=Word2Vec.load("https://minitorn.tlu.ee/~jaagup/oma/too/23/05/word2vec/lemmad1.model") from scipy.cluster.hierarchy import dendrogram, linkage Z=linkage(mudel.wv.get_normed_vectors(), 'ward')