from sklearn.manifold import MDS import urllib.request from gensim.models import Word2Vec sonalist=urllib.request.urlopen("https://minitorn.tlu.ee/~jaagup/oma/too/23/05/word2vec/sonaindeksid.txt").read().decode("utf-8").split("\n") sonad={rida.split(",")[1]:int(rida.split(",")[0]) for rida in sonalist if rida} ryhmalist=urllib.request.urlopen("https://minitorn.tlu.ee/~jaagup/oma/too/23/05/word2vec/vastused/jaotus1000.txt").read().decode("utf-8").split("\n") ryhmad=[ryhmalist[i*3+1].split() for i in range(1000)] mudel=Word2Vec.load("https://minitorn.tlu.ee/~jaagup/oma/too/23/05/word2vec/lemmad1.model") print("loetud") sonakohad=mudel.wv.get_normed_vectors() keskmised=[] for ryhm in ryhmad: kohad=[sonakohad[sonad[sona]].tolist() for sona in ryhm if sona in sonad] keskmine=[sum(vaartused)/len(vaartused) for vaartused in zip(*kohad)] keskmised.append(keskmine) ryhmakeskmed=MDS().fit_transform(keskmised) print(ryhmakeskmed) vastus=[str(rida[0])+","+str(rida[1]) for rida in ryhmakeskmed] f2=open("paigutus4_ryhmakeskmed.txt", "w", encoding="utf-8") print("\n".join(vastus), file=f2) f2.close()