import stanza nlp=stanza.Pipeline(lang='et', processors="tokenize, pos") sisendfail="/mnt/c/jaagup/22/korpused/etnc19_reference_corpus_clean.txt" koht=int(open("koht.txt").read().strip()) valjund_sonaliigid="sonaliigid.txt" #valjund_conllu="reference_conllu.txt" f1=open(sisendfail, "r", encoding="utf-8") #f2=open(valjund_conllu, "a", encoding="utf-8") f3=open(valjund_sonaliigid, "a", encoding="utf-8") for nr in range(koht): f1.readline() rida=f1.readline() while rida: t=nlp(rida) sonad=[sona for lause in t.sentences for sona in lause.words] # for snr, sona in enumerate(sonad): # print(str(koht), sona.id, sona.text, sona.lemma, sona.upos, sona.xpos, sona.feats, sona.head, sona.deprel, sep="\t", file=f2) sliigid="".join([sona.xpos for sona in sonad]) print("^"+sliigid+"$", file=f3) f3.flush() # f2.flush() koht+=1 with open("koht.txt", "w") as f4: print(koht, file=f4) print(koht) rida=f1.readline()