npikkus=7 hoidla={} nr=0 for rida in open("sonaliigid_koos.txt"): nr+=1 if nr % 10000 == 0: print(nr) r=rida.strip().replace("Z", "") ngramid=[r[koht: koht+npikkus] for koht in range(len(r)-npikkus+1)] for ngram in ngramid: if ngram in hoidla: hoidla[ngram]+=1 else: hoidla[ngram]=1 votmed=list(hoidla.keys()) votmed.sort() with open(str(npikkus)+"gram_eest.txt", "w") as f: for voti in votmed: print(voti, hoidla[voti], sep=",", file=f) votmed.sort(key=lambda voti: voti[::-1]) with open(str(npikkus)+"gram_tagant.txt", "w") as f: for voti in votmed: print(voti, hoidla[voti], sep=",", file=f) #votmed.sort(key=lambda voti: voti[1:4]+voti[0]+voti[4]) #with open(str(npikkus)+"gram_keskelt.txt", "w") as f: # for voti in votmed: # print(voti, hoidla[voti], sep=",", file=f) votmed.sort(key=lambda voti: voti[2:5]+voti[1]+voti[0]+voti[5]+voti[6]) with open(str(npikkus)+"gram_keskelt.txt", "w") as f: for voti in votmed: print(voti, hoidla[voti], sep=",", file=f) kogum=[[voti, hoidla[voti]] for voti in votmed] kogum.sort(key=lambda rida: -rida[1]) with open(str(npikkus)+"gram_kahanev.txt", "w") as f: for rida in kogum: print(rida[0], rida[1], sep=",", file=f) kokku=sum([rida[1] for rida in kogum]) with open(str(npikkus)+"gram_kokku.txt", "w") as f: print(kokku, file=f)