# -*- coding: utf-8 # Copyright 2004-2006 by Vahur Rebas import re #reegliotsing import scripts from scripts import objects from scripts import rules from scripts.rules import rules as dyn_rules import Globals from Globals import InitializeClass from AccessControl import ClassSecurityInfo from OFS.PropertyManager import PropertyManager from OFS.SimpleItem import SimpleItem from zope.interface import implements from interfaces import ISearch from permissions import * from schemas import all class Search(PropertyManager, SimpleItem): """ manages saved searches """ meta_type = 'Searches' security = ClassSecurityInfo() security.declareObjectPublic() id = 'Search' implements(ISearch) security.declareProtected(perm_view, 'index_html') def index_html(self): """ index_html """ return self.search_index() def search_results_query(self, REQUEST): """ search documents """ free_word = unicode(REQUEST.get('vaba_sona', ''), 'utf-8') vigane_sona = unicode(REQUEST.get('vigane_sona', ''), 'utf-8') vea_liik = REQUEST.get('vea_liik', '') adq = {} seen = [] for s in all(): for f in s.fields: if f.getName() in seen: continue seen.append(f.getName()) rval = REQUEST.get(f.getName(), '') if not rval.strip(): continue adq[f.getName()] = rval has_fields = 0 for x in REQUEST.form.keys(): if x == 'otsiNupp': continue if REQUEST.get(x): has_fields = 1 if not free_word.strip(): free_word = '' if not vigane_sona.strip(): vigane_sona = '' if not has_fields: return [] x = None query = { 'getDocument' : free_word, 'getMarkedWords' : vigane_sona, 'getUsedCodes' : vea_liik, 'is_deleted' : 0, # 'document_status': 1 } if(REQUEST.get('getCorpus')): query['getCorpus']=REQUEST.get('getCorpus') query.update(adq) print query x = self.zcatalog(query) return x security.declarePrivate('constructString') def constructString(self, sdoc, starts, ends, txt): """ construct string that is shown to user """ stri = '' stri += self.fetchContext(sdoc, starts, -1) stri += '' record = 1 buf = '' tmp_buf = '' for i in range(len(txt)): if txt[i] == '<': record = 0 if txt[i] == '>': record = 1 continue if record: buf += txt[i] stri += buf stri += '' stri += self.fetchContext(sdoc, ends, 1) return stri security.declarePrivate('fetchContext') def fetchContext(self, sdoc, rng_n, direction): """ fetch context direction : -1 - back 1 - forward """ rng = 0 # dir -1 if direction == -1: rng = range(rng_n) rng.reverse() #dir 1 if direction == 1: rng = range(rng_n, len(sdoc)) result = '' record = 1 count = 0 tmp_buf = [] for r in rng: if direction == -1: #backward if sdoc[r] in ['>']: tmp_buf = [] record = 0 if sdoc[r] in ['<']: record = 1 result += ' ' continue if direction == 1: # forward if sdoc[r] in ['<']: record = 0 if sdoc[r] in ['>']: record = 1 result += ' ' continue if sdoc[r] in ['.', '!','?']: # end of sentence break if record: result += sdoc[r] count += 1 if not record: if direction == -1: tmp_buf.insert(0,sdoc[r]) if count > 400: break if direction == -1: rever = '' for x in result: rever = x + rever return rever return result def search_context(self, brain, req): """ display search context """ vaba_sona = req.get('vaba_sona', '') vigane_sona = req.get('vigane_sona', '') vea_liik = req.get('vea_liik', '') #sdoc = brain.textdoc #sdoc = brain.getObject().getBody() res = [] if not vigane_sona and not vea_liik and not vaba_sona: return [] if not vigane_sona and not vea_liik: sdoc = brain.textdoc txt = vaba_sona txt = txt.replace('*', '') txt = txt.replace('?', '') words = re.finditer(txt, sdoc) for word in words: stri = self.constructString(sdoc, word.start(), word.end(), txt) res.append(stri) if vigane_sona or vea_liik: sobj = brain.getObject() sdoc = sobj.getBody() #print sobj.getId(), vigane_sona query = {'document': sobj.getId()} if vea_liik: query['code'] = vea_liik if vigane_sona: query['content'] = vigane_sona #print query errs = self.Errors.queryErrors(query) for x in errs: err = x.getObject() txt = err.getPreContext().encode('utf-8') txt += ' 
' txt += err.getContent() txt += '
 ' txt += err.getPostContext().encode('utf-8') res.append(txt) return res def taggerVastusEesti(self, tekst): "TreeTaggeri tulemus" import subprocess a=subprocess.Popen("/home/jaagup/treetagger/k3/cmd/tree-tagger-estonian", shell=True, stderr=subprocess.PIPE, stdin=subprocess.PIPE, stdout=subprocess.PIPE) # a=subprocess.Popen("/home/jaagup/treetagger/k3/cmd/tree-tagger-estonian < /home/jaagup/treetagger/k3/sisend.txt") stdo, stde = a.communicate(tekst) # return tekst # return tekst+" ! "+str(stde) + " ? " + str(stdo) return stdo def taggerVastusVene(self, tekst): "TreeTaggeri tulemus" import subprocess a=subprocess.Popen("/home/jaagup/treetagger/k3/cmd/tree-tagger-russian", shell=True, stderr=subprocess.PIPE, stdin=subprocess.PIPE, stdout=subprocess.PIPE) stdo, stde = a.communicate(tekst) return stdo def taggerVastusSoome(self, tekst): "TreeTaggeri tulemus" import subprocess a=subprocess.Popen("/home/jaagup/treetagger/k3/cmd/tree-tagger-finnish", shell=True, stderr=subprocess.PIPE, stdin=subprocess.PIPE, stdout=subprocess.PIPE) stdo, stde = a.communicate(tekst) return stdo def klasterVastus1(self, tekst): "klastrianalyys" import subprocess import os os.chdir("/home/jaagup/klastrileidja") f=open("abiandmed.txt", "w") f.write(tekst) f.close() a=subprocess.Popen("/usr/bin/java -jar klastrileidja.jar -fabiandmed.txt -uveeb", shell=True, stderr=subprocess.PIPE, stdin=subprocess.PIPE, stdout=subprocess.PIPE) stdo, stde = a.communicate() f=open("users/veeb/abiandmed.csv", "r") sisu=f.read() f.close() return sisu def morfoVastus(self, tekst): "morfotulemus" #MORFO_PROGS="/home/vahur/estmorf/TLU" #return "kuku0" MORFO_PROGS="/home/jaagup/estmorf" cmd = "./estmorf.sh | ./kms2cg3.pl | vislcg3 --grammar trivial.rle | vislcg3 --grammar morfyhest170609.rlb | vislcg3 --grammar pindsyn170609.rlb | vislcg3 --grammar strukt170609.rlb" #return "kuku2" import subprocess #return "kuku3" a = subprocess.Popen("./estmorf.sh | ./kms2cg3.pl | /usr/local/bin/vislcg3 --grammar trivial.rle | /usr/local/bin/vislcg3 --grammar morfyhest170609.rlb | /usr/local/bin/vislcg3 --grammar pindsyn170609.rlb | /usr/local/bin/vislcg3 --grammar strukt170609.rlb", cwd=MORFO_PROGS, shell=True, stderr=subprocess.PIPE, stdin=subprocess.PIPE, stdout=subprocess.PIPE) #return "kuku4" stdo, stde = a.communicate(tekst) #return "kuku5" #return stde return stdo def morfoLauseObjektiks(self, morfotekst): "lauseobjektideks" lines = morfotekst.split('\n') words = objects.Pipe() for l in lines: l = l.decode('utf-8') if not l: mi = None continue if l == '""' or l == '""': mi = None continue if l.startswith('"'): mi = objects.Word(l[2:-2]) #mi = {'word': l[2:-2], 'inf': [], 'ignore': False} words.append(mi) if l.startswith('\t'): #mi['inf'].append(l.replace('"'+mi['word']+'"', '').strip()) if mi: mi.addInfo(l.strip()) return words def lauseObjektidena(self, tekst): "tekstist andmeobjektideks" morfotekst=self.morfoVastus(tekst) morfolaused=re.findall("\".*?<\/s>\"", morfotekst, re.S) lauseobjektid=[] for morfolause in morfolaused: lauseobjekt=self.morfoLauseObjektiks(morfolause) lauseobjekt.documentid=0 if len(lauseobjekt._ds)>0: lauseobjektid.append(lauseobjekt) return lauseobjektid def lauseteAndmed(self, tekst): "andmed" lauseobjektid=self.lauseObjektidena(tekst) for lause in lauseobjektid: lause.checkKysilause() lause.checkHyydlause() lause.checkUmbisikuline() lause.checkKuiLauseAlguses() lause.checkIgnoredWords(rules.allowed_ignores_words) lause.checkIgnoredMarks(rules.allowed_ignores_marks, rules.needed, rules.skip_list) lause.checkIgnoredWithMark(rules.conditional_ignore) #words.checkSLopus() lause.checkWordsCorrect() #words.checkContainsNumber() lause.checkFmvPosition() lause.checkPrdPosition() lause.checkImvPosition() lause.checkFcvFmv() matches=[] matchedrules=[] maxrulelength=0 for rid, rule in dyn_rules: if lause.isSkipped(): continue match = lause.match(rule, rules) if match: matches.append(rid) matchedrules.append(rule) if len(rule)>maxrulelength: maxrulelength=len(rule) maxrule=rule maxruleid=rid koef=lause.coef if matches: rid=maxruleid lause.addRuleMatch(rid, koef) lisa="" if lause.isSkipped(): lisa=" skipped " + str(words.skipcomments) return lauseobjektid def kysiReegliSisu(self, reeglinr): "sisu" return dyn_rules.get(int(reeglinr)) def replaceSyllables(self, count=1000): "replacesyllablesinwordtable" sqlres = self.sqls.getWordsWithoutSyllables() res = [] for r in sqlres: res.append([r[0], r[1]]) if count>len(res): count=len(res) errors=[] itsokay=[] itsokay.append('k') for i in range(int(count)): r=res[i] if r[1] is None: try: self.replaceSyllable(r[0]) itsokay.append(r[0]) except: errors.append(r[0]) return errors + itsokay def replaceSyllable(self, word): "replacesyllablecolumninwordstable" a=self.getSyllables(word) self.sqls.wordUpdateSyllable(word=word, syllable=a) return a def getSyllables(self, word): "getsyllablefromword" MORFO_PROGS="/home/jaagup/silbitaja" cmd = "./silbitaja.bin" import subprocess a = subprocess.Popen(cmd, cwd=MORFO_PROGS, shell=True, stderr=subprocess.PIPE, stdin=subprocess.PIPE, stdout=subprocess.PIPE) stdo, stde = a.communicate(word.decode("utf-8").encode("iso-8859-13")) return stdo.decode("iso-8859-13").encode("utf-8") def setDocrefsSyllable(self, word): "setdocrefssyllables" try: help=self.getSyllables(word) self.sqls.wordUpdateDocrefsSyllable(word=word, syllable=help) except: self.sqls.wordUpdateDocrefsSyllable(word=word, syllable='') return "error" def replaceDocrefsSyllables(self, limit=1000): "silbitasonad" sqlres = self.sqls.getDocrefsWordsWithoutSyllables() res = [] for r in sqlres: res.append(r[0]) errors=[] itsokay=[] itsokay.append('k') # for r in res: for i in range(int(limit)): r=res[i] try: # self.sqls.wordUpdateSyllable(word=r[0], syllable=self.kysiSilbid2(r[0])) self.setDocrefsSyllable(r) itsokay.append(r) except: errors.append(r) return errors + itsokay def getErrorAuthors(self): "vigade margendajad koos arvudega" sqlres=self.sqls.stat_getauthors() res=[] for r in sqlres: res.append([r[0], r[1]]) return res def getanneandmed(self): "anne andmed koos arvudega" sqlres=self.sqls.stat_anneandmed() res=[] for r in sqlres: res.append([r[0], r[1]]) return res def getautoriandmed(self, REQUEST): "anne andmed koos arvudega" sqlres=self.sqls.stat_autoriandmed(author=REQUEST.get('author', 'anne')) res=[] for r in sqlres: res.append([r[0], r[1]]) return res def getDataVariable(self): "kood funktsiooni jaoks" t="andmed = " t+=str([['Autor', 'Vigade arv']]+self.getErrorAuthors()).replace('L', '')+";\n" t+="console.log(andmed);" return t def getSelectedCodes(self, ylatase=[]): "koodide loetelu" import types if type(ylatase)==types.StringType: ylatase=(ylatase, ) #teeb massiiviks v=[] for kood in ylatase: v+=self.Marks.getChildMarks(kood) return v def getMainCodes(self): """ Pea veade koodid """ return ["global_113978483712", "global_11397848460", "global_113978485636", "global_113978486457", "global_113978487438", "global_113978488098", "global_113978489046"] def getCodeGroup(self): """ Leiab veagrupid """ sqlres=self.sqls.stat_codegroup() group=[] for r in sqlres: group.append([r[0], r[1]]) return group def isInURL(self, REQUEST, value): m=REQUEST.get('ylatase', []) import types if type(m)==types.StringType: m=(m,) return value in m def getUpdateStrings(self): "updates" t=[] for r in self.sqls.stat_updates(): t.append("UPDATE errors SET codegroup ="+str(r[1])+" WHERE id="+str(r[0])+";") return "\n".join(t) InitializeClass(Search)