# -*- coding: utf-8 # Copyright 2004-2006 by Vahur Rebas import re #reegliotsing import scripts from scripts import objects from scripts import rules from scripts.rules import rules as dyn_rules import Globals from Globals import InitializeClass from AccessControl import ClassSecurityInfo from OFS.PropertyManager import PropertyManager from OFS.SimpleItem import SimpleItem from zope.interface import implements from interfaces import ISearch from permissions import * from schemas import all class Search(PropertyManager, SimpleItem): """ manages saved searches """ meta_type = 'Searches' security = ClassSecurityInfo() security.declareObjectPublic() id = 'Search' implements(ISearch) security.declareProtected(perm_view, 'index_html') def index_html(self): """ index_html """ return self.search_index() def search_results_query(self, REQUEST): """ search documents """ free_word = unicode(REQUEST.get('vaba_sona', ''), 'utf-8') vigane_sona = unicode(REQUEST.get('vigane_sona', ''), 'utf-8') vea_liik = REQUEST.get('vea_liik', '') adq = {} seen = [] for s in all(): for f in s.fields: if f.getName() in seen: continue seen.append(f.getName()) rval = REQUEST.get(f.getName(), '') if not rval.strip(): continue adq[f.getName()] = rval has_fields = 0 for x in REQUEST.form.keys(): if x == 'otsiNupp': continue if REQUEST.get(x): has_fields = 1 if not free_word.strip(): free_word = '' if not vigane_sona.strip(): vigane_sona = '' if not has_fields: return [] x = None query = { 'getDocument' : free_word, 'getMarkedWords' : vigane_sona, 'getUsedCodes' : vea_liik, 'is_deleted' : 0, 'document_status': 1 } query.update(adq) print query x = self.zcatalog(query) return x security.declarePrivate('constructString') def constructString(self, sdoc, starts, ends, txt): """ construct string that is shown to user """ stri = '' stri += self.fetchContext(sdoc, starts, -1) stri += '' record = 1 buf = '' tmp_buf = '' for i in range(len(txt)): if txt[i] == '<': record = 0 if txt[i] == '>': record = 1 continue if record: buf += txt[i] stri += buf stri += '' stri += self.fetchContext(sdoc, ends, 1) return stri security.declarePrivate('fetchContext') def fetchContext(self, sdoc, rng_n, direction): """ fetch context direction : -1 - back 1 - forward """ rng = 0 # dir -1 if direction == -1: rng = range(rng_n) rng.reverse() #dir 1 if direction == 1: rng = range(rng_n, len(sdoc)) result = '' record = 1 count = 0 tmp_buf = [] for r in rng: if direction == -1: #backward if sdoc[r] in ['>']: tmp_buf = [] record = 0 if sdoc[r] in ['<']: record = 1 result += ' ' continue if direction == 1: # forward if sdoc[r] in ['<']: record = 0 if sdoc[r] in ['>']: record = 1 result += ' ' continue if sdoc[r] in ['.', '!','?']: # end of sentence break if record: result += sdoc[r] count += 1 if not record: if direction == -1: tmp_buf.insert(0,sdoc[r]) if count > 400: break if direction == -1: rever = '' for x in result: rever = x + rever return rever return result def search_context(self, brain, req): """ display search context """ vaba_sona = req.get('vaba_sona', '') vigane_sona = req.get('vigane_sona', '') vea_liik = req.get('vea_liik', '') #sdoc = brain.textdoc #sdoc = brain.getObject().getBody() res = [] if not vigane_sona and not vea_liik and not vaba_sona: return [] if not vigane_sona and not vea_liik: sdoc = brain.textdoc txt = vaba_sona txt = txt.replace('*', '') txt = txt.replace('?', '') words = re.finditer(txt, sdoc) for word in words: stri = self.constructString(sdoc, word.start(), word.end(), txt) res.append(stri) if vigane_sona or vea_liik: sobj = brain.getObject() sdoc = sobj.getBody() #print sobj.getId(), vigane_sona query = {'document': sobj.getId()} if vea_liik: query['code'] = vea_liik if vigane_sona: query['content'] = vigane_sona #print query errs = self.Errors.queryErrors(query) for x in errs: err = x.getObject() txt = err.getPreContext().encode('utf-8') txt += ' 
' txt += err.getContent() txt += '
 ' txt += err.getPostContext().encode('utf-8') res.append(txt) return res def asendaSilbid(self): "silbitasonad" sqlres = self.sqls.getWords2() res = [] for r in sqlres: res.append([r[0], r[1]]) vead=[] korras=[] korras.append('k') # for r in res: for i in range(8): r=res[i] if r[1] is None: try: # self.sqls.wordUpdateSyllable(word=r[0], syllable=self.kysiSilbid2(r[0])) self.katse20(r[0]) korras.append(r[0]) except: vead.append(r[0]) return vead + korras def replaceDocrefsSyllables(self, limit=1000): "silbitasonad" sqlres = self.sqls.getDocrefsWordsWithoutSyllables() res = [] for r in sqlres: res.append(r[0]) errors=[] itsokay=[] itsokay.append('k') # for r in res: if len(res)"' or l == '""': mi = None continue if l.startswith('"'): mi = objects.Word(l[2:-2]) #mi = {'word': l[2:-2], 'inf': [], 'ignore': False} words.append(mi) if l.startswith('\t'): #mi['inf'].append(l.replace('"'+mi['word']+'"', '').strip()) if mi: mi.addInfo(l.strip()) return words def lauseObjektidena(self, tekst): "tekstist andmeobjektideks" morfotekst=self.morfoVastus(tekst) morfolaused=re.findall("\".*?<\/s>\"", morfotekst, re.S) lauseobjektid=[] for morfolause in morfolaused: lauseobjekt=self.morfoLauseObjektiks(morfolause) lauseobjekt.documentid=0 if len(lauseobjekt._ds)>0: lauseobjektid.append(lauseobjekt) return lauseobjektid def lauseteAndmed(self, tekst): "andmed" lauseobjektid=self.lauseObjektidena(tekst) for lause in lauseobjektid: lause.checkKysilause() lause.checkHyydlause() lause.checkUmbisikuline() lause.checkKuiLauseAlguses() lause.checkIgnoredWords(rules.allowed_ignores_words) lause.checkIgnoredMarks(rules.allowed_ignores_marks, rules.needed, rules.skip_list) lause.checkIgnoredWithMark(rules.conditional_ignore) #words.checkSLopus() lause.checkWordsCorrect() #words.checkContainsNumber() lause.checkFmvPosition() lause.checkPrdPosition() lause.checkImvPosition() lause.checkFcvFmv() matches=[] matchedrules=[] maxrulelength=0 for rid, rule in dyn_rules: if lause.isSkipped(): continue match = lause.match(rule, rules) if match: matches.append(rid) matchedrules.append(rule) if len(rule)>maxrulelength: maxrulelength=len(rule) maxrule=rule maxruleid=rid koef=lause.coef if matches: rid=maxruleid lause.addRuleMatch(rid, koef) lisa="" if lause.isSkipped(): lisa=" skipped " + str(words.skipcomments) return lauseobjektid def kysiReegliSisu(self, reeglinr): "sisu" return dyn_rules.get(int(reeglinr)) InitializeClass(Search)