# -*- coding: utf-8
# Copyright 2004-2006 by Vahur Rebas
import re
#reegliotsing
import scripts
from scripts import objects
from scripts import rules
from scripts.rules import rules as dyn_rules
import Globals
from Globals import InitializeClass
from AccessControl import ClassSecurityInfo
from OFS.PropertyManager import PropertyManager
from OFS.SimpleItem import SimpleItem
from zope.interface import implements
from interfaces import ISearch
from permissions import *
from schemas import all
class Search(PropertyManager, SimpleItem):
""" manages saved searches """
meta_type = 'Searches'
security = ClassSecurityInfo()
security.declareObjectPublic()
id = 'Search'
implements(ISearch)
security.declareProtected(perm_view, 'index_html')
def index_html(self):
""" index_html """
return self.search_index()
def search_results_query(self, REQUEST):
""" search documents """
free_word = unicode(REQUEST.get('vaba_sona', ''), 'utf-8')
vigane_sona = unicode(REQUEST.get('vigane_sona', ''), 'utf-8')
vea_liik = REQUEST.get('vea_liik', '')
adq = {}
seen = []
for s in all():
for f in s.fields:
if f.getName() in seen: continue
seen.append(f.getName())
rval = REQUEST.get(f.getName(), '')
if not rval.strip(): continue
adq[f.getName()] = rval
has_fields = 0
for x in REQUEST.form.keys():
if x == 'otsiNupp': continue
if REQUEST.get(x): has_fields = 1
if not free_word.strip():
free_word = ''
if not vigane_sona.strip():
vigane_sona = ''
if not has_fields:
return []
x = None
query = {
'getDocument' : free_word,
'getMarkedWords' : vigane_sona,
'getUsedCodes' : vea_liik,
'is_deleted' : 0,
# 'document_status': 1
}
if(REQUEST.get('getCorpus')):
query['getCorpus']=REQUEST.get('getCorpus')
query.update(adq)
print query
x = self.zcatalog(query)
return x
security.declarePrivate('constructString')
def constructString(self, sdoc, starts, ends, txt):
""" construct string that is shown to user """
stri = ''
stri += self.fetchContext(sdoc, starts, -1)
stri += ''
record = 1
buf = ''
tmp_buf = ''
for i in range(len(txt)):
if txt[i] == '<':
record = 0
if txt[i] == '>':
record = 1
continue
if record:
buf += txt[i]
stri += buf
stri += ''
stri += self.fetchContext(sdoc, ends, 1)
return stri
security.declarePrivate('fetchContext')
def fetchContext(self, sdoc, rng_n, direction):
""" fetch context
direction : -1 - back
1 - forward
"""
rng = 0
# dir -1
if direction == -1:
rng = range(rng_n)
rng.reverse()
#dir 1
if direction == 1:
rng = range(rng_n, len(sdoc))
result = ''
record = 1
count = 0
tmp_buf = []
for r in rng:
if direction == -1: #backward
if sdoc[r] in ['>']:
tmp_buf = []
record = 0
if sdoc[r] in ['<']:
record = 1
result += ' '
continue
if direction == 1: # forward
if sdoc[r] in ['<']:
record = 0
if sdoc[r] in ['>']:
record = 1
result += ' '
continue
if sdoc[r] in ['.', '!','?']:
# end of sentence
break
if record:
result += sdoc[r]
count += 1
if not record:
if direction == -1:
tmp_buf.insert(0,sdoc[r])
if count > 400:
break
if direction == -1:
rever = ''
for x in result:
rever = x + rever
return rever
return result
def search_context(self, brain, req):
""" display search context """
vaba_sona = req.get('vaba_sona', '')
vigane_sona = req.get('vigane_sona', '')
vea_liik = req.get('vea_liik', '')
#sdoc = brain.textdoc
#sdoc = brain.getObject().getBody()
res = []
if not vigane_sona and not vea_liik and not vaba_sona:
return []
if not vigane_sona and not vea_liik:
sdoc = brain.textdoc
txt = vaba_sona
txt = txt.replace('*', '')
txt = txt.replace('?', '')
words = re.finditer(txt, sdoc)
for word in words:
stri = self.constructString(sdoc, word.start(), word.end(), txt)
res.append(stri)
if vigane_sona or vea_liik:
sobj = brain.getObject()
sdoc = sobj.getBody()
#print sobj.getId(), vigane_sona
query = {'document': sobj.getId()}
if vea_liik:
query['code'] = vea_liik
if vigane_sona:
query['content'] = vigane_sona
#print query
errs = self.Errors.queryErrors(query)
for x in errs:
err = x.getObject()
txt = err.getPreContext().encode('utf-8')
txt += '
'
txt += err.getContent()
txt += '
'
txt += err.getPostContext().encode('utf-8')
res.append(txt)
return res
def taggerVastusEesti(self, tekst):
"TreeTaggeri tulemus"
import subprocess
a=subprocess.Popen("/home/jaagup/treetagger/k3/cmd/tree-tagger-estonian", shell=True, stderr=subprocess.PIPE, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
# a=subprocess.Popen("/home/jaagup/treetagger/k3/cmd/tree-tagger-estonian < /home/jaagup/treetagger/k3/sisend.txt")
stdo, stde = a.communicate(tekst)
# return tekst
# return tekst+" ! "+str(stde) + " ? " + str(stdo)
return stdo
def taggerVastusVene(self, tekst):
"TreeTaggeri tulemus"
import subprocess
a=subprocess.Popen("/home/jaagup/treetagger/k3/cmd/tree-tagger-russian", shell=True, stderr=subprocess.PIPE, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
stdo, stde = a.communicate(tekst)
return stdo
def taggerVastusSoome(self, tekst):
"TreeTaggeri tulemus"
import subprocess
a=subprocess.Popen("/home/jaagup/treetagger/k3/cmd/tree-tagger-finnish", shell=True, stderr=subprocess.PIPE, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
stdo, stde = a.communicate(tekst)
return stdo
def klasterVastus1(self, tekst):
"klastrianalyys"
import subprocess
import os
os.chdir("/home/jaagup/klastrileidja")
f=open("abiandmed.txt", "w")
f.write(tekst)
f.close()
a=subprocess.Popen("/usr/bin/java -jar klastrileidja.jar -fabiandmed.txt -uveeb", shell=True, stderr=subprocess.PIPE, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
stdo, stde = a.communicate()
f=open("users/veeb/abiandmed.csv", "r")
sisu=f.read()
f.close()
return sisu
def morfoVastus(self, tekst):
"morfotulemus"
#MORFO_PROGS="/home/vahur/estmorf/TLU"
#return "kuku0"
MORFO_PROGS="/home/jaagup/estmorf"
cmd = "./estmorf.sh | ./kms2cg3.pl | vislcg3 --grammar trivial.rle | vislcg3 --grammar morfyhest170609.rlb | vislcg3 --grammar pindsyn170609.rlb | vislcg3 --grammar strukt170609.rlb"
#return "kuku2"
import subprocess
#return "kuku3"
a = subprocess.Popen("./estmorf.sh | ./kms2cg3.pl | /usr/local/bin/vislcg3 --grammar trivial.rle | /usr/local/bin/vislcg3 --grammar morfyhest170609.rlb | /usr/local/bin/vislcg3 --grammar pindsyn170609.rlb | /usr/local/bin/vislcg3 --grammar strukt170609.rlb", cwd=MORFO_PROGS, shell=True, stderr=subprocess.PIPE, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
#return "kuku4"
stdo, stde = a.communicate(tekst)
#return "kuku5"
#return stde
return stdo
def morfoLauseObjektiks(self, morfotekst):
"lauseobjektideks"
lines = morfotekst.split('\n')
words = objects.Pipe()
for l in lines:
l = l.decode('utf-8')
if not l:
mi = None
continue
if l == '""' or l == '""':
mi = None
continue
if l.startswith('"'):
mi = objects.Word(l[2:-2])
#mi = {'word': l[2:-2], 'inf': [], 'ignore': False}
words.append(mi)
if l.startswith('\t'):
#mi['inf'].append(l.replace('"'+mi['word']+'"', '').strip())
if mi:
mi.addInfo(l.strip())
return words
def lauseObjektidena(self, tekst):
"tekstist andmeobjektideks"
morfotekst=self.morfoVastus(tekst)
morfolaused=re.findall("\".*?<\/s>\"", morfotekst, re.S)
lauseobjektid=[]
for morfolause in morfolaused:
lauseobjekt=self.morfoLauseObjektiks(morfolause)
lauseobjekt.documentid=0
if len(lauseobjekt._ds)>0:
lauseobjektid.append(lauseobjekt)
return lauseobjektid
def lauseteAndmed(self, tekst):
"andmed"
lauseobjektid=self.lauseObjektidena(tekst)
for lause in lauseobjektid:
lause.checkKysilause()
lause.checkHyydlause()
lause.checkUmbisikuline()
lause.checkKuiLauseAlguses()
lause.checkIgnoredWords(rules.allowed_ignores_words)
lause.checkIgnoredMarks(rules.allowed_ignores_marks, rules.needed, rules.skip_list)
lause.checkIgnoredWithMark(rules.conditional_ignore)
#words.checkSLopus()
lause.checkWordsCorrect()
#words.checkContainsNumber()
lause.checkFmvPosition()
lause.checkPrdPosition()
lause.checkImvPosition()
lause.checkFcvFmv()
matches=[]
matchedrules=[]
maxrulelength=0
for rid, rule in dyn_rules:
if lause.isSkipped(): continue
match = lause.match(rule, rules)
if match:
matches.append(rid)
matchedrules.append(rule)
if len(rule)>maxrulelength:
maxrulelength=len(rule)
maxrule=rule
maxruleid=rid
koef=lause.coef
if matches:
rid=maxruleid
lause.addRuleMatch(rid, koef)
lisa=""
if lause.isSkipped(): lisa=" skipped " + str(words.skipcomments)
return lauseobjektid
def kysiReegliSisu(self, reeglinr):
"sisu"
return dyn_rules.get(int(reeglinr))
def replaceSyllables(self, count=1000):
"replacesyllablesinwordtable"
sqlres = self.sqls.getWordsWithoutSyllables()
res = []
for r in sqlres:
res.append([r[0], r[1]])
if count>len(res):
count=len(res)
errors=[]
itsokay=[]
itsokay.append('k')
for i in range(int(count)):
r=res[i]
if r[1] is None:
try:
self.replaceSyllable(r[0])
itsokay.append(r[0])
except:
errors.append(r[0])
return errors + itsokay
def replaceSyllable(self, word):
"replacesyllablecolumninwordstable"
a=self.getSyllables(word)
self.sqls.wordUpdateSyllable(word=word, syllable=a)
return a
def getSyllables(self, word):
"getsyllablefromword"
MORFO_PROGS="/home/jaagup/silbitaja"
cmd = "./silbitaja.bin"
import subprocess
a = subprocess.Popen(cmd, cwd=MORFO_PROGS, shell=True, stderr=subprocess.PIPE, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
stdo, stde = a.communicate(word.decode("utf-8").encode("iso-8859-13"))
return stdo.decode("iso-8859-13").encode("utf-8")
def setDocrefsSyllable(self, word):
"setdocrefssyllables"
try:
help=self.getSyllables(word)
self.sqls.wordUpdateDocrefsSyllable(word=word, syllable=help)
except:
self.sqls.wordUpdateDocrefsSyllable(word=word, syllable='')
return "error"
def replaceDocrefsSyllables(self, limit=1000):
"silbitasonad"
sqlres = self.sqls.getDocrefsWordsWithoutSyllables()
res = []
for r in sqlres:
res.append(r[0])
errors=[]
itsokay=[]
itsokay.append('k')
# for r in res:
for i in range(int(limit)):
r=res[i]
try:
# self.sqls.wordUpdateSyllable(word=r[0], syllable=self.kysiSilbid2(r[0]))
self.setDocrefsSyllable(r)
itsokay.append(r)
except:
errors.append(r)
return errors + itsokay
def getErrorAuthors(self):
"vigade margendajad koos arvudega"
sqlres=self.sqls.stat_getauthors()
res=[]
for r in sqlres:
res.append([r[0], r[1]])
return res
def getanneandmed(self):
"anne andmed koos arvudega"
sqlres=self.sqls.stat_anneandmed()
res=[]
for r in sqlres:
res.append([r[0], r[1]])
return res
def getautoriandmed(self, REQUEST):
"anne andmed koos arvudega"
sqlres=self.sqls.stat_autoriandmed(author=REQUEST.get('author', 'anne'))
res=[]
for r in sqlres:
res.append([r[0], r[1]])
return res
def getDataVariable(self):
"kood funktsiooni jaoks"
t="andmed = "
t+=str([['Autor', 'Vigade arv']]+self.getErrorAuthors()).replace('L', '')+";\n"
t+="console.log(andmed);"
return t
def getSelectedCodes(self, ylatase=[]):
"koodide loetelu"
import types
if type(ylatase)==types.StringType:
ylatase=(ylatase, ) #teeb massiiviks
v=[]
for kood in ylatase:
v+=self.Marks.getChildMarks(kood)
return v
def getMainCodes(self):
""" Pea veade koodid """
return ["global_113978483712", "global_11397848460", "global_113978485636", "global_113978486457", "global_113978487438", "global_113978488098", "global_113978489046"]
def getCodeGroup(self):
""" Leiab veagrupid """
sqlres=self.sqls.stat_codegroup()
group=[]
for r in sqlres:
group.append([r[0], r[1]])
return group
def isInURL(self, REQUEST, value):
m=REQUEST.get('ylatase', [])
import types
if type(m)==types.StringType: m=(m,)
return value in m
def getUpdateStrings(self):
"updates"
t=[]
for r in self.sqls.stat_updates():
t.append("UPDATE errors SET codegroup ="+str(r[1])+" WHERE id="+str(r[0])+";")
return "\n".join(t)
InitializeClass(Search)