###################################################################### # regexps used by ZWiki # in order to support more flexible numbers in wikinames, eg # Issue0001, Issue0001SubjectHeading etc while not compromising # simplicity, how about: "one or more digits is equivalent to a single # capital letter" ? # hmm getting a bit tricky import re # international (latin?) characters intl_char_entities = ( ('\300', 'À'), #À#<--char ('\302', 'Â'), #Â# ('\311', 'É'), #É# ('\312', 'Ê'), #Ê# ('\316', 'Î'), #Î# ('\324', 'Ô'), #Ô# ('\333', 'Û'), #Û# ('\340', 'à'), #à# ('\342', 'â'), #â# ('\347', 'ç'), #ç# ('\350', 'è'), #è# ('\351', 'é'), #é# ('\352', 'ê'), #ê# ('\356', 'î'), #î# ('\364', 'ô'), #ô# ('\371', 'ù'), #ù# ('\373', 'û'), #û# # ('\253', '«'), #«# # ('\273', '»'), #»# # ('(r)', '®'), # ('(tm)', '™'), # ('^s', ' ') ) # regular expressions (worth compiling these ?) # use urllib/urlparse ? urlchars = r'[A-Za-z0-9/:@_%~#=&\.\-\?\+\$]+' urlendchar = r'[A-Za-z0-9/]' url = r'["=]?((about|gopher|http|https|ftp|mailto|file):%s)' % (urlchars) # I think this got complicated in an attempt to co-exist with stx's references # or perhaps it's this way so it can be used to strip the brackets as well bracketedexpr = r'\[([^\]0-9][^]]*)\]' wikiname1 = r'\b[A-Z]+[a-z]+[A-Z][A-Za-z]*[0-9]*' wikiname2 = r'\b[A-Z][A-Z]+[a-z][A-Za-z]*[0-9]*' simplewikilink = r'!?(%s|%s|%s)' % (wikiname1, wikiname2, url) wikilink = r'!?(%s|%s|%s|%s)' % (wikiname1,wikiname2,bracketedexpr,url) interwikilink = r'!?((?P%s):(?P%s))' % \ (wikilink, urlchars+urlendchar) remotewikiurl = r'(?m)RemoteWikiURL[:\s]*([^\s]+)\s*$' pagesubscribers = r'(?m)PageSubscribers[:\s]*(.*)$' protected_line = r'(?m)^!(.*)$' # intl. regexps - won't work because we convert intl. chars to # entities to protect them from browser textareas #IU = intlupper = '\300\302\311\312\316\324\333' #IL = intllower = '\340\342\347\350\351\352\356\364\371\373' #IA = intlall = IU+IL #url = r'["=]?((http|ftp|mailto|file):[A-Za-z0-9/:@_%~\.\-\?'+IA+']+)' #wikiname1 = r'[A-Z'+IU+']+[a-z'+IL+']+[A-Z'+IU+'][A-Za-z'+IA+']*' #wikiname2 = r'[A-Z]'+IU+'[A-Z'+IU+']+[a-z'+IL+'][A-Za-z'+IA+']*' # CMFWiki version: # #urlchars = (r'[A-Za-z0-9/:@_%~#=&\.\-\?]+') #url = (r'["=]?((http|https|ftp|mailto|file|about):%s)' # % (urlchars)) #urlexp = re.compile(url) ## trying to co-exist with stx references: #bracketedexpr = r'\[([^\]0-9][^]]*)\]' #bracketedexprexp = re.compile(bracketedexpr) #underlinedexpr = r'_([^_]+)_' #underlinedexprexp = re.compile(underlinedexpr) #wikiname1 = r'\b[A-Z]+[a-z~]+[A-Z0-9][A-Z0-9a-z~]*' #wikiname2 = r'\b[A-Z][A-Z0-9]+[a-z~][A-Z0-9a-z~]*' #simplewikilinkexp = re.compile(r'!?(%s|%s)' % (wikiname1, wikiname2)) # #wikiending = r"[ \t\n\r\f\v:;.,<)!?']" #urllinkending = r'[^A-Za-z0-9/:@_%~\.\-\?]' #wikilink = (r'!?(%s%s|%s%s|%s|%s%s)' # % (wikiname1,wikiending, # wikiname2,wikiending, # bracketedexpr,url,urllinkending)) #wikilinkexp = re.compile(wikilink) #wikilink_ = r'!?(%s|%s|%s|%s)' % \ # (wikiname1,wikiname2,bracketedexpr,url) #interwikilinkexp = re.compile(r'!?((?P%s):(?P[\w]+))' # % (wikilink_)) #remotewikiurlexp = re.compile(r'(?m)RemoteWikiURL[:\s]*(.*)$') #protected_lineexp = re.compile(r'(?m)^!(.*)$') # #antidecaptext = '\n' #antidecapexp = re.compile(antidecaptext) # #commentsdelim = "
" #preexp = re.compile(r'
')
#unpreexp = re.compile(r'
') #citedexp = re.compile(r'^\s*>') ## Match group 1 is citation prefix, group 2 is leading whitespace: #cite_prefixexp = re.compile('([\s>]*>)?([\s]*)')