""" MoinMoin - SearchInPagesAndSort Macro A line-oriented search macro over multiple pages, with sorting @copyright: Pascal Bauermeister @license: GPL Updates: * [v0.3.1] Pascal Sat Nov 6 16:03:01 CET 2004 * Added NoText, RawText, NbSubs and MoreSubsText arguments * [v0.3.1] Pascal Mon Aug 30 21:27:36 CEST 2004 * Corrected bug: did not work well with multiple pages hit. Bug reported by Craig Johnson. It worked in 0.2.x because one bug corrected another one... * If args are not a kw list (e.g. old macro form) inserts usage in html page (brutal, but we really don't want to support the old form any more) * [v0.3.0] Pascal Wed Aug 18 15:39:54 CEST 2004 * macro arguments are now passed as a list of KEYWORD=VALUE * ACL is handled * new options: Reverse and NoHeader * [v0.2.4] Pascal Mon Jul 19 23:40:54 CEST 2004 * Comparisons to None use the 'is' and 'is not' operator (nicer) * Use get() for dict lookup w/ default value * Do not quote args and retry to compile if they are not valid regexes * Corrected usage samples in the comment below * [v0.2.3] Pascal Sun Jul 18 13:45:46 CEST 2004 Avoid endless recursion when matching page contains this macro * [v0.2.2] Fri Jul 16 14:43:23 CEST 2004 * Use Request.redirect(). Thanks to Craig Johnson and Thomas Waldmann . * No more unused imports. * Catch only expected exceptions. * [v0.2.1] Mon Jun 7 11:54:52 CEST 2004 * options: links, heading * works now with MoinMoin Release 1.2 too * [v0.1.1] Wed Oct 29 14:48:02 CET 2003 works with MoinMoin Release 1.1 [Revision 1.173] and Python 2.3.2 * [v0.1.0] 2003/04/24 10:32:04 Original version ---- Usage: [[ SearchInPagesAndSort ]] [[ SearchInPagesAndSort (KEYWORD=VALUE [, ...] ) ]] Search for 'searchtext' regex in pages marching 'pages' regex, and sort the found lines (=hits) in this order: 1) substring of the hit matching 'sortkey'; group same matches of 'sortkey' by a header 2) substring of the hit matching 'searchtext' 3) the hit itself If no arguments are given, the usage is inserted in the HTML result. Possible keywords: Help = 0, 1, 2 displays 1:short or 2:full help in the page; default: 0 (i.e. no help) Pages = 'PAGES REGEX' pages in which the text is sought; if empty (default) search in the current page and defaults 'NoLinks' to 1; default: empty (i.e. current page) SearchText = 'TEXT REGEX' to search for lines in matching pages; mandatory! SortKey = 'TEXT REGEX' criterion to sort matching lines (=hits); default: empty (i.e. no sorting) Heading = 'TEXT REGEX' follow each hit by the text maching Regex, that preceeds the hit in its source page; default: empty (i.e. no headings) UnassignedText = 'WIKI TEXT' header for hits not matching the sort key; default: '[unassigned]' Reverse = 0 or 1 reverse-sort the hits; default: 0 (i.e. forward sort) RawText = 0 or 1 do not format found text; default: 0 (i.e. formatted) NbSubs = 0, N, or 'all' follow each hit by max N sub lines (i.e. next lines with greater indent) of source text following the hit; if N is 'all', take *all* sub lines; if N is positive, take N *first* sub lines; if N is negative, take the |N| *last* sub lines; default: 0 (i.e. do *not* include subs) MoreSubsText = 'WIKI TEXT' if there are more sub lines than 'NbSubs', follow/preceed the last/first sub lines by this text; default: '...' NoHeader = 0 or 1 disable showing the headers as subtitles; default: 0 (i.e. show headers) NoLinks = 0 or 1 disable following each hit by a link to its page; default: 0 (i.e. show links) or 1 if 'Pages' is omitted NoPageText = 'HTML TEXT' text displayed if no page match 'Pages'; default: an error message w/ Page regex NoText = 0 or 1 disables showing the found text; default: 0 (i.e. show found text) Keywords can be also given in upper or lower cases, or abbreviated. Example: SearchText, searchtext, SEARCHTEXT, st, ST, Pages, p, etc. ---- Sample 1: Given a page named 'ProjectA': 1. Action Items 1. [Alan] {2} to launch this task 1. [Alan] {1} to do this urgent thing 1. [Ben][Clara] {3} do this as background task 1. Deadlines 1. 2003-03-12 [Alan][Clara]: deliver 1st version of the Release X ...and a page named 'ProjectB': * [Denise] {2} Development of task Xyz * [Eric] {1} Tests of feature F * [Eric] (./) Tests of feature E ...using the macro in a page named 'ActionItems' like this: = ActionItems = [[SearchInPagesAndSort(pages="Project.*", searchtext="{[123]}", sortkey="\[[A-Za-z_]*\]")]] = Deadlines = [[SearchInPagesAndSort(pages="Project.*", searchtext="")]] = Completed tasks = [[SearchInPagesAndSort(pages="Project.*", searchtext"(\./)", sortkey="\[[A-Za-z_]*\]")]] ...will give this output (note: _text_ are links): ActionItems * [Alan] * [Alan] {1} to do this urgent thing _ProjectA_ * [Alan] {2} to launch this task _ProjectA_ * [Denise] * [Denise] {2} Development of task Xyz _ProjectB_ * [Ben] * [Ben][Clara] {3} do this as background task _ProjectA_ * [Eric] * [Eric] {1} Tests of feature F _ProjectB_ * [Clara] * [Ben][Clara] {3} do this as background task _ProjectA_ Deadlines * 2003-03-12 [Alan][Clara]: deliver 1st version of the Release X _ProjectA_ Completed tasks * [Eric] * [Eric] (./) Tests of feature E _ProjectB_ Sample 2: Given a page containing: == Tasks for (ABC) == * {1} (due:2003-12-16) [Mike] Do this == Tasks for (XYZ) == * {2} (due:2003-12-17) [John_Doe][Mike] Do that ...the following macro call in the same page: [[SearchInPagesAndSort(searchtext="{[123]}", sortkey="\[[A-Za-z_ -]*\]", links=0, heading="\([ab]*[0-9][0-9][0-9]\)")]] ...will produce: * [John_Doe] * {2} (due:2003-12-17) [John_Doe][Mike] Do that (XYZ) * [Mike] * {1} (due:2003-12-16) [Mike] Do this (ABC) * {2} (due:2003-12-17) [John_Doe][Mike] Do that (XYZ) """ # Imports import re, sys, cStringIO from string import ascii_lowercase, maketrans from MoinMoin import config, wikiutil from MoinMoin.Page import Page from MoinMoin.parser import wiki Dependencies = ["time"] # macro cannot be cached _recursions = 0 FAKETRANS = maketrans ("","") class _Error (Exception): pass def execute (macro, text, args_re=None): global _recursions if _recursions: return '' _recursions += 1 try: res = _execute (macro, text) except _Error, msg: _recursions = 0 return """

Error: macro SearchInPagesAndSort: %s

""" % msg _recursions -=1 return res def _delparam (keyword, params): value = params [keyword] del params [keyword] return value def _param_get (params, spec, default): """Returns the value for a parameter, if specified with one of several acceptable keyword names, or returns its default value if it is missing from the macro call. If the parameter is specified, it is removed from the list, so that remaining params can be signalled as unknown""" # param name is litteral ? if params.has_key (spec): return _delparam (spec, params) # param name is all lower or all upper ? lspec = spec.lower () if params.has_key (lspec): return _delparam (lspec, params) uspec = spec.upper () if params.has_key (uspec): return _delparam (uspec, params) # param name is abbreviated ? cspec = spec [0].upper () + spec [1:] # capitalize 1st letter cspec = cspec.translate (FAKETRANS, ascii_lowercase) if params.has_key (cspec): return _delparam (cspec, params) cspec = cspec.lower () if params.has_key (cspec): return _delparam (cspec, params) # nope: return default value return default def _usage (full = False): """Returns the interesting part of the module's doc""" if full: return __doc__ lines = __doc__.splitlines () start = 0 end = len (lines) for i in range (end): if lines [i].strip ().lower () == "usage:": start = i break for i in range (start, end): if lines [i].startswith ('--'): end = i break return '\n'.join (lines [start:end]) def _re_compile (text, name): try: return re.compile (text, re.IGNORECASE) except Exception, msg: raise _Error ("%s for regex argument %s: '%s'" % (msg, name, text)) def _indent_of (line, pos=0): n = 0 for c in line [pos:]: if c != ' ': break n = n + 1 return n def _subtext_get (body, pos, nbsubs, indent, moresubs): subpos = pos+1 end = len (body) lead = ' '*indent while True: if subpos>=end: break if _indent_of (body, subpos) <= indent: break p = body.find ("\n", subpos) if p == -1: break else: subpos = p + 1 subs = body [pos:subpos].strip ('\n').split ('\n') ls = len (subs) if (nbsubs=='all'): pass elif nbsubs>0 and ls>nbsubs: subs = subs [0:nbsubs] subs.append (lead + moresubs) elif nbsubs<0 and ls>-nbsubs: subs = subs [nbsubs:] subs.insert (0, lead + moresubs) return lead + '\n'.join (subs) # The "raison d'etre" of this module def _execute (macro, text): result = "" # new args syntax try: params = eval ("(lambda **opts: opts)(%s)" % text) except: raise _Error ("""malformed arguments list: %s
usage:

%s

""" % (text, _usage () ) ) arg_text = _param_get (params, 'SearchText', None) arg_pages = _param_get (params, 'Pages', '') arg_key = _param_get (params, 'SortKey', None) opt_heading = _param_get (params, 'Heading', None) opt_unassigned_text = _param_get (params, 'UnassignedText', "[unassigned]") opt_reverse = _param_get (params, 'Reverse', False) opt_rawtext = _param_get (params, 'RawText', False) def_nolinks = (1,0) [len (arg_pages)>0] opt_nolinks = _param_get (params, 'NoLinks', def_nolinks) opt_noheader = _param_get (params, 'NoHeader', False) opt_notext = _param_get (params, 'NoText', False) opt_nopage = _param_get (params, 'NoPageText', None) opt_help = _param_get (params, 'Help', 0) opt_nbsubs = _param_get (params, 'NbSubs', 0) def_moresubs = ('...', None) [opt_nbsubs=='all'] opt_moresubs = _param_get (params, 'MoreSubsText', def_moresubs) # help ? if opt_help: return """

Macro SearchInPagesAndSort usage:

%s

""" % _usage (opt_help==2) # check the args a little bit if len (params): raise _Error ("""unknown argument(s): %s
usage:

%s

""" % (`params.keys ()`, _usage () ) ) if arg_text is None: raise _Error ("missing 'searchtext' argument") # empty page means this page; subpage are also handled if len (arg_pages) == 0 or arg_pages.startswith ('/'): arg_pages = macro.formatter.page.page_name + arg_pages # get a list of pages matching the PageRegex pages_re = _re_compile (arg_pages, 'Pages') all_pages = wikiutil.getPageList (config.text_dir) hits = filter (pages_re.search, all_pages) # check ACL now (since we may end up with no pages) if config.acl_enabled: me = macro.request.user.name def _check_page (page_name): page = Page (page_name) # too bad we must instanciate... return page.getACL ().may (macro.request, me, "read") hits = filter (_check_page, hits) # sort pages, check if we have pages if len (hits) == 0: if opt_nopage: return "%s" % opt_nopage else: raise _Error ("no page matching '%s'!" % arg_pages) else: hits.sort () # compile all regex text_re = _re_compile (arg_text, 'SearchText') if arg_key is not None: key_re = _re_compile (arg_key, 'SortKey') if opt_heading is not None: heading_re = _re_compile (opt_heading, 'Heading') # we will collect matching lines in each matching page all_matches = [] # treat each found page for page_name in hits: body = Page (page_name).get_raw_body () pos = 0 last_start = -1 last_end = -1 heading_text = "" while 1: keep_line = 1 # search text match = text_re.search (body, pos) if not match: break # text is found; now search for heading if opt_heading is not None: heading_pos = pos heading_match = True # keep the nearest heading to the found text while heading_match: heading_match = heading_re.search (body, heading_pos) if heading_match and \ heading_match.start () < match.start (): heading_text = heading_match.group (0) heading_pos = heading_match.end () else: heading_match = False # point to found text pos = match.end ()+1 # cut before start of line start_pos = match.start () rev = 0 while body [start_pos] != '\n' and start_pos: start_pos = start_pos - 1 rev = 1 if rev: start_pos = start_pos + 1 # cut at end of line end_pos = body.find ("\n", match.end ()) # extract line raw_line = body [start_pos:end_pos] indent = _indent_of (raw_line) line = raw_line.strip () # store this record if it differs from previous one if start_pos == last_start or end_pos == last_end: keep_line = 0 # store this record if it it is not a comment elif line.startswith ("##"): keep_line = 0 # remove possible list item leaders if keep_line: for heading in ["*", "1.", "a.", "A.", "i.", "I."]: if line.startswith (heading): line = line.replace (heading, "", 1) line = line.strip () if len (line)==0: keep_line = 0 # handle this record if keep_line: # get sub sections if opt_nbsubs: subtext = '\n' + _subtext_get (body, end_pos, opt_nbsubs, indent, opt_moresubs) else: subtext = '' # find the sort key nbmatches = 0 keypos = 0 found = 0 while 1: if arg_key is None: keyval = "" else: keymatch = key_re.search (line, keypos) if keymatch: keyval = line [keymatch.start ():keymatch.end ()] keypos = keymatch.end () nbmatches = nbmatches + 1 found = 1 else: if nbmatches>0: break keyval = opt_unassigned_text # store info item = [] item.append (keyval) # key text item.append (body [match.start ():match.end ()]) # srch txt item.append (line) # line text item.append (page_name) # page name item.append (heading_text) # heading item.append (subtext) # subsections all_matches.append (item) if found == 0: break last_start = start_pos last_end = end_pos # all occurences of sort key found # this line handled # all lines handled # all pages handled # prepare some formatting text bullet_list_open = macro.formatter.bullet_list (1) bullet_list_close = macro.formatter.bullet_list (0) listitem_open = macro.formatter.listitem (1) listitem_close = macro.formatter.listitem (0) # now sort and format records if not opt_notext: all_matches.sort () if opt_reverse: all_matches.reverse () result = result+"\n" + bullet_list_open keyval = "" head_count = 0 last_pagename = "" # treat records for output for item in all_matches: keytext, srchtext, text, pagename, heading_text, subtext = item if opt_notext: text_fmtted = "" if last_pagename == pagename: continue else: last_pagename = pagename elif opt_rawtext: text_fmtted = wikiutil.escape (text) else: # parse the text (in wiki source format) and make HTML, # after diverting sys.stdout to a string text_fmtted = _format (text, macro.request, macro.formatter) text_fmtted = text_fmtted.strip (' ') # preserve newlines # empty text => drop this item if len (text_fmtted)==0: continue # insert heading (only if not yet done) if not opt_noheader \ and arg_key is not None \ and keytext != keyval: # this is a new heading keyval = keytext if head_count: result = result+"\n " + bullet_list_close result = result+"\n " + listitem_close head_count = head_count +1 result = result+"\n " + listitem_open result = result+ _format (keyval, macro.request, macro.formatter) result = result+"\n " + bullet_list_open # correct the text format (berk) if text_fmtted.startswith ("\n

"): text_fmtted = text_fmtted [4:] if text_fmtted.endswith ("

\n"): text_fmtted = text_fmtted [:-5] text_trailer = "\n

\n" else: text_trailer = "" # insert text result = result+"\n " + listitem_open result = result + text_fmtted if not opt_nolinks: result = result + " " try: # try MoinMoin 1.1 API link_text = wikiutil.link_tag (pagename) except TypeError: # try MoinMoin 1.2 API link_text = wikiutil.link_tag (macro.request, pagename) result = result + link_text result = result + "" if opt_heading is not None: result = result + " " result = result + heading_text result = result + "" if opt_nbsubs: result = result + _format (subtext, macro.request, macro.formatter) result = result + text_trailer + "\n " + listitem_close # all items done, close (hopefully) gracefully if head_count: result = result+"\n " + listitem_close result = result+"\n " + bullet_list_close if not opt_noheader and arg_key is not None: result = result+"\n " + listitem_close result = result+"\n" + bullet_list_close # done return result def _format (src_text, request, formatter): # parse the text (in wiki source format) and make HTML, # after diverting sys.stdout to a string str_out = cStringIO.StringIO () # create str to collect output request.redirect (str_out) # divert output to that string # parse this line wiki.Parser (src_text, request).format (formatter) request.redirect () # restore output return str_out.getvalue () # return what was generated