From ab24a26d858e53de71946fb678cee96c962723a4 Mon Sep 17 00:00:00 2001 From: UlrichB22 <97119703+UlrichB22@users.noreply.github.com> Date: Sun, 24 Nov 2024 14:31:23 +0100 Subject: [PATCH] ItemList performance: mv regex handling to search_meta --- src/moin/items/__init__.py | 4 ++-- src/moin/macros/ItemList.py | 11 +++-------- src/moin/macros/_base.py | 12 +++++++----- src/moin/storage/middleware/indexing.py | 7 ++++++- src/moin/storage/middleware/protecting.py | 4 ++-- 5 files changed, 20 insertions(+), 18 deletions(-) diff --git a/src/moin/items/__init__.py b/src/moin/items/__init__.py index bc0d2ea01..fee474d8b 100644 --- a/src/moin/items/__init__.py +++ b/src/moin/items/__init__.py @@ -1357,7 +1357,7 @@ def build_index_query(self, startswith=None, selected_groups=None, isglobalindex return query - def get_index(self, startswith=None, selected_groups=None): + def get_index(self, startswith=None, selected_groups=None, regex=None): """ Get index enties for descendents of the matching items @@ -1378,7 +1378,7 @@ def get_index(self, startswith=None, selected_groups=None): ) if not fqname.value.startswith(NAMESPACE_ALL + "/") and fqname.value != NAMESPACE_ALL: query = Term(NAMESPACE, fqname.namespace) & query - revs = flaskg.storage.search_meta(query, idx_name=LATEST_REVS, sortedby=NAME_EXACT, limit=None) + revs = flaskg.storage.search_meta(query, idx_name=LATEST_REVS, sortedby=NAME_EXACT, limit=None, regex=regex) return self.make_flat_index(revs, isglobalindex) diff --git a/src/moin/macros/ItemList.py b/src/moin/macros/ItemList.py index 96d138f4b..5ec03608d 100644 --- a/src/moin/macros/ItemList.py +++ b/src/moin/macros/ItemList.py @@ -133,20 +133,15 @@ def macro(self, content, arguments, page_url, alternative): err_msg = _("Item does not exist or read access blocked by ACLs: {0}").format(item) return fail_message(err_msg, alternative) - # process subitems - children = get_item_names(item, startswith=startswith, skiptag=skiptag, tag=tag) if regex: try: - regex_re = re.compile(regex, re.IGNORECASE) + re.compile(regex, re.IGNORECASE) except re.error as err: err_msg = _("Error in regex {0!r}: {1}").format(regex, err) return fail_message(err_msg, alternative) - newlist = [] - for child in children: - if regex_re.search(child.fullname): - newlist.append(child) - children = newlist + children = get_item_names(item, startswith=startswith, skiptag=skiptag, tag=tag, regex=regex) + if not children: return fail_message(_("No matching items were found"), alternative, severity="attention") diff --git a/src/moin/macros/_base.py b/src/moin/macros/_base.py index 0b982bfb1..5bee33157 100644 --- a/src/moin/macros/_base.py +++ b/src/moin/macros/_base.py @@ -19,7 +19,7 @@ from moin.constants.keys import TAGS -def get_item_names(name="", startswith="", kind="files", skiptag="", tag=""): +def get_item_names(name="", startswith="", kind="files", skiptag="", tag="", regex=None): """ For the specified item, return the fullname of matching descendents. @@ -49,7 +49,7 @@ def get_item_names(name="", startswith="", kind="files", skiptag="", tag=""): item = Item.create(name) except AccessDenied: abort(403) - dirs, files = item.get_index(startswith) + dirs, files = item.get_index(startswith, regex=regex) item_names = [] if not kind or kind == "files" or kind == "both": for item in files: @@ -213,8 +213,7 @@ def create_pagelink_list(self, pagenames, alternative, ordered=False, display="F ItemTitle : Use the title from the first header in the linked page """ - page_list = moin_page.list(attrib={moin_page.item_label_generate: ordered and "ordered" or "unordered"}) - + children = [] for pagename in pagenames: fqname = pagename.fullname @@ -245,7 +244,10 @@ def create_pagelink_list(self, pagenames, alternative, ordered=False, display="F pagelink = moin_page.a(attrib={xlink.href: url}, children=[linkname]) item_body = moin_page.list_item_body(children=[pagelink]) item = moin_page.list_item(children=[item_body]) - page_list.append(item) + children.append(item) + page_list = moin_page.list( + attrib={moin_page.item_label_generate: ordered and "ordered" or "unordered"}, children=children + ) return page_list diff --git a/src/moin/storage/middleware/indexing.py b/src/moin/storage/middleware/indexing.py index 21281515e..3f2be940d 100644 --- a/src/moin/storage/middleware/indexing.py +++ b/src/moin/storage/middleware/indexing.py @@ -51,6 +51,7 @@ import gc import os +import re import sys import shutil import time @@ -889,14 +890,18 @@ def search_page(self, q, idx_name=LATEST_REVS, pagenum=1, pagelen=10, **kw): item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID]) yield item.get_revision(doc[REVID], doc=doc) - def search_meta(self, q, idx_name=LATEST_REVS, **kw): + def search_meta(self, q, idx_name=LATEST_REVS, regex=None, **kw): """ Search with query q, yield Revision metadata from index. """ with self.ix[idx_name].searcher() as searcher: # Note: callers must consume everything we yield, so the for loop # ends and the "with" is left to close the index files. + if regex: + regex_re = re.compile(regex, re.IGNORECASE) for hit in searcher.search(q, **kw): + if regex and not regex_re.search(hit[NAME][0]): + continue meta = hit.fields() yield meta diff --git a/src/moin/storage/middleware/protecting.py b/src/moin/storage/middleware/protecting.py index 2f92c809b..4a9509402 100644 --- a/src/moin/storage/middleware/protecting.py +++ b/src/moin/storage/middleware/protecting.py @@ -192,7 +192,7 @@ def search_page(self, q, idx_name=LATEST_REVS, pagenum=1, pagelen=10, **kw): if rev.allows(READ) or rev.allows(PUBREAD): yield rev - def search_meta(self, q, idx_name=LATEST_REVS, **kw): + def search_meta(self, q, idx_name=LATEST_REVS, regex=None, **kw): """ Yield an item's metadata, skipping any items where read permission is denied. @@ -200,7 +200,7 @@ def search_meta(self, q, idx_name=LATEST_REVS, **kw): of the items in namespace subject to query restrictions. This is useful for reports such as Global Index, Global Tags, Wanted Items, Orphaned Items, etc. """ - for meta in self.indexer.search_meta(q, idx_name, **kw): + for meta in self.indexer.search_meta(q, idx_name, regex=regex, **kw): meta[FQNAMES] = gen_fqnames(meta) result = self.may_read_rev(meta) if result: