Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ItemList performance: mv regex handling to search_meta #1813

Merged
merged 1 commit into from
Nov 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/moin/items/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1357,7 +1357,7 @@ def build_index_query(self, startswith=None, selected_groups=None, isglobalindex

return query

def get_index(self, startswith=None, selected_groups=None):
def get_index(self, startswith=None, selected_groups=None, regex=None):
"""
Get index enties for descendents of the matching items
Expand All @@ -1378,7 +1378,7 @@ def get_index(self, startswith=None, selected_groups=None):
)
if not fqname.value.startswith(NAMESPACE_ALL + "/") and fqname.value != NAMESPACE_ALL:
query = Term(NAMESPACE, fqname.namespace) & query
revs = flaskg.storage.search_meta(query, idx_name=LATEST_REVS, sortedby=NAME_EXACT, limit=None)
revs = flaskg.storage.search_meta(query, idx_name=LATEST_REVS, sortedby=NAME_EXACT, limit=None, regex=regex)
return self.make_flat_index(revs, isglobalindex)


Expand Down
11 changes: 3 additions & 8 deletions src/moin/macros/ItemList.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,20 +133,15 @@ def macro(self, content, arguments, page_url, alternative):
err_msg = _("Item does not exist or read access blocked by ACLs: {0}").format(item)
return fail_message(err_msg, alternative)

# process subitems
children = get_item_names(item, startswith=startswith, skiptag=skiptag, tag=tag)
if regex:
try:
regex_re = re.compile(regex, re.IGNORECASE)
re.compile(regex, re.IGNORECASE)
except re.error as err:
err_msg = _("Error in regex {0!r}: {1}").format(regex, err)
return fail_message(err_msg, alternative)

newlist = []
for child in children:
if regex_re.search(child.fullname):
newlist.append(child)
children = newlist
children = get_item_names(item, startswith=startswith, skiptag=skiptag, tag=tag, regex=regex)

if not children:
return fail_message(_("No matching items were found"), alternative, severity="attention")

Expand Down
12 changes: 7 additions & 5 deletions src/moin/macros/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from moin.constants.keys import TAGS


def get_item_names(name="", startswith="", kind="files", skiptag="", tag=""):
def get_item_names(name="", startswith="", kind="files", skiptag="", tag="", regex=None):
"""
For the specified item, return the fullname of matching descendents.

Expand Down Expand Up @@ -49,7 +49,7 @@ def get_item_names(name="", startswith="", kind="files", skiptag="", tag=""):
item = Item.create(name)
except AccessDenied:
abort(403)
dirs, files = item.get_index(startswith)
dirs, files = item.get_index(startswith, regex=regex)
item_names = []
if not kind or kind == "files" or kind == "both":
for item in files:
Expand Down Expand Up @@ -213,8 +213,7 @@ def create_pagelink_list(self, pagenames, alternative, ordered=False, display="F
ItemTitle : Use the title from the first header in the linked page
"""

page_list = moin_page.list(attrib={moin_page.item_label_generate: ordered and "ordered" or "unordered"})

children = []
for pagename in pagenames:

fqname = pagename.fullname
Expand Down Expand Up @@ -245,7 +244,10 @@ def create_pagelink_list(self, pagenames, alternative, ordered=False, display="F
pagelink = moin_page.a(attrib={xlink.href: url}, children=[linkname])
item_body = moin_page.list_item_body(children=[pagelink])
item = moin_page.list_item(children=[item_body])
page_list.append(item)
children.append(item)
page_list = moin_page.list(
attrib={moin_page.item_label_generate: ordered and "ordered" or "unordered"}, children=children
)

return page_list

Expand Down
7 changes: 6 additions & 1 deletion src/moin/storage/middleware/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@

import gc
import os
import re
import sys
import shutil
import time
Expand Down Expand Up @@ -889,14 +890,18 @@ def search_page(self, q, idx_name=LATEST_REVS, pagenum=1, pagelen=10, **kw):
item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID])
yield item.get_revision(doc[REVID], doc=doc)

def search_meta(self, q, idx_name=LATEST_REVS, **kw):
def search_meta(self, q, idx_name=LATEST_REVS, regex=None, **kw):
"""
Search with query q, yield Revision metadata from index.
"""
with self.ix[idx_name].searcher() as searcher:
# Note: callers must consume everything we yield, so the for loop
# ends and the "with" is left to close the index files.
if regex:
regex_re = re.compile(regex, re.IGNORECASE)
for hit in searcher.search(q, **kw):
if regex and not regex_re.search(hit[NAME][0]):
continue
meta = hit.fields()
yield meta

Expand Down
4 changes: 2 additions & 2 deletions src/moin/storage/middleware/protecting.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,15 +192,15 @@ def search_page(self, q, idx_name=LATEST_REVS, pagenum=1, pagelen=10, **kw):
if rev.allows(READ) or rev.allows(PUBREAD):
yield rev

def search_meta(self, q, idx_name=LATEST_REVS, **kw):
def search_meta(self, q, idx_name=LATEST_REVS, regex=None, **kw):
"""
Yield an item's metadata, skipping any items where read permission is denied.
The intended use of this method is to return the current rev metadata for all
of the items in namespace subject to query restrictions. This is useful for reports
such as Global Index, Global Tags, Wanted Items, Orphaned Items, etc.
"""
for meta in self.indexer.search_meta(q, idx_name, **kw):
for meta in self.indexer.search_meta(q, idx_name, regex=regex, **kw):
meta[FQNAMES] = gen_fqnames(meta)
result = self.may_read_rev(meta)
if result:
Expand Down