3564 - Fix leading wildcard search

2026-03-16 21:06:30 +01:00 · 2024-11-20 21:40:54 -07:00 · 2024-11-20 21:40:54 -07:00 · 0845ba8523
commit 0845ba8523
parent 1ad1cf9fc1
2 changed files with 176 additions and 50 deletions
--- a/evennia/help/tests.py
+++ b/evennia/help/tests.py
@ -5,6 +5,7 @@ command test-suite).
 """

 from unittest import mock
+from parameterized import parameterized

 from evennia.help import filehelp
 from evennia.help import utils as help_utils
@ -140,3 +141,56 @@ class TestFileHelp(TestCase):
            self.assertEqual(HELP_ENTRY_DICTS[inum].get("aliases", []), helpentry.aliases)
            self.assertEqual(HELP_ENTRY_DICTS[inum]["category"].lower(), helpentry.help_category)
            self.assertEqual(HELP_ENTRY_DICTS[inum]["text"], helpentry.entrytext)
+
+
+class HelpUtils(TestCase):
+
+    def setUp(self):
+        self.candidate_entries = [
+            filehelp.FileHelpEntry(
+                key="*examine",
+                aliases=["*exam", "*ex", "@examine"],
+                help_category="building",
+                entrytext="Lorem ipsum examine",
+                lock_storage="",
+            ),
+            filehelp.FileHelpEntry(
+                key="inventory",
+                aliases=[],
+                help_category="general",
+                entrytext="A character's inventory",
+                lock_storage="",
+            ),
+            filehelp.FileHelpEntry(
+                key="userpassword",
+                aliases=[],
+                help_category="admin",
+                entrytext="change the password of an account",
+                lock_storage="",
+            ),
+        ]
+
+    @parameterized.expand(
+        [
+            ("*examine", "*examine", "Leading wildcard should return exact matches."),
+            ("@examine", "*examine", "Aliases should return an entry."),
+            ("inventory", "inventory", "It should return exact matches."),
+            ("inv*", "inventory", "Trailing wildcard search should return an entry."),
+            ("userpaZZword~2", "userpassword", "Fuzzy matching should return an entry."),
+            (
+                "*word",
+                "userpassword",
+                "Leading wildcard should return an entry when no exact match.",
+            ),
+        ]
+    )
+    def test_help_search_with_index(self, search_term, expected_entry_key, error_msg):
+        """Test search terms return correct entries"""
+
+        expected_entry = [
+            entry for entry in self.candidate_entries if entry.key == expected_entry_key
+        ]
+
+        entries, _ = help_utils.help_search_with_index(search_term, self.candidate_entries)
+
+        self.assertEqual(entries, expected_entry, error_msg)
--- a/evennia/help/utils.py
+++ b/evennia/help/utils.py
@ -9,26 +9,8 @@ This is used primarily by the default `help` command.
 import re

 from django.conf import settings
+from lunr.stemmer import stemmer

-# these are words that Lunr normally ignores but which we want to find
-# since we use them (e.g. as command names).
-# Lunr's default ignore-word list is found here:
-# https://github.com/yeraydiazdiaz/lunr.py/blob/master/lunr/stop_word_filter.py
-_LUNR_STOP_WORD_FILTER_EXCEPTIONS = [
-    "about",
-    "might",
-    "get",
-    "who",
-    "say",
-    "where",
-] + settings.LUNR_STOP_WORD_FILTER_EXCEPTIONS
-
-
-_LUNR = None
-_LUNR_EXCEPTION = None
-
-_LUNR_GET_BUILDER = None
-_LUNR_BUILDER_PIPELINE = None

 _RE_HELP_SUBTOPICS_START = re.compile(r"^\s*?#\s*?subtopics\s*?$", re.I + re.M)
 _RE_HELP_SUBTOPIC_SPLIT = re.compile(r"^\s*?(\#{2,6}\s*?\w+?[a-z0-9 \-\?!,\.]*?)$", re.M + re.I)
@ -37,6 +19,123 @@ _RE_HELP_SUBTOPIC_PARSE = re.compile(r"^(?P<nesting>\#{2,6})\s*?(?P<name>.*?)$",
 MAX_SUBTOPIC_NESTING = 5


+def wildcard_stemmer(token, i, tokens):
+    """
+    Custom LUNR stemmer that returns both the original and stemmed token
+    if the token contains a leading wildcard (*).
+
+    Args:
+        token (str): The input token to be stemmed
+        i (int): Index of current token.  Unused here but required by LUNR.
+        tokens (list): List of tokens being processed.  Unused here but required by LUNR.
+
+    Returns:
+        list: A list containing the stemmed tokens and original token if it has leading '*'.
+    """
+
+    original_token = token.clone()
+    # Then apply the standard Lunr stemmer
+    stemmed_token = stemmer(token)
+
+    if original_token.string.startswith("*"):
+        # Return both tokens
+        return [original_token, stemmed_token]
+    return stemmed_token
+
+
+class LunrSearch:
+    """
+    Singleton class for managing Lunr search index configuration and initialization.
+    """
+
+    # these are words that Lunr normally ignores but which we want to find
+    # since we use them (e.g. as command names).
+    # Lunr's default ignore-word list is found here:
+    # https://github.com/yeraydiazdiaz/lunr.py/blob/master/lunr/stop_word_filter.py
+    _LUNR_STOP_WORD_FILTER_EXCEPTIONS = [
+        "about",
+        "might",
+        "get",
+        "who",
+        "say",
+        "where",
+    ] + settings.LUNR_STOP_WORD_FILTER_EXCEPTIONS
+
+    _instance = None
+
+    def __new__(cls):
+        """
+        Ensure only one instance of the class is created (Singleton)
+        """
+        if not cls._instance:
+            cls._instance = super(LunrSearch, cls).__new__(cls)
+            cls._instance._initialize()
+        return cls._instance
+
+    def _initialize(self):
+        """
+        Lazy load Lunr libraries and set up custom configuration
+
+        we have to delay-load lunr because it messes with logging if it's imported
+        before twisted's logging has been set up
+        """
+        # Lunr-related imports
+        from lunr import get_default_builder
+        from lunr import lunr
+        from lunr import stop_word_filter
+        from lunr.exceptions import QueryParseError
+        from lunr.stemmer import stemmer
+        from lunr.pipeline import Pipeline
+
+        # Store imported modules as instance attributes
+        self.get_default_builder = get_default_builder
+        self.lunr = lunr
+        self.stop_word_filter = stop_word_filter
+        self.QueryParseError = QueryParseError
+        self.default_stemmer = stemmer
+
+        self._setup_stop_words_filter()
+        self.custom_builder_pipeline = (self.custom_stop_words_filter, wildcard_stemmer)
+
+        # Register custom stemmer if we want to serialize.
+        Pipeline.register_function(wildcard_stemmer, "wildcard_stemmer")
+
+    def _setup_stop_words_filter(self):
+        """
+        Create a custom stop words filter, removing specified exceptions
+        """
+        stop_words = self.stop_word_filter.WORDS.copy()
+
+        for ignore_word in self._LUNR_STOP_WORD_FILTER_EXCEPTIONS:
+            try:
+                stop_words.remove(ignore_word)
+            except ValueError:
+                pass
+
+        self.custom_stop_words_filter = self.stop_word_filter.generate_stop_word_filter(stop_words)
+
+    def index(self, ref, fields, documents):
+        """
+        Creates a Lunr searchable index.
+
+        Args:
+            ref (str): Unique identifier field within a document
+            fields (list): A list of Lunr field mappings
+              ``{"field_name": str, "boost": int}``. See the Lunr documentation
+              for more details.
+            documents (list[dict]): This is the body of possible entities to search.
+              Each dict should have all keys in the `fields` arg.
+        Returns: A lunr.Index object
+        """
+
+        # Create and configure builder
+        builder = self.get_default_builder()
+        builder.pipeline.reset()
+        builder.pipeline.add(*self.custom_builder_pipeline)
+
+        return self.lunr(ref, fields, documents, builder=builder)
+
+
 def help_search_with_index(query, candidate_entries, suggestion_maxnum=5, fields=None):
    """
    Lunr-powered fast index search and suggestion wrapper. See https://lunrjs.com/.
@ -57,31 +156,7 @@ def help_search_with_index(query, candidate_entries, suggestion_maxnum=5, fields
            how many suggestions are included.

    """
-    global _LUNR, _LUNR_EXCEPTION, _LUNR_BUILDER_PIPELINE, _LUNR_GET_BUILDER
-    if not _LUNR:
-        # we have to delay-load lunr because it messes with logging if it's imported
-        # before twisted's logging has been set up
-        from lunr import get_default_builder as _LUNR_GET_BUILDER
-        from lunr import lunr as _LUNR
-        from lunr import stop_word_filter
-        from lunr.exceptions import QueryParseError as _LUNR_EXCEPTION
-        from lunr.stemmer import stemmer
-
-        # from lunr.trimmer import trimmer
-        # pre-create a lunr index-builder pipeline where we've removed some of
-        # the stop-words from the default in lunr.
-
-        stop_words = stop_word_filter.WORDS
-
-        for ignore_word in _LUNR_STOP_WORD_FILTER_EXCEPTIONS:
-            try:
-                stop_words.remove(ignore_word)
-            except ValueError:
-                pass
-
-        custom_stop_words_filter = stop_word_filter.generate_stop_word_filter(stop_words)
-        # _LUNR_BUILDER_PIPELINE = (trimmer, custom_stop_words_filter, stemmer)
-        _LUNR_BUILDER_PIPELINE = (custom_stop_words_filter, stemmer)
+    from lunr.exceptions import QueryParseError

    indx = [cnd.search_index_entry for cnd in candidate_entries]
    mapping = {indx[ix]["key"]: cand for ix, cand in enumerate(candidate_entries)}
@ -94,16 +169,13 @@ def help_search_with_index(query, candidate_entries, suggestion_maxnum=5, fields
            {"field_name": "tags", "boost": 5},
        ]

-    # build the search index
-    builder = _LUNR_GET_BUILDER()
-    builder.pipeline.reset()
-    builder.pipeline.add(*_LUNR_BUILDER_PIPELINE)
+    lunr_search = LunrSearch()

-    search_index = _LUNR(ref="key", fields=fields, documents=indx, builder=builder)
+    search_index = lunr_search.index(ref="key", fields=fields, documents=indx)

    try:
        matches = search_index.search(query)[:suggestion_maxnum]
-    except _LUNR_EXCEPTION:
+    except QueryParseError:
        # this is a user-input problem
        matches = []