Changed how command not found errors are handled by default: Implemented a cos-likeness algorithm (Coling 2008) for comparing strings, which allows for decent suggestions and speed.

2026-04-05 23:47:16 +02:00 · 2012-04-22 16:36:31 +02:00 · 2012-04-22 16:36:31 +02:00 · 0c292b5ff2
commit 0c292b5ff2
parent 4678234e9a
4 changed files with 76 additions and 12 deletions
--- a/src/commands/cmdhandler.py
+++ b/src/commands/cmdhandler.py
@ -42,6 +42,7 @@ from django.conf import settings
 from src.comms.channelhandler import CHANNELHANDLER
 from src.utils import logger, utils
 from src.commands.cmdparser import at_multimatch_cmd
+from src.utils.utils import string_suggestions

 __all__ = ("cmdhandler",)

@ -191,7 +192,12 @@ def cmdhandler(caller, raw_string, testing=False):
                if syscmd:
                    sysarg = raw_string
                else:
-                    sysarg = "Huh? (Type \"help\" for help)"
+                    sysarg = "Command '%s' is not available." % raw_string
+                    suggestions = string_suggestions(raw_string, cmdset.get_all_cmd_keys_and_aliases(), cutoff=0.7, maxnum=3)
+                    if suggestions:
+                        sysarg += " Did you maybe mean %s?" % utils.list_to_string(suggestions, 'or', addquote=True)
+                    else:
+                        sysarg += " Type \"help\" for help."
                raise ExecSystemCommand(syscmd, sysarg)

            if len(matches) > 1:
--- a/src/commands/cmdset.py
+++ b/src/commands/cmdset.py
@ -382,3 +382,12 @@ class CmdSet(object):
        by use of self.add().
        """
        pass
+
+    def get_all_cmd_keys_and_aliases(self):
+        """
+        Returns a list of all command keys and aliases
+        available in this cmdset.
+        """
+        names = [cmd.key for cmd in self.commands]
+        [names.extend(cmd.aliases) for cmd in self.commands]
+        return names
--- a/src/commands/default/help.py
+++ b/src/commands/default/help.py
@ -6,6 +6,7 @@ set. The normal, database-tied help system is used for collaborative
 creation of other help topics such as RP help or game-world aides.
 """

+from collections import defaultdict
 from src.utils.utils import fill, dedent
 from src.commands.command import Command
 from src.help.models import HelpEntry
@ -100,20 +101,14 @@ class CmdHelp(Command):

        if query in LIST_ARGS:
            # we want to list all available help entries, grouped by category.
-            hdict_cmd = {}
+            hdict_cmd = defaultdict(list)
            for cmd in (cmd for cmd in cmdset if cmd.auto_help and not cmd.is_exit
                        and not cmd.key.startswith('__') and cmd.access(caller)):
-                try:
-                    hdict_cmd[cmd.help_category].append(cmd.key)
-                except KeyError:
-                    hdict_cmd[cmd.help_category] = [cmd.key]
-            hdict_db = {}
+                hdict_cmd[cmd.help_category].append(cmd.key)
+            hdict_db = defaultdict(list)
            for topic in (topic for topic in HelpEntry.objects.get_all_topics()
                          if topic.access(caller, 'view', default=True)):
-                try:
-                    hdict_db[topic.help_category].append(topic.key)
-                except KeyError:
-                    hdict_db[topic.help_category] = [topic.key]
+                hdict_db[topic.help_category].append(topic.key)
            help_entry = format_help_list(hdict_cmd, hdict_db)
            caller.msg(help_entry)
            return
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@ -7,7 +7,7 @@ be of use when designing your own game.

 """
 from inspect import ismodule
-import os, sys, imp, types
+import os, sys, imp, types, math
 import textwrap
 import datetime
 import random
@ -70,6 +70,25 @@ def dedent(text):
        return ""
    return textwrap.dedent(text)

+def list_to_string(inlist, endsep="and", addquote=False):
+    """
+    This pretty-formats a list as string output, adding
+    an optional alternative separator to the second to last entry.
+    If addquote is True, the outgoing strints will be surrounded by quotes.
+
+    [1,2,3] -> '1, 2 and 3'
+    """
+    if not inlist:
+        return ""
+    if addquote:
+        if len(inlist) == 1:
+            return "\"%s\"" % inlist[0]
+        return ", ".join("\"%s\"" % v for v in inlist[:-1]) + " %s %s" % (endsep, "\"%s\"" % inlist[-1])
+    else:
+        if len(inlist) == 1:
+            return str(inlist[0])
+        return ", ".join(str(v) for v in inlist[:-1]) + " %s %s" % (endsep, inlist[-1])
+
 def wildcard_to_regexp(instring):
    """
    Converts a player-supplied string that may have wildcards in it to regular
@ -664,3 +683,38 @@ def init_new_player(player):
    if player.character:
        player.character.db.FIRST_LOGIN = True
    player.db.FIRST_LOGIN = True
+
+def string_similarity(string1, string2):
+    """
+    This implements a "cosine-similarity" algorithm as described for example in
+       Proceedings of the 22nd International Conference on Computation Linguistics
+       (Coling 2008), pages 593-600, Manchester, August 2008
+    The measure vectors used is simply a "bag of words" type histogram (but for letters).
+
+    The function returns a value 0...1 rating how similar the two strings are. The strings can
+    contain multiple words.
+    """
+    vocabulary = set(list(string1 + string2))
+    vec1 = [string1.count(v) for v in vocabulary]
+    vec2 = [string2.count(v) for v in vocabulary]
+    return float(sum(vec1[i]*vec2[i] for i in range(len(vocabulary)))) / \
+           (math.sqrt(sum(v1**2 for v1 in vec1)) * math.sqrt(sum(v2**2 for v2 in vec2)))
+
+def string_suggestions(string, vocabulary, cutoff=0.6, maxnum=3):
+    """
+    Given a string and a vocabulary, return a match or a list of suggestsion based on
+    string similarity.
+
+    Args:
+        string (str)- a string to search for
+        vocabulary (iterable) - a list of available strings
+        cutoff (int, 0-1) - limit the similarity matches (higher, the more exact is required)
+        maxnum (int) - maximum number of suggestions to return
+    Returns:
+        list of suggestions from vocabulary (could be empty if there are no matches)
+    """
+    if string in vocabulary:
+        return [string]
+    # no exact match. Determine suggestions and return sorted with highest match first.
+    return [tup[1] for tup in sorted([(string_similarity(string, sugg), sugg) for sugg in vocabulary],
+                                      key=lambda tup: tup[0], reverse=True) if tup[0] >= cutoff][:maxnum]