From 0c292b5ff2f61221cdedc905b33b5a9f72505f91 Mon Sep 17 00:00:00 2001 From: Griatch Date: Sun, 22 Apr 2012 16:36:31 +0200 Subject: [PATCH] Changed how command not found errors are handled by default: Implemented a cos-likeness algorithm (Coling 2008) for comparing strings, which allows for decent suggestions and speed. --- src/commands/cmdhandler.py | 8 +++++- src/commands/cmdset.py | 9 ++++++ src/commands/default/help.py | 15 ++++------ src/utils/utils.py | 56 +++++++++++++++++++++++++++++++++++- 4 files changed, 76 insertions(+), 12 deletions(-) diff --git a/src/commands/cmdhandler.py b/src/commands/cmdhandler.py index 7d235c8b1a..0c6c4df232 100644 --- a/src/commands/cmdhandler.py +++ b/src/commands/cmdhandler.py @@ -42,6 +42,7 @@ from django.conf import settings from src.comms.channelhandler import CHANNELHANDLER from src.utils import logger, utils from src.commands.cmdparser import at_multimatch_cmd +from src.utils.utils import string_suggestions __all__ = ("cmdhandler",) @@ -191,7 +192,12 @@ def cmdhandler(caller, raw_string, testing=False): if syscmd: sysarg = raw_string else: - sysarg = "Huh? (Type \"help\" for help)" + sysarg = "Command '%s' is not available." % raw_string + suggestions = string_suggestions(raw_string, cmdset.get_all_cmd_keys_and_aliases(), cutoff=0.7, maxnum=3) + if suggestions: + sysarg += " Did you maybe mean %s?" % utils.list_to_string(suggestions, 'or', addquote=True) + else: + sysarg += " Type \"help\" for help." raise ExecSystemCommand(syscmd, sysarg) if len(matches) > 1: diff --git a/src/commands/cmdset.py b/src/commands/cmdset.py index d8c993b692..b7efe313e6 100644 --- a/src/commands/cmdset.py +++ b/src/commands/cmdset.py @@ -382,3 +382,12 @@ class CmdSet(object): by use of self.add(). """ pass + + def get_all_cmd_keys_and_aliases(self): + """ + Returns a list of all command keys and aliases + available in this cmdset. + """ + names = [cmd.key for cmd in self.commands] + [names.extend(cmd.aliases) for cmd in self.commands] + return names diff --git a/src/commands/default/help.py b/src/commands/default/help.py index 4d5f60a5de..4218285636 100644 --- a/src/commands/default/help.py +++ b/src/commands/default/help.py @@ -6,6 +6,7 @@ set. The normal, database-tied help system is used for collaborative creation of other help topics such as RP help or game-world aides. """ +from collections import defaultdict from src.utils.utils import fill, dedent from src.commands.command import Command from src.help.models import HelpEntry @@ -100,20 +101,14 @@ class CmdHelp(Command): if query in LIST_ARGS: # we want to list all available help entries, grouped by category. - hdict_cmd = {} + hdict_cmd = defaultdict(list) for cmd in (cmd for cmd in cmdset if cmd.auto_help and not cmd.is_exit and not cmd.key.startswith('__') and cmd.access(caller)): - try: - hdict_cmd[cmd.help_category].append(cmd.key) - except KeyError: - hdict_cmd[cmd.help_category] = [cmd.key] - hdict_db = {} + hdict_cmd[cmd.help_category].append(cmd.key) + hdict_db = defaultdict(list) for topic in (topic for topic in HelpEntry.objects.get_all_topics() if topic.access(caller, 'view', default=True)): - try: - hdict_db[topic.help_category].append(topic.key) - except KeyError: - hdict_db[topic.help_category] = [topic.key] + hdict_db[topic.help_category].append(topic.key) help_entry = format_help_list(hdict_cmd, hdict_db) caller.msg(help_entry) return diff --git a/src/utils/utils.py b/src/utils/utils.py index 758b715b7f..5364a98a7a 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -7,7 +7,7 @@ be of use when designing your own game. """ from inspect import ismodule -import os, sys, imp, types +import os, sys, imp, types, math import textwrap import datetime import random @@ -70,6 +70,25 @@ def dedent(text): return "" return textwrap.dedent(text) +def list_to_string(inlist, endsep="and", addquote=False): + """ + This pretty-formats a list as string output, adding + an optional alternative separator to the second to last entry. + If addquote is True, the outgoing strints will be surrounded by quotes. + + [1,2,3] -> '1, 2 and 3' + """ + if not inlist: + return "" + if addquote: + if len(inlist) == 1: + return "\"%s\"" % inlist[0] + return ", ".join("\"%s\"" % v for v in inlist[:-1]) + " %s %s" % (endsep, "\"%s\"" % inlist[-1]) + else: + if len(inlist) == 1: + return str(inlist[0]) + return ", ".join(str(v) for v in inlist[:-1]) + " %s %s" % (endsep, inlist[-1]) + def wildcard_to_regexp(instring): """ Converts a player-supplied string that may have wildcards in it to regular @@ -664,3 +683,38 @@ def init_new_player(player): if player.character: player.character.db.FIRST_LOGIN = True player.db.FIRST_LOGIN = True + +def string_similarity(string1, string2): + """ + This implements a "cosine-similarity" algorithm as described for example in + Proceedings of the 22nd International Conference on Computation Linguistics + (Coling 2008), pages 593-600, Manchester, August 2008 + The measure vectors used is simply a "bag of words" type histogram (but for letters). + + The function returns a value 0...1 rating how similar the two strings are. The strings can + contain multiple words. + """ + vocabulary = set(list(string1 + string2)) + vec1 = [string1.count(v) for v in vocabulary] + vec2 = [string2.count(v) for v in vocabulary] + return float(sum(vec1[i]*vec2[i] for i in range(len(vocabulary)))) / \ + (math.sqrt(sum(v1**2 for v1 in vec1)) * math.sqrt(sum(v2**2 for v2 in vec2))) + +def string_suggestions(string, vocabulary, cutoff=0.6, maxnum=3): + """ + Given a string and a vocabulary, return a match or a list of suggestsion based on + string similarity. + + Args: + string (str)- a string to search for + vocabulary (iterable) - a list of available strings + cutoff (int, 0-1) - limit the similarity matches (higher, the more exact is required) + maxnum (int) - maximum number of suggestions to return + Returns: + list of suggestions from vocabulary (could be empty if there are no matches) + """ + if string in vocabulary: + return [string] + # no exact match. Determine suggestions and return sorted with highest match first. + return [tup[1] for tup in sorted([(string_similarity(string, sugg), sugg) for sugg in vocabulary], + key=lambda tup: tup[0], reverse=True) if tup[0] >= cutoff][:maxnum]