Changed how command not found errors are handled by default: Implemented a cos-likeness algorithm (Coling 2008) for comparing strings, which allows for decent suggestions and speed.

This commit is contained in:
Griatch 2012-04-22 16:36:31 +02:00
parent 4678234e9a
commit 0c292b5ff2
4 changed files with 76 additions and 12 deletions

View file

@ -42,6 +42,7 @@ from django.conf import settings
from src.comms.channelhandler import CHANNELHANDLER
from src.utils import logger, utils
from src.commands.cmdparser import at_multimatch_cmd
from src.utils.utils import string_suggestions
__all__ = ("cmdhandler",)
@ -191,7 +192,12 @@ def cmdhandler(caller, raw_string, testing=False):
if syscmd:
sysarg = raw_string
else:
sysarg = "Huh? (Type \"help\" for help)"
sysarg = "Command '%s' is not available." % raw_string
suggestions = string_suggestions(raw_string, cmdset.get_all_cmd_keys_and_aliases(), cutoff=0.7, maxnum=3)
if suggestions:
sysarg += " Did you maybe mean %s?" % utils.list_to_string(suggestions, 'or', addquote=True)
else:
sysarg += " Type \"help\" for help."
raise ExecSystemCommand(syscmd, sysarg)
if len(matches) > 1:

View file

@ -382,3 +382,12 @@ class CmdSet(object):
by use of self.add().
"""
pass
def get_all_cmd_keys_and_aliases(self):
"""
Returns a list of all command keys and aliases
available in this cmdset.
"""
names = [cmd.key for cmd in self.commands]
[names.extend(cmd.aliases) for cmd in self.commands]
return names

View file

@ -6,6 +6,7 @@ set. The normal, database-tied help system is used for collaborative
creation of other help topics such as RP help or game-world aides.
"""
from collections import defaultdict
from src.utils.utils import fill, dedent
from src.commands.command import Command
from src.help.models import HelpEntry
@ -100,20 +101,14 @@ class CmdHelp(Command):
if query in LIST_ARGS:
# we want to list all available help entries, grouped by category.
hdict_cmd = {}
hdict_cmd = defaultdict(list)
for cmd in (cmd for cmd in cmdset if cmd.auto_help and not cmd.is_exit
and not cmd.key.startswith('__') and cmd.access(caller)):
try:
hdict_cmd[cmd.help_category].append(cmd.key)
except KeyError:
hdict_cmd[cmd.help_category] = [cmd.key]
hdict_db = {}
hdict_cmd[cmd.help_category].append(cmd.key)
hdict_db = defaultdict(list)
for topic in (topic for topic in HelpEntry.objects.get_all_topics()
if topic.access(caller, 'view', default=True)):
try:
hdict_db[topic.help_category].append(topic.key)
except KeyError:
hdict_db[topic.help_category] = [topic.key]
hdict_db[topic.help_category].append(topic.key)
help_entry = format_help_list(hdict_cmd, hdict_db)
caller.msg(help_entry)
return

View file

@ -7,7 +7,7 @@ be of use when designing your own game.
"""
from inspect import ismodule
import os, sys, imp, types
import os, sys, imp, types, math
import textwrap
import datetime
import random
@ -70,6 +70,25 @@ def dedent(text):
return ""
return textwrap.dedent(text)
def list_to_string(inlist, endsep="and", addquote=False):
"""
This pretty-formats a list as string output, adding
an optional alternative separator to the second to last entry.
If addquote is True, the outgoing strints will be surrounded by quotes.
[1,2,3] -> '1, 2 and 3'
"""
if not inlist:
return ""
if addquote:
if len(inlist) == 1:
return "\"%s\"" % inlist[0]
return ", ".join("\"%s\"" % v for v in inlist[:-1]) + " %s %s" % (endsep, "\"%s\"" % inlist[-1])
else:
if len(inlist) == 1:
return str(inlist[0])
return ", ".join(str(v) for v in inlist[:-1]) + " %s %s" % (endsep, inlist[-1])
def wildcard_to_regexp(instring):
"""
Converts a player-supplied string that may have wildcards in it to regular
@ -664,3 +683,38 @@ def init_new_player(player):
if player.character:
player.character.db.FIRST_LOGIN = True
player.db.FIRST_LOGIN = True
def string_similarity(string1, string2):
"""
This implements a "cosine-similarity" algorithm as described for example in
Proceedings of the 22nd International Conference on Computation Linguistics
(Coling 2008), pages 593-600, Manchester, August 2008
The measure vectors used is simply a "bag of words" type histogram (but for letters).
The function returns a value 0...1 rating how similar the two strings are. The strings can
contain multiple words.
"""
vocabulary = set(list(string1 + string2))
vec1 = [string1.count(v) for v in vocabulary]
vec2 = [string2.count(v) for v in vocabulary]
return float(sum(vec1[i]*vec2[i] for i in range(len(vocabulary)))) / \
(math.sqrt(sum(v1**2 for v1 in vec1)) * math.sqrt(sum(v2**2 for v2 in vec2)))
def string_suggestions(string, vocabulary, cutoff=0.6, maxnum=3):
"""
Given a string and a vocabulary, return a match or a list of suggestsion based on
string similarity.
Args:
string (str)- a string to search for
vocabulary (iterable) - a list of available strings
cutoff (int, 0-1) - limit the similarity matches (higher, the more exact is required)
maxnum (int) - maximum number of suggestions to return
Returns:
list of suggestions from vocabulary (could be empty if there are no matches)
"""
if string in vocabulary:
return [string]
# no exact match. Determine suggestions and return sorted with highest match first.
return [tup[1] for tup in sorted([(string_similarity(string, sugg), sugg) for sugg in vocabulary],
key=lambda tup: tup[0], reverse=True) if tup[0] >= cutoff][:maxnum]