mirror of
https://github.com/evennia/evennia.git
synced 2026-03-31 21:17:17 +02:00
Made the in-memory representation of ANSIString the clean string, removing the need for the regexable flag.
This commit is contained in:
parent
259860ff25
commit
af0a149148
1 changed files with 20 additions and 67 deletions
|
|
@ -389,6 +389,7 @@ def _transform(func_name):
|
|||
return ANSIString(''.join(to_string), decoded=True)
|
||||
return wrapped
|
||||
|
||||
|
||||
class ANSIMeta(type):
|
||||
"""
|
||||
Many functions on ANSIString are just light wrappers around the unicode
|
||||
|
|
@ -402,7 +403,7 @@ class ANSIMeta(type):
|
|||
setattr(cls, func_name, _query_super(func_name))
|
||||
for func_name in [
|
||||
'__mul__', '__mod__', 'expandtabs', '__rmul__', 'join',
|
||||
'decode', 'replace', 'format']:
|
||||
'decode', 'replace', 'format', 'encode']:
|
||||
setattr(cls, func_name, _on_raw(func_name))
|
||||
for func_name in [
|
||||
'capitalize', 'translate', 'lower', 'upper', 'swapcase']:
|
||||
|
|
@ -434,62 +435,38 @@ class ANSIString(unicode):
|
|||
the same attributes as the standard one, and you may declare the
|
||||
string to be handled as already decoded. It is important not to double
|
||||
decode strings, as escapes can only be respected once.
|
||||
|
||||
If the regexable flag is set, using __getitem__, such as when getting
|
||||
an index or slicing, will return the result from the raw string. If
|
||||
this flag is set False, it will intelligently skip ANSI escapes.
|
||||
|
||||
ANSIString('{rHello{g, W{yorld', regexable=True)[0] will return the
|
||||
first byte of the escape sequence before 'Hello', while
|
||||
ANSIString('{rHello{g, W{yorld')[0] will return a red 'H'.
|
||||
|
||||
When a regexable ANSIString is sliced, the result is returned as a
|
||||
non-regexable ANSI String. This ensures that usage of regexable
|
||||
ANSIStrings is an explicit choice.
|
||||
|
||||
Why all this complication with the regexable flag?
|
||||
|
||||
The reason is that while we are able to subclass the unicode object in
|
||||
Python, the byte representation of the string in memory cannot be
|
||||
changed and still exists under the hood. This doesn't matter for things
|
||||
coded in pure Python, but since Regexes need to be mindful of
|
||||
performance, the module that handles them operates directly on the
|
||||
memory representation of the string in order to do matching. It is thus
|
||||
completely unaware of our customizations to the class. Interestingly,
|
||||
however, while the re module does its matching on the raw string, it
|
||||
slices the string using the object's methods. This means that running
|
||||
a regex on an ANSIString would return matches at bogus indexes, since
|
||||
the __getitem__ method of ANSIString skips ANSI escape sequences, which
|
||||
were part of the raw data regex was matching against.
|
||||
|
||||
So, if you need to use regex on an ANSIString, make sure you get it in
|
||||
regexable mode first, and be ready to deal with a few edge cases.
|
||||
"""
|
||||
string = to_str(args[0], force_string=True)
|
||||
if not isinstance(string, basestring):
|
||||
string = str(string)
|
||||
parser = kwargs.get('parser', ANSI_PARSER)
|
||||
regexable = kwargs.get('regexable', False)
|
||||
decoded = kwargs.get('decoded', False) or hasattr(string, '_raw_string')
|
||||
if not decoded:
|
||||
string = parser.parse_ansi(string)
|
||||
if isinstance(string, unicode):
|
||||
string = super(ANSIString, cls).__new__(ANSIString, string)
|
||||
else:
|
||||
string = super(ANSIString, cls).__new__(ANSIString, string, 'utf-8')
|
||||
string._regexable = regexable
|
||||
return string
|
||||
clean_string = unicode(parser.parse_ansi(
|
||||
string, strip_ansi=True), 'utf-8')
|
||||
ansi_string = super(ANSIString, cls).__new__(ANSIString, clean_string)
|
||||
ansi_string._raw_string = string
|
||||
ansi_string._clean_string = clean_string
|
||||
return ansi_string
|
||||
|
||||
def __str__(self):
|
||||
return self._raw_string.encode('utf-8')
|
||||
|
||||
def __unicode__(self):
|
||||
"""
|
||||
Unfortunately, this is not called during print() statements due to a
|
||||
bug in the Python interpreter. You can always do unicode() or str()
|
||||
around the resulting ANSIString and print that.
|
||||
"""
|
||||
return self._raw_string
|
||||
|
||||
def __repr__(self):
|
||||
"""
|
||||
Let's make the repr the command that would actually be used to
|
||||
construct this object, for convenience and reference.
|
||||
"""
|
||||
if self._regexable:
|
||||
reg = ', regexable=True'
|
||||
else:
|
||||
reg = ''
|
||||
return "ANSIString(%s, decoded=True%s)" % (repr(self._raw_string), reg)
|
||||
return "ANSIString(%s, decoded=True)" % repr(self._raw_string)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""
|
||||
|
|
@ -519,9 +496,6 @@ class ANSIString(unicode):
|
|||
"""
|
||||
self.parser = kwargs.pop('parser', ANSI_PARSER)
|
||||
super(ANSIString, self).__init__(*args, **kwargs)
|
||||
self._raw_string = unicode(self)
|
||||
self._clean_string = unicode(self.parser.parse_ansi(
|
||||
self._raw_string, strip_ansi=True), 'utf-8')
|
||||
self._code_indexes, self._char_indexes = self._get_indexes()
|
||||
|
||||
def __add__(self, other):
|
||||
|
|
@ -598,8 +572,6 @@ class ANSIString(unicode):
|
|||
string instead, bypassing ANSIString's intelligent escape skipping,
|
||||
for reasons explained in the __new__ method's docstring.
|
||||
"""
|
||||
if self._regexable:
|
||||
return ANSIString(self._raw_string[item], decoded=True)
|
||||
if isinstance(item, slice):
|
||||
# Slices must be handled specially.
|
||||
return self._slice(item)
|
||||
|
|
@ -635,25 +607,6 @@ class ANSIString(unicode):
|
|||
"""
|
||||
return self._raw_string
|
||||
|
||||
def is_regexable(self):
|
||||
"""
|
||||
State whether or not this ANSIString is a 'regexable' ANSIString.
|
||||
Regexable ANSIStrings return indexes from _raw_string when sliced.
|
||||
"""
|
||||
return self._regexable
|
||||
|
||||
def regexable(self):
|
||||
"""
|
||||
Return the regexable version of this ANSIString.
|
||||
"""
|
||||
return ANSIString(self, decoded=True, regexable=True)
|
||||
|
||||
def non_regexable(self):
|
||||
"""
|
||||
Return the non-regexable version of this ANSIString.
|
||||
"""
|
||||
return ANSIString(self, decoded=True)
|
||||
|
||||
def partition(self, sep, reverse=False):
|
||||
"""
|
||||
Similar to split, but always creates a tuple with three items:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue