From 07aba24f4d0fae4e21cd9f2953d062b0bd15c748 Mon Sep 17 00:00:00 2001
From: Jonathan Piacenti <kelketek@gmail.com>
Date: Sat, 13 Dec 2014 16:39:41 -0600
Subject: [PATCH] Improve ANSIString performance.

---
 src/utils/ansi.py | 148 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 120 insertions(+), 28 deletions(-)

diff --git a/src/utils/ansi.py b/src/utils/ansi.py
index 53fa571164..799eaf609a 100644
--- a/src/utils/ansi.py
+++ b/src/utils/ansi.py
@@ -317,11 +317,9 @@ class ANSIParser(object):
     mxp_re = r'\{lc(.*?)\{lt(.*?)\{le'
 
     # prepare regex matching
-    #ansi_sub = [(re.compile(sub[0], re.DOTALL), sub[1])
-    #                 for sub in ansi_map]
     xterm256_sub = re.compile(r"|".join([tup[0] for tup in xterm256_map]), re.DOTALL)
     ansi_sub = re.compile(r"|".join([re.escape(tup[0]) for tup in mux_ansi_map + ext_ansi_map]), re.DOTALL)
-    mxp_sub =  re.compile(mxp_re, re.DOTALL)
+    mxp_sub = re.compile(mxp_re, re.DOTALL)
 
     # used by regex replacer to correctly map ansi sequences
     ansi_map = dict(mux_ansi_map + ext_ansi_map)
@@ -436,7 +434,10 @@ def _transform(func_name):
             elif index in self._char_indexes:
                 to_string.append(replacement_string[char_counter])
                 char_counter += 1
-        return ANSIString(''.join(to_string), decoded=True)
+        return ANSIString(
+            ''.join(to_string), decoded=True,
+            code_indexes=self._code_indexes, char_indexes=self._char_indexes,
+            clean_string=replacement_string)
     return wrapped
 
 
@@ -452,8 +453,8 @@ class ANSIMeta(type):
                 'rfind', 'rindex', '__len__']:
             setattr(cls, func_name, _query_super(func_name))
         for func_name in [
-                '__mul__', '__mod__', 'expandtabs', '__rmul__',
-                'decode', 'replace', 'format', 'encode']:
+                '__mod__', 'expandtabs', 'decode', 'replace', 'format',
+                'encode']:
             setattr(cls, func_name, _on_raw(func_name))
         for func_name in [
                 'capitalize', 'translate', 'lower', 'upper', 'swapcase']:
@@ -485,19 +486,38 @@ class ANSIString(unicode):
         the same attributes as the standard one, and you may declare the
         string to be handled as already decoded. It is important not to double
         decode strings, as escapes can only be respected once.
+
+        Internally, ANSIString can also passes itself precached code/character
+        indexes and clean strings to avoid doing extra work when combining
+        ANSIStrings.
         """
         string = args[0]
         if not isinstance(string, basestring):
             string = to_str(string, force_string=True)
         parser = kwargs.get('parser', ANSI_PARSER)
         decoded = kwargs.get('decoded', False) or hasattr(string, '_raw_string')
+        code_indexes = kwargs.pop('code_indexes', None)
+        char_indexes = kwargs.pop('char_indexes', None)
+        clean_string = kwargs.pop('clean_string', None)
+        # All True, or All False, not just one.
+        checks = map(lambda x: x is None, [code_indexes, char_indexes, clean_string])
+        if not len(set(checks)) == 1:
+            raise ValueError("You must specify code_indexes, char_indexes, "
+                             "and clean_string together, or not at all.")
+        if not all(checks):
+            decoded = True
         if not decoded:
             # Completely new ANSI String
             clean_string = to_unicode(parser.parse_ansi(string, strip_ansi=True))
             string = parser.parse_ansi(string)
+        elif clean_string is not None:
+            # We have an explicit clean string.
+            pass
         elif hasattr(string, '_clean_string'):
             # It's already an ANSIString
             clean_string = string._clean_string
+            code_indexes = string._code_indexes
+            char_indexes = string._char_indexes
             string = string._raw_string
         else:
             # It's a string that has been pre-ansi decoded.
@@ -505,12 +525,12 @@ class ANSIString(unicode):
 
         if not isinstance(string, unicode):
             string = string.decode('utf-8')
-        else:
-            # Do this to prevent recursive ANSIStrings.
-            string = unicode(string)
+
         ansi_string = super(ANSIString, cls).__new__(ANSIString, to_str(clean_string), "utf-8")
         ansi_string._raw_string = string
         ansi_string._clean_string = clean_string
+        ansi_string._code_indexes = code_indexes
+        ansi_string._char_indexes = char_indexes
         return ansi_string
 
     def __str__(self):
@@ -559,7 +579,34 @@ class ANSIString(unicode):
         """
         self.parser = kwargs.pop('parser', ANSI_PARSER)
         super(ANSIString, self).__init__()
-        self._code_indexes, self._char_indexes = self._get_indexes()
+        if self._code_indexes is None:
+            self._code_indexes, self._char_indexes = self._get_indexes()
+
+    @staticmethod
+    def _shifter(iterable, offset):
+        """
+        Takes a list of integers, and produces a new one incrementing all
+        by a number.
+        """
+        return [i + offset for i in iterable]
+
+    @classmethod
+    def _adder(cls, first, second):
+        """
+        Joins two ANSIStrings, preserving calculated info.
+        """
+
+        raw_string = first._raw_string + second._raw_string
+        clean_string = first._clean_string + second._clean_string
+        code_indexes = first._code_indexes[:]
+        char_indexes = first._char_indexes[:]
+        code_indexes.extend(
+            cls._shifter(second._code_indexes, len(first._raw_string)))
+        char_indexes.extend(
+            cls._shifter(second._code_indexes, len(first._raw_string)))
+        return ANSIString(raw_string, code_indexes=code_indexes,
+                          char_indexes=char_indexes,
+                          clean_string=clean_string)
 
     def __add__(self, other):
         """
@@ -569,8 +616,9 @@ class ANSIString(unicode):
         """
         if not isinstance(other, basestring):
             return NotImplemented
-        return ANSIString(self._raw_string + getattr(
-            other, '_raw_string', other), decoded=True)
+        if not isinstance(other, ANSIString):
+            other = ANSIString(other)
+        return self._adder(self, other)
 
     def __radd__(self, other):
         """
@@ -578,8 +626,9 @@ class ANSIString(unicode):
         """
         if not isinstance(other, basestring):
             return NotImplemented
-        return ANSIString(getattr(
-            other, '_raw_string', other) + self._raw_string, decoded=True)
+        if not isinstance(other, ANSIString):
+            other = ANSIString(other)
+        return self._adder(other, self)
 
     def __getslice__(self, i, j):
         """
@@ -615,7 +664,7 @@ class ANSIString(unicode):
         # Check between the slice intervals for escape sequences.
         i = None
         for i in slice_indexes[1:]:
-            for index in range(last_mark, i):
+            for index in xrange(last_mark, i):
                 if index in self._code_indexes:
                     string += self._raw_string[index]
             last_mark = i
@@ -654,7 +703,7 @@ class ANSIString(unicode):
         result = ''
         # Get the character they're after, and replay all escape sequences
         # previous to it.
-        for index in range(0, item + 1):
+        for index in xrange(0, item + 1):
             if index in self._code_indexes:
                 result += self._raw_string[index]
         return ANSIString(result + clean + append_tail, decoded=True)
@@ -711,13 +760,6 @@ class ANSIString(unicode):
         It's possible that only one of these tables is actually needed, the
         other assumed to be what isn't in the first.
         """
-        # These are all the indexes which hold code characters.
-        #matches = [(match.start(), match.end())
-        #            for match in self.parser.ansi_regex.finditer(self._raw_string)]
-        #code_indexes = []
-        #         # These are all the indexes which hold code characters.
-        #for start, end in matches:
-        #    code_indexes.extend(range(start, end))
 
         code_indexes = []
         for match in self.parser.ansi_regex.finditer(self._raw_string):
@@ -775,6 +817,28 @@ class ANSIString(unicode):
         res.append(self[start:len(self)])
         return res
 
+    def __mul__(self, other):
+        """
+        Multiplication method. Implemented for performance reasons.
+        """
+        if not isinstance(other, int):
+            return NotImplemented
+        raw_string = self._raw_string * other
+        clean_string = self._clean_string * other
+        code_indexes = self._code_indexes[:]
+        char_indexes = self._char_indexes[:]
+        for i in range(1, other + 1):
+            code_indexes.extend(
+                self._shifter(self._code_indexes, i * len(self._raw_string)))
+            char_indexes.extend(
+                self._shifter(self._char_indexes, i * len(self._raw_string)))
+        return ANSIString(
+            raw_string, code_indexes=code_indexes, char_indexes=char_indexes,
+            clean_string=clean_string)
+
+    def __rmul__(self, other):
+        return self.__mul__(other)
+
     def rsplit(self, by, maxsplit=-1):
         """
         Stolen from PyPy's pure Python string implementation, tweaked for
@@ -810,11 +874,39 @@ class ANSIString(unicode):
         last_item = None
         for item in iterable:
             if last_item is not None:
-                result += self
+                result += self._raw_string
+            if not isinstance(item, ANSIString):
+                item = ANSIString(item)
             result += item
             last_item = item
         return result
 
+    def _filler(self, char, amount):
+        """
+        Generate a line of characters in a more efficient way than just adding
+        ANSIStrings.
+        """
+        if not isinstance(char, ANSIString):
+            line = char * amount
+            return ANSIString(
+                char * amount, code_indexes=[], char_indexes=range(0, len(line)),
+                clean_string=char)
+        try:
+            start = char._code_indexes[0]
+        except IndexError:
+            start = None
+        end = char._char_indexes[0]
+        prefix = char._raw_string[start:end]
+        postfix = char._raw_string[end + 1:]
+        line = char._clean_string * amount
+        code_indexes = [i for i in range(0, len(prefix))]
+        length = len(prefix) + len(line)
+        code_indexes.extend([i for i in range(length, length + len(postfix))])
+        char_indexes = self._shifter(xrange(0, len(line)), len(prefix))
+        raw_string = prefix + line + postfix
+        return ANSIString(
+            raw_string, clean_string=line, char_indexes=char_indexes,
+            code_indexes=code_indexes)
 
     @_spacing_preflight
     def center(self, width, fillchar, difference):
@@ -823,8 +915,8 @@ class ANSIString(unicode):
         """
         remainder = difference % 2
         difference /= 2
-        spacing = difference * fillchar
-        result = spacing + self + spacing + (remainder * fillchar)
+        spacing = self._filler(fillchar, difference)
+        result = spacing + self + spacing + self._filler(fillchar, remainder)
         return result
 
     @_spacing_preflight
@@ -832,11 +924,11 @@ class ANSIString(unicode):
         """
         Left justify some text.
         """
-        return self + (difference * fillchar)
+        return self + self._filler(fillchar, difference)
 
     @_spacing_preflight
     def rjust(self, width, fillchar, difference):
         """
         Right justify some text.
         """
-        return (difference * fillchar) + self
+        return self._filler(fillchar, difference) + self