From f49f33d40dd6774237ab22fe97c1577ffb8cbbe6 Mon Sep 17 00:00:00 2001
From: Griatch <griatch@gmail.com>
Date: Sun, 18 May 2014 10:54:04 +0200
Subject: [PATCH] Made more work around memory profiling, added a small Script
 and display program for logging memory usage

---
 src/commands/default/system.py   | 14 ++----
 src/typeclasses/models.py        |  1 -
 src/utils/dummyrunner/memplot.py | 76 ++++++++++++++++++++++++++++++++
 src/utils/idmapper/base.py       | 32 +++++++-------
 4 files changed, 95 insertions(+), 28 deletions(-)
 create mode 100644 src/utils/dummyrunner/memplot.py

diff --git a/src/commands/default/system.py b/src/commands/default/system.py
index 9e93e144b4..75051ef036 100644
--- a/src/commands/default/system.py
+++ b/src/commands/default/system.py
@@ -713,9 +713,9 @@ class CmdServerLoad(MuxCommand):
             # because it lacks sys.getsizeof
 
             # object cache size
-            total_num, total_size, cachedict = _idmapper.cache_size()
-            sorted_cache = sorted([(key, tup[0], tup[1]) for key, tup in cachedict.items() if key !="_total" and tup[0] > 0],
-                                    key=lambda tup: tup[2], reverse=True)
+            total_num, cachedict = _idmapper.cache_size()
+            sorted_cache = sorted([(key, num) for key, num in cachedict.items() if num > 0],
+                                    key=lambda tup: tup[1], reverse=True)
             memtable = prettytable.PrettyTable(["entity name",
                                                 "number",
                                                 "idmapper %%"])
@@ -723,16 +723,10 @@ class CmdServerLoad(MuxCommand):
             for tup in sorted_cache:
                 memtable.add_row([tup[0],
                                  "%i" % tup[1],
-                                 "%.2f" % (float(tup[1] / 1.0*total_num) * 100)])
+                                 "%.2f" % (float(tup[1]) / total_num * 100)])
 
             # get sizes of other caches
-            #attr_cache_info, prop_cache_info = get_cache_sizes()
             string += "\n{w Entity idmapper cache:{n %i items\n%s" % (total_num, memtable)
-            #string += "\n{w On-entity Attribute cache usage:{n %5.2f MB (%i attrs)" % (attr_cache_info[1], attr_cache_info[0])
-            #string += "\n{w On-entity Property cache usage:{n %5.2f MB (%i props)" % (prop_cache_info[1], prop_cache_info[0])
-            #base_mem = vmem - total_size - attr_cache_info[1] - prop_cache_info[1]
-            #string += "\n{w Base Server usage (virtmem-idmapper-attrcache-propcache):{n %5.2f MB" % base_mem
-            #string += "\n{w Base Server usage (virtmem - cache):{n %5.2f MB" % base_mem
 
         caller.msg(string)
 
diff --git a/src/typeclasses/models.py b/src/typeclasses/models.py
index 4c1ab2e5a9..6595a79562 100644
--- a/src/typeclasses/models.py
+++ b/src/typeclasses/models.py
@@ -467,7 +467,6 @@ class NAttributeHandler(object):
     def add(self, key, value):
         "Add new key and value"
         self._store[key] = value
-        print "set_recache_protection:", self.obj.key, key
         self.obj.set_recache_protection()
 
     def remove(self, key):
diff --git a/src/utils/dummyrunner/memplot.py b/src/utils/dummyrunner/memplot.py
new file mode 100644
index 0000000000..459a60c265
--- /dev/null
+++ b/src/utils/dummyrunner/memplot.py
@@ -0,0 +1,76 @@
+"""
+Script that saves memory and idmapper data over time.
+
+Data will be saved to game/logs/memoryusage.log. Note that
+the script will append to this file if it already exists.
+
+Call this module directly to plot the log (requires matplotlib and numpy).
+"""
+import os, sys
+import time
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
+os.environ['DJANGO_SETTINGS_MODULE'] = 'game.settings'
+import ev
+from src.utils.idmapper import base as _idmapper
+
+LOGFILE = "logs/memoryusage.log"
+INTERVAL = 30 # log every 30 seconds
+
+class Memplot(ev.Script):
+    def at_script_creation(self):
+        self.key = "memplot"
+        self.desc = "Save server memory stats to file"
+        self.start_delay = False
+        self.persistent = True
+        self.interval = INTERVAL
+        self.db.starttime = time.time()
+
+    def at_repeat(self):
+
+        pid = os.getpid()
+        rmem = float(os.popen('ps -p %d -o %s | tail -1' % (pid, "rss")).read()) / 1000.0  # resident memory
+        vmem = float(os.popen('ps -p %d -o %s | tail -1' % (pid, "vsz")).read()) / 1000.0  # virtual memory
+        total_num, cachedict = _idmapper.cache_size()
+        t0 = (time.time() - self.db.starttime) / 60.0 # save in minutes
+
+        with open(LOGFILE, "a") as f:
+            f.write("%s, %s, %s, %s\n" % (t0, rmem, vmem, total_num))
+
+if __name__ == "__main__":
+
+    # plot output from the file
+
+    from matplotlib import pyplot as pp
+    import numpy
+
+    data = numpy.genfromtxt(LOGFILE, delimiter=",")
+    secs = data[:,0]
+    rmem = data[:,1]
+    vmem = data[:,2]
+    nobj = data[:,3]
+
+    # correct for @reload
+    #secs[359:] = secs[359:] + secs[358]
+
+    # count total amount of objects
+    ntot = data[:,3].copy()
+    #ntot[119:] = ntot[119:] + ntot[118] - ntot[119]
+    #ntot[359:] = ntot[359:] + ntot[358] - ntot[359]
+
+    fig = pp.figure()
+    ax1 = fig.add_subplot(111)
+    ax1.set_title("Memory usage")
+    ax1.set_xlabel("Time (mins)")
+    ax1.set_ylabel("Memory usage (MB)")
+    ax1.plot(secs, rmem, "r", label="RMEM", lw=2)
+    ax1.plot(secs, vmem, "b", label="VMEM", lw=2)
+    ax1.legend(loc="upper left")
+
+    ax2 = ax1.twinx()
+    ax2.plot(secs, nobj, "g--", label="objs in cache", lw=2)
+    ax2.plot(secs, ntot, "r--", label="objs total", lw=2)
+    ax2.set_ylabel("Number of objects")
+    ax2.legend(loc="lower right")
+    ax2.annotate("idmapper\nflush", xy=(70,480))
+    ax2.annotate("@reload", xy=(185,600))
+    pp.show()
diff --git a/src/utils/idmapper/base.py b/src/utils/idmapper/base.py
index dd9e08bdbc..b27d1a325d 100755
--- a/src/utils/idmapper/base.py
+++ b/src/utils/idmapper/base.py
@@ -357,30 +357,28 @@ post_save.connect(update_cached_instance)
 
 def cache_size(mb=True):
     """
-    Calculate statistics about the cache
+    Calculate statistics about the cache.
+
+    Note: we cannot get reliable memory statistics from the cache -
+    whereas we could do getsizof each object in cache, the result is
+    highly imprecise and for a large number of object the result is
+    many times larger than the actual memory use of the entire server;
+    Python is clearly reusing memory behind the scenes that we cannot
+    catch in an easy way here.  Ideas are appreciated. /Griatch
 
-    mb - return the result in MB.
     Returns
-      total_num, total_size, {objclass:(total_num, total_size)}
+      total_num, {objclass:total_num, ...}
     """
-    import sys
-    totals = [0, 0] # totalnum, totalsize
-    sizedict = {}
-    def getsize(model):
-        instances = model.get_all_cached_instances()
-        num_inst = len(instances)
-        size = sum(sys.getsizeof(o) for o in instances)
-        size = size / 1000.0 if mb else size
-        return num_inst, size
+    numtotal = [0] # use mutable to keep reference through recursion
+    classdict = {}
     def get_recurse(submodels):
         for submodel in submodels:
             subclasses = submodel.__subclasses__()
             if not subclasses:
-                num_inst, size = getsize(submodel)
-                totals[0] += num_inst
-                totals[1] += size
-                sizedict[submodel.__name__] = (num_inst, size)
+                num = len(submodel.get_all_cached_instances())
+                numtotal[0] += num
+                classdict[submodel.__name__] = num
             else:
                 get_recurse(subclasses)
     get_recurse(SharedMemoryModel.__subclasses__())
-    return totals[0], totals[1], sizedict
+    return numtotal[0], classdict