Change in vdsm[master]: WIP: add simple tracemalloc inspection tool - vdsm-patches - Fedora mailing-lists

5 Nov 2015

Francesco Romani has uploaded a new change for review.
Change subject: WIP: add simple tracemalloc inspection tool
......................................................................
WIP: add simple tracemalloc inspection tool
Change-Id: Ideb2652f345edb6f4a4d66c5299b601e53e85d33
Signed-off-by: Francesco Romani fromani@redhat.com
---
A contrib/memory-stats
A contrib/tracemalloc.py
2 files changed, 592 insertions(+), 0 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/15/48115/1

diff --git a/contrib/memory-stats b/contrib/memory-stats
new file mode 100755
index 0000000..5625ad1
--- /dev/null
+++ b/contrib/memory-stats
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+#
+# Copyright 2015 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+# Refer to the README and COPYING files for full details of the license
+#
+
+import argparse
+import os
+import sys
+import time
+
+import tracemalloc
+
+
+def eprint(s):
+    sys.stderr.write('%s\n' % s)
+
+
+def _load(filename):
+    if filename:
+        eprint('Loading snapshot: "%s"' % filename)
+        before = time.time()
+        snap = tracemalloc.Snapshot.load(filename)
+        eprint('Loaded snapshot: "%s" in %.3f seconds' % (
+                filename, time.time() - before))
+        return snap
+    return None
+
+
+def display_top(snapshot1, snapshot2=None, group_by='lineno', limit=10):
+    if snapshot2 is None:
+        top_stats = snapshot1.statistics(group_by)
+    else:
+        top_stats = snapshot2.compare_to(snapshot1, group_by)
+    print_stats(top_stats, limit)
+
+
+def _print_sep(width=52):
+    print('-' * width)
+
+
+def print_stats(top_stats, limit=10):
+    total = float(sum(stat.size for stat in top_stats))
+    print("* Total allocated size: %.2f KiB" % (total / 1024.))
+    print("* Top %s lines:" % limit)
+    _print_sep()
+    for index, stat in enumerate(top_stats[:limit], 1):
+        frame = stat.traceback[0]
+        # replace "/path/to/module/file.py" with "module/file.py"
+        filename = os.sep.join(frame.filename.split(os.sep)[-2:])
+        where = "%s:%s" % (filename, frame.lineno)
+        print("#%3s: %-32s: %.2f KiB (%.2f%%)"
+              % (index, where,
+                 stat.size / 1024.,
+                 (100. * stat.size) / total))
+
+    other = top_stats[limit:]
+    if other:
+        size = sum(stat.size for stat in other)
+        _print_sep()
+        where = "%i entries" % len(other)
+        print("rest: %-32s: %.2f KiB (%.2f%%)"
+              % (where, size / 1024., (100. * size) / total))
+
+
+ap = argparse.ArgumentParser(description='analyze tracemalloc snapshot')
+ap.add_argument('snapshot', nargs='?', help='memory snapshot to analyze')
+ap.add_argument('-c', '--compare-to', metavar='OS', dest='to_compare',
+                help='memory snapshot to compare')
+ap.add_argument('-g', '--group-by', metavar='ATTR', dest='groupby',
+                default='lineno',
+                help='group samples by either: lineno, filename, traceback')
+ap.add_argument('-l', '--limit', metavar='L', dest='limit', default=10,
+                type=int,
+                help='limit output to L top entries')
+                 
+
+args = ap.parse_args()
+
+if not args.snapshot:
+    ap.print_help()
+    sys.exit(1)
+
+display_top(_load(args.snapshot),
+            _load(args.to_compare),
+            args.groupby,
+            args.limit)
diff --git a/contrib/tracemalloc.py b/contrib/tracemalloc.py
new file mode 100644
index 0000000..47b633e
--- /dev/null
+++ b/contrib/tracemalloc.py
@@ -0,0 +1,490 @@
+# Copy of tracemalloc.py from pytracemalloc without _tracemalloc,
+# to be able to read snapshot without having to install the _tracemaloc
+# module.
+from collections import Sequence, Iterable
+import fnmatch
+import linecache
+import os.path
+import pickle
+
+
+try:
+    from functools import total_ordering
+except ImportError:
+    # Python 2.6
+    def total_ordering(cls):
+        # Function backported from Python 2.7
+        convert = {
+            '__lt__': [('__gt__', lambda self, other: _not_op_and_not_eq(self.__lt__, self, other)),
+                       ('__le__', lambda self, other: _op_or_eq(self.__lt__, self, other)),
+                       ('__ge__', lambda self, other: _not_op(self.__lt__, other))],
+            '__le__': [('__ge__', lambda self, other: _not_op_or_eq(self.__le__, self, other)),
+                       ('__lt__', lambda self, other: _op_and_not_eq(self.__le__, self, other)),
+                       ('__gt__', lambda self, other: _not_op(self.__le__, other))],
+            '__gt__': [('__lt__', lambda self, other: _not_op_and_not_eq(self.__gt__, self, other)),
+                       ('__ge__', lambda self, other: _op_or_eq(self.__gt__, self, other)),
+                       ('__le__', lambda self, other: _not_op(self.__gt__, other))],
+            '__ge__': [('__le__', lambda self, other: _not_op_or_eq(self.__ge__, self, other)),
+                       ('__gt__', lambda self, other: _op_and_not_eq(self.__ge__, self, other)),
+                       ('__lt__', lambda self, other: _not_op(self.__ge__, other))]
+        }
+        roots = [op for op in convert if getattr(cls, op, None) is not getattr(object, op, None)]
+        if not roots:
+            raise ValueError('must define at least one ordering operation: < > <= >=')
+        root = max(roots)
+        for opname, opfunc in convert[root]:
+            if opname not in roots:
+                opfunc.__name__ = opname
+                opfunc.__doc__ = getattr(int, opname).__doc__
+                setattr(cls, opname, opfunc)
+        return cls
+
+
+def _format_size(size, sign):
+    for unit in ('B', 'KiB', 'MiB', 'GiB', 'TiB'):
+        if abs(size) < 100 and unit != 'B':
+            # 3 digits (xx.x UNIT)
+            if sign:
+                return "%+.1f %s" % (size, unit)
+            else:
+                return "%.1f %s" % (size, unit)
+        if abs(size) < 10 * 1024 or unit == 'TiB':
+            # 4 or 5 digits (xxxx UNIT)
+            if sign:
+                return "%+.0f %s" % (size, unit)
+            else:
+                return "%.0f %s" % (size, unit)
+        size /= 1024
+
+
+class Statistic(object):
+    """
+    Statistic difference on memory allocations between two Snapshot instance.
+    """
+
+    __slots__ = ('traceback', 'size', 'count')
+
+    def __init__(self, traceback, size, count):
+        self.traceback = traceback
+        self.size = size
+        self.count = count
+
+    def __hash__(self):
+        return hash((self.traceback, self.size, self.count))
+
+    def __eq__(self, other):
+        return (self.traceback == other.traceback
+                and self.size == other.size
+                and self.count == other.count)
+
+    def __str__(self):
+        text = ("%s: size=%s, count=%i"
+                 % (self.traceback,
+                    _format_size(self.size, False),
+                    self.count))
+        if self.count:
+            average = self.size / self.count
+            text += ", average=%s" % _format_size(average, False)
+        return text
+
+    def __repr__(self):
+        return ('<Statistic traceback=%r size=%i count=%i>'
+                % (self.traceback, self.size, self.count))
+
+    def _sort_key(self):
+        return (self.size, self.count, self.traceback)
+
+
+class StatisticDiff(object):
+    """
+    Statistic difference on memory allocations between an old and a new
+    Snapshot instance.
+    """
+    __slots__ = ('traceback', 'size', 'size_diff', 'count', 'count_diff')
+
+    def __init__(self, traceback, size, size_diff, count, count_diff):
+        self.traceback = traceback
+        self.size = size
+        self.size_diff = size_diff
+        self.count = count
+        self.count_diff = count_diff
+
+    def __hash__(self):
+        return hash((self.traceback, self.size, self.size_diff,
+                     self.count, self.count_diff))
+
+    def __eq__(self, other):
+        return (self.traceback == other.traceback
+                and self.size == other.size
+                and self.size_diff == other.size_diff
+                and self.count == other.count
+                and self.count_diff == other.count_diff)
+
+    def __str__(self):
+        text = ("%s: size=%s (%s), count=%i (%+i)"
+                % (self.traceback,
+                   _format_size(self.size, False),
+                   _format_size(self.size_diff, True),
+                   self.count,
+                   self.count_diff))
+        if self.count:
+            average = self.size / self.count
+            text += ", average=%s" % _format_size(average, False)
+        return text
+
+    def __repr__(self):
+        return ('<StatisticDiff traceback=%r size=%i (%+i) count=%i (%+i)>'
+                % (self.traceback, self.size, self.size_diff,
+                   self.count, self.count_diff))
+
+    def _sort_key(self):
+        return (abs(self.size_diff), self.size,
+                abs(self.count_diff), self.count,
+                self.traceback)
+
+
+def _compare_grouped_stats(old_group, new_group):
+    statistics = []
+    for traceback, stat in new_group.items():
+        previous = old_group.pop(traceback, None)
+        if previous is not None:
+            stat = StatisticDiff(traceback,
+                                 stat.size, stat.size - previous.size,
+                                 stat.count, stat.count - previous.count)
+        else:
+            stat = StatisticDiff(traceback,
+                                 stat.size, stat.size,
+                                 stat.count, stat.count)
+        statistics.append(stat)
+
+    for traceback, stat in old_group.items():
+        stat = StatisticDiff(traceback, 0, -stat.size, 0, -stat.count)
+        statistics.append(stat)
+    return statistics
+
+
+@total_ordering
+class Frame(object):
+    """
+    Frame of a traceback.
+    """
+    __slots__ = ("_frame",)
+
+    def __init__(self, frame):
+        # frame is a tuple: (filename: str, lineno: int)
+        self._frame = frame
+
+    @property
+    def filename(self):
+        return self._frame[0]
+
+    @property
+    def lineno(self):
+        return self._frame[1]
+
+    def __eq__(self, other):
+        return (self._frame == other._frame)
+
+    def __lt__(self, other):
+        return (self._frame < other._frame)
+
+    def __hash__(self):
+        return hash(self._frame)
+
+    def __str__(self):
+        return "%s:%s" % (self.filename, self.lineno)
+
+    def __repr__(self):
+        return "<Frame filename=%r lineno=%r>" % (self.filename, self.lineno)
+
+
+@total_ordering
+class Traceback(Sequence):
+    """
+    Sequence of Frame instances sorted from the most recent frame
+    to the oldest frame.
+    """
+    __slots__ = ("_frames",)
+
+    def __init__(self, frames):
+        Sequence.__init__(self)
+        # frames is a tuple of frame tuples: see Frame constructor for the
+        # format of a frame tuple
+        self._frames = frames
+
+    def __len__(self):
+        return len(self._frames)
+
+    def __getitem__(self, index):
+        if isinstance(index, slice):
+            return tuple(Frame(trace) for trace in self._frames[index])
+        else:
+            return Frame(self._frames[index])
+
+    def __contains__(self, frame):
+        return frame._frame in self._frames
+
+    def __hash__(self):
+        return hash(self._frames)
+
+    def __eq__(self, other):
+        return (self._frames == other._frames)
+
+    def __lt__(self, other):
+        return (self._frames < other._frames)
+
+    def __str__(self):
+        return str(self[0])
+
+    def __repr__(self):
+        return "<Traceback %r>" % (tuple(self),)
+
+    def format(self, limit=None):
+        lines = []
+        if limit is not None and limit < 0:
+            return lines
+        for frame in self[:limit]:
+            lines.append('  File "%s", line %s'
+                         % (frame.filename, frame.lineno))
+            line = linecache.getline(frame.filename, frame.lineno).strip()
+            if line:
+                lines.append('    %s' % line)
+        return lines
+
+
+class Trace(object):
+    """
+    Trace of a memory block.
+    """
+    __slots__ = ("_trace",)
+
+    def __init__(self, trace):
+        # trace is a tuple: (size, traceback), see Traceback constructor
+        # for the format of the traceback tuple
+        self._trace = trace
+
+    @property
+    def size(self):
+        return self._trace[0]
+
+    @property
+    def traceback(self):
+        return Traceback(self._trace[1])
+
+    def __eq__(self, other):
+        return (self._trace == other._trace)
+
+    def __hash__(self):
+        return hash(self._trace)
+
+    def __str__(self):
+        return "%s: %s" % (self.traceback, _format_size(self.size, False))
+
+    def __repr__(self):
+        return ("<Trace size=%s, traceback=%r>"
+                % (_format_size(self.size, False), self.traceback))
+
+
+class _Traces(Sequence):
+    def __init__(self, traces):
+        Sequence.__init__(self)
+        # traces is a tuple of trace tuples: see Trace constructor
+        self._traces = traces
+
+    def __len__(self):
+        return len(self._traces)
+
+    def __getitem__(self, index):
+        if isinstance(index, slice):
+            return tuple(Trace(trace) for trace in self._traces[index])
+        else:
+            return Trace(self._traces[index])
+
+    def __contains__(self, trace):
+        return trace._trace in self._traces
+
+    def __eq__(self, other):
+        return (self._traces == other._traces)
+
+    def __repr__(self):
+        return "<Traces len=%s>" % len(self)
+
+
+def _normalize_filename(filename):
+    filename = os.path.normcase(filename)
+    if filename.endswith(('.pyc', '.pyo')):
+        filename = filename[:-1]
+    return filename
+
+
+class Filter(object):
+    def __init__(self, inclusive, filename_pattern,
+                 lineno=None, all_frames=False):
+        self.inclusive = inclusive
+        self._filename_pattern = _normalize_filename(filename_pattern)
+        self.lineno = lineno
+        self.all_frames = all_frames
+
+    @property
+    def filename_pattern(self):
+        return self._filename_pattern
+
+    def __match_frame(self, filename, lineno):
+        filename = _normalize_filename(filename)
+        if not fnmatch.fnmatch(filename, self._filename_pattern):
+            return False
+        if self.lineno is None:
+            return True
+        else:
+            return (lineno == self.lineno)
+
+    def _match_frame(self, filename, lineno):
+        return self.__match_frame(filename, lineno) ^ (not self.inclusive)
+
+    def _match_traceback(self, traceback):
+        if self.all_frames:
+            if any(self.__match_frame(filename, lineno)
+                   for filename, lineno in traceback):
+                return self.inclusive
+            else:
+                return (not self.inclusive)
+        else:
+            filename, lineno = traceback[0]
+            return self._match_frame(filename, lineno)
+
+
+class Snapshot(object):
+    """
+    Snapshot of traces of memory blocks allocated by Python.
+    """
+
+    def __init__(self, traces, traceback_limit):
+        # traces is a tuple of trace tuples: see _Traces constructor for
+        # the exact format
+        self.traces = _Traces(traces)
+        self.traceback_limit = traceback_limit
+
+    def dump(self, filename):
+        """
+        Write the snapshot into a file.
+        """
+        with open(filename, "wb") as fp:
+            pickle.dump(self, fp, pickle.HIGHEST_PROTOCOL)
+
+    @staticmethod
+    def load(filename):
+        """
+        Load a snapshot from a file.
+        """
+        with open(filename, "rb") as fp:
+            return pickle.load(fp)
+
+    def _filter_trace(self, include_filters, exclude_filters, trace):
+        traceback = trace[1]
+        if include_filters:
+            if not any(trace_filter._match_traceback(traceback)
+                       for trace_filter in include_filters):
+                return False
+        if exclude_filters:
+            if any(not trace_filter._match_traceback(traceback)
+                   for trace_filter in exclude_filters):
+                return False
+        return True
+
+    def filter_traces(self, filters):
+        """
+        Create a new Snapshot instance with a filtered traces sequence, filters
+        is a list of Filter instances.  If filters is an empty list, return a
+        new Snapshot instance with a copy of the traces.
+        """
+        if not isinstance(filters, Iterable):
+            raise TypeError("filters must be a list of filters, not %s"
+                            % type(filters).__name__)
+        if filters:
+            include_filters = []
+            exclude_filters = []
+            for trace_filter in filters:
+                if trace_filter.inclusive:
+                    include_filters.append(trace_filter)
+                else:
+                    exclude_filters.append(trace_filter)
+            new_traces = [trace for trace in self.traces._traces
+                          if self._filter_trace(include_filters,
+                                                exclude_filters,
+                                                trace)]
+        else:
+            new_traces = self.traces._traces[:]
+        return Snapshot(new_traces, self.traceback_limit)
+
+    def _group_by(self, key_type, cumulative):
+        if key_type not in ('traceback', 'filename', 'lineno'):
+            raise ValueError("unknown key_type: %r" % (key_type,))
+        if cumulative and key_type not in ('lineno', 'filename'):
+            raise ValueError("cumulative mode cannot by used "
+                             "with key type %r" % key_type)
+
+        stats = {}
+        tracebacks = {}
+        if not cumulative:
+            for trace in self.traces._traces:
+                size, trace_traceback = trace
+                try:
+                    traceback = tracebacks[trace_traceback]
+                except KeyError:
+                    if key_type == 'traceback':
+                        frames = trace_traceback
+                    elif key_type == 'lineno':
+                        frames = trace_traceback[:1]
+                    else: # key_type == 'filename':
+                        frames = ((trace_traceback[0][0], 0),)
+                    traceback = Traceback(frames)
+                    tracebacks[trace_traceback] = traceback
+                try:
+                    stat = stats[traceback]
+                    stat.size += size
+                    stat.count += 1
+                except KeyError:
+                    stats[traceback] = Statistic(traceback, size, 1)
+        else:
+            # cumulative statistics
+            for trace in self.traces._traces:
+                size, trace_traceback = trace
+                for frame in trace_traceback:
+                    try:
+                        traceback = tracebacks[frame]
+                    except KeyError:
+                        if key_type == 'lineno':
+                            frames = (frame,)
+                        else: # key_type == 'filename':
+                            frames = ((frame[0], 0),)
+                        traceback = Traceback(frames)
+                        tracebacks[frame] = traceback
+                    try:
+                        stat = stats[traceback]
+                        stat.size += size
+                        stat.count += 1
+                    except KeyError:
+                        stats[traceback] = Statistic(traceback, size, 1)
+        return stats
+
+    def statistics(self, key_type, cumulative=False):
+        """
+        Group statistics by key_type. Return a sorted list of Statistic
+        instances.
+        """
+        grouped = self._group_by(key_type, cumulative)
+        statistics = list(grouped.values())
+        statistics.sort(reverse=True, key=Statistic._sort_key)
+        return statistics
+
+    def compare_to(self, old_snapshot, key_type, cumulative=False):
+        """
+        Compute the differences with an old snapshot old_snapshot. Get
+        statistics as a sorted list of StatisticDiff instances, grouped by
+        group_by.
+        """
+        new_group = self._group_by(key_type, cumulative)
+        old_group = old_snapshot._group_by(key_type, cumulative)
+        statistics = _compare_grouped_stats(old_group, new_group)
+        statistics.sort(reverse=True, key=StatisticDiff._sort_key)
+        return statistics
-- 
To view, visit https://gerrit.ovirt.org/48115
To unsubscribe, visit https://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ideb2652f345edb6f4a4d66c5299b601e53e85d33
Gerrit-PatchSet: 1
Gerrit-Project: vdsm
Gerrit-Branch: master
Gerrit-Owner: Francesco Romani fromani@redhat.com