diff --git a/extra/gprof2dot/gprof2dot.py b/extra/gprof2dot/gprof2dot.py
new file mode 100755
index 000000000..12405c2bd
--- /dev/null
+++ b/extra/gprof2dot/gprof2dot.py
@@ -0,0 +1,2624 @@
+#!/usr/bin/env python
+#
+# Copyright 2008-2009 Jose Fonseca
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see .
+#
+
+"""Generate a dot graph from the output of several profilers."""
+
+__author__ = "Jose Fonseca"
+
+__version__ = "1.0"
+
+
+import sys
+import math
+import os.path
+import re
+import textwrap
+import optparse
+import xml.parsers.expat
+
+
+try:
+ # Debugging helper module
+ import debug
+except ImportError:
+ pass
+
+
+def times(x):
+ return u"%u\xd7" % (x,)
+
+def percentage(p):
+ return "%.02f%%" % (p*100.0,)
+
+def add(a, b):
+ return a + b
+
+def equal(a, b):
+ if a == b:
+ return a
+ else:
+ return None
+
+def fail(a, b):
+ assert False
+
+
+tol = 2 ** -23
+
+def ratio(numerator, denominator):
+ try:
+ ratio = float(numerator)/float(denominator)
+ except ZeroDivisionError:
+ # 0/0 is undefined, but 1.0 yields more useful results
+ return 1.0
+ if ratio < 0.0:
+ if ratio < -tol:
+ sys.stderr.write('warning: negative ratio (%s/%s)\n' % (numerator, denominator))
+ return 0.0
+ if ratio > 1.0:
+ if ratio > 1.0 + tol:
+ sys.stderr.write('warning: ratio greater than one (%s/%s)\n' % (numerator, denominator))
+ return 1.0
+ return ratio
+
+
+class UndefinedEvent(Exception):
+ """Raised when attempting to get an event which is undefined."""
+
+ def __init__(self, event):
+ Exception.__init__(self)
+ self.event = event
+
+ def __str__(self):
+ return 'unspecified event %s' % self.event.name
+
+
+class Event(object):
+ """Describe a kind of event, and its basic operations."""
+
+ def __init__(self, name, null, aggregator, formatter = str):
+ self.name = name
+ self._null = null
+ self._aggregator = aggregator
+ self._formatter = formatter
+
+ def __eq__(self, other):
+ return self is other
+
+ def __hash__(self):
+ return id(self)
+
+ def null(self):
+ return self._null
+
+ def aggregate(self, val1, val2):
+ """Aggregate two event values."""
+ assert val1 is not None
+ assert val2 is not None
+ return self._aggregator(val1, val2)
+
+ def format(self, val):
+ """Format an event value."""
+ assert val is not None
+ return self._formatter(val)
+
+
+CALLS = Event("Calls", 0, add, times)
+SAMPLES = Event("Samples", 0, add)
+SAMPLES2 = Event("Samples", 0, add)
+
+TIME = Event("Time", 0.0, add, lambda x: '(' + str(x) + ')')
+TIME_RATIO = Event("Time ratio", 0.0, add, lambda x: '(' + percentage(x) + ')')
+TOTAL_TIME = Event("Total time", 0.0, fail)
+TOTAL_TIME_RATIO = Event("Total time ratio", 0.0, fail, percentage)
+
+
+class Object(object):
+ """Base class for all objects in profile which can store events."""
+
+ def __init__(self, events=None):
+ if events is None:
+ self.events = {}
+ else:
+ self.events = events
+
+ def __hash__(self):
+ return id(self)
+
+ def __eq__(self, other):
+ return self is other
+
+ def __contains__(self, event):
+ return event in self.events
+
+ def __getitem__(self, event):
+ try:
+ return self.events[event]
+ except KeyError:
+ raise UndefinedEvent(event)
+
+ def __setitem__(self, event, value):
+ if value is None:
+ if event in self.events:
+ del self.events[event]
+ else:
+ self.events[event] = value
+
+
+class Call(Object):
+ """A call between functions.
+
+ There should be at most one call object for every pair of functions.
+ """
+
+ def __init__(self, callee_id):
+ Object.__init__(self)
+ self.callee_id = callee_id
+ self.ratio = None
+ self.weight = None
+
+
+class Function(Object):
+ """A function."""
+
+ def __init__(self, id, name):
+ Object.__init__(self)
+ self.id = id
+ self.name = name
+ self.module = None
+ self.process = None
+ self.calls = {}
+ self.called = None
+ self.weight = None
+ self.cycle = None
+
+ def add_call(self, call):
+ if call.callee_id in self.calls:
+ sys.stderr.write('warning: overwriting call from function %s to %s\n' % (str(self.id), str(call.callee_id)))
+ self.calls[call.callee_id] = call
+
+ # TODO: write utility functions
+
+ def __repr__(self):
+ return self.name
+
+
+class Cycle(Object):
+ """A cycle made from recursive function calls."""
+
+ def __init__(self):
+ Object.__init__(self)
+ # XXX: Do cycles need an id?
+ self.functions = set()
+
+ def add_function(self, function):
+ assert function not in self.functions
+ self.functions.add(function)
+ # XXX: Aggregate events?
+ if function.cycle is not None:
+ for other in function.cycle.functions:
+ if function not in self.functions:
+ self.add_function(other)
+ function.cycle = self
+
+
+class Profile(Object):
+ """The whole profile."""
+
+ def __init__(self):
+ Object.__init__(self)
+ self.functions = {}
+ self.cycles = []
+
+ def add_function(self, function):
+ if function.id in self.functions:
+ sys.stderr.write('warning: overwriting function %s (id %s)\n' % (function.name, str(function.id)))
+ self.functions[function.id] = function
+
+ def add_cycle(self, cycle):
+ self.cycles.append(cycle)
+
+ def validate(self):
+ """Validate the edges."""
+
+ for function in self.functions.itervalues():
+ for callee_id in function.calls.keys():
+ assert function.calls[callee_id].callee_id == callee_id
+ if callee_id not in self.functions:
+ sys.stderr.write('warning: call to undefined function %s from function %s\n' % (str(callee_id), function.name))
+ del function.calls[callee_id]
+
+ def find_cycles(self):
+ """Find cycles using Tarjan's strongly connected components algorithm."""
+
+ # Apply the Tarjan's algorithm successively until all functions are visited
+ visited = set()
+ for function in self.functions.itervalues():
+ if function not in visited:
+ self._tarjan(function, 0, [], {}, {}, visited)
+ cycles = []
+ for function in self.functions.itervalues():
+ if function.cycle is not None and function.cycle not in cycles:
+ cycles.append(function.cycle)
+ self.cycles = cycles
+ if 0:
+ for cycle in cycles:
+ sys.stderr.write("Cycle:\n")
+ for member in cycle.functions:
+ sys.stderr.write("\tFunction %s\n" % member.name)
+
+ def _tarjan(self, function, order, stack, orders, lowlinks, visited):
+ """Tarjan's strongly connected components algorithm.
+
+ See also:
+ - http://en.wikipedia.org/wiki/Tarjan's_strongly_connected_components_algorithm
+ """
+
+ visited.add(function)
+ orders[function] = order
+ lowlinks[function] = order
+ order += 1
+ pos = len(stack)
+ stack.append(function)
+ for call in function.calls.itervalues():
+ callee = self.functions[call.callee_id]
+ # TODO: use a set to optimize lookup
+ if callee not in orders:
+ order = self._tarjan(callee, order, stack, orders, lowlinks, visited)
+ lowlinks[function] = min(lowlinks[function], lowlinks[callee])
+ elif callee in stack:
+ lowlinks[function] = min(lowlinks[function], orders[callee])
+ if lowlinks[function] == orders[function]:
+ # Strongly connected component found
+ members = stack[pos:]
+ del stack[pos:]
+ if len(members) > 1:
+ cycle = Cycle()
+ for member in members:
+ cycle.add_function(member)
+ return order
+
+ def call_ratios(self, event):
+ # Aggregate for incoming calls
+ cycle_totals = {}
+ for cycle in self.cycles:
+ cycle_totals[cycle] = 0.0
+ function_totals = {}
+ for function in self.functions.itervalues():
+ function_totals[function] = 0.0
+ for function in self.functions.itervalues():
+ for call in function.calls.itervalues():
+ if call.callee_id != function.id:
+ callee = self.functions[call.callee_id]
+ function_totals[callee] += call[event]
+ if callee.cycle is not None and callee.cycle is not function.cycle:
+ cycle_totals[callee.cycle] += call[event]
+
+ # Compute the ratios
+ for function in self.functions.itervalues():
+ for call in function.calls.itervalues():
+ assert call.ratio is None
+ if call.callee_id != function.id:
+ callee = self.functions[call.callee_id]
+ if callee.cycle is not None and callee.cycle is not function.cycle:
+ total = cycle_totals[callee.cycle]
+ else:
+ total = function_totals[callee]
+ call.ratio = ratio(call[event], total)
+
+ def integrate(self, outevent, inevent):
+ """Propagate function time ratio allong the function calls.
+
+ Must be called after finding the cycles.
+
+ See also:
+ - http://citeseer.ist.psu.edu/graham82gprof.html
+ """
+
+ # Sanity checking
+ assert outevent not in self
+ for function in self.functions.itervalues():
+ assert outevent not in function
+ assert inevent in function
+ for call in function.calls.itervalues():
+ assert outevent not in call
+ if call.callee_id != function.id:
+ assert call.ratio is not None
+
+ # Aggregate the input for each cycle
+ for cycle in self.cycles:
+ total = inevent.null()
+ for function in self.functions.itervalues():
+ total = inevent.aggregate(total, function[inevent])
+ self[inevent] = total
+
+ # Integrate along the edges
+ total = inevent.null()
+ for function in self.functions.itervalues():
+ total = inevent.aggregate(total, function[inevent])
+ self._integrate_function(function, outevent, inevent)
+ self[outevent] = total
+
+ def _integrate_function(self, function, outevent, inevent):
+ if function.cycle is not None:
+ return self._integrate_cycle(function.cycle, outevent, inevent)
+ else:
+ if outevent not in function:
+ total = function[inevent]
+ for call in function.calls.itervalues():
+ if call.callee_id != function.id:
+ total += self._integrate_call(call, outevent, inevent)
+ function[outevent] = total
+ return function[outevent]
+
+ def _integrate_call(self, call, outevent, inevent):
+ assert outevent not in call
+ assert call.ratio is not None
+ callee = self.functions[call.callee_id]
+ subtotal = call.ratio *self._integrate_function(callee, outevent, inevent)
+ call[outevent] = subtotal
+ return subtotal
+
+ def _integrate_cycle(self, cycle, outevent, inevent):
+ if outevent not in cycle:
+
+ # Compute the outevent for the whole cycle
+ total = inevent.null()
+ for member in cycle.functions:
+ subtotal = member[inevent]
+ for call in member.calls.itervalues():
+ callee = self.functions[call.callee_id]
+ if callee.cycle is not cycle:
+ subtotal += self._integrate_call(call, outevent, inevent)
+ total += subtotal
+ cycle[outevent] = total
+
+ # Compute the time propagated to callers of this cycle
+ callees = {}
+ for function in self.functions.itervalues():
+ if function.cycle is not cycle:
+ for call in function.calls.itervalues():
+ callee = self.functions[call.callee_id]
+ if callee.cycle is cycle:
+ try:
+ callees[callee] += call.ratio
+ except KeyError:
+ callees[callee] = call.ratio
+
+ for member in cycle.functions:
+ member[outevent] = outevent.null()
+
+ for callee, call_ratio in callees.iteritems():
+ ranks = {}
+ call_ratios = {}
+ partials = {}
+ self._rank_cycle_function(cycle, callee, 0, ranks)
+ self._call_ratios_cycle(cycle, callee, ranks, call_ratios, set())
+ partial = self._integrate_cycle_function(cycle, callee, call_ratio, partials, ranks, call_ratios, outevent, inevent)
+ assert partial == max(partials.values())
+ assert not total or abs(1.0 - partial/(call_ratio*total)) <= 0.001
+
+ return cycle[outevent]
+
+ def _rank_cycle_function(self, cycle, function, rank, ranks):
+ if function not in ranks or ranks[function] > rank:
+ ranks[function] = rank
+ for call in function.calls.itervalues():
+ if call.callee_id != function.id:
+ callee = self.functions[call.callee_id]
+ if callee.cycle is cycle:
+ self._rank_cycle_function(cycle, callee, rank + 1, ranks)
+
+ def _call_ratios_cycle(self, cycle, function, ranks, call_ratios, visited):
+ if function not in visited:
+ visited.add(function)
+ for call in function.calls.itervalues():
+ if call.callee_id != function.id:
+ callee = self.functions[call.callee_id]
+ if callee.cycle is cycle:
+ if ranks[callee] > ranks[function]:
+ call_ratios[callee] = call_ratios.get(callee, 0.0) + call.ratio
+ self._call_ratios_cycle(cycle, callee, ranks, call_ratios, visited)
+
+ def _integrate_cycle_function(self, cycle, function, partial_ratio, partials, ranks, call_ratios, outevent, inevent):
+ if function not in partials:
+ partial = partial_ratio*function[inevent]
+ for call in function.calls.itervalues():
+ if call.callee_id != function.id:
+ callee = self.functions[call.callee_id]
+ if callee.cycle is not cycle:
+ assert outevent in call
+ partial += partial_ratio*call[outevent]
+ else:
+ if ranks[callee] > ranks[function]:
+ callee_partial = self._integrate_cycle_function(cycle, callee, partial_ratio, partials, ranks, call_ratios, outevent, inevent)
+ call_ratio = ratio(call.ratio, call_ratios[callee])
+ call_partial = call_ratio*callee_partial
+ try:
+ call[outevent] += call_partial
+ except UndefinedEvent:
+ call[outevent] = call_partial
+ partial += call_partial
+ partials[function] = partial
+ try:
+ function[outevent] += partial
+ except UndefinedEvent:
+ function[outevent] = partial
+ return partials[function]
+
+ def aggregate(self, event):
+ """Aggregate an event for the whole profile."""
+
+ total = event.null()
+ for function in self.functions.itervalues():
+ try:
+ total = event.aggregate(total, function[event])
+ except UndefinedEvent:
+ return
+ self[event] = total
+
+ def ratio(self, outevent, inevent):
+ assert outevent not in self
+ assert inevent in self
+ for function in self.functions.itervalues():
+ assert outevent not in function
+ assert inevent in function
+ function[outevent] = ratio(function[inevent], self[inevent])
+ for call in function.calls.itervalues():
+ assert outevent not in call
+ if inevent in call:
+ call[outevent] = ratio(call[inevent], self[inevent])
+ self[outevent] = 1.0
+
+ def prune(self, node_thres, edge_thres):
+ """Prune the profile"""
+
+ # compute the prune ratios
+ for function in self.functions.itervalues():
+ try:
+ function.weight = function[TOTAL_TIME_RATIO]
+ except UndefinedEvent:
+ pass
+
+ for call in function.calls.itervalues():
+ callee = self.functions[call.callee_id]
+
+ if TOTAL_TIME_RATIO in call:
+ # handle exact cases first
+ call.weight = call[TOTAL_TIME_RATIO]
+ else:
+ try:
+ # make a safe estimate
+ call.weight = min(function[TOTAL_TIME_RATIO], callee[TOTAL_TIME_RATIO])
+ except UndefinedEvent:
+ pass
+
+ # prune the nodes
+ for function_id in self.functions.keys():
+ function = self.functions[function_id]
+ if function.weight is not None:
+ if function.weight < node_thres:
+ del self.functions[function_id]
+
+ # prune the egdes
+ for function in self.functions.itervalues():
+ for callee_id in function.calls.keys():
+ call = function.calls[callee_id]
+ if callee_id not in self.functions or call.weight is not None and call.weight < edge_thres:
+ del function.calls[callee_id]
+
+ def dump(self):
+ for function in self.functions.itervalues():
+ sys.stderr.write('Function %s:\n' % (function.name,))
+ self._dump_events(function.events)
+ for call in function.calls.itervalues():
+ callee = self.functions[call.callee_id]
+ sys.stderr.write(' Call %s:\n' % (callee.name,))
+ self._dump_events(call.events)
+ for cycle in self.cycles:
+ sys.stderr.write('Cycle:\n')
+ self._dump_events(cycle.events)
+ for function in cycle.functions:
+ sys.stderr.write(' Function %s\n' % (function.name,))
+
+ def _dump_events(self, events):
+ for event, value in events.iteritems():
+ sys.stderr.write(' %s: %s\n' % (event.name, event.format(value)))
+
+
+class Struct:
+ """Masquerade a dictionary with a structure-like behavior."""
+
+ def __init__(self, attrs = None):
+ if attrs is None:
+ attrs = {}
+ self.__dict__['_attrs'] = attrs
+
+ def __getattr__(self, name):
+ try:
+ return self._attrs[name]
+ except KeyError:
+ raise AttributeError(name)
+
+ def __setattr__(self, name, value):
+ self._attrs[name] = value
+
+ def __str__(self):
+ return str(self._attrs)
+
+ def __repr__(self):
+ return repr(self._attrs)
+
+
+class ParseError(Exception):
+ """Raised when parsing to signal mismatches."""
+
+ def __init__(self, msg, line):
+ self.msg = msg
+ # TODO: store more source line information
+ self.line = line
+
+ def __str__(self):
+ return '%s: %r' % (self.msg, self.line)
+
+
+class Parser:
+ """Parser interface."""
+
+ def __init__(self):
+ pass
+
+ def parse(self):
+ raise NotImplementedError
+
+
+class LineParser(Parser):
+ """Base class for parsers that read line-based formats."""
+
+ def __init__(self, file):
+ Parser.__init__(self)
+ self._file = file
+ self.__line = None
+ self.__eof = False
+
+ def readline(self):
+ line = self._file.readline()
+ if not line:
+ self.__line = ''
+ self.__eof = True
+ self.__line = line.rstrip('\r\n')
+
+ def lookahead(self):
+ assert self.__line is not None
+ return self.__line
+
+ def consume(self):
+ assert self.__line is not None
+ line = self.__line
+ self.readline()
+ return line
+
+ def eof(self):
+ assert self.__line is not None
+ return self.__eof
+
+
+XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF = range(4)
+
+
+class XmlToken:
+
+ def __init__(self, type, name_or_data, attrs = None, line = None, column = None):
+ assert type in (XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF)
+ self.type = type
+ self.name_or_data = name_or_data
+ self.attrs = attrs
+ self.line = line
+ self.column = column
+
+ def __str__(self):
+ if self.type == XML_ELEMENT_START:
+ return '<' + self.name_or_data + ' ...>'
+ if self.type == XML_ELEMENT_END:
+ return '' + self.name_or_data + '>'
+ if self.type == XML_CHARACTER_DATA:
+ return self.name_or_data
+ if self.type == XML_EOF:
+ return 'end of file'
+ assert 0
+
+
+class XmlTokenizer:
+ """Expat based XML tokenizer."""
+
+ def __init__(self, fp, skip_ws = True):
+ self.fp = fp
+ self.tokens = []
+ self.index = 0
+ self.final = False
+ self.skip_ws = skip_ws
+
+ self.character_pos = 0, 0
+ self.character_data = ''
+
+ self.parser = xml.parsers.expat.ParserCreate()
+ self.parser.StartElementHandler = self.handle_element_start
+ self.parser.EndElementHandler = self.handle_element_end
+ self.parser.CharacterDataHandler = self.handle_character_data
+
+ def handle_element_start(self, name, attributes):
+ self.finish_character_data()
+ line, column = self.pos()
+ token = XmlToken(XML_ELEMENT_START, name, attributes, line, column)
+ self.tokens.append(token)
+
+ def handle_element_end(self, name):
+ self.finish_character_data()
+ line, column = self.pos()
+ token = XmlToken(XML_ELEMENT_END, name, None, line, column)
+ self.tokens.append(token)
+
+ def handle_character_data(self, data):
+ if not self.character_data:
+ self.character_pos = self.pos()
+ self.character_data += data
+
+ def finish_character_data(self):
+ if self.character_data:
+ if not self.skip_ws or not self.character_data.isspace():
+ line, column = self.character_pos
+ token = XmlToken(XML_CHARACTER_DATA, self.character_data, None, line, column)
+ self.tokens.append(token)
+ self.character_data = ''
+
+ def next(self):
+ size = 16*1024
+ while self.index >= len(self.tokens) and not self.final:
+ self.tokens = []
+ self.index = 0
+ data = self.fp.read(size)
+ self.final = len(data) < size
+ try:
+ self.parser.Parse(data, self.final)
+ except xml.parsers.expat.ExpatError, e:
+ #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS:
+ if e.code == 3:
+ pass
+ else:
+ raise e
+ if self.index >= len(self.tokens):
+ line, column = self.pos()
+ token = XmlToken(XML_EOF, None, None, line, column)
+ else:
+ token = self.tokens[self.index]
+ self.index += 1
+ return token
+
+ def pos(self):
+ return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber
+
+
+class XmlTokenMismatch(Exception):
+
+ def __init__(self, expected, found):
+ self.expected = expected
+ self.found = found
+
+ def __str__(self):
+ return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found))
+
+
+class XmlParser(Parser):
+ """Base XML document parser."""
+
+ def __init__(self, fp):
+ Parser.__init__(self)
+ self.tokenizer = XmlTokenizer(fp)
+ self.consume()
+
+ def consume(self):
+ self.token = self.tokenizer.next()
+
+ def match_element_start(self, name):
+ return self.token.type == XML_ELEMENT_START and self.token.name_or_data == name
+
+ def match_element_end(self, name):
+ return self.token.type == XML_ELEMENT_END and self.token.name_or_data == name
+
+ def element_start(self, name):
+ while self.token.type == XML_CHARACTER_DATA:
+ self.consume()
+ if self.token.type != XML_ELEMENT_START:
+ raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token)
+ if self.token.name_or_data != name:
+ raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token)
+ attrs = self.token.attrs
+ self.consume()
+ return attrs
+
+ def element_end(self, name):
+ while self.token.type == XML_CHARACTER_DATA:
+ self.consume()
+ if self.token.type != XML_ELEMENT_END:
+ raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token)
+ if self.token.name_or_data != name:
+ raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token)
+ self.consume()
+
+ def character_data(self, strip = True):
+ data = ''
+ while self.token.type == XML_CHARACTER_DATA:
+ data += self.token.name_or_data
+ self.consume()
+ if strip:
+ data = data.strip()
+ return data
+
+
+class GprofParser(Parser):
+ """Parser for GNU gprof output.
+
+ See also:
+ - Chapter "Interpreting gprof's Output" from the GNU gprof manual
+ http://sourceware.org/binutils/docs-2.18/gprof/Call-Graph.html#Call-Graph
+ - File "cg_print.c" from the GNU gprof source code
+ http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/src/gprof/cg_print.c?rev=1.12&cvsroot=src
+ """
+
+ def __init__(self, fp):
+ Parser.__init__(self)
+ self.fp = fp
+ self.functions = {}
+ self.cycles = {}
+
+ def readline(self):
+ line = self.fp.readline()
+ if not line:
+ sys.stderr.write('error: unexpected end of file\n')
+ sys.exit(1)
+ line = line.rstrip('\r\n')
+ return line
+
+ _int_re = re.compile(r'^\d+$')
+ _float_re = re.compile(r'^\d+\.\d+$')
+
+ def translate(self, mo):
+ """Extract a structure from a match object, while translating the types in the process."""
+ attrs = {}
+ groupdict = mo.groupdict()
+ for name, value in groupdict.iteritems():
+ if value is None:
+ value = None
+ elif self._int_re.match(value):
+ value = int(value)
+ elif self._float_re.match(value):
+ value = float(value)
+ attrs[name] = (value)
+ return Struct(attrs)
+
+ _cg_header_re = re.compile(
+ # original gprof header
+ r'^\s+called/total\s+parents\s*$|' +
+ r'^index\s+%time\s+self\s+descendents\s+called\+self\s+name\s+index\s*$|' +
+ r'^\s+called/total\s+children\s*$|' +
+ # GNU gprof header
+ r'^index\s+%\s+time\s+self\s+children\s+called\s+name\s*$'
+ )
+
+ _cg_ignore_re = re.compile(
+ # spontaneous
+ r'^\s+\s*$|'
+ # internal calls (such as "mcount")
+ r'^.*\((\d+)\)$'
+ )
+
+ _cg_primary_re = re.compile(
+ r'^\[(?P\d+)\]?' +
+ r'\s+(?P\d+\.\d+)' +
+ r'\s+(?P\d+\.\d+)' +
+ r'\s+(?P\d+\.\d+)' +
+ r'\s+(?:(?P\d+)(?:\+(?P\d+))?)?' +
+ r'\s+(?P\S.*?)' +
+ r'(?:\s+\d+)>)?' +
+ r'\s\[(\d+)\]$'
+ )
+
+ _cg_parent_re = re.compile(
+ r'^\s+(?P\d+\.\d+)?' +
+ r'\s+(?P\d+\.\d+)?' +
+ r'\s+(?P\d+)(?:/(?P\d+))?' +
+ r'\s+(?P\S.*?)' +
+ r'(?:\s+\d+)>)?' +
+ r'\s\[(?P\d+)\]$'
+ )
+
+ _cg_child_re = _cg_parent_re
+
+ _cg_cycle_header_re = re.compile(
+ r'^\[(?P\d+)\]?' +
+ r'\s+(?P\d+\.\d+)' +
+ r'\s+(?P\d+\.\d+)' +
+ r'\s+(?P\d+\.\d+)' +
+ r'\s+(?:(?P\d+)(?:\+(?P\d+))?)?' +
+ r'\s+\d+)\sas\sa\swhole>' +
+ r'\s\[(\d+)\]$'
+ )
+
+ _cg_cycle_member_re = re.compile(
+ r'^\s+(?P\d+\.\d+)?' +
+ r'\s+(?P\d+\.\d+)?' +
+ r'\s+(?P\d+)(?:\+(?P\d+))?' +
+ r'\s+(?P\S.*?)' +
+ r'(?:\s+\d+)>)?' +
+ r'\s\[(?P\d+)\]$'
+ )
+
+ _cg_sep_re = re.compile(r'^--+$')
+
+ def parse_function_entry(self, lines):
+ parents = []
+ children = []
+
+ while True:
+ if not lines:
+ sys.stderr.write('warning: unexpected end of entry\n')
+ line = lines.pop(0)
+ if line.startswith('['):
+ break
+
+ # read function parent line
+ mo = self._cg_parent_re.match(line)
+ if not mo:
+ if self._cg_ignore_re.match(line):
+ continue
+ sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
+ else:
+ parent = self.translate(mo)
+ parents.append(parent)
+
+ # read primary line
+ mo = self._cg_primary_re.match(line)
+ if not mo:
+ sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
+ return
+ else:
+ function = self.translate(mo)
+
+ while lines:
+ line = lines.pop(0)
+
+ # read function subroutine line
+ mo = self._cg_child_re.match(line)
+ if not mo:
+ if self._cg_ignore_re.match(line):
+ continue
+ sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
+ else:
+ child = self.translate(mo)
+ children.append(child)
+
+ function.parents = parents
+ function.children = children
+
+ self.functions[function.index] = function
+
+ def parse_cycle_entry(self, lines):
+
+ # read cycle header line
+ line = lines[0]
+ mo = self._cg_cycle_header_re.match(line)
+ if not mo:
+ sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
+ return
+ cycle = self.translate(mo)
+
+ # read cycle member lines
+ cycle.functions = []
+ for line in lines[1:]:
+ mo = self._cg_cycle_member_re.match(line)
+ if not mo:
+ sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
+ continue
+ call = self.translate(mo)
+ cycle.functions.append(call)
+
+ self.cycles[cycle.cycle] = cycle
+
+ def parse_cg_entry(self, lines):
+ if lines[0].startswith("["):
+ self.parse_cycle_entry(lines)
+ else:
+ self.parse_function_entry(lines)
+
+ def parse_cg(self):
+ """Parse the call graph."""
+
+ # skip call graph header
+ while not self._cg_header_re.match(self.readline()):
+ pass
+ line = self.readline()
+ while self._cg_header_re.match(line):
+ line = self.readline()
+
+ # process call graph entries
+ entry_lines = []
+ while line != '\014': # form feed
+ if line and not line.isspace():
+ if self._cg_sep_re.match(line):
+ self.parse_cg_entry(entry_lines)
+ entry_lines = []
+ else:
+ entry_lines.append(line)
+ line = self.readline()
+
+ def parse(self):
+ self.parse_cg()
+ self.fp.close()
+
+ profile = Profile()
+ profile[TIME] = 0.0
+
+ cycles = {}
+ for index in self.cycles.iterkeys():
+ cycles[index] = Cycle()
+
+ for entry in self.functions.itervalues():
+ # populate the function
+ function = Function(entry.index, entry.name)
+ function[TIME] = entry.self
+ if entry.called is not None:
+ function.called = entry.called
+ if entry.called_self is not None:
+ call = Call(entry.index)
+ call[CALLS] = entry.called_self
+ function.called += entry.called_self
+
+ # populate the function calls
+ for child in entry.children:
+ call = Call(child.index)
+
+ assert child.called is not None
+ call[CALLS] = child.called
+
+ if child.index not in self.functions:
+ # NOTE: functions that were never called but were discovered by gprof's
+ # static call graph analysis dont have a call graph entry so we need
+ # to add them here
+ missing = Function(child.index, child.name)
+ function[TIME] = 0.0
+ function.called = 0
+ profile.add_function(missing)
+
+ function.add_call(call)
+
+ profile.add_function(function)
+
+ if entry.cycle is not None:
+ try:
+ cycle = cycles[entry.cycle]
+ except KeyError:
+ sys.stderr.write('warning: entry missing\n' % entry.cycle)
+ cycle = Cycle()
+ cycles[entry.cycle] = cycle
+ cycle.add_function(function)
+
+ profile[TIME] = profile[TIME] + function[TIME]
+
+ for cycle in cycles.itervalues():
+ profile.add_cycle(cycle)
+
+ # Compute derived events
+ profile.validate()
+ profile.ratio(TIME_RATIO, TIME)
+ profile.call_ratios(CALLS)
+ profile.integrate(TOTAL_TIME, TIME)
+ profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)
+
+ return profile
+
+
+class CallgrindParser(LineParser):
+ """Parser for valgrind's callgrind tool.
+
+ See also:
+ - http://valgrind.org/docs/manual/cl-format.html
+ """
+
+ _call_re = re.compile('^calls=\s*(\d+)\s+((\d+|\+\d+|-\d+|\*)\s+)+$')
+
+ def __init__(self, infile):
+ LineParser.__init__(self, infile)
+
+ # Textual positions
+ self.position_ids = {}
+ self.positions = {}
+
+ # Numeric positions
+ self.num_positions = 1
+ self.cost_positions = ['line']
+ self.last_positions = [0]
+
+ # Events
+ self.num_events = 0
+ self.cost_events = []
+
+ self.profile = Profile()
+ self.profile[SAMPLES] = 0
+
+ def parse(self):
+ # read lookahead
+ self.readline()
+
+ self.parse_key('version')
+ self.parse_key('creator')
+ self.parse_part()
+
+ # compute derived data
+ self.profile.validate()
+ self.profile.find_cycles()
+ self.profile.ratio(TIME_RATIO, SAMPLES)
+ self.profile.call_ratios(CALLS)
+ self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
+
+ return self.profile
+
+ def parse_part(self):
+ while self.parse_header_line():
+ pass
+ while self.parse_body_line():
+ pass
+ return True
+
+ def parse_header_line(self):
+ return \
+ self.parse_empty() or \
+ self.parse_comment() or \
+ self.parse_part_detail() or \
+ self.parse_description() or \
+ self.parse_event_specification() or \
+ self.parse_cost_line_def() or \
+ self.parse_cost_summary()
+
+ _detail_keys = set(('cmd', 'pid', 'thread', 'part'))
+
+ def parse_part_detail(self):
+ return self.parse_keys(self._detail_keys)
+
+ def parse_description(self):
+ return self.parse_key('desc') is not None
+
+ def parse_event_specification(self):
+ event = self.parse_key('event')
+ if event is None:
+ return False
+ return True
+
+ def parse_cost_line_def(self):
+ pair = self.parse_keys(('events', 'positions'))
+ if pair is None:
+ return False
+ key, value = pair
+ items = value.split()
+ if key == 'events':
+ self.num_events = len(items)
+ self.cost_events = items
+ if key == 'positions':
+ self.num_positions = len(items)
+ self.cost_positions = items
+ self.last_positions = [0]*self.num_positions
+ return True
+
+ def parse_cost_summary(self):
+ pair = self.parse_keys(('summary', 'totals'))
+ if pair is None:
+ return False
+ return True
+
+ def parse_body_line(self):
+ return \
+ self.parse_empty() or \
+ self.parse_comment() or \
+ self.parse_cost_line() or \
+ self.parse_position_spec() or \
+ self.parse_association_spec()
+
+ _cost_re = re.compile(r'^(\d+|\+\d+|-\d+|\*)( \d+)+$')
+
+ def parse_cost_line(self, calls=None):
+ line = self.lookahead()
+ mo = self._cost_re.match(line)
+ if not mo:
+ return False
+
+ function = self.get_function()
+
+ values = line.split(' ')
+ assert len(values) == self.num_positions + self.num_events
+
+ positions = values[0 : self.num_positions]
+ events = values[self.num_positions : ]
+
+ for i in range(self.num_positions):
+ position = positions[i]
+ if position == '*':
+ position = self.last_positions[i]
+ elif position[0] in '-+':
+ position = self.last_positions[i] + int(position)
+ else:
+ position = int(position)
+ self.last_positions[i] = position
+
+ events = map(float, events)
+
+ if calls is None:
+ function[SAMPLES] += events[0]
+ self.profile[SAMPLES] += events[0]
+ else:
+ callee = self.get_callee()
+ callee.called += calls
+
+ try:
+ call = function.calls[callee.id]
+ except KeyError:
+ call = Call(callee.id)
+ call[CALLS] = calls
+ call[SAMPLES] = events[0]
+ function.add_call(call)
+ else:
+ call[CALLS] += calls
+ call[SAMPLES] += events[0]
+
+ self.consume()
+ return True
+
+ def parse_association_spec(self):
+ line = self.lookahead()
+ if not line.startswith('calls='):
+ return False
+
+ _, values = line.split('=', 1)
+ values = values.strip().split()
+ calls = int(values[0])
+ call_position = values[1:]
+ self.consume()
+
+ self.parse_cost_line(calls)
+
+ return True
+
+ _position_re = re.compile('^(?Pc?(?:ob|fl|fi|fe|fn))=\s*(?:\((?P\d+)\))?(?:\s*(?P.+))?')
+
+ _position_table_map = {
+ 'ob': 'ob',
+ 'fl': 'fl',
+ 'fi': 'fl',
+ 'fe': 'fl',
+ 'fn': 'fn',
+ 'cob': 'ob',
+ 'cfl': 'fl',
+ 'cfi': 'fl',
+ 'cfe': 'fl',
+ 'cfn': 'fn',
+ }
+
+ _position_map = {
+ 'ob': 'ob',
+ 'fl': 'fl',
+ 'fi': 'fl',
+ 'fe': 'fl',
+ 'fn': 'fn',
+ 'cob': 'cob',
+ 'cfl': 'cfl',
+ 'cfi': 'cfl',
+ 'cfe': 'cfl',
+ 'cfn': 'cfn',
+ }
+
+ def parse_position_spec(self):
+ line = self.lookahead()
+ mo = self._position_re.match(line)
+ if not mo:
+ return False
+
+ position, id, name = mo.groups()
+ if id:
+ table = self._position_table_map[position]
+ if name:
+ self.position_ids[(table, id)] = name
+ else:
+ name = self.position_ids.get((table, id), '')
+ self.positions[self._position_map[position]] = name
+ self.consume()
+ return True
+
+ def parse_empty(self):
+ line = self.lookahead()
+ if line.strip():
+ return False
+ self.consume()
+ return True
+
+ def parse_comment(self):
+ line = self.lookahead()
+ if not line.startswith('#'):
+ return False
+ self.consume()
+ return True
+
+ _key_re = re.compile(r'^(\w+):')
+
+ def parse_key(self, key):
+ pair = self.parse_keys((key,))
+ if not pair:
+ return None
+ key, value = pair
+ return value
+ line = self.lookahead()
+ mo = self._key_re.match(line)
+ if not mo:
+ return None
+ key, value = line.split(':', 1)
+ if key not in keys:
+ return None
+ value = value.strip()
+ self.consume()
+ return key, value
+
+ def parse_keys(self, keys):
+ line = self.lookahead()
+ mo = self._key_re.match(line)
+ if not mo:
+ return None
+ key, value = line.split(':', 1)
+ if key not in keys:
+ return None
+ value = value.strip()
+ self.consume()
+ return key, value
+
+ def make_function(self, module, filename, name):
+ # FIXME: module and filename are not being tracked reliably
+ #id = '|'.join((module, filename, name))
+ id = name
+ try:
+ function = self.profile.functions[id]
+ except KeyError:
+ function = Function(id, name)
+ function[SAMPLES] = 0
+ function.called = 0
+ self.profile.add_function(function)
+ return function
+
+ def get_function(self):
+ module = self.positions.get('ob', '')
+ filename = self.positions.get('fl', '')
+ function = self.positions.get('fn', '')
+ return self.make_function(module, filename, function)
+
+ def get_callee(self):
+ module = self.positions.get('cob', '')
+ filename = self.positions.get('cfi', '')
+ function = self.positions.get('cfn', '')
+ return self.make_function(module, filename, function)
+
+
+class OprofileParser(LineParser):
+ """Parser for oprofile callgraph output.
+
+ See also:
+ - http://oprofile.sourceforge.net/doc/opreport.html#opreport-callgraph
+ """
+
+ _fields_re = {
+ 'samples': r'(\d+)',
+ '%': r'(\S+)',
+ 'linenr info': r'(?P