[pypy] * Wed Jan 5 2011 David Malcolm <dmalcolm at redhat.com> - 1.4.1-4 - rebuild pypy using itself, for spe

Thu Jan 6 22:12:22 UTC 2011

commit 00e57e74f81587a2c49341341a7a291a54e22dc8
Author: David Malcolm <dmalcolm at redhat.com>
Date:   Thu Jan 6 17:09:03 2011 -0500

    * Wed Jan  5 2011 David Malcolm <dmalcolm at redhat.com> - 1.4.1-4
    - rebuild pypy using itself, for speed, with a boolean to break this cycle in
    the build-requirement graph (falling back to using "python-devel" aka CPython)
    - add work-in-progress patch to try to make generated c more readable
    (rhbz#666963)
    - capture the RPython source code files from the build within the debuginfo
    package (rhbz#666975)

 pypy-1.4.1-more-readable-c-code.patch |  695 +++++++++++++++++++++++++++++++++
 pypy.spec                             |   81 ++++-
 2 files changed, 773 insertions(+), 3 deletions(-)
---

diff --git a/pypy-1.4.1-more-readable-c-code.patch b/pypy-1.4.1-more-readable-c-code.patch
new file mode 100644
index 0000000..45fa534
--- /dev/null
+++ b/pypy-1.4.1-more-readable-c-code.patch
@@ -0,0 +1,695 @@
+diff -r cd083843b67a pypy/interpreter/pycode.py
+--- a/pypy/interpreter/pycode.py	Mon Dec 20 17:17:45 2010 +0100
++++ b/pypy/interpreter/pycode.py	Wed Jan 05 16:14:35 2011 -0500
+@@ -14,6 +14,7 @@
+ from pypy.interpreter.astcompiler.consts import (CO_OPTIMIZED,
+     CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS, CO_NESTED,
+     CO_GENERATOR, CO_CONTAINSGLOBALS)
++from pypy.interpreter.pytraceback import offset2lineno
+ from pypy.rlib.rarithmetic import intmask
+ from pypy.rlib.debug import make_sure_not_resized
+ from pypy.rlib import jit
+@@ -81,6 +82,7 @@
+         self.hidden_applevel = hidden_applevel
+         self.magic = magic
+         self._signature = cpython_code_signature(self)
++        self._cached_source = None
+         self._initialize()
+ 
+     def _initialize(self):
+@@ -403,3 +405,25 @@
+     def repr(self, space):
+         return space.wrap(self.get_repr())
+     repr.unwrap_spec = ['self', ObjSpace]
++
++    def get_linenum_for_offset(self, offset):
++        # Given a bytecode offset, return a 1-based index into the lines of the
++        # source code
++        return offset2lineno(self, offset)
++
++    def _ensure_source(self):
++        # Lazily grab the source lines into self._cached_source (or raise
++        # an IOError)
++        if not self._cached_source:
++            f = open(self.co_filename, 'r')
++            source = [line.rstrip() for line in f.readlines()]
++            f.close()
++            self._cached_source = source
++    
++    def get_source_text(self, linenum):
++        # Given a 1-based index, get the corresponding line of source code (or
++        # raise an IOError)
++        self._ensure_source()
++        return self._cached_source[linenum - 1]
++
++        
+diff -r cd083843b67a pypy/objspace/flow/model.py
+--- a/pypy/objspace/flow/model.py	Mon Dec 20 17:17:45 2010 +0100
++++ b/pypy/objspace/flow/model.py	Wed Jan 05 16:14:35 2011 -0500
+@@ -31,6 +31,120 @@
+ 
+ __metaclass__ = type
+ 
++class SourceLoc(object):
++    # A srcloc is a specific location within the RPython source code,
++    # intended for human display
++    __slots__ = ('code', # code object
++                 'linenum' # 1-based index, as displayed to a user
++                 )
++    def __init__(self, code, linenum):
++        self.code = code
++        self.linenum = linenum
++
++    def get_text(self):
++        # Get the actual source text of this line
++        return self.code.get_source_text(self.linenum)
++
++    def __eq__(self, other):
++        return self.code == other.code and self.linenum == other.linenum
++
++    def __ne__(self, other):
++        if other:
++            return self.code != other.code or self.linenum != other.linenum
++        else:
++            return True
++
++class CodeLoc(object):
++    # A codeloc is a specific location within the RPython bytecode
++    __slots__ = ('code', # code object
++                 'offset' # int index into bytecode, or -1
++                 )
++
++    def __init__(self, code, offset):
++        self.code = code
++        self.offset = offset
++
++    def __str__(self):
++        if self.offset >= 0:
++            return "%s@%d" % (self.code.co_name, self.offset)
++        else:
++            return ""
++
++    def __ne__(self, other):
++        if other:
++            return self.code != other.code or self.offset != other.offset
++        else:
++            return True
++
++    def __cmp__(self, other):
++        # Partial ordering, for those locations that have an offset:
++        if other:
++            if self.offset >= 0 and other.offset >= 0:
++                return self.offset - other.offset
++        return 0
++
++    def get_source_loc(self):
++        # Convert to a SourceLoc:
++        return SourceLoc(self.code, self.code.get_linenum_for_offset(self.offset))
++
++class OperationLoc(object):
++    # An oploc is the location within the RPython source code of a given
++    # operation
++    # 
++    # This is a list consisting of CodeLoc instances, some of which may be None
++    #
++    # For the simple case, this is list of length 1 with a single CodeLoc
++    #
++    # For an operation inside an inlined callsite, we have a list of length 2:
++    #    [codeloc of callsite,
++    #     codeloc of operation within inlined body]
++    #
++    # For more interesting inlined cases, we have a chain of source locations:
++    #    [codeloc of callsite,
++    #     codeloc of inner callsite,
++    #     ... ,
++    #     codeloc of innermost inlined callsite,
++    #     codeloc of operation within inlined body]
++    #
++
++    __slots__ = ('codelocs', )
++
++    def __init__(self, codelocs):
++        self.codelocs = codelocs
++
++    def __str__(self):
++        return '[' + ' > '.join(str(codeloc) for codeloc in self.codelocs) + ']'
++
++    def __cmp__(self, other):
++        return cmp(self.codelocs, other.codelocs)
++
++def block_comparator(blk0, blk1):
++    '''
++    Sort function for blocks, putting them in an ordering that attempts to
++    maximize readability of the generated C code
++    '''
++    # print 'comparing %r and %r' % (blk0, blk1)
++    # Put the start/end block at the top/bottom:
++    if blk0.isstartblock:
++        return -1
++
++    if blk1.isstartblock:
++        return 1
++
++    # Order blocks by the offset, where present:
++    if blk0.operations:
++        if blk1.operations:
++            return cmp(blk0.operations[0].oploc, blk1.operations[0].oploc)
++        else:
++            return -1
++    else:
++        if blk1.operations:
++            return 1
++        else:
++            return 0
++
++def edge_comparator(edge0, edge1):
++    return block_comparator(edge0.target, edge1.target)
+ 
+ class FunctionGraph(object):
+     __slots__ = ['startblock', 'returnblock', 'exceptblock', '__dict__']
+@@ -94,6 +208,21 @@
+                 seen[block] = True
+                 stack += block.exits[::-1]
+ 
++    def iterblocks_by_source(self):
++        # Try to preserve logical source ordering in the blocks
++        block = self.startblock
++        yield block
++        seen = {block: True}
++        stack = list(block.exits[::-1])
++        stack.sort(edge_comparator)
++        while stack:
++            block = stack.pop().target
++            if block not in seen:
++                yield block
++                seen[block] = True
++                stack += block.exits[::-1]
++                stack.sort(edge_comparator)
++
+     def iterlinks(self):
+         block = self.startblock
+         seen = {block: True}
+@@ -183,14 +312,14 @@
+         self.exits      = []              # list of Link(s)
+ 
+     def at(self):
+-        if self.operations and self.operations[0].offset >= 0:
+-            return "@%d" % self.operations[0].offset
++        if self.operations:
++            return str(self.operations[0].oploc)
+         else:
+             return ""
+ 
+     def __str__(self):
+         if self.operations:
+-            txt = "block@%d" % self.operations[0].offset
++            txt = "block%s" % self.operations[0].oploc
+         else:
+             if (not self.exits) and len(self.inputargs) == 1:
+                 txt = "return block"
+@@ -245,6 +374,21 @@
+         from pypy.translator.tool.graphpage import try_show
+         try_show(self)
+ 
++    def isreturnblock(self):
++        return (not self.operations) and (not self.exits) and len(self.inputargs) == 1
++
++    def get_base_label(self, blocknum):
++        # Generate a more friendly C label for this block
++        if self.operations:
++            txt = "block"
++        elif (not self.exits) and len(self.inputargs) == 1:
++            txt = "return_block"
++        elif (not self.exits) and len(self.inputargs) == 2:
++            txt = "raise_block"
++        else:
++            txt = "codeless_block"
++        return '%s%d' % (txt, blocknum)
++
+ 
+ class Variable(object):
+     __slots__ = ["_name", "_nr", "concretetype"]
+@@ -331,13 +475,15 @@
+ 
+ 
+ class SpaceOperation(object):
+-    __slots__ = "opname args result offset".split()
++    __slots__ = "opname args result oploc".split()
+ 
+-    def __init__(self, opname, args, result, offset=-1):
++    def __init__(self, opname, args, result, oploc=None):
+         self.opname = intern(opname)      # operation name
+         self.args   = list(args)  # mixed list of var/const
+         self.result = result      # either Variable or Constant instance
+-        self.offset = offset      # offset in code string
++        if oploc is None:
++            oploc = OperationLoc([None])
++        self.oploc = oploc
+ 
+     def __eq__(self, other):
+         return (self.__class__ is other.__class__ and 
+@@ -352,8 +498,9 @@
+         return hash((self.opname,tuple(self.args),self.result))
+ 
+     def __repr__(self):
+-        return "%r = %s(%s)" % (self.result, self.opname,
+-                                ", ".join(map(repr, self.args)))
++        return "%r = %s(%s) (%s)" % (self.result, self.opname,
++                                     ", ".join(map(repr, self.args)),
++                                     self.oploc)
+ 
+ class Atom(object):
+     def __init__(self, name):
+@@ -448,8 +595,7 @@
+                 for op in oplist:
+                     copyop = SpaceOperation(op.opname,
+                                             [copyvar(v) for v in op.args],
+-                                            copyvar(op.result), op.offset)
+-                    #copyop.offset = op.offset
++                                            copyvar(op.result), op.oploc)
+                     result.append(copyop)
+                 return result
+             newblock.operations = copyoplist(block.operations)
+diff -r cd083843b67a pypy/objspace/flow/objspace.py
+--- a/pypy/objspace/flow/objspace.py	Mon Dec 20 17:17:45 2010 +0100
++++ b/pypy/objspace/flow/objspace.py	Wed Jan 05 16:14:35 2011 -0500
+@@ -310,7 +310,9 @@
+     def do_operation(self, name, *args_w):
+         spaceop = SpaceOperation(name, args_w, Variable())
+         if hasattr(self, 'executioncontext'):  # not here during bootstrapping
+-            spaceop.offset = self.executioncontext.crnt_offset
++            codeloc = CodeLoc(self.executioncontext.code,
++                              self.executioncontext.crnt_offset)
++            spaceop.oploc = OperationLoc([codeloc])
+             self.executioncontext.recorder.append(spaceop)
+         return spaceop.result
+ 
+diff -r cd083843b67a pypy/objspace/flow/test/test_model.py
+--- a/pypy/objspace/flow/test/test_model.py	Mon Dec 20 17:17:45 2010 +0100
++++ b/pypy/objspace/flow/test/test_model.py	Wed Jan 05 16:14:35 2011 -0500
+@@ -132,3 +132,25 @@
+     assert v2.renamed
+     assert v2.name.startswith("foobar_") and v2.name != v.name
+     assert v2.name.split('_', 1)[1].isdigit()
++
++def test_source_locations():
++    # Invent some random offsets into the code:
++    co = sample_function.__code__
++    codelocA = CodeLoc(co, 42)
++    codelocB = CodeLoc(co, 87)
++
++    assert str(codelocA) == 'sample_function at 42'
++    assert str(codelocB) == 'sample_function at 87'
++
++    assert cmp(codelocA, codelocB) < 0
++    assert cmp(codelocB, codelocA) > 0
++    
++    oplocA = OperationLoc([codelocA])
++    oplocB = OperationLoc([codelocB])
++
++    assert str(oplocA) == '[sample_function at 42]'
++    assert str(oplocB) == '[sample_function at 87]'
++
++    assert cmp(oplocA, oplocB) < 0
++    assert cmp(oplocB, oplocA) > 0
++
+diff -r cd083843b67a pypy/rpython/rtyper.py
+--- a/pypy/rpython/rtyper.py	Mon Dec 20 17:17:45 2010 +0100
++++ b/pypy/rpython/rtyper.py	Wed Jan 05 16:14:35 2011 -0500
+@@ -800,7 +800,7 @@
+         return vars
+ 
+     def genop(self, opname, args_v, resulttype=None):
+-        return self.llops.genop(opname, args_v, resulttype)
++        return self.llops.genop(opname, args_v, resulttype, self.spaceop.oploc)
+ 
+     def gendirectcall(self, ll_function, *args_v):
+         return self.llops.gendirectcall(ll_function, *args_v)
+@@ -935,7 +935,7 @@
+                                                     v.concretetype))
+         return v
+ 
+-    def genop(self, opname, args_v, resulttype=None):
++    def genop(self, opname, args_v, resulttype=None, oploc=None):
+         try:
+             for v in args_v:
+                 v.concretetype
+@@ -944,7 +944,7 @@
+                                  " and pass its result to genop(),"
+                                  " never hop.args_v directly.")
+         vresult = Variable()
+-        self.append(SpaceOperation(opname, args_v, vresult))
++        self.append(SpaceOperation(opname, args_v, vresult, oploc))
+         if resulttype is None:
+             vresult.concretetype = Void
+             return None
+diff -r cd083843b67a pypy/translator/backendopt/inline.py
+--- a/pypy/translator/backendopt/inline.py	Mon Dec 20 17:17:45 2010 +0100
++++ b/pypy/translator/backendopt/inline.py	Wed Jan 05 16:14:35 2011 -0500
+@@ -4,6 +4,7 @@
+ from pypy.translator.unsimplify import copyvar
+ from pypy.objspace.flow.model import Variable, Constant, Block, Link
+ from pypy.objspace.flow.model import SpaceOperation, c_last_exception
++from pypy.objspace.flow.model import OperationLoc
+ from pypy.objspace.flow.model import FunctionGraph
+ from pypy.objspace.flow.model import traverse, mkentrymap, checkgraph
+ from pypy.annotation import model as annmodel
+@@ -231,6 +232,7 @@
+         self.varmap = {}
+         self._copied_blocks = {}
+         self.op = block.operations[index_operation]
++        self.callsite_oploc = self.op.oploc
+         self.graph_to_inline = self.get_graph_from_op(self.op)
+         self.exception_guarded = False
+         if (block.exitswitch == c_last_exception and
+@@ -297,7 +299,9 @@
+         
+     def copy_operation(self, op):
+         args = [self.get_new_name(arg) for arg in op.args]
+-        result = SpaceOperation(op.opname, args, self.get_new_name(op.result))
++        new_oploc = OperationLoc(self.callsite_oploc.codelocs[:] + op.oploc.codelocs[:])
++        result = SpaceOperation(op.opname, args, self.get_new_name(op.result), 
++                                new_oploc)
+         return result
+ 
+     def copy_block(self, block):
+diff -r cd083843b67a pypy/translator/c/funcgen.py
+--- a/pypy/translator/c/funcgen.py	Mon Dec 20 17:17:45 2010 +0100
++++ b/pypy/translator/c/funcgen.py	Wed Jan 05 16:14:35 2011 -0500
+@@ -1,4 +1,6 @@
+ import sys
++import inspect
++import dis
+ from pypy.translator.c.support import USESLOTS # set to False if necessary while refactoring
+ from pypy.translator.c.support import cdecl
+ from pypy.translator.c.support import llvalue_from_constant, gen_assignments
+@@ -22,6 +24,38 @@
+ 
+ KEEP_INLINED_GRAPHS = False
+ 
++def block_comparator(blk0, blk1):
++    '''
++    Sort function for blocks, putting them in an ordering that attempts to
++    maximize readability of the generated C code
++    '''
++    # print 'comparing %r and %r' % (blk0, blk1)
++    # Put the start/end block at the top/bottom:
++    if blk0.isstartblock:
++        return -1
++
++    if blk1.isstartblock:
++        return 1
++
++    # Order blocks by the offset, where present:
++    if blk0.operations:
++        if blk1.operations:
++            return cmp(blk0.operations[0].oploc, blk1.operations[0].oploc)
++        else:
++            return -1
++    else:
++        if blk1.operations:
++            return 1
++        else:
++            return 0
++
++def escape_c_comments(py_src):
++    # Escape C comments within RPython source, to avoid generating bogus
++    # comments in our generated C source:
++    py_src = py_src.replace('/*', '')
++    py_src = py_src.replace('*/', '')
++    return py_src
++
+ class FunctionCodeGenerator(object):
+     """
+     Collects information about a function which we have to generate
+@@ -210,14 +244,57 @@
+ 
+     def cfunction_body(self):
+         graph = self.graph
+-        yield 'goto block0;'    # to avoid a warning "this label is not used"
++        # Try to print python source code:
++        if hasattr(graph, 'func'):
++            filename = inspect.getfile(graph.func)
++            #yield '/* name: %r */' % filename
++            try:
++                src, startline = inspect.getsourcelines(graph.func)
++            except IOError:
++                pass # No source found
++            except IndexError:
++                pass # Bulletproofing
++            else:
++                yield '/* Python source %r' % filename
++                for i, line in enumerate(src):
++                    line = line.rstrip()
++                    line = escape_c_comments(line)
++                    # FuncNode.funcgen_implementation treats lines ending in ':'
++                    # as C blocks, which messes up the formatting.
++                    # Work around this:
++                    if line.endswith(':'):
++                        line += ' '
++                    yield ' * %4d : %s' % (startline + i, line)
++                yield ' */'
++
++        label = graph.startblock.get_base_label(self.blocknum[graph.startblock])
++        yield 'goto %s;' % label # to avoid a warning "this label is not used"
++
++        # Sort the blocks into a (hopefully) readable order:
++        blocks = list(graph.iterblocks_by_source())
++        blocks.sort(block_comparator)
+ 
+         # generate the body of each block
+-        for block in graph.iterblocks():
++        for block in blocks:
++            cursrcloc = None
+             myblocknum = self.blocknum[block]
+             yield ''
+-            yield 'block%d:' % myblocknum
++            yield '%s:' % block.get_base_label(myblocknum)
++            #yield "/* repr(block): %r */" % (block, )
++            #yield "/* type(block): %r */" % (type(block), )
+             for i, op in enumerate(block.operations):
++                #yield "/* type(op): %r */" % (type(op), )
++                #yield "/* op.oploc: %s */" % (op.oploc, )
++                codeloc = op.oploc.codelocs[-1]
++                if codeloc:
++                    srcloc = codeloc.get_source_loc()
++                    if srcloc != cursrcloc:
++                        try:
++                            yield "/* %s:%d : %s */" % (codeloc.code.co_name, srcloc.linenum, escape_c_comments(srcloc.get_text()))
++                            cursrcloc = srcloc
++                        except IOError:
++                            pass
++
+                 for line in self.gen_op(op):
+                     yield line
+             if len(block.exits) == 0:
+@@ -310,7 +387,7 @@
+             assignments.append((a2typename, dest, src))
+         for line in gen_assignments(assignments):
+             yield line
+-        label = 'block%d' % self.blocknum[link.target]
++        label = link.target.get_base_label(self.blocknum[link.target])
+         if link.target in self.innerloops:
+             loop = self.innerloops[link.target]
+             if link is loop.links[-1]:   # link that ends a loop
+diff -r cd083843b67a pypy/translator/c/test/test_genc.py
+--- a/pypy/translator/c/test/test_genc.py	Mon Dec 20 17:17:45 2010 +0100
++++ b/pypy/translator/c/test/test_genc.py	Wed Jan 05 16:14:35 2011 -0500
+@@ -1,4 +1,5 @@
+ import autopath, sys, os, py
++import re
+ from pypy.rpython.lltypesystem.lltype import *
+ from pypy.annotation import model as annmodel
+ from pypy.translator.translator import TranslationContext
+@@ -498,3 +499,130 @@
+     else:
+         assert 0, "the call was not found in the C source"
+     assert 'PYPY_INHIBIT_TAIL_CALL();' in lines[i+1]
++
++def get_generated_c_source(fn, types):
++    # Return a (optimized fn, c source code, c source filename) 3-tuple
++    t = Translation(fn)
++    t.annotate(types)
++    c_filename_path = t.source_c()
++    h = c_filename_path.open()
++    src = h.read()
++    h.close()
++    c_fn = t.compile_c()
++    return (c_fn, src, c_filename_path)
++
++def extract_c_function(c_src, fname):
++    # Extract the source for a given C function out of a the given src string
++    # Makes assumptions about the layout of the source
++    pattern = '^(.+) \**%s\(.*\) {$' % fname
++    within_fn = False
++    result = ''
++    for line in c_src.splitlines():
++        if within_fn:
++            result += line + '\n'
++            if line.startswith('}'):
++                return result
++        else:
++            m = re.match(pattern, line)
++            if m:
++                within_fn = True
++                result += line + '\n'
++    return result
++    
++    
++
++def test_generated_c_source():
++    # Verify that generate C source "looks good"
++    # We'll use is_perfect_number, as it contains a loop and a conditional
++
++    # Generate C source code
++    from pypy.translator.test.snippet import is_perfect_number
++    c_fn, c_src, c_filename_path = get_generated_c_source(is_perfect_number,
++                                                        [int])
++
++    # Locate the C source for the type-specialized function:
++    c_fn_src = extract_c_function(c_src, 'pypy_g_is_perfect_number')
++    
++    # Verify that the C source contains embedded comments containing the lines
++    # of the python source:
++    expected_comment_lines = [
++        '/* is_perfect_number:31 :     while div < n: */',
++        '/* is_perfect_number:32 :         if n % div == 0: */',
++        '/* is_perfect_number:33 :             sum += div */',
++        '/* is_perfect_number:34 :         div += 1 */',
++        '/* is_perfect_number:35 :     return n == sum */']
++    for exp_line in expected_comment_lines:
++        assert exp_line in c_fn_src
++        
++    # Verify that the lines occur in the correct order
++    # ...we do this by filtering the function's generated C source to just
++    # those lines containing our comments (and dropping whitespace):
++    lines = c_fn_src.splitlines()
++    lines = [line.strip()
++             for line in lines
++             if '/* is_perfect_number:' in line]
++
++    # ...we should now have exact equality: the ordering should be as expected,
++    # and each comment should appear exactly once:
++    assert lines == expected_comment_lines
++
++    # Ensure that the generated C function does the right thing:
++    assert c_fn(5) == False
++    assert c_fn(6) == True
++    assert c_fn(7) == False
++
++    assert c_fn(5.0) == False
++    assert c_fn(6.0) == True
++    assert c_fn(7.0) == False
++
++    assert c_fn(5L) == False
++    assert c_fn(6L) == True
++    assert c_fn(7L) == False
++
++    try:
++        c_fn('hello world')
++    except:
++        pass
++    else:
++        raise 'Was expected exception'
++    
++def test_escaping_c_comments():
++    # Ensure that c comments within RPython code get escaped when we generate
++    # our .c code (to avoid generating bogus C)
++    # See e.g. pypy.module.cpyext.dictobject's PyDict_Next, which has a
++    # docstring embedding a C comment
++    def c_style_comment(a, b):
++        '''Here is a C-style comment within an RPython docstring:
++                /* hello world */
++        '''
++        # and here's one in a string literal:
++        return '/* hello world a:%s b:%s */' % (a, b)
++
++    def cplusplus_style_comment(a, b):
++        '''Here is a C++-style comment within an RPython docstring:
++                // hello world
++        '''
++        # and here are some in string literals, and one as the floor division
++        # operator:
++        return '// hello world: a // b = %s' % (a // b)
++
++    for fn_name, exp_output in [('c_style_comment',
++                                 '/* hello world a:6 b:3 */'),
++                                ('cplusplus_style_comment',
++                                 '// hello world: a // b = 2')]:
++        fn = locals()[fn_name]
++
++        c_fn, c_src, c_filename_path = get_generated_c_source(fn, [int, int])
++        # If the above survived, then the C compiler managed to handle
++        # the generated C code
++
++        # Verify that the generated code works (i.e. that we didn't
++        # accidentally change the meaning):
++        assert c_fn(6, 3) == exp_output
++
++        # Ensure that at least part of the docstrings made it into the C
++        # code:
++        c_fn_src = extract_c_function(c_src, 'pypy_g_' + fn_name)
++        assert 'Here is a ' in c_fn_src
++        assert 'style comment within an RPython docstring' in c_fn_src
++        
+diff -r cd083843b67a pypy/translator/driver.py
+--- a/pypy/translator/driver.py	Mon Dec 20 17:17:45 2010 +0100
++++ b/pypy/translator/driver.py	Wed Jan 05 16:14:35 2011 -0500
+@@ -539,6 +539,7 @@
+             dstname = self.compute_exe_name() + '.staticdata.info'
+             shutil.copy(str(fname), str(dstname))
+             self.log.info('Static data info written to %s' % dstname)
++        return c_source_filename
+ 
+     #
+     task_source_c = taskdef(task_source_c, ['database_c'], "Generating c source")
+diff -r cd083843b67a pypy/translator/gensupp.py
+--- a/pypy/translator/gensupp.py	Mon Dec 20 17:17:45 2010 +0100
++++ b/pypy/translator/gensupp.py	Wed Jan 05 16:14:35 2011 -0500
+@@ -16,8 +16,8 @@
+     def visit(block):
+         if isinstance(block, Block):
+             # first we order by offset in the code string
+-            if block.operations:
+-                ofs = block.operations[0].offset
++            if block.operations and block.operations[0].oploc.codelocs[0]:
++                ofs = block.operations[0].oploc.codelocs[0].offset
+             else:
+                 ofs = sys.maxint
+             # then we order by input variable name or value
+diff -r cd083843b67a pypy/translator/interactive.py
+--- a/pypy/translator/interactive.py	Mon Dec 20 17:17:45 2010 +0100
++++ b/pypy/translator/interactive.py	Wed Jan 05 16:14:35 2011 -0500
+@@ -138,7 +138,7 @@
+     def source_c(self, argtypes=None, **kwds):
+         self.update_options(argtypes, kwds)
+         self.ensure_backend('c')
+-        self.driver.source_c()
++        return self.driver.source_c()
+ 
+     def source_cl(self, argtypes=None, **kwds):
+         self.update_options(argtypes, kwds)
+diff -r cd083843b67a pypy/translator/llsupport/wrapper.py
+--- a/pypy/translator/llsupport/wrapper.py	Mon Dec 20 17:17:45 2010 +0100
++++ b/pypy/translator/llsupport/wrapper.py	Wed Jan 05 16:14:35 2011 -0500
+@@ -59,6 +59,8 @@
+     # "return result"
+     block = Block(wrapper_inputargs)
+     wgraph = FunctionGraph('pyfn_' + (newname or func.func_name), block)
++    if hasattr(graph, 'func'):
++        wgraph.func = graph.func
+     translator.update_call_graph(wgraph, graph, object())
+     translator.graphs.append(wgraph)
+     block.operations[:] = newops
+diff -r cd083843b67a pypy/translator/simplify.py
+--- a/pypy/translator/simplify.py	Mon Dec 20 17:17:45 2010 +0100
++++ b/pypy/translator/simplify.py	Wed Jan 05 16:14:35 2011 -0500
+@@ -294,7 +294,7 @@
+                 return renaming.get(v, v)
+             def rename_op(op):
+                 args = [rename(a) for a in op.args]
+-                op = SpaceOperation(op.opname, args, rename(op.result), op.offset)
++                op = SpaceOperation(op.opname, args, rename(op.result), op.oploc)
+                 # special case...
+                 if op.opname == 'indirect_call':
+                     if isinstance(op.args[0], Constant):
diff --git a/pypy.spec b/pypy.spec
index 96354e2..162ed5f 100644
--- a/pypy.spec
+++ b/pypy.spec
@@ -1,6 +1,6 @@
 Name:           pypy
 Version:        1.4.1
-Release:        3%{?dist}
+Release:        4%{?dist}
 Summary:        Python implementation with a Just-In-Time compiler
 
 Group:          Development/Languages
@@ -136,9 +136,33 @@ Patch2: fix-test_commands-expected-ls-output-issue7108.patch
 #   https://codespeak.net/issue/pypy-dev/issue614
 Patch3: pypy-1.4.1-add-LIBRARY_INSTALLATION_PATH.patch
 
+# Try to improve the readability of the generated .c code, by adding in the
+# RPython source as comments where possible.
+# A version of this was sent upstream as:
+#  http://codespeak.net/pipermail/pypy-dev/2010q4/006532.html
+# TODO: get this into the upstream bug tracker, and finish inlining
+# support (rhbz#666963)
+Patch4: pypy-1.4.1-more-readable-c-code.patch
+
+
 # Build-time requirements:
 
-BuildRequires:  python-devel
+# pypy's can be rebuilt using itself, rather than with CPython; doing so
+# halves the build time.
+#
+# Turn it off with this boolean, to revert back to rebuilding using CPython
+# and avoid a cycle in the build-time dependency graph:
+#
+%global use_self_when_building 1
+%if 0%{use_self_when_building}
+BuildRequires: pypy
+%global bootstrap_python_interp pypy
+%else
+BuildRequires: python-devel
+%global bootstrap_python_interp python
+%endif
+
+
 
 # FIXME: I'm seeing errors like this in the logs:
 #   [translation:WARNING] The module '_rawffi' is disabled
@@ -215,6 +239,8 @@ sed -i \
   -e 's|LIBRARY_INSTALLATION_PATH|"%{pypyprefix}"|' \
   pypy/translator/goal/app_main.py
 
+%patch4 -p1 -b .more-readable-c-code
+
 
 # Replace /usr/local/bin/python shebangs with /usr/bin/python:
 find -name "*.py" -exec \
@@ -304,6 +330,7 @@ BuildPyPy() {
   # doesn't interract well with the results of using our standard build flags.
   # For now, filter our CFLAGS of everything that could be conflicting with
   # pypy.  Need to check these and reenable ones that are okay later.
+  # Filed as https://bugzilla.redhat.com/show_bug.cgi?id=666966
   export CFLAGS=$(echo "$RPM_OPT_FLAGS" | sed -e 's/-Wp,-D_FORTIFY_SOURCE=2//' -e 's/-fexceptions//' -e 's/-fstack-protector//' -e 's/--param=ssp-buffer-size=4//' -e 's/-O2//' -e 's/-fasynchronous-unwind-tables//' -e 's/-march=i686//' -e 's/-mtune=atom//')
 
   # If we're already built the JIT-enabled "pypy", then use it for subsequent
@@ -311,7 +338,10 @@ BuildPyPy() {
   if test -x './pypy' ; then
     INTERP='./pypy'
   else
-    INTERP='python'
+    # First pypy build within this rpm build?
+    # Fall back to using the bootstrap python interpreter, which might be a
+    # system copy of pypy from an earlier rpm, or be cpython's /usr/bin/python:
+    INTERP='%{bootstrap_python_interp}'
   fi
 
   # Here's where we actually invoke the build:
@@ -484,6 +514,34 @@ mkdir -p %{buildroot}/%{pypyprefix}/site-packages
   %{buildroot}/%{_bindir}/pypy \
   0
 
+# Capture the RPython source code files from the build within the debuginfo
+# package (rhbz#666975)
+%global pypy_debuginfo_dir /usr/src/debug/pypy-%{version}-src
+mkdir -p %{buildroot}%{pypy_debuginfo_dir}
+
+# copy over everything:
+cp -a pypy %{buildroot}%{pypy_debuginfo_dir}
+
+# ...then delete files that aren't .py files:
+find \
+  %{buildroot}%{pypy_debuginfo_dir} \
+  \( -type f                        \
+     -a                             \
+     \! -name "*.py"                \
+  \)                                \
+  -delete
+
+# We don't need bytecode for these files; they are being included for reference
+# purposes.
+# There are some rpmlint warnings from these files:
+#   non-executable-script
+#   wrong-script-interpreter
+#   zero-length
+#   script-without-shebang
+#   dangling-symlink
+# but given that the objective is to preserve a copy of the source code, those
+# are acceptable.
+
 %check
 topdir=$(pwd)
 
@@ -511,6 +569,7 @@ CheckPyPy() {
 
     # Gather a list of tests to skip, due to known failures
     # TODO: report these failures to pypy upstream
+    # See also rhbz#666967 and rhbz#666969
     TESTS_TO_SKIP=""
 
     # Test failures relating to missing codecs
@@ -622,6 +681,14 @@ CheckPyPy() {
       #     AssertionError: ValueError not raised
       SkipTest test_zlib
 
+    %if 0%{use_self_when_building}
+    # Patch 3 prioritizes the installed copy of pypy's libraries over the
+    # build copy.
+    # This leads to test failures of test_pep263 and test_tarfile
+    # For now, suppress these when building using pypy itself:
+    SkipTest test_pep263   # on-disk encoding issues
+    SkipTest test_tarfile  # permissions issues
+    %endif
 
     # Run the built binary through the selftests:
     time ./$ExeName -m test.regrtest -x $TESTS_TO_SKIP
@@ -696,6 +763,14 @@ rm -rf $RPM_BUILD_ROOT
 
 
 %changelog
+* Wed Jan  5 2011 David Malcolm <dmalcolm at redhat.com> - 1.4.1-4
+- rebuild pypy using itself, for speed, with a boolean to break this cycle in
+the build-requirement graph (falling back to using "python-devel" aka CPython)
+- add work-in-progress patch to try to make generated c more readable
+(rhbz#666963)
+- capture the RPython source code files from the build within the debuginfo
+package (rhbz#666975)
+
 * Wed Dec 22 2010 David Malcolm <dmalcolm at redhat.com> - 1.4.1-3
 - try to respect the FHS by installing libraries below libdir, rather than
 datadir; patch app_main.py to look in this installation location first when