[gperftools] add metapackage, update to svn218
Tom Callaway
spot at fedoraproject.org
Tue Jun 4 16:02:04 UTC 2013
commit 019a221686d8f71ae5bd097fbb043986650083e9
Author: Tom Callaway <spot at fedoraproject.org>
Date: Tue Jun 4 12:01:26 2013 -0400
add metapackage, update to svn218
gperftools-2.0-svn190-to-svn218.patch | 1972 +++++++++++++++++++++++++++++++++
gperftools.spec | 23 +-
2 files changed, 1992 insertions(+), 3 deletions(-)
---
diff --git a/gperftools-2.0-svn190-to-svn218.patch b/gperftools-2.0-svn190-to-svn218.patch
new file mode 100644
index 0000000..771e61a
--- /dev/null
+++ b/gperftools-2.0-svn190-to-svn218.patch
@@ -0,0 +1,1972 @@
+Only in gperftools-2.0: aclocal.m4
+Only in gperftools-2.0: aclocal.m4.svn-r190
+diff -urP gperftools-2.0/autogen.sh gperftools-2.0-svn218/autogen.sh
+--- gperftools-2.0/autogen.sh 2013-06-04 10:20:21.135844736 -0400
++++ gperftools-2.0-svn218/autogen.sh 2013-06-04 10:16:58.887841701 -0400
+@@ -1,54 +1,3 @@
+ #!/bin/sh
+
+-# Before using, you should figure out all the .m4 macros that your
+-# configure.m4 script needs and make sure they exist in the m4/
+-# directory.
+-#
+-# These are the files that this script might edit:
+-# aclocal.m4 configure Makefile.in src/config.h.in \
+-# depcomp config.guess config.sub install-sh missing mkinstalldirs \
+-# ltmain.sh
+-#
+-# Here's a command you can run to see what files aclocal will import:
+-# aclocal -I ../autoconf --output=- | sed -n 's/^m4_include..\([^]]*\).*/\1/p'
+-
+-set -ex
+-rm -rf autom4te.cache
+-
+-trap 'rm -f aclocal.m4.tmp' EXIT
+-
+-# Returns the first binary in $* that exists, or the last arg, if none exists.
+-WhichOf() {
+- for candidate in "$@"; do
+- if "$candidate" --version >/dev/null 2>&1; then
+- echo "$candidate"
+- return
+- fi
+- done
+- echo "$candidate" # the last one in $@
+-}
+-
+-# Use version 1.9 of aclocal and automake if available.
+-ACLOCAL=`WhichOf aclocal-1.9 aclocal`
+-AUTOMAKE=`WhichOf automake-1.9 automake`
+-LIBTOOLIZE=`WhichOf glibtoolize libtoolize15 libtoolize14 libtoolize`
+-
+-# aclocal tries to overwrite aclocal.m4 even if the contents haven't
+-# changed, which is annoying when the file is not open for edit (in
+-# p4). We work around this by writing to a temp file and just
+-# updating the timestamp if the file hasn't change.
+-"$ACLOCAL" --force -I m4 --output=aclocal.m4.tmp
+-if cmp aclocal.m4.tmp aclocal.m4; then
+- touch aclocal.m4 # pretend that we regenerated the file
+- rm -f aclocal.m4.tmp
+-else
+- mv aclocal.m4.tmp aclocal.m4 # we did set -e above, so we die if this fails
+-fi
+-
+-grep -q '^[^#]*AC_PROG_LIBTOOL' configure.ac && "$LIBTOOLIZE" -c -f
+-autoconf -f -W all,no-obsolete
+-autoheader -f -W all
+-"$AUTOMAKE" -a -c -f -W all
+-
+-rm -rf autom4te.cache
+-exit 0
++autoreconf -i
+Only in gperftools-2.0: autogen.sh.svn-r190
+Only in gperftools-2.0: compile
+Only in gperftools-2.0: config.guess
+Only in gperftools-2.0: config.sub
+Only in gperftools-2.0: configure
+diff -urP gperftools-2.0/configure.ac gperftools-2.0-svn218/configure.ac
+--- gperftools-2.0/configure.ac 2013-06-04 10:20:21.138844736 -0400
++++ gperftools-2.0-svn218/configure.ac 2013-06-04 10:16:58.805841700 -0400
+@@ -99,28 +99,7 @@
+ [gpt_cv_objcopy_weaken=no])
+ AM_CONDITIONAL(HAVE_OBJCOPY_WEAKEN, test $gpt_cv_objcopy_weaken = yes)
+
+-case $host_os in
+- *mingw*)
+- # Disabling fast install keeps libtool from creating wrapper scripts
+- # around the executables it builds. Such scripts have caused failures on
+- # MinGW. Using this option means an extra link step is executed during
+- # "make install".
+- _LT_SET_OPTION([LT_INIT],[disable-fast-install])
+-AC_DIAGNOSE([obsolete],[AC_DISABLE_FAST_INSTALL: Remove this warning and the call to _LT_SET_OPTION when you put
+-the `disable-fast-install' option into LT_INIT's first parameter.])
+-
+- ;;
+- *)
+- _LT_SET_OPTION([LT_INIT],[fast-install])
+-AC_DIAGNOSE([obsolete],[AC_ENABLE_FAST_INSTALL: Remove this warning and the call to _LT_SET_OPTION when you put
+-the `fast-install' option into LT_INIT's first parameter.])
+-
+- ;;
+-esac
+-
+-LT_INIT
+-AC_SUBST(LIBTOOL_DEPS)
+-AM_CONDITIONAL(USE_LIBTOOL, test "x$LIBTOOL" != "x")
++LT_INIT([])
+
+ AC_C_INLINE
+ AX_C___ATTRIBUTE__
+@@ -134,6 +113,7 @@
+ AC_CHECK_TYPES([Elf32_Versym],,, [#include <elf.h>]) # for vdso_support.h
+ AC_CHECK_FUNCS(sbrk) # for tcmalloc to get memory
+ AC_CHECK_FUNCS(geteuid) # for turning off services when run as root
++AC_CHECK_FUNCS(fork) # for the pthread_atfork setup
+ AC_CHECK_HEADERS(features.h) # for vdso_support.h
+ AC_CHECK_HEADERS(malloc.h) # some systems define stuff there, others not
+ AC_CHECK_HEADERS(sys/malloc.h) # where some versions of OS X put malloc.h
+@@ -183,6 +163,11 @@
+ # This workaround comes from
+ # http://cygwin.com/ml/cygwin/2004-11/msg00138.html
+ case "$host" in
++ *-*-mingw*)
++ dnl mingw doesn't have mmap, not worth
++ dnl checking. Especially given that mingw can be a
++ dnl cross-compiler
++ ;;
+ *-*-cygwin*)
+ ac_cv_func_mmap_fixed_mapped=yes
+ AC_DEFINE(HAVE_MMAP, 1,
+@@ -310,10 +295,18 @@
+ # Note, however, that our code tickles a bug in gcc < 4.1.2
+ # involving TLS and -fPIC (which our libraries will use) on x86:
+ # http://gcc.gnu.org/ml/gcc-bugs/2006-09/msg02275.html
++#
++# And mingw also does compile __thread but resultant code actually
++# fails to work correctly at least in some not so ancient version:
++# http://mingw-users.1079350.n2.nabble.com/gcc-4-4-multi-threaded-exception-handling-amp-thread-specifier-not-working-td3440749.html
+ AC_MSG_CHECKING([for __thread])
+ AC_LINK_IFELSE([AC_LANG_PROGRAM([#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && ((__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) || (__GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ < 2))
+ #error gcc has this bug: http://gcc.gnu.org/ml/gcc-bugs/2006-09/msg02275.html
+-#endif], [static __thread int p = 0])],
++#endif
++#if defined(__MINGW32__)
++#error mingw doesn't really support tls
++#endif
++], [static __thread int p = 0])],
+ [AC_DEFINE(HAVE_TLS, 1,
+ Define to 1 if compiler supports __thread)
+ AC_MSG_RESULT([yes])],
+Only in gperftools-2.0: configure.ac.svn-r190
+Only in gperftools-2.0: configure.svn-r190
+Only in gperftools-2.0: depcomp
+Only in gperftools-2.0/doc: cpuprofile.html.svn-r190
+Only in gperftools-2.0/doc: heapprofile.html.svn-r190
+Only in gperftools-2.0/doc: pprof.see_also.svn-r190
+diff -urP gperftools-2.0/INSTALL gperftools-2.0-svn218/INSTALL
+--- gperftools-2.0/INSTALL 2012-02-03 14:40:32.000000000 -0500
++++ gperftools-2.0-svn218/INSTALL 2013-06-04 10:16:58.886841701 -0400
+@@ -8,6 +8,28 @@
+ Perftools-Specific Install Notes
+ ================================
+
++*** Building from source repository
++
++As of 2.1 gperftools does not have configure and other autotools
++products checked into it's source repository. This is common practice
++for projects using autotools.
++
++NOTE: Source releases (.tar.gz that you download from
++code.google.com/p/gperftools) still have all required files just as
++before. Nothing has changed w.r.t. building from .tar.gz releases.
++
++But, in order to build gperftools checked out from subversion
++repository you need to have autoconf, automake and libtool
++installed. And before running ./configure you have to generate it (and
++a bunch of other files) by running ./autogen.sh script. That script
++will take care of calling correct autotools programs in correct order.
++
++If you're maintainer then it's business as usual too. Just run make
++dist (or, preferably, make distcheck) and it'll produce .tar.gz or
++.tar.bz2 with all autotools magic already included. So that users can
++build our software without having autotools.
++
++
+ *** NOTE FOR 64-BIT LINUX SYSTEMS
+
+ The glibc built-in stack-unwinder on 64-bit systems has some problems
+Only in gperftools-2.0: install-sh
+Only in gperftools-2.0: libtool
+Only in gperftools-2.0: ltmain.sh
+Only in gperftools-2.0/m4: libtool.m4
+Only in gperftools-2.0/m4: libtool.m4.svn-r190
+Only in gperftools-2.0/m4: lt~obsolete.m4
+Only in gperftools-2.0/m4: ltoptions.m4
+Only in gperftools-2.0/m4: ltsugar.m4
+Only in gperftools-2.0/m4: ltversion.m4
+diff -urP gperftools-2.0/Makefile.am gperftools-2.0-svn218/Makefile.am
+--- gperftools-2.0/Makefile.am 2013-06-04 10:20:21.140844736 -0400
++++ gperftools-2.0-svn218/Makefile.am 2013-06-04 10:16:58.887841701 -0400
+@@ -221,7 +221,7 @@
+ src/windows/preamble_patcher.cc \
+ src/windows/preamble_patcher_with_stub.cc
+ # patch_functions.cc uses Psapi.lib. MSVC has a #pragma for that, but not us.
+-libwindows_la_LIBADD = -lPsapi
++libwindows_la_LIBADD = -lpsapi
+
+ SPINLOCK_INCLUDES = src/base/spinlock.h \
+ src/base/spinlock_internal.h \
+@@ -238,6 +238,7 @@
+ noinst_LTLIBRARIES += libspinlock.la
+ libspinlock_la_SOURCES = src/base/spinlock.cc \
+ src/base/spinlock_internal.cc \
++ src/base/atomicops-internals-x86.cc \
+ $(SPINLOCK_INCLUDES)
+
+ LIBSPINLOCK = libwindows.la libspinlock.la libsysinfo.la liblogging.la
+@@ -355,7 +356,7 @@
+ $(STACKTRACE_INCLUDES)
+ libstacktrace_la_LIBADD = $(UNWIND_LIBS) $(LIBSPINLOCK)
+ STACKTRACE_SYMBOLS = '(GetStackTrace|GetStackFrames|GetStackTraceWithContext|GetStackFramesWithContext)'
+-libstacktrace_la_LDFLAGS = -export-symbols-regex $(STACKTRACE_SYMBOLS)
++libstacktrace_la_LDFLAGS = -export-symbols-regex $(STACKTRACE_SYMBOLS) $(AM_LDFLAGS)
+
+ ### Unittests
+ TESTS += stacktrace_unittest
+@@ -468,7 +469,7 @@
+ -DNO_HEAP_CHECK \
+ $(PTHREAD_CFLAGS) -DNDEBUG \
+ $(AM_CXXFLAGS) $(NO_EXCEPTIONS)
+-libtcmalloc_minimal_internal_la_LDFLAGS = $(PTHREAD_CFLAGS)
++libtcmalloc_minimal_internal_la_LDFLAGS = $(PTHREAD_CFLAGS) $(AM_LDFLAGS)
+ libtcmalloc_minimal_internal_la_LIBADD = $(PTHREAD_LIBS) $(LIBSPINLOCK)
+
+ lib_LTLIBRARIES += libtcmalloc_minimal.la
+@@ -477,7 +478,7 @@
+ libtcmalloc_minimal_la_CXXFLAGS = -DNO_TCMALLOC_SAMPLES \
+ $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS)
+ # -version-info gets passed to libtool
+-libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@
++libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@ $(AM_LDFLAGS)
+ libtcmalloc_minimal_la_LIBADD = libtcmalloc_minimal_internal.la $(PTHREAD_LIBS)
+
+ # For windows, we're playing around with trying to do some stacktrace
+@@ -539,6 +540,12 @@
+ tcmalloc_minimal_large_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
+ tcmalloc_minimal_large_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
+
++TESTS += tcmalloc_minimal_large_heap_fragmentation_unittest
++tcmalloc_minimal_large_heap_fragmentation_unittest_SOURCES = src/tests/large_heap_fragmentation_unittest.cc
++tcmalloc_minimal_large_heap_fragmentation_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
++tcmalloc_minimal_large_heap_fragmentation_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
++tcmalloc_minimal_large_heap_fragmentation_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
++
+ # This tests it works to LD_PRELOAD libtcmalloc (tests maybe_threads.cc)
+ # In theory this should work under mingw, but mingw has trouble running
+ # shell scripts that end in .exe. And it doesn't seem to build shared
+@@ -898,8 +905,16 @@
+
+ ### Unittests
+
+-TESTS += tcmalloc_unittest
+-TCMALLOC_UNITTEST_INCLUDES = src/config_for_unittests.h \
++TESTS += tcmalloc_unittest.sh$(EXEEXT)
++tcmalloc_unittest_sh_SOURCES = src/tests/tcmalloc_unittest.sh
++noinst_SCRIPTS += $(tcmalloc_unittest_sh_SOURCES)
++tcmalloc_unittest.sh$(EXEEXT): $(top_srcdir)/$(tcmalloc_unittest_sh_SOURCES) \
++ tcmalloc_unittest
++ rm -f $@
++ cp -p $(top_srcdir)/$(tcmalloc_unittest_sh_SOURCES) $@
++
++noinst_PROGRAMS += tcmalloc_unittest
++tcmalloc_unittest_INCLUDES = src/config_for_unittests.h \
+ src/gperftools/malloc_extension.h
+ tcmalloc_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
+ src/tcmalloc.h \
+@@ -956,6 +971,12 @@
+ tcmalloc_large_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
+ tcmalloc_large_unittest_LDADD = $(LIBTCMALLOC) $(PTHREAD_LIBS)
+
++TESTS += tcmalloc_large_heap_fragmentation_unittest
++tcmalloc_large_heap_fragmentation_unittest_SOURCES = src/tests/large_heap_fragmentation_unittest.cc
++tcmalloc_large_heap_fragmentation_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
++tcmalloc_large_heap_fragmentation_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
++tcmalloc_large_heap_fragmentation_unittest_LDADD = $(LIBTCMALLOC) $(PTHREAD_LIBS)
++
+ TESTS += raw_printer_test
+ raw_printer_test_SOURCES = src/tests/raw_printer_test.cc
+ raw_printer_test_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
+Only in gperftools-2.0: Makefile.am.svn-r190
+Only in gperftools-2.0: Makefile.in
+Only in gperftools-2.0: Makefile.in.svn-r190
+Only in gperftools-2.0: missing
+Only in gperftools-2.0: mkinstalldirs
+Only in gperftools-2.0: NEWS.svn-r190
+diff -urP gperftools-2.0/src/base/atomicops.h gperftools-2.0-svn218/src/base/atomicops.h
+--- gperftools-2.0/src/base/atomicops.h 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/base/atomicops.h 2013-06-04 10:16:58.375841694 -0400
+@@ -50,6 +50,16 @@
+ // implementations on other archtectures will cause your code to break. If you
+ // do not know what you are doing, avoid these routines, and use a Mutex.
+ //
++// These following lower-level operations are typically useful only to people
++// implementing higher-level synchronization operations like spinlocks,
++// mutexes, and condition-variables. They combine CompareAndSwap(), a load, or
++// a store with appropriate memory-ordering instructions. "Acquire" operations
++// ensure that no later memory access can be reordered ahead of the operation.
++// "Release" operations ensure that no previous memory access can be reordered
++// after the operation. "Barrier" operations have both "Acquire" and "Release"
++// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory
++// access.
++//
+ // It is incorrect to make direct assignments to/from an atomic variable.
+ // You should use one of the Load or Store routines. The NoBarrier
+ // versions are provided when no barriers are needed:
+@@ -95,10 +105,10 @@
+ #include "base/atomicops-internals-arm-v6plus.h"
+ #elif defined(ARMV3)
+ #include "base/atomicops-internals-arm-generic.h"
+-#elif defined(_WIN32)
+-#include "base/atomicops-internals-windows.h"
+ #elif defined(__GNUC__) && (defined(__i386) || defined(__x86_64__))
+ #include "base/atomicops-internals-x86.h"
++#elif defined(_WIN32)
++#include "base/atomicops-internals-windows.h"
+ #elif defined(__linux__) && defined(__PPC__)
+ #include "base/atomicops-internals-linuxppc.h"
+ #else
+@@ -149,6 +159,18 @@
+ reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value);
+ }
+
++AtomicWord Acquire_AtomicExchange(volatile AtomicWord* ptr,
++ AtomicWord new_value) {
++ return Acquire_AtomicExchange(
++ reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value);
++}
++
++AtomicWord Release_AtomicExchange(volatile AtomicWord* ptr,
++ AtomicWord new_value) {
++ return Release_AtomicExchange(
++ reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value);
++}
++
+ // Atomically increment *ptr by "increment". Returns the new value of
+ // *ptr with the increment applied. This routine implies no memory
+ // barriers.
+@@ -164,17 +186,6 @@
+ reinterpret_cast<volatile AtomicWordCastType*>(ptr), increment);
+ }
+
+-// ------------------------------------------------------------------------
+-// These following lower-level operations are typically useful only to people
+-// implementing higher-level synchronization operations like spinlocks,
+-// mutexes, and condition-variables. They combine CompareAndSwap(), a load, or
+-// a store with appropriate memory-ordering instructions. "Acquire" operations
+-// ensure that no later memory access can be reordered ahead of the operation.
+-// "Release" operations ensure that no previous memory access can be reordered
+-// after the operation. "Barrier" operations have both "Acquire" and "Release"
+-// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory
+-// access.
+-// ------------------------------------------------------------------------
+ inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr,
+ AtomicWord old_value,
+ AtomicWord new_value) {
+@@ -250,6 +261,8 @@
+ Atomic32 old_value,
+ Atomic32 new_value);
+ Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
++Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
++Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
+ Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment);
+ Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
+ Atomic32 increment);
+@@ -271,6 +284,8 @@
+ Atomic64 old_value,
+ Atomic64 new_value);
+ Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
++Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
++Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
+ Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment);
+ Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment);
+
+diff -urP gperftools-2.0/src/base/atomicops-internals-arm-generic.h gperftools-2.0-svn218/src/base/atomicops-internals-arm-generic.h
+--- gperftools-2.0/src/base/atomicops-internals-arm-generic.h 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/base/atomicops-internals-arm-generic.h 2013-06-04 10:16:58.378841694 -0400
+@@ -89,6 +89,18 @@
+ return old_value;
+ }
+
++inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
++ Atomic32 new_value) {
++ // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
++inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
++ Atomic32 new_value) {
++ // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
+ Atomic32 increment) {
+ for (;;) {
+@@ -176,6 +188,18 @@
+ return 0;
+ }
+
++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
++inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
+ Atomic64 increment) {
+ NotImplementedFatalError("NoBarrier_AtomicIncrement");
+diff -urP gperftools-2.0/src/base/atomicops-internals-arm-v6plus.h gperftools-2.0-svn218/src/base/atomicops-internals-arm-v6plus.h
+--- gperftools-2.0/src/base/atomicops-internals-arm-v6plus.h 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/base/atomicops-internals-arm-v6plus.h 2013-06-04 10:16:58.372841694 -0400
+@@ -94,6 +94,28 @@
+ return old;
+ }
+
++inline void MemoryBarrier() {
++#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6KZ__) || defined(__ARM_ARCH_6T2__)
++ uint32_t dest = 0;
++ __asm__ __volatile__("mcr p15,0,%0,c7,c10,5" :"=&r"(dest) : : "memory");
++#else
++ __asm__ __volatile__("dmb" : : : "memory");
++#endif
++}
++
++inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
++ Atomic32 new_value) {
++ Atomic32 old_value = NoBarrier_AtomicExchange(ptr, new_value);
++ MemoryBarrier();
++ return old_value;
++}
++
++inline Atomic64 Release_AtomicExchange(volatile Atomic32* ptr,
++ Atomic32 new_value) {
++ MemoryBarrier();
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
+ Atomic32 increment) {
+ Atomic32 tmp, res;
+@@ -110,10 +132,6 @@
+ return res;
+ }
+
+-inline void MemoryBarrier() {
+- __asm__ __volatile__("dmb" : : : "memory");
+-}
+-
+ inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
+ Atomic32 increment) {
+ Atomic32 tmp, res;
+@@ -220,6 +238,19 @@
+ return old;
+ }
+
++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ Atomic64 old_value = NoBarrier_AtomicExchange(ptr, new_value);
++ MemoryBarrier();
++ return old_value;
++}
++
++inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ MemoryBarrier();
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
+ Atomic64 increment) {
+ int store_failed;
+@@ -303,6 +334,18 @@
+ return 0;
+ }
+
++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ NotImplementedFatalError("Acquire_AtomicExchange");
++ return 0;
++}
++
++inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ NotImplementedFatalError("Release_AtomicExchange");
++ return 0;
++}
++
+ inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
+ Atomic64 increment) {
+ NotImplementedFatalError("NoBarrier_AtomicIncrement");
+diff -urP gperftools-2.0/src/base/atomicops-internals-linuxppc.h gperftools-2.0-svn218/src/base/atomicops-internals-linuxppc.h
+--- gperftools-2.0/src/base/atomicops-internals-linuxppc.h 2013-06-04 10:20:21.141844736 -0400
++++ gperftools-2.0-svn218/src/base/atomicops-internals-linuxppc.h 2013-06-04 10:16:58.371841694 -0400
+@@ -163,6 +163,26 @@
+ return old_value;
+ }
+
++inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr,
++ Atomic32 new_value) {
++ Atomic32 old_value;
++ do {
++ old_value = *ptr;
++ } while (!OSAtomicCompareAndSwap32Acquire(old_value, new_value,
++ const_cast<Atomic32*>(ptr)));
++ return old_value;
++}
++
++inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr,
++ Atomic32 new_value) {
++ Atomic32 old_value;
++ do {
++ old_value = *ptr;
++ } while (!OSAtomicCompareAndSwap32Release(old_value, new_value,
++ const_cast<Atomic32*>(ptr)));
++ return old_value;
++}
++
+ inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32 *ptr,
+ Atomic32 increment) {
+ return OSAtomicAdd32(increment, const_cast<Atomic32*>(ptr));
+@@ -294,6 +314,26 @@
+ return old_value;
+ }
+
++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr,
++ Atomic64 new_value) {
++ Atomic64 old_value;
++ do {
++ old_value = *ptr;
++ } while (!OSAtomicCompareAndSwap64Acquire(old_value, new_value,
++ const_cast<Atomic64*>(ptr)));
++ return old_value;
++}
++
++inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr,
++ Atomic64 new_value) {
++ Atomic64 old_value;
++ do {
++ old_value = *ptr;
++ } while (!OSAtomicCompareAndSwap64Release(old_value, new_value,
++ const_cast<Atomic64*>(ptr)));
++ return old_value;
++}
++
+ inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64 *ptr,
+ Atomic64 increment) {
+ return OSAtomicAdd64(increment, const_cast<Atomic64*>(ptr));
+Only in gperftools-2.0/src/base: atomicops-internals-linuxppc.h.svn-r190
+diff -urP gperftools-2.0/src/base/atomicops-internals-macosx.h gperftools-2.0-svn218/src/base/atomicops-internals-macosx.h
+--- gperftools-2.0/src/base/atomicops-internals-macosx.h 2012-02-02 16:36:22.000000000 -0500
++++ gperftools-2.0-svn218/src/base/atomicops-internals-macosx.h 2013-06-04 10:16:58.378841694 -0400
+@@ -132,6 +132,21 @@
+ return old_value;
+ }
+
++inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr,
++ Atomic32 new_value) {
++ Atomic32 old_value;
++ do {
++ old_value = *ptr;
++ } while (!OSAtomicCompareAndSwap32Barrier(old_value, new_value,
++ const_cast<Atomic32*>(ptr)));
++ return old_value;
++}
++
++inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr,
++ Atomic32 new_value) {
++ return Acquire_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32 *ptr,
+ Atomic32 increment) {
+ return OSAtomicAdd32(increment, const_cast<Atomic32*>(ptr));
+@@ -217,6 +232,21 @@
+ return old_value;
+ }
+
++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr,
++ Atomic64 new_value) {
++ Atomic64 old_value;
++ do {
++ old_value = *ptr;
++ } while (!OSAtomicCompareAndSwap64Barrier(old_value, new_value,
++ const_cast<Atomic64*>(ptr)));
++ return old_value;
++}
++
++inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr,
++ Atomic64 new_value) {
++ return Acquire_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64 *ptr,
+ Atomic64 increment) {
+ return OSAtomicAdd64(increment, const_cast<Atomic64*>(ptr));
+diff -urP gperftools-2.0/src/base/atomicops-internals-windows.h gperftools-2.0-svn218/src/base/atomicops-internals-windows.h
+--- gperftools-2.0/src/base/atomicops-internals-windows.h 2013-06-04 10:20:21.142844736 -0400
++++ gperftools-2.0-svn218/src/base/atomicops-internals-windows.h 2013-06-04 10:16:58.378841694 -0400
+@@ -137,6 +137,18 @@
+ return static_cast<Atomic32>(result);
+ }
+
++inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
++ Atomic32 new_value) {
++ // FastInterlockedExchange has both acquire and release memory barriers.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
++inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
++ Atomic32 new_value) {
++ // FastInterlockedExchange has both acquire and release memory barriers.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
+ Atomic32 increment) {
+ return FastInterlockedExchangeAdd(
+@@ -188,8 +200,7 @@
+ }
+
+ inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
+- NoBarrier_AtomicExchange(ptr, value);
+- // acts as a barrier in this implementation
++ Acquire_AtomicExchange(ptr, value);
+ }
+
+ inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
+@@ -478,6 +489,18 @@
+ #endif // defined(_WIN64) || defined(__MINGW64__)
+
+
++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ // FastInterlockedExchange has both acquire and release memory barriers.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
++inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ // FastInterlockedExchange has both acquire and release memory barriers.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
+ Atomic64 old_value,
+ Atomic64 new_value) {
+Only in gperftools-2.0/src/base: atomicops-internals-windows.h.svn-r190
+diff -urP gperftools-2.0/src/base/atomicops-internals-x86.h gperftools-2.0-svn218/src/base/atomicops-internals-x86.h
+--- gperftools-2.0/src/base/atomicops-internals-x86.h 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/base/atomicops-internals-x86.h 2013-06-04 10:16:58.373841694 -0400
+@@ -89,6 +89,21 @@
+ return new_value; // Now it's the previous value.
+ }
+
++inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
++ Atomic32 new_value) {
++ Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value);
++ if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
++ __asm__ __volatile__("lfence" : : : "memory");
++ }
++ return old_val;
++}
++
++inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
++ Atomic32 new_value) {
++ // xchgl already has release memory barrier semantics.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
+ Atomic32 increment) {
+ Atomic32 temp = increment;
+@@ -152,7 +167,7 @@
+ __asm__ __volatile__("mfence" : : : "memory");
+ } else { // mfence is faster but not present on PIII
+ Atomic32 x = 0;
+- NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII
++ Acquire_AtomicExchange(&x, 0);
+ }
+ }
+
+@@ -161,8 +176,7 @@
+ *ptr = value;
+ __asm__ __volatile__("mfence" : : : "memory");
+ } else {
+- NoBarrier_AtomicExchange(ptr, value);
+- // acts as a barrier on PIII
++ Acquire_AtomicExchange(ptr, value);
+ }
+ }
+ #endif
+@@ -213,6 +227,21 @@
+ return new_value; // Now it's the previous value.
+ }
+
++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value);
++ if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
++ __asm__ __volatile__("lfence" : : : "memory");
++ }
++ return old_val;
++}
++
++inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ // xchgq already has release memory barrier semantics.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
+ Atomic64 increment) {
+ Atomic64 temp = increment;
+@@ -334,6 +363,20 @@
+ return old_val;
+ }
+
++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_val) {
++ Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_val);
++ if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
++ __asm__ __volatile__("lfence" : : : "memory");
++ }
++ return old_val;
++}
++
++inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_val) {
++ return NoBarrier_AtomicExchange(ptr, new_val);
++}
++
+ inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
+ Atomic64 increment) {
+ Atomic64 old_val, new_val;
+diff -urP gperftools-2.0/src/base/basictypes.h gperftools-2.0-svn218/src/base/basictypes.h
+--- gperftools-2.0/src/base/basictypes.h 2013-06-04 10:20:21.142844736 -0400
++++ gperftools-2.0-svn218/src/base/basictypes.h 2013-06-04 10:16:58.372841694 -0400
+@@ -334,10 +334,13 @@
+ #if defined(HAVE___ATTRIBUTE__)
+ # if (defined(__i386__) || defined(__x86_64__))
+ # define CACHELINE_ALIGNED __attribute__((aligned(64)))
+-# elif defined(__arm__)
+-# define CACHELINE_ALIGNED __attribute__((aligned(32)))
+ # elif (defined(__PPC__) || defined(__PPC64__))
+ # define CACHELINE_ALIGNED __attribute__((aligned(16)))
++# elif (defined(__arm__))
++# define CACHELINE_ALIGNED __attribute__((aligned(64)))
++ // some ARMs have shorter cache lines (ARM1176JZF-S is 32 bytes for example) but obviously 64-byte aligned implies 32-byte aligned
++# else
++# error Could not determine cache line length - unknown architecture
+ # endif
+ #else
+ # define CACHELINE_ALIGNED
+Only in gperftools-2.0/src/base: basictypes.h.svn-r190
+Only in gperftools-2.0/src/base: cycleclock.h.svn-r190
+diff -urP gperftools-2.0/src/base/linux_syscall_support.h gperftools-2.0-svn218/src/base/linux_syscall_support.h
+--- gperftools-2.0/src/base/linux_syscall_support.h 2013-06-04 10:20:21.142844736 -0400
++++ gperftools-2.0-svn218/src/base/linux_syscall_support.h 2013-06-04 10:16:58.379841694 -0400
+@@ -148,6 +148,8 @@
+ #include <errno.h>
+ #include <signal.h>
+ #include <stdarg.h>
++#include <stddef.h>
++#include <stdint.h>
+ #include <string.h>
+ #include <sys/ptrace.h>
+ #include <sys/resource.h>
+@@ -404,24 +406,24 @@
+ };
+ #elif defined(__x86_64__)
+ struct kernel_stat {
+- unsigned long st_dev;
+- unsigned long st_ino;
+- unsigned long st_nlink;
++ uint64_t st_dev;
++ uint64_t st_ino;
++ uint64_t st_nlink;
+ unsigned st_mode;
+ unsigned st_uid;
+ unsigned st_gid;
+ unsigned __pad0;
+- unsigned long st_rdev;
+- long st_size;
+- long st_blksize;
+- long st_blocks;
+- unsigned long st_atime_;
+- unsigned long st_atime_nsec_;
+- unsigned long st_mtime_;
+- unsigned long st_mtime_nsec_;
+- unsigned long st_ctime_;
+- unsigned long st_ctime_nsec_;
+- long __unused[3];
++ uint64_t st_rdev;
++ int64_t st_size;
++ int64_t st_blksize;
++ int64_t st_blocks;
++ uint64_t st_atime_;
++ uint64_t st_atime_nsec_;
++ uint64_t st_mtime_;
++ uint64_t st_mtime_nsec_;
++ uint64_t st_ctime_;
++ uint64_t st_ctime_nsec_;
++ int64_t __unused[3];
+ };
+ #elif defined(__PPC__)
+ struct kernel_stat {
+@@ -1013,74 +1015,141 @@
+ * location (e.g. when using the clone() system call with the CLONE_VM
+ * option).
+ */
++ #undef LSS_ENTRYPOINT
++ #define LSS_ENTRYPOINT "syscall\n"
++
++ /* The x32 ABI has 32 bit longs, but the syscall interface is 64 bit.
++ * We need to explicitly cast to an unsigned 64 bit type to avoid implicit
++ * sign extension. We can't cast pointers directly because those are
++ * 32 bits, and gcc will dump ugly warnings about casting from a pointer
++ * to an integer of a different size.
++ */
++ #undef LSS_SYSCALL_ARG
++ #define LSS_SYSCALL_ARG(a) ((uint64_t)(uintptr_t)(a))
++ #undef _LSS_RETURN
++ #define _LSS_RETURN(type, res, cast) \
++ do { \
++ if ((uint64_t)(res) >= (uint64_t)(-4095)) { \
++ LSS_ERRNO = -(res); \
++ res = -1; \
++ } \
++ return (type)(cast)(res); \
++ } while (0)
++ #undef LSS_RETURN
++ #define LSS_RETURN(type, res) _LSS_RETURN(type, res, uintptr_t)
++
++ #undef _LSS_BODY
++ #define _LSS_BODY(nr, type, name, cast, ...) \
++ long long __res; \
++ __asm__ __volatile__(LSS_BODY_ASM##nr LSS_ENTRYPOINT \
++ : "=a" (__res) \
++ : "0" (__NR_##name) LSS_BODY_ARG##nr(__VA_ARGS__) \
++ : LSS_BODY_CLOBBER##nr "r11", "rcx", "memory"); \
++ _LSS_RETURN(type, __res, cast)
+ #undef LSS_BODY
+- #define LSS_BODY(type,name, ...) \
+- long __res; \
+- __asm__ __volatile__("syscall" : "=a" (__res) : "0" (__NR_##name), \
+- ##__VA_ARGS__ : "r11", "rcx", "memory"); \
+- LSS_RETURN(type, __res)
++ #define LSS_BODY(nr, type, name, args...) \
++ _LSS_BODY(nr, type, name, uintptr_t, ## args)
++
++ #undef LSS_BODY_ASM0
++ #undef LSS_BODY_ASM1
++ #undef LSS_BODY_ASM2
++ #undef LSS_BODY_ASM3
++ #undef LSS_BODY_ASM4
++ #undef LSS_BODY_ASM5
++ #undef LSS_BODY_ASM6
++ #define LSS_BODY_ASM0
++ #define LSS_BODY_ASM1 LSS_BODY_ASM0
++ #define LSS_BODY_ASM2 LSS_BODY_ASM1
++ #define LSS_BODY_ASM3 LSS_BODY_ASM2
++ #define LSS_BODY_ASM4 LSS_BODY_ASM3 "movq %5,%%r10;"
++ #define LSS_BODY_ASM5 LSS_BODY_ASM4 "movq %6,%%r8;"
++ #define LSS_BODY_ASM6 LSS_BODY_ASM5 "movq %7,%%r9;"
++
++ #undef LSS_BODY_CLOBBER0
++ #undef LSS_BODY_CLOBBER1
++ #undef LSS_BODY_CLOBBER2
++ #undef LSS_BODY_CLOBBER3
++ #undef LSS_BODY_CLOBBER4
++ #undef LSS_BODY_CLOBBER5
++ #undef LSS_BODY_CLOBBER6
++ #define LSS_BODY_CLOBBER0
++ #define LSS_BODY_CLOBBER1 LSS_BODY_CLOBBER0
++ #define LSS_BODY_CLOBBER2 LSS_BODY_CLOBBER1
++ #define LSS_BODY_CLOBBER3 LSS_BODY_CLOBBER2
++ #define LSS_BODY_CLOBBER4 LSS_BODY_CLOBBER3 "r10",
++ #define LSS_BODY_CLOBBER5 LSS_BODY_CLOBBER4 "r8",
++ #define LSS_BODY_CLOBBER6 LSS_BODY_CLOBBER5 "r9",
++
++ #undef LSS_BODY_ARG0
++ #undef LSS_BODY_ARG1
++ #undef LSS_BODY_ARG2
++ #undef LSS_BODY_ARG3
++ #undef LSS_BODY_ARG4
++ #undef LSS_BODY_ARG5
++ #undef LSS_BODY_ARG6
++ #define LSS_BODY_ARG0()
++ #define LSS_BODY_ARG1(arg1) \
++ LSS_BODY_ARG0(), "D" (arg1)
++ #define LSS_BODY_ARG2(arg1, arg2) \
++ LSS_BODY_ARG1(arg1), "S" (arg2)
++ #define LSS_BODY_ARG3(arg1, arg2, arg3) \
++ LSS_BODY_ARG2(arg1, arg2), "d" (arg3)
++ #define LSS_BODY_ARG4(arg1, arg2, arg3, arg4) \
++ LSS_BODY_ARG3(arg1, arg2, arg3), "r" (arg4)
++ #define LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5) \
++ LSS_BODY_ARG4(arg1, arg2, arg3, arg4), "r" (arg5)
++ #define LSS_BODY_ARG6(arg1, arg2, arg3, arg4, arg5, arg6) \
++ LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5), "r" (arg6)
++
+ #undef _syscall0
+ #define _syscall0(type,name) \
+ type LSS_NAME(name)() { \
+- LSS_BODY(type, name); \
++ LSS_BODY(0, type, name); \
+ }
+ #undef _syscall1
+ #define _syscall1(type,name,type1,arg1) \
+ type LSS_NAME(name)(type1 arg1) { \
+- LSS_BODY(type, name, "D" ((long)(arg1))); \
++ LSS_BODY(1, type, name, LSS_SYSCALL_ARG(arg1)); \
+ }
+ #undef _syscall2
+ #define _syscall2(type,name,type1,arg1,type2,arg2) \
+ type LSS_NAME(name)(type1 arg1, type2 arg2) { \
+- LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2))); \
++ LSS_BODY(2, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2));\
+ }
+ #undef _syscall3
+ #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \
+ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \
+- LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)), \
+- "d" ((long)(arg3))); \
++ LSS_BODY(3, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
++ LSS_SYSCALL_ARG(arg3)); \
+ }
+ #undef _syscall4
+ #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
+ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \
+- long __res; \
+- __asm__ __volatile__("movq %5,%%r10; syscall" : \
+- "=a" (__res) : "0" (__NR_##name), \
+- "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \
+- "r" ((long)(arg4)) : "r10", "r11", "rcx", "memory"); \
+- LSS_RETURN(type, __res); \
++ LSS_BODY(4, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
++ LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4));\
+ }
+ #undef _syscall5
+ #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
+ type5,arg5) \
+ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
+ type5 arg5) { \
+- long __res; \
+- __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; syscall" : \
+- "=a" (__res) : "0" (__NR_##name), \
+- "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \
+- "r" ((long)(arg4)), "r" ((long)(arg5)) : \
+- "r8", "r10", "r11", "rcx", "memory"); \
+- LSS_RETURN(type, __res); \
++ LSS_BODY(5, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
++ LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \
++ LSS_SYSCALL_ARG(arg5)); \
+ }
+ #undef _syscall6
+ #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
+ type5,arg5,type6,arg6) \
+ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
+ type5 arg5, type6 arg6) { \
+- long __res; \
+- __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; movq %7,%%r9;" \
+- "syscall" : \
+- "=a" (__res) : "0" (__NR_##name), \
+- "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \
+- "r" ((long)(arg4)), "r" ((long)(arg5)), "r" ((long)(arg6)) : \
+- "r8", "r9", "r10", "r11", "rcx", "memory"); \
+- LSS_RETURN(type, __res); \
++ LSS_BODY(6, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
++ LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \
++ LSS_SYSCALL_ARG(arg5), LSS_SYSCALL_ARG(arg6));\
+ }
+ LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+ int flags, void *arg, int *parent_tidptr,
+ void *newtls, int *child_tidptr) {
+- long __res;
++ long long __res;
+ {
+ __asm__ __volatile__(/* if (fn == NULL)
+ * return -EINVAL;
+@@ -1145,8 +1214,13 @@
+ "1:\n"
+ : "=a" (__res)
+ : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
+- "r"(fn), "S"(child_stack), "D"(flags), "r"(arg),
+- "d"(parent_tidptr), "g"(newtls), "g"(child_tidptr)
++ "r"(LSS_SYSCALL_ARG(fn)),
++ "S"(LSS_SYSCALL_ARG(child_stack)),
++ "D"(LSS_SYSCALL_ARG(flags)),
++ "r"(LSS_SYSCALL_ARG(arg)),
++ "d"(LSS_SYSCALL_ARG(parent_tidptr)),
++ "r"(LSS_SYSCALL_ARG(newtls)),
++ "r"(LSS_SYSCALL_ARG(child_tidptr))
+ : "rsp", "memory", "r8", "r10", "r11", "rcx");
+ }
+ LSS_RETURN(int, __res);
+@@ -1159,7 +1233,7 @@
+ * Unfortunately, we cannot just reference the glibc version of this
+ * function, as glibc goes out of its way to make it inaccessible.
+ */
+- void (*res)(void);
++ long long res;
+ __asm__ __volatile__("call 2f\n"
+ "0:.align 16\n"
+ "1:movq %1,%%rax\n"
+@@ -1168,7 +1242,7 @@
+ "addq $(1b-0b),%0\n"
+ : "=a" (res)
+ : "i" (__NR_rt_sigreturn));
+- return res;
++ return (void (*)(void))(uintptr_t)res;
+ }
+ #elif defined(__arm__)
+ /* Most definitions of _syscallX() neglect to mark "memory" as being
+@@ -1797,8 +1871,16 @@
+ LSS_INLINE _syscall0(pid_t, _gettid)
+ LSS_INLINE _syscall2(int, kill, pid_t, p,
+ int, s)
+- LSS_INLINE _syscall3(off_t, lseek, int, f,
+- off_t, o, int, w)
++ #if defined(__x86_64__)
++ /* Need to make sure off_t isn't truncated to 32-bits under x32. */
++ LSS_INLINE off_t LSS_NAME(lseek)(int f, off_t o, int w) {
++ _LSS_BODY(3, off_t, lseek, off_t, LSS_SYSCALL_ARG(f), (uint64_t)(o),
++ LSS_SYSCALL_ARG(w));
++ }
++ #else
++ LSS_INLINE _syscall3(off_t, lseek, int, f,
++ off_t, o, int, w)
++ #endif
+ LSS_INLINE _syscall2(int, munmap, void*, s,
+ size_t, l)
+ LSS_INLINE _syscall5(void*, _mremap, void*, o,
+@@ -1835,10 +1917,13 @@
+ int, t, int, p)
+ #endif
+ #if defined(__x86_64__)
+- LSS_INLINE _syscall6(void*, mmap, void*, s,
+- size_t, l, int, p,
+- int, f, int, d,
+- __off64_t, o)
++ /* Need to make sure __off64_t isn't truncated to 32-bits under x32. */
++ LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d,
++ __off64_t o) {
++ LSS_BODY(6, void*, mmap, LSS_SYSCALL_ARG(s), LSS_SYSCALL_ARG(l),
++ LSS_SYSCALL_ARG(p), LSS_SYSCALL_ARG(f),
++ LSS_SYSCALL_ARG(d), (uint64_t)(o));
++ }
+
+ LSS_INLINE int LSS_NAME(sigaction)(int signum,
+ const struct kernel_sigaction *act,
+Only in gperftools-2.0/src/base: linux_syscall_support.h.svn-r190
+Only in gperftools-2.0/src/base: linuxthreads.cc.svn-r190
+diff -urP gperftools-2.0/src/base/spinlock.h gperftools-2.0-svn218/src/base/spinlock.h
+--- gperftools-2.0/src/base/spinlock.h 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/base/spinlock.h 2013-06-04 10:16:58.374841694 -0400
+@@ -31,11 +31,6 @@
+ * Author: Sanjay Ghemawat
+ */
+
+-//
+-// Fast spinlocks (at least on x86, a lock/unlock pair is approximately
+-// half the cost of a Mutex because the unlock just does a store instead
+-// of a compare-and-swap which is expensive).
+-
+ // SpinLock is async signal safe.
+ // If used within a signal handler, all lock holders
+ // should block the signal even outside the signal handler.
+@@ -95,10 +90,9 @@
+ // TODO(csilvers): uncomment the annotation when we figure out how to
+ // support this macro with 0 args (see thread_annotations.h)
+ inline void Unlock() /*UNLOCK_FUNCTION()*/ {
+- uint64 wait_cycles =
+- static_cast<uint64>(base::subtle::NoBarrier_Load(&lockword_));
+ ANNOTATE_RWLOCK_RELEASED(this, 1);
+- base::subtle::Release_Store(&lockword_, kSpinLockFree);
++ uint64 wait_cycles = static_cast<uint64>(
++ base::subtle::Release_AtomicExchange(&lockword_, kSpinLockFree));
+ if (wait_cycles != kSpinLockHeld) {
+ // Collect contentionz profile info, and speed the wakeup of any waiter.
+ // The wait_cycles value indicates how long this thread spent waiting
+Only in gperftools-2.0/src/base: spinlock_internal.cc.svn-r190
+Only in gperftools-2.0/src/base: sysinfo.cc.svn-r190
+diff -urP gperftools-2.0/src/base/sysinfo.h gperftools-2.0-svn218/src/base/sysinfo.h
+--- gperftools-2.0/src/base/sysinfo.h 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/base/sysinfo.h 2013-06-04 10:16:58.375841694 -0400
+@@ -38,7 +38,7 @@
+ #include <time.h>
+ #if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__))
+ #include <windows.h> // for DWORD
+-#include <TlHelp32.h> // for CreateToolhelp32Snapshot
++#include <tlhelp32.h> // for CreateToolhelp32Snapshot
+ #endif
+ #ifdef HAVE_UNISTD_H
+ #include <unistd.h> // for pid_t
+diff -urP gperftools-2.0/src/central_freelist.h gperftools-2.0-svn218/src/central_freelist.h
+--- gperftools-2.0/src/central_freelist.h 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/central_freelist.h 2013-06-04 10:16:57.724841684 -0400
+@@ -79,6 +79,16 @@
+ // page full of 5-byte objects would have 2 bytes memory overhead).
+ size_t OverheadBytes();
+
++ // Lock/Unlock the internal SpinLock. Used on the pthread_atfork call
++ // to set the lock in a consistent state before the fork.
++ void Lock() {
++ lock_.Lock();
++ }
++
++ void Unlock() {
++ lock_.Unlock();
++ }
++
+ private:
+ // TransferCache is used to cache transfers of
+ // sizemap.num_objects_to_move(size_class) back and forth between
+diff -urP gperftools-2.0/src/common.cc gperftools-2.0-svn218/src/common.cc
+--- gperftools-2.0/src/common.cc 2013-06-04 10:20:21.143844736 -0400
++++ gperftools-2.0-svn218/src/common.cc 2013-06-04 10:16:57.724841684 -0400
+@@ -30,12 +30,32 @@
+ // ---
+ // Author: Sanjay Ghemawat <opensource at google.com>
+
++#include <stdlib.h> // for getenv and strtol
+ #include "config.h"
+ #include "common.h"
+ #include "system-alloc.h"
++#include "base/spinlock.h"
+
+ namespace tcmalloc {
+
++// Define the maximum number of object per classe type to transfer between
++// thread and central caches.
++static int32 FLAGS_tcmalloc_transfer_num_objects;
++
++static const int32 kDefaultTransferNumObjecs = 32768;
++
++// The init function is provided to explicit initialize the variable value
++// from the env. var to avoid C++ global construction that might defer its
++// initialization after a malloc/new call.
++static inline void InitTCMallocTransferNumObjects()
++{
++ if (UNLIKELY(FLAGS_tcmalloc_transfer_num_objects == 0)) {
++ const char *envval = getenv("TCMALLOC_TRANSFER_NUM_OBJ");
++ FLAGS_tcmalloc_transfer_num_objects = !envval ? kDefaultTransferNumObjecs :
++ strtol(envval, NULL, 10);
++ }
++}
++
+ // Note: the following only works for "n"s that fit in 32-bits, but
+ // that is fine since we only use it for small sizes.
+ static inline int LgFloor(size_t n) {
+@@ -90,13 +110,16 @@
+ // - We go to the central freelist too often and we have to acquire
+ // its lock each time.
+ // This value strikes a balance between the constraints above.
+- if (num > 32) num = 32;
++ if (num > FLAGS_tcmalloc_transfer_num_objects)
++ num = FLAGS_tcmalloc_transfer_num_objects;
+
+ return num;
+ }
+
+ // Initialize the mapping arrays
+ void SizeMap::Init() {
++ InitTCMallocTransferNumObjects();
++
+ // Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
+ if (ClassIndex(0) < 0) {
+ Log(kCrash, __FILE__, __LINE__,
+@@ -189,12 +212,56 @@
+
+ // Metadata allocator -- keeps stats about how many bytes allocated.
+ static uint64_t metadata_system_bytes_ = 0;
++static const size_t kMetadataAllocChunkSize = 8*1024*1024;
++static const size_t kMetadataBigAllocThreshold = kMetadataAllocChunkSize / 8;
++// usually malloc uses larger alignments, but because metadata cannot
++// have and fancy simd types, aligning on pointer size seems fine
++static const size_t kMetadataAllignment = sizeof(void *);
++
++static char *metadata_chunk_alloc_;
++static size_t metadata_chunk_avail_;
++
++static SpinLock metadata_alloc_lock(SpinLock::LINKER_INITIALIZED);
++
+ void* MetaDataAlloc(size_t bytes) {
+- void* result = TCMalloc_SystemAlloc(bytes, NULL);
+- if (result != NULL) {
+- metadata_system_bytes_ += bytes;
++ if (bytes >= kMetadataAllocChunkSize) {
++ void *rv = TCMalloc_SystemAlloc(bytes,
++ NULL, kMetadataAllignment);
++ if (rv != NULL) {
++ metadata_system_bytes_ += bytes;
++ }
++ return rv;
+ }
+- return result;
++
++ SpinLockHolder h(&metadata_alloc_lock);
++
++ // the following works by essentially turning address to integer of
++ // log_2 kMetadataAllignment size and negating it. I.e. negated
++ // value + original value gets 0 and that's what we want modulo
++ // kMetadataAllignment. Note, we negate before masking higher bits
++ // off, otherwise we'd have to mask them off after negation anyways.
++ intptr_t alignment = -reinterpret_cast<intptr_t>(metadata_chunk_alloc_) & (kMetadataAllignment-1);
++
++ if (metadata_chunk_avail_ < bytes + alignment) {
++ size_t real_size;
++ void *ptr = TCMalloc_SystemAlloc(kMetadataAllocChunkSize,
++ &real_size, kMetadataAllignment);
++ if (ptr == NULL) {
++ return NULL;
++ }
++
++ metadata_chunk_alloc_ = static_cast<char *>(ptr);
++ metadata_chunk_avail_ = real_size;
++
++ alignment = 0;
++ }
++
++ void *rv = static_cast<void *>(metadata_chunk_alloc_ + alignment);
++ bytes += alignment;
++ metadata_chunk_alloc_ += bytes;
++ metadata_chunk_avail_ -= bytes;
++ metadata_system_bytes_ += bytes;
++ return rv;
+ }
+
+ uint64_t metadata_system_bytes() { return metadata_system_bytes_; }
+Only in gperftools-2.0/src: common.cc.svn-r190
+diff -urP gperftools-2.0/src/common.h gperftools-2.0-svn218/src/common.h
+--- gperftools-2.0/src/common.h 2013-06-04 10:20:21.143844736 -0400
++++ gperftools-2.0-svn218/src/common.h 2013-06-04 10:16:58.382841694 -0400
+@@ -80,7 +80,7 @@
+ static const size_t kMinAlign = 16;
+ #elif defined(TCMALLOC_ALIGN_8BYTES)
+ static const size_t kPageShift = 13;
+-static const size_t kNumClasses = 93;
++static const size_t kNumClasses = 95;
+ // Unless we force to use 8 bytes alignment we use an alignment of
+ // at least 16 bytes to statisfy requirements for some SSE types.
+ // Keep in mind when using the 16 bytes alignment you can have a space
+@@ -88,7 +88,7 @@
+ static const size_t kMinAlign = 8;
+ #else
+ static const size_t kPageShift = 13;
+-static const size_t kNumClasses = 86;
++static const size_t kNumClasses = 88;
+ static const size_t kMinAlign = 16;
+ #endif
+ static const size_t kMaxThreadCacheSize = 4 << 20;
+Only in gperftools-2.0/src: common.h.svn-r190
+diff -urP gperftools-2.0/src/config.h.in gperftools-2.0-svn218/src/config.h.in
+--- gperftools-2.0/src/config.h.in 2013-06-04 10:20:21.143844736 -0400
++++ gperftools-2.0-svn218/src/config.h.in 2013-06-04 10:16:57.816841685 -0400
+@@ -56,6 +56,9 @@
+ /* Define to 1 if you have the <features.h> header file. */
+ #undef HAVE_FEATURES_H
+
++/* Define to 1 if you have the `fork' function. */
++#undef HAVE_FORK
++
+ /* Define to 1 if you have the `geteuid' function. */
+ #undef HAVE_GETEUID
+
+Only in gperftools-2.0/src: config.h.in.svn-r190
+Only in gperftools-2.0/src: debugallocation.cc.svn-r190
+Only in gperftools-2.0/src: getpc.h.svn-r190
+Only in gperftools-2.0/src/gperftools: malloc_extension.h.svn-r190
+Only in gperftools-2.0/src/gperftools: tcmalloc.h.in.svn-r190
+Only in gperftools-2.0/src: heap-checker.cc.svn-r190
+Only in gperftools-2.0/src: heap-profiler.cc.svn-r190
+Only in gperftools-2.0/src: heap-profile-table.cc.svn-r190
+Only in gperftools-2.0/src: malloc_extension.cc.svn-r190
+Only in gperftools-2.0/src: malloc_hook-inl.h.svn-r190
+Only in gperftools-2.0/src: memory_region_map.cc.svn-r190
+diff -urP gperftools-2.0/src/page_heap.cc gperftools-2.0-svn218/src/page_heap.cc
+--- gperftools-2.0/src/page_heap.cc 2013-06-04 10:20:21.145844736 -0400
++++ gperftools-2.0-svn218/src/page_heap.cc 2013-06-04 10:16:58.070841689 -0400
+@@ -108,6 +108,8 @@
+ return AllocLarge(n); // May be NULL
+ }
+
++static const size_t kForcedCoalesceInterval = 128*1024*1024;
++
+ Span* PageHeap::New(Length n) {
+ ASSERT(Check());
+ ASSERT(n > 0);
+@@ -116,6 +118,38 @@
+ if (result != NULL)
+ return result;
+
++ if (stats_.free_bytes != 0 && stats_.unmapped_bytes != 0
++ && stats_.free_bytes + stats_.unmapped_bytes >= stats_.system_bytes / 4
++ && (stats_.system_bytes / kForcedCoalesceInterval
++ != (stats_.system_bytes + (n << kPageShift)) / kForcedCoalesceInterval)) {
++ // We're about to grow heap, but there are lots of free pages.
++ // tcmalloc's design decision to keep unmapped and free spans
++ // separately and never coalesce them means that sometimes there
++ // can be free pages span of sufficient size, but it consists of
++ // "segments" of different type so page heap search cannot find
++ // it. In order to prevent growing heap and wasting memory in such
++ // case we're going to unmap all free pages. So that all free
++ // spans are maximally coalesced.
++ //
++ // We're also limiting 'rate' of going into this path to be at
++ // most once per 128 megs of heap growth. Otherwise programs that
++ // grow heap frequently (and that means by small amount) could be
++ // penalized with higher count of minor page faults.
++ //
++ // See also large_heap_fragmentation_unittest.cc and
++ // https://code.google.com/p/gperftools/issues/detail?id=368
++ ReleaseAtLeastNPages(static_cast<Length>(0x7fffffff));
++
++ // then try again. If we are forced to grow heap because of large
++ // spans fragmentation and not because of problem described above,
++ // then at the very least we've just unmapped free but
++ // insufficiently big large spans back to OS. So in case of really
++ // unlucky memory fragmentation we'll be consuming virtual address
++ // space, but not real memory
++ result = SearchFreeAndLargeLists(n);
++ if (result != NULL) return result;
++ }
++
+ // Grow the heap and try again.
+ if (!GrowHeap(n)) {
+ ASSERT(Check());
+Only in gperftools-2.0/src: page_heap.cc.svn-r190
+Only in gperftools-2.0/src: page_heap.h.svn-r190
+Only in gperftools-2.0/src: pprof.svn-r190
+Only in gperftools-2.0/src: profiler.cc.svn-r190
+diff -urP gperftools-2.0/src/static_vars.cc gperftools-2.0-svn218/src/static_vars.cc
+--- gperftools-2.0/src/static_vars.cc 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/static_vars.cc 2013-06-04 10:16:57.817841685 -0400
+@@ -39,6 +39,39 @@
+
+ namespace tcmalloc {
+
++#if defined(HAVE_FORK) && defined(HAVE_PTHREAD)
++// These following two functions are registered via pthread_atfork to make
++// sure the central_cache locks remain in a consisten state in the forked
++// version of the thread.
++
++static
++void CentralCacheLockAll()
++{
++ Static::pageheap_lock()->Lock();
++ for (int i = 0; i < kNumClasses; ++i)
++ Static::central_cache()[i].Lock();
++}
++
++static
++void CentralCacheUnlockAll()
++{
++ for (int i = 0; i < kNumClasses; ++i)
++ Static::central_cache()[i].Unlock();
++ Static::pageheap_lock()->Unlock();
++}
++#endif
++
++static inline
++void SetupAtForkLocksHandler()
++{
++#if defined(HAVE_FORK) && defined(HAVE_PTHREAD)
++ pthread_atfork(CentralCacheLockAll, // parent calls before fork
++ CentralCacheUnlockAll, // parent calls after fork
++ CentralCacheUnlockAll); // child calls after fork
++#endif
++}
++
++
+ SpinLock Static::pageheap_lock_(SpinLock::LINKER_INITIALIZED);
+ SizeMap Static::sizemap_;
+ CentralFreeListPadded Static::central_cache_[kNumClasses];
+@@ -49,6 +82,7 @@
+ StackTrace* Static::growth_stacks_ = NULL;
+ PageHeap* Static::pageheap_ = NULL;
+
++
+ void Static::InitStaticVars() {
+ sizemap_.Init();
+ span_allocator_.Init();
+@@ -61,6 +95,8 @@
+ for (int i = 0; i < kNumClasses; ++i) {
+ central_cache_[i].Init(i);
+ }
++ SetupAtForkLocksHandler();
++
+ // It's important to have PageHeap allocated, not in static storage,
+ // so that HeapLeakChecker does not consider all the byte patterns stored
+ // in is caches as pointers that are sources of heap object liveness,
+Only in gperftools-2.0/src: static_vars.h.svn-r190
+Only in gperftools-2.0/src: symbolize.cc.svn-r190
+Only in gperftools-2.0/src: system-alloc.cc.svn-r190
+Only in gperftools-2.0/src: system-alloc.h.svn-r190
+Only in gperftools-2.0/src: tcmalloc.cc.svn-r190
+diff -urP gperftools-2.0/src/tests/atomicops_unittest.cc gperftools-2.0-svn218/src/tests/atomicops_unittest.cc
+--- gperftools-2.0/src/tests/atomicops_unittest.cc 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/tests/atomicops_unittest.cc 2013-06-04 10:16:58.072841689 -0400
+@@ -38,13 +38,14 @@
+ #define GG_ULONGLONG(x) static_cast<uint64>(x)
+
+ template <class AtomicType>
+-static void TestAtomicIncrement() {
++static void TestAtomicIncrement(AtomicType (*atomic_increment_func)
++ (volatile AtomicType*, AtomicType)) {
+ // For now, we just test single threaded execution
+
+- // use a guard value to make sure the NoBarrier_AtomicIncrement doesn't go
++ // use a guard value to make sure the atomic_increment_func doesn't go
+ // outside the expected address bounds. This is in particular to
+ // test that some future change to the asm code doesn't cause the
+- // 32-bit NoBarrier_AtomicIncrement doesn't do the wrong thing on 64-bit
++ // 32-bit atomic_increment_func doesn't do the wrong thing on 64-bit
+ // machines.
+ struct {
+ AtomicType prev_word;
+@@ -60,47 +61,47 @@
+ s.count = 0;
+ s.next_word = next_word_value;
+
+- ASSERT_EQ(1, base::subtle::NoBarrier_AtomicIncrement(&s.count, 1));
++ ASSERT_EQ(1, (*atomic_increment_func)(&s.count, 1));
+ ASSERT_EQ(1, s.count);
+ ASSERT_EQ(prev_word_value, s.prev_word);
+ ASSERT_EQ(next_word_value, s.next_word);
+
+- ASSERT_EQ(3, base::subtle::NoBarrier_AtomicIncrement(&s.count, 2));
++ ASSERT_EQ(3, (*atomic_increment_func)(&s.count, 2));
+ ASSERT_EQ(3, s.count);
+ ASSERT_EQ(prev_word_value, s.prev_word);
+ ASSERT_EQ(next_word_value, s.next_word);
+
+- ASSERT_EQ(6, base::subtle::NoBarrier_AtomicIncrement(&s.count, 3));
++ ASSERT_EQ(6, (*atomic_increment_func)(&s.count, 3));
+ ASSERT_EQ(6, s.count);
+ ASSERT_EQ(prev_word_value, s.prev_word);
+ ASSERT_EQ(next_word_value, s.next_word);
+
+- ASSERT_EQ(3, base::subtle::NoBarrier_AtomicIncrement(&s.count, -3));
++ ASSERT_EQ(3, (*atomic_increment_func)(&s.count, -3));
+ ASSERT_EQ(3, s.count);
+ ASSERT_EQ(prev_word_value, s.prev_word);
+ ASSERT_EQ(next_word_value, s.next_word);
+
+- ASSERT_EQ(1, base::subtle::NoBarrier_AtomicIncrement(&s.count, -2));
++ ASSERT_EQ(1, (*atomic_increment_func)(&s.count, -2));
+ ASSERT_EQ(1, s.count);
+ ASSERT_EQ(prev_word_value, s.prev_word);
+ ASSERT_EQ(next_word_value, s.next_word);
+
+- ASSERT_EQ(0, base::subtle::NoBarrier_AtomicIncrement(&s.count, -1));
++ ASSERT_EQ(0, (*atomic_increment_func)(&s.count, -1));
+ ASSERT_EQ(0, s.count);
+ ASSERT_EQ(prev_word_value, s.prev_word);
+ ASSERT_EQ(next_word_value, s.next_word);
+
+- ASSERT_EQ(-1, base::subtle::NoBarrier_AtomicIncrement(&s.count, -1));
++ ASSERT_EQ(-1, (*atomic_increment_func)(&s.count, -1));
+ ASSERT_EQ(-1, s.count);
+ ASSERT_EQ(prev_word_value, s.prev_word);
+ ASSERT_EQ(next_word_value, s.next_word);
+
+- ASSERT_EQ(-5, base::subtle::NoBarrier_AtomicIncrement(&s.count, -4));
++ ASSERT_EQ(-5, (*atomic_increment_func)(&s.count, -4));
+ ASSERT_EQ(-5, s.count);
+ ASSERT_EQ(prev_word_value, s.prev_word);
+ ASSERT_EQ(next_word_value, s.next_word);
+
+- ASSERT_EQ(0, base::subtle::NoBarrier_AtomicIncrement(&s.count, 5));
++ ASSERT_EQ(0, (*atomic_increment_func)(&s.count, 5));
+ ASSERT_EQ(0, s.count);
+ ASSERT_EQ(prev_word_value, s.prev_word);
+ ASSERT_EQ(next_word_value, s.next_word);
+@@ -111,9 +112,10 @@
+
+
+ template <class AtomicType>
+-static void TestCompareAndSwap() {
++static void TestCompareAndSwap(AtomicType (*compare_and_swap_func)
++ (volatile AtomicType*, AtomicType, AtomicType)) {
+ AtomicType value = 0;
+- AtomicType prev = base::subtle::NoBarrier_CompareAndSwap(&value, 0, 1);
++ AtomicType prev = (*compare_and_swap_func)(&value, 0, 1);
+ ASSERT_EQ(1, value);
+ ASSERT_EQ(0, prev);
+
+@@ -122,21 +124,22 @@
+ const AtomicType k_test_val = (GG_ULONGLONG(1) <<
+ (NUM_BITS(AtomicType) - 2)) + 11;
+ value = k_test_val;
+- prev = base::subtle::NoBarrier_CompareAndSwap(&value, 0, 5);
++ prev = (*compare_and_swap_func)(&value, 0, 5);
+ ASSERT_EQ(k_test_val, value);
+ ASSERT_EQ(k_test_val, prev);
+
+ value = k_test_val;
+- prev = base::subtle::NoBarrier_CompareAndSwap(&value, k_test_val, 5);
++ prev = (*compare_and_swap_func)(&value, k_test_val, 5);
+ ASSERT_EQ(5, value);
+ ASSERT_EQ(k_test_val, prev);
+ }
+
+
+ template <class AtomicType>
+-static void TestAtomicExchange() {
++static void TestAtomicExchange(AtomicType (*atomic_exchange_func)
++ (volatile AtomicType*, AtomicType)) {
+ AtomicType value = 0;
+- AtomicType new_value = base::subtle::NoBarrier_AtomicExchange(&value, 1);
++ AtomicType new_value = (*atomic_exchange_func)(&value, 1);
+ ASSERT_EQ(1, value);
+ ASSERT_EQ(0, new_value);
+
+@@ -145,28 +148,29 @@
+ const AtomicType k_test_val = (GG_ULONGLONG(1) <<
+ (NUM_BITS(AtomicType) - 2)) + 11;
+ value = k_test_val;
+- new_value = base::subtle::NoBarrier_AtomicExchange(&value, k_test_val);
++ new_value = (*atomic_exchange_func)(&value, k_test_val);
+ ASSERT_EQ(k_test_val, value);
+ ASSERT_EQ(k_test_val, new_value);
+
+ value = k_test_val;
+- new_value = base::subtle::NoBarrier_AtomicExchange(&value, 5);
++ new_value = (*atomic_exchange_func)(&value, 5);
+ ASSERT_EQ(5, value);
+ ASSERT_EQ(k_test_val, new_value);
+ }
+
+
+ template <class AtomicType>
+-static void TestAtomicIncrementBounds() {
++static void TestAtomicIncrementBounds(AtomicType (*atomic_increment_func)
++ (volatile AtomicType*, AtomicType)) {
+ // Test increment at the half-width boundary of the atomic type.
+ // It is primarily for testing at the 32-bit boundary for 64-bit atomic type.
+ AtomicType test_val = GG_ULONGLONG(1) << (NUM_BITS(AtomicType) / 2);
+ AtomicType value = test_val - 1;
+- AtomicType new_value = base::subtle::NoBarrier_AtomicIncrement(&value, 1);
++ AtomicType new_value = (*atomic_increment_func)(&value, 1);
+ ASSERT_EQ(test_val, value);
+ ASSERT_EQ(value, new_value);
+
+- base::subtle::NoBarrier_AtomicIncrement(&value, -1);
++ (*atomic_increment_func)(&value, -1);
+ ASSERT_EQ(test_val - 1, value);
+ }
+
+@@ -222,16 +226,28 @@
+
+ template <class AtomicType>
+ static void TestAtomicOps() {
+- TestCompareAndSwap<AtomicType>();
+- TestAtomicExchange<AtomicType>();
+- TestAtomicIncrementBounds<AtomicType>();
++ TestCompareAndSwap<AtomicType>(base::subtle::NoBarrier_CompareAndSwap);
++ TestCompareAndSwap<AtomicType>(base::subtle::Acquire_CompareAndSwap);
++ TestCompareAndSwap<AtomicType>(base::subtle::Release_CompareAndSwap);
++
++ TestAtomicExchange<AtomicType>(base::subtle::NoBarrier_AtomicExchange);
++ TestAtomicExchange<AtomicType>(base::subtle::Acquire_AtomicExchange);
++ TestAtomicExchange<AtomicType>(base::subtle::Release_AtomicExchange);
++
++ TestAtomicIncrementBounds<AtomicType>(
++ base::subtle::NoBarrier_AtomicIncrement);
++ TestAtomicIncrementBounds<AtomicType>(
++ base::subtle::Barrier_AtomicIncrement);
++
+ TestStore<AtomicType>();
+ TestLoad<AtomicType>();
+ }
+
+ int main(int argc, char** argv) {
+- TestAtomicIncrement<AtomicWord>();
+- TestAtomicIncrement<Atomic32>();
++ TestAtomicIncrement<AtomicWord>(base::subtle::NoBarrier_AtomicIncrement);
++ TestAtomicIncrement<AtomicWord>(base::subtle::Barrier_AtomicIncrement);
++ TestAtomicIncrement<Atomic32>(base::subtle::NoBarrier_AtomicIncrement);
++ TestAtomicIncrement<Atomic32>(base::subtle::Barrier_AtomicIncrement);
+
+ TestAtomicOps<AtomicWord>();
+ TestAtomicOps<Atomic32>();
+@@ -248,8 +264,10 @@
+ // If we ever *do* want to enable this, try adding -msse (or -mmmx?)
+ // to the CXXFLAGS in Makefile.am.
+ #if 0 and defined(BASE_HAS_ATOMIC64)
+- TestAtomicIncrement<base::subtle::Atomic64>();
+- TestAtomicOps<base::subtle::Atomic64>();
++ TestAtomicIncrement<base::subtle::Atomic64>(
++ base::subtle::NoBarrier_AtomicIncrement);
++ TestAtomicIncrement<base::subtle::Atomic64>(
++ base::subtle::Barrier_AtomicIncrement);
+ #endif
+
+ printf("PASS\n");
+Only in gperftools-2.0/src/tests: getpc_test.cc.svn-r190
+diff -urP gperftools-2.0/src/tests/large_heap_fragmentation_unittest.cc gperftools-2.0-svn218/src/tests/large_heap_fragmentation_unittest.cc
+--- gperftools-2.0/src/tests/large_heap_fragmentation_unittest.cc 1969-12-31 19:00:00.000000000 -0500
++++ gperftools-2.0-svn218/src/tests/large_heap_fragmentation_unittest.cc 2013-06-04 10:16:58.073841689 -0400
+@@ -0,0 +1,62 @@
++// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
++// Redistribution and use in source and binary forms, with or without
++// modification, are permitted provided that the following conditions are
++// met:
++//
++// * Redistributions of source code must retain the above copyright
++// notice, this list of conditions and the following disclaimer.
++// * Redistributions in binary form must reproduce the above
++// copyright notice, this list of conditions and the following disclaimer
++// in the documentation and/or other materials provided with the
++// distribution.
++// * Neither the name of Google Inc. nor the names of its
++// contributors may be used to endorse or promote products derived from
++// this software without specific prior written permission.
++//
++// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
++// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
++// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
++// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
++// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
++// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++// This is a unit test for exercising fragmentation of large (over 1
++// meg) page spans. It makes sure that allocations/releases of
++// increasing memory chunks do not blowup memory
++// usage. See also https://code.google.com/p/gperftools/issues/detail?id=368
++
++
++#include <stddef.h>
++#include <stdlib.h>
++#include <stdio.h>
++
++#include "base/logging.h"
++#include "common.h"
++#include <gperftools/malloc_extension.h>
++
++
++int main (int argc, char** argv) {
++ for (int pass = 1; pass <= 3; pass++) {
++ size_t size = 100*1024*1024;
++ while (size < 500*1024*1024) {
++ void *ptr = malloc(size);
++ free(ptr);
++ size += 20000;
++
++ size_t heap_size = static_cast<size_t>(-1);
++ MallocExtension::instance()->GetNumericProperty("generic.heap_size",
++ &heap_size);
++
++
++ CHECK_LT(heap_size, 1*1024*1024*1024);
++ }
++ }
++
++ printf("PASS\n");
++ return 0;
++}
+diff -urP gperftools-2.0/src/tests/malloc_extension_c_test.c gperftools-2.0-svn218/src/tests/malloc_extension_c_test.c
+--- gperftools-2.0/src/tests/malloc_extension_c_test.c 2012-02-03 14:18:23.000000000 -0500
++++ gperftools-2.0-svn218/src/tests/malloc_extension_c_test.c 2013-06-04 10:16:58.077841689 -0400
+@@ -59,6 +59,16 @@
+ g_delete_hook_calls++;
+ }
+
++static
++void *forced_malloc(size_t size)
++{
++ void *rv = malloc(size);
++ if (!rv) {
++ FAIL("malloc is not supposed to fail here");
++ }
++ return rv;
++}
++
+ void TestMallocHook(void) {
+ /* TODO(csilvers): figure out why we get:
+ * E0100 00:00:00.000000 7383 malloc_hook.cc:244] RAW: google_malloc section is missing, thus InHookCaller is broken!
+@@ -78,8 +88,9 @@
+ if (!MallocHook_AddDeleteHook(&TestDeleteHook)) {
+ FAIL("Failed to add delete hook");
+ }
+- free(malloc(10));
+- free(malloc(20));
++
++ free(forced_malloc(10));
++ free(forced_malloc(20));
+ if (g_new_hook_calls != 2) {
+ FAIL("Wrong number of calls to the new hook");
+ }
+Only in gperftools-2.0/src/tests: malloc_hook_test.cc.svn-r190
+Only in gperftools-2.0/src/tests: markidle_unittest.cc.svn-r190
+Only in gperftools-2.0/src/tests: page_heap_test.cc.svn-r190
+Only in gperftools-2.0/src/tests: profiler_unittest.sh.svn-r190
+diff -urP gperftools-2.0/src/tests/tcmalloc_unittest.cc gperftools-2.0-svn218/src/tests/tcmalloc_unittest.cc
+--- gperftools-2.0/src/tests/tcmalloc_unittest.cc 2013-06-04 10:20:21.147844736 -0400
++++ gperftools-2.0-svn218/src/tests/tcmalloc_unittest.cc 2013-06-04 10:16:58.073841689 -0400
+@@ -725,7 +725,7 @@
+ // Note the ... in the hook signature: we don't care what arguments
+ // the hook takes.
+ #define MAKE_HOOK_CALLBACK(hook_type) \
+- static int g_##hook_type##_calls = 0; \
++ static volatile int g_##hook_type##_calls = 0; \
+ static void IncrementCallsTo##hook_type(...) { \
+ g_##hook_type##_calls++; \
+ } \
+@@ -760,7 +760,7 @@
+ CHECK((p % sizeof(void*)) == 0);
+ CHECK((p % sizeof(double)) == 0);
+
+- // Must have 16-byte (or 8-byte in case of -DTCMALLOC_ALIGN_8BYTES)
++ // Must have 16-byte (or 8-byte in case of -DTCMALLOC_ALIGN_8BYTES)
+ // alignment for large enough objects
+ if (size >= kMinAlign) {
+ CHECK((p % kMinAlign) == 0);
+Only in gperftools-2.0/src/tests: tcmalloc_unittest.cc.svn-r190
+diff -urP gperftools-2.0/src/tests/tcmalloc_unittest.sh gperftools-2.0-svn218/src/tests/tcmalloc_unittest.sh
+--- gperftools-2.0/src/tests/tcmalloc_unittest.sh 1969-12-31 19:00:00.000000000 -0500
++++ gperftools-2.0-svn218/src/tests/tcmalloc_unittest.sh 2013-06-04 10:16:58.075841689 -0400
+@@ -0,0 +1,68 @@
++#!/bin/sh
++
++# Copyright (c) 2013, Google Inc.
++# All rights reserved.
++#
++# Redistribution and use in source and binary forms, with or without
++# modification, are permitted provided that the following conditions are
++# met:
++#
++# * Redistributions of source code must retain the above copyright
++# notice, this list of conditions and the following disclaimer.
++# * Redistributions in binary form must reproduce the above
++# copyright notice, this list of conditions and the following disclaimer
++# in the documentation and/or other materials provided with the
++# distribution.
++# * Neither the name of Google Inc. nor the names of its
++# contributors may be used to endorse or promote products derived from
++# this software without specific prior written permission.
++#
++# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
++# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
++# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
++# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
++# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
++# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++# ---
++# Author: Adhemerval Zanella
++#
++# Runs the tcmalloc_unittest with various environment variables.
++# This is necessary because tuning some environment variables
++# (TCMALLOC_TRANSFER_NUM_OBJ for instance) should not change program
++# behavior, just performance.
++
++BINDIR="${BINDIR:-.}"
++TCMALLOC_UNITTEST="${1:-$BINDIR}/tcmalloc_unittest"
++
++TMPDIR=/tmp/tcmalloc_unittest
++rm -rf $TMPDIR || exit 2
++mkdir $TMPDIR || exit 3
++
++# $1: value of tcmalloc_unittest env. var.
++run_check_transfer_num_obj() {
++ [ -n "$1" ] && export TCMALLOC_TRANSFER_NUM_OBJ="$1"
++
++ echo -n "Testing $TCMALLOC_UNITTEST with TCMALLOC_TRANSFER_NUM_OBJ=$1 ... "
++ if $TCMALLOC_UNITTEST > $TMPDIR/output 2>&1; then
++ echo "OK"
++ else
++ echo "FAILED"
++ echo "Output from the failed run:"
++ echo "----"
++ cat $TMPDIR/output
++ echo "----"
++ exit 4
++ fi
++}
++
++run_check_transfer_num_obj ""
++run_check_transfer_num_obj "40"
++run_check_transfer_num_obj "4096"
++
++echo "PASS"
+Only in gperftools-2.0/src: thread_cache.cc.svn-r190
+Only in gperftools-2.0/src: thread_cache.h.svn-r190
+diff -urP gperftools-2.0/src/windows/mingw.h gperftools-2.0-svn218/src/windows/mingw.h
+--- gperftools-2.0/src/windows/mingw.h 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/windows/mingw.h 2013-06-04 10:16:57.682841683 -0400
+@@ -60,6 +60,8 @@
+ // pretend the pthreads wrapper doesn't exist, even when it does.
+ #undef HAVE_PTHREAD
+
++#define HAVE_PID_T
++
+ #include "windows/port.h"
+
+ #endif /* __MINGW32__ */
+diff -urP gperftools-2.0/src/windows/patch_functions.cc gperftools-2.0-svn218/src/windows/patch_functions.cc
+--- gperftools-2.0/src/windows/patch_functions.cc 2012-02-03 14:18:23.000000000 -0500
++++ gperftools-2.0-svn218/src/windows/patch_functions.cc 2013-06-04 10:16:57.683841683 -0400
+@@ -85,7 +85,7 @@
+ #include <windows.h>
+ #include <stdio.h>
+ #include <malloc.h> // for _msize and _expand
+-#include <Psapi.h> // for EnumProcessModules, GetModuleInformation, etc.
++#include <psapi.h> // for EnumProcessModules, GetModuleInformation, etc.
+ #include <set>
+ #include <map>
+ #include <vector>
+Only in gperftools-2.0/src/windows: port.cc.svn-r190
+diff -urP gperftools-2.0/src/windows/port.h gperftools-2.0-svn218/src/windows/port.h
+--- gperftools-2.0/src/windows/port.h 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/windows/port.h 2013-06-04 10:16:57.683841683 -0400
+@@ -390,7 +390,10 @@
+
+ /* ----------------------------------- SYSTEM/PROCESS */
+
++#ifndef HAVE_PID_T
+ typedef int pid_t;
++#endif
++
+ #if __STDC__ && !defined(__MINGW32__)
+ inline pid_t getpid(void) { return _getpid(); }
+ #endif
+diff -urP gperftools-2.0/src/windows/preamble_patcher.cc gperftools-2.0-svn218/src/windows/preamble_patcher.cc
+--- gperftools-2.0/src/windows/preamble_patcher.cc 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/windows/preamble_patcher.cc 2013-06-04 10:16:57.601841682 -0400
+@@ -103,6 +103,7 @@
+ new_target = target + 2 + relative_offset;
+ } else if (target[0] == ASM_JMP32ABS_0 &&
+ target[1] == ASM_JMP32ABS_1) {
++ jmp32rel:
+ // Visual studio seems to sometimes do it this way instead of the
+ // previous way. Not sure what the rules are, but it was happening
+ // with operator new in some binaries.
+@@ -118,6 +119,18 @@
+ memcpy(&new_target_v, reinterpret_cast<void*>(target + 2), 4);
+ }
+ new_target = reinterpret_cast<unsigned char*>(*new_target_v);
++ } else if (kIs64BitBinary && target[0] == ASM_REXW
++ && target[1] == ASM_JMP32ABS_0
++ && target[2] == ASM_JMP32ABS_1) {
++ // in Visual Studio 2012 we're seeing jump like that:
++ // rex.W jmpq *0x11d019(%rip)
++ //
++ // according to docs I have, rex prefix is actually unneeded and
++ // can be ignored. I.e. docs say for jumps like that operand
++ // already defaults to 64-bit. But clearly it breaks abs. jump
++ // detection above and we just skip rex
++ target++;
++ goto jmp32rel;
+ } else {
+ break;
+ }
+@@ -535,6 +548,12 @@
+ return (*(target) & 0x70) == 0x70 && instruction_size == 2;
+ }
+
++bool PreamblePatcher::IsShortJump(
++ unsigned char* target,
++ unsigned int instruction_size) {
++ return target[0] == 0xeb && instruction_size == 2;
++}
++
+ bool PreamblePatcher::IsNearConditionalJump(
+ unsigned char* target,
+ unsigned int instruction_size) {
+@@ -575,7 +594,9 @@
+ unsigned char* target,
+ unsigned int* target_bytes,
+ unsigned int target_size) {
+- unsigned char* original_jump_dest = (source + 2) + source[1];
++ // note: rel8 offset is signed. Thus we need to ask for signed char
++ // to negative offsets right
++ unsigned char* original_jump_dest = (source + 2) + static_cast<signed char>(source[1]);
+ unsigned char* stub_jump_from = target + 6;
+ __int64 fixup_jump_offset = original_jump_dest - stub_jump_from;
+ if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) {
+@@ -597,6 +618,36 @@
+ reinterpret_cast<void*>(&fixup_jump_offset), 4);
+ }
+
++ return SIDESTEP_SUCCESS;
++}
++
++SideStepError PreamblePatcher::PatchShortJump(
++ unsigned char* source,
++ unsigned int instruction_size,
++ unsigned char* target,
++ unsigned int* target_bytes,
++ unsigned int target_size) {
++ // note: rel8 offset is _signed_. Thus we need signed char here.
++ unsigned char* original_jump_dest = (source + 2) + static_cast<signed char>(source[1]);
++ unsigned char* stub_jump_from = target + 5;
++ __int64 fixup_jump_offset = original_jump_dest - stub_jump_from;
++ if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) {
++ SIDESTEP_ASSERT(false &&
++ "Unable to fix up short jump because target"
++ " is too far away.");
++ return SIDESTEP_JUMP_INSTRUCTION;
++ }
++
++ *target_bytes = 5;
++ if (target_size > *target_bytes) {
++ // Convert the short jump to a near jump.
++ //
++ // e9 xx xx xx xx = jmp rel32off
++ target[0] = 0xe9;
++ memcpy(reinterpret_cast<void*>(target + 1),
++ reinterpret_cast<void*>(&fixup_jump_offset), 4);
++ }
++
+ return SIDESTEP_SUCCESS;
+ }
+
+diff -urP gperftools-2.0/src/windows/preamble_patcher.h gperftools-2.0-svn218/src/windows/preamble_patcher.h
+--- gperftools-2.0/src/windows/preamble_patcher.h 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/windows/preamble_patcher.h 2013-06-04 10:16:57.601841682 -0400
+@@ -467,6 +467,8 @@
+ static bool IsShortConditionalJump(unsigned char* target,
+ unsigned int instruction_size);
+
++ static bool IsShortJump(unsigned char *target, unsigned int instruction_size);
++
+ // Helper routine that determines if a target instruction is a near
+ // conditional jump.
+ //
+@@ -547,6 +549,12 @@
+ unsigned int* target_bytes,
+ unsigned int target_size);
+
++ static SideStepError PatchShortJump(unsigned char* source,
++ unsigned int instruction_size,
++ unsigned char* target,
++ unsigned int* target_bytes,
++ unsigned int target_size);
++
+ // Helper routine that converts an instruction that will convert various
+ // jump-like instructions to corresponding instructions in the target buffer.
+ // What this routine does is fix up the relative offsets contained in jump
+diff -urP gperftools-2.0/src/windows/preamble_patcher_with_stub.cc gperftools-2.0-svn218/src/windows/preamble_patcher_with_stub.cc
+--- gperftools-2.0/src/windows/preamble_patcher_with_stub.cc 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/windows/preamble_patcher_with_stub.cc 2013-06-04 10:16:57.682841683 -0400
+@@ -150,6 +150,11 @@
+ preamble_stub + stub_bytes,
+ &jump_bytes,
+ stub_size - stub_bytes);
++ } else if (IsShortJump(target + preamble_bytes, cur_bytes)) {
++ jump_ret = PatchShortJump(target + preamble_bytes, cur_bytes,
++ preamble_stub + stub_bytes,
++ &jump_bytes,
++ stub_size - stub_bytes);
+ } else if (IsNearConditionalJump(target + preamble_bytes, cur_bytes) ||
+ IsNearRelativeJump(target + preamble_bytes, cur_bytes) ||
+ IsNearAbsoluteCall(target + preamble_bytes, cur_bytes) ||
+Only in gperftools-2.0/src/windows: TODO.svn-r190
diff --git a/gperftools.spec b/gperftools.spec
index 7046ecb..3f5e0f9 100644
--- a/gperftools.spec
+++ b/gperftools.spec
@@ -2,7 +2,7 @@
Name: gperftools
Version: 2.0
-Release: 10%{?dist}
+Release: 11%{?dist}
License: BSD
Group: Development/Tools
Summary: Very fast malloc and performance analysis tools
@@ -10,10 +10,14 @@ URL: http://code.google.com/p/gperftools/
Source0: http://gperftools.googlecode.com/files/%{name}-%{version}.tar.gz
# Update to latest svn, since google forgets how to make releases
Patch0: gperftools-svn-r190.patch
+Patch1: gperftools-2.0-svn190-to-svn218.patch
ExclusiveArch: %{ix86} x86_64 ppc ppc64 %{arm}
%ifnarch ppc ppc64
BuildRequires: libunwind-devel
%endif
+BuildRequires: autoconf, automake, libtool
+Requires: gperftools-devel = %{version}-%{release}
+Requires: pprof = %{version}-%{release}
%description
Perf Tools is a collection of performance analysis tools, including a
@@ -21,6 +25,9 @@ high-performance multi-threaded malloc() implementation that works
particularly well with threads and STL, a thread-friendly heap-checker,
a heap profiler, and a cpu-profiler.
+This is a metapackage which pulls in all of the gperftools (and pprof)
+binaries, libraries, and development headers, so that you can use them.
+
%package devel
Summary: Development libraries and headers for gperftools
Group: Development/Libraries
@@ -52,6 +59,7 @@ Pprof is a heap and CPU profiler tool, part of the gperftools suite.
%prep
%setup -q
%patch0 -p1 -b .svn-r190
+%patch1 -p1 -b .svn-r218
# Fix end-of-line encoding
sed -i 's/\r//' README_windows.txt
@@ -59,8 +67,10 @@ sed -i 's/\r//' README_windows.txt
# No need to have exec permissions on source code
chmod -x src/sampler.h src/sampler.cc
+autoreconf -i
+
%build
-CXXFLAGS=`echo $RPM_OPT_FLAGS -DTCMALLOC_LARGE_PAGES| sed -e 's/-Wp,-D_FORTIFY_SOURCE=2//g'`
+CXXFLAGS=`echo $RPM_OPT_FLAGS -fno-strict-aliasing -Wno-unused-local-typedefs -DTCMALLOC_LARGE_PAGES| sed -e 's/-Wp,-D_FORTIFY_SOURCE=2//g'`
%configure --disable-static
# Bad rpath!
@@ -82,13 +92,15 @@ rm -rf %{buildroot}%{_docdir}/%{name}-%{version}/INSTALL
%check
# http://code.google.com/p/google-perftools/issues/detail?id=153
%ifnarch ppc
-# Their test suite is junk. Disabling.
+# Their test suite is almost always broken.
# LD_LIBRARY_PATH=./.libs make check
%endif
%post libs -p /sbin/ldconfig
%postun libs -p /sbin/ldconfig
+%files
+
%files -n pprof
%{_bindir}/pprof
%{_mandir}/man1/*
@@ -104,6 +116,11 @@ rm -rf %{buildroot}%{_docdir}/%{name}-%{version}/INSTALL
%{_libdir}/*.so.*
%changelog
+* Tue Jun 4 2013 Tom Callaway <spot at fedoraproject.org> - 2.0-11
+- pass -fno-strict-aliasing
+- create "gperftools" metapackage.
+- update to svn r218 (cleanups, some ARM fixes)
+
* Thu Mar 14 2013 Dan HorĂ¡k <dan[at]danny.cz> - 2.0-10
- build on ppc64 as well
More information about the scm-commits
mailing list