[golang] archive/tar patches for performance

Vincent Batts vbatts at fedoraproject.org
Mon Jul 7 20:12:05 UTC 2014


commit 408684902613b408364682c29ac0199b4daef0fa
Author: Vincent Batts <vbatts at redhat.com>
Date:   Mon Jul 7 16:11:04 2014 -0400

    archive/tar patches for performance

 go1.3-tar_reuse_buffer_readHeader.patch  |   64 ++++++++++++++++++++++++++++++
 go1.3-tar_reuse_buffer_writeHeader.patch |   56 ++++++++++++++++++++++++++
 golang.spec                              |   14 ++++++-
 3 files changed, 133 insertions(+), 1 deletions(-)
---
diff --git a/go1.3-tar_reuse_buffer_readHeader.patch b/go1.3-tar_reuse_buffer_readHeader.patch
new file mode 100644
index 0000000..1c6693c
--- /dev/null
+++ b/go1.3-tar_reuse_buffer_readHeader.patch
@@ -0,0 +1,64 @@
+# HG changeset patch
+# User Cristian Staretu <unclejacksons at gmail.com>
+# Date 1404344479 -36000
+#      Thu Jul 03 09:41:19 2014 +1000
+# Node ID 17404efd6b02d4b3acd17070e3f89de97a145877
+# Parent  837348e418f33fc7a242f56dbe2feff829532526
+archive/tar: reuse temporary buffer in readHeader
+
+A temporary 512 bytes buffer is allocated for every call to
+readHeader. This buffer isn't returned to the caller and it could
+be reused to lower the number of memory allocations.
+
+This CL improves it by using a pool and zeroing out the buffer before
+putting it back into the pool.
+
+benchmark                  old ns/op     new ns/op     delta
+BenchmarkListFiles100k     545249903     538832687     -1.18%
+
+benchmark                  old allocs    new allocs    delta
+BenchmarkListFiles100k     2105167       2005692       -4.73%
+
+benchmark                  old bytes     new bytes     delta
+BenchmarkListFiles100k     105903472     54831527      -48.22%
+
+This improvement is very important if your code has to deal with a lot
+of tarballs which contain a lot of files.
+
+LGTM=dsymonds
+R=golang-codereviews, dave, dsymonds, bradfitz
+CC=golang-codereviews
+https://codereview.appspot.com/108240044
+
+Committer: David Symonds <dsymonds at golang.org>
+
+diff -r 837348e418f3 -r 17404efd6b02 src/pkg/archive/tar/reader.go
+--- a/src/pkg/archive/tar/reader.go	Thu Jul 03 09:40:53 2014 +1000
++++ b/src/pkg/archive/tar/reader.go	Thu Jul 03 09:41:19 2014 +1000
+@@ -29,10 +29,11 @@
+ // The Next method advances to the next file in the archive (including the first),
+ // and then it can be treated as an io.Reader to access the file's data.
+ type Reader struct {
+-	r    io.Reader
+-	err  error
+-	pad  int64          // amount of padding (ignored) after current file entry
+-	curr numBytesReader // reader for current file entry
++	r       io.Reader
++	err     error
++	pad     int64           // amount of padding (ignored) after current file entry
++	curr    numBytesReader  // reader for current file entry
++	hdrBuff [blockSize]byte // buffer to use in readHeader
+ }
+ 
+ // A numBytesReader is an io.Reader with a numBytes method, returning the number
+@@ -426,7 +427,9 @@
+ }
+ 
+ func (tr *Reader) readHeader() *Header {
+-	header := make([]byte, blockSize)
++	header := tr.hdrBuff[:]
++	copy(header, zeroBlock)
++
+ 	if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
+ 		return nil
+ 	}
diff --git a/go1.3-tar_reuse_buffer_writeHeader.patch b/go1.3-tar_reuse_buffer_writeHeader.patch
new file mode 100644
index 0000000..6cd8969
--- /dev/null
+++ b/go1.3-tar_reuse_buffer_writeHeader.patch
@@ -0,0 +1,56 @@
+# HG changeset patch
+# User Cristian Staretu <unclejacksons at gmail.com>
+# Date 1404344453 -36000
+#      Thu Jul 03 09:40:53 2014 +1000
+# Node ID 837348e418f33fc7a242f56dbe2feff829532526
+# Parent  c5f72a685e256457a0872f6587e2bb9500eac7c4
+archive/tar: reuse temporary buffer in writeHeader
+
+A temporary 512 bytes buffer is allocated for every call to
+writeHeader. This buffer could be reused the lower the number
+of memory allocations.
+
+benchmark                   old ns/op     new ns/op     delta
+BenchmarkWriteFiles100k     634622051     583810847     -8.01%
+
+benchmark                   old allocs     new allocs     delta
+BenchmarkWriteFiles100k     2701920        2602621        -3.68%
+
+benchmark                   old bytes     new bytes     delta
+BenchmarkWriteFiles100k     115383884     64349922      -44.23%
+
+This change is very important if your code has to write a lot of
+tarballs with a lot of files.
+
+LGTM=dsymonds
+R=golang-codereviews, dave, dsymonds
+CC=golang-codereviews
+https://codereview.appspot.com/107440043
+
+Committer: David Symonds <dsymonds at golang.org>
+
+diff -r c5f72a685e25 -r 837348e418f3 src/pkg/archive/tar/writer.go
+--- a/src/pkg/archive/tar/writer.go	Wed Jul 02 15:28:57 2014 -0700
++++ b/src/pkg/archive/tar/writer.go	Thu Jul 03 09:40:53 2014 +1000
+@@ -37,8 +37,9 @@
+ 	nb         int64 // number of unwritten bytes for current file entry
+ 	pad        int64 // amount of padding to write after current file entry
+ 	closed     bool
+-	usedBinary bool // whether the binary numeric field extension was used
+-	preferPax  bool // use pax header instead of binary numeric header
++	usedBinary bool            // whether the binary numeric field extension was used
++	preferPax  bool            // use pax header instead of binary numeric header
++	hdrBuff    [blockSize]byte // buffer to use in writeHeader
+ }
+ 
+ // NewWriter creates a new Writer writing to w.
+@@ -160,7 +161,8 @@
+ 	// subsecond time resolution, but for now let's just capture
+ 	// too long fields or non ascii characters
+ 
+-	header := make([]byte, blockSize)
++	header := tw.hdrBuff[:]
++	copy(header, zeroBlock)
+ 	s := slicer(header)
+ 
+ 	// keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
diff --git a/golang.spec b/golang.spec
index 3bd1740..e3e9776 100644
--- a/golang.spec
+++ b/golang.spec
@@ -39,7 +39,7 @@
 
 Name:           golang
 Version:        1.3
-Release:        1%{?dist}
+Release:        2%{?dist}
 Summary:        The Go Programming Language
 
 License:        BSD
@@ -73,6 +73,11 @@ Patch1:         golang-1.2-remove-ECC-p224.patch
 # http://code.google.com/p/go/issues/detail?id=6522
 Patch2:         ./golang-1.2-skipCpuProfileTest.patch
 
+# these patches can be dropped for go1.4
+# discovered working here https://github.com/dotcloud/docker/pull/6829
+Patch3:         ./go1.3-tar_reuse_buffer_readHeader.patch
+Patch4:         ./go1.3-tar_reuse_buffer_writeHeader.patch
+
 # Having documentation separate was broken
 Obsoletes:      %{name}-docs < 1.1-4
 
@@ -357,6 +362,10 @@ end
 # skip flaky test
 %patch2 -p1
 
+# performance for archive/tar
+%patch3 -p1
+%patch4 -p1
+
 # create a [dirty] gcc wrapper to allow us to build with our own flags
 # (dirty because it is spoofing 'gcc' since CC value is stored in the go tool)
 # TODO: remove this and just set CFLAGS/LDFLAGS once upstream supports it
@@ -973,6 +982,9 @@ GOROOT=%{goroot} GOOS=openbsd GOARCH=amd64 go install std
 
 
 %changelog
+* Mon Jul 07 2014 Vincent Batts <vbatts at fedoraproejct.org> - 1.3-2
+- archive/tar memory allocation improvements
+
 * Thu Jun 19 2014 Vincent Batts <vbatts at fedoraproejct.org> - 1.3-1
 - update to go1.3
 


More information about the scm-commits mailing list