[mesa] 9.2.3 upstream release

Igor Gnatenko ignatenkobrain at fedoraproject.org
Thu Nov 14 14:26:48 UTC 2013


commit 8002493cec883f0be4b95350357f99e85d839fbc
Author: Igor Gnatenko <i.gnatenko.brain at gmail.com>
Date:   Thu Nov 14 15:41:45 2013 +0400

    9.2.3 upstream release
    
    Signed-off-by: Igor Gnatenko <i.gnatenko.brain at gmail.com>

 .gitignore                                         |    1 +
 ...3xx-fix-color-inversion-on-mem-gmem-resto.patch |   31 +
 ...eno-a3xx-fix-viewport-on-gmem-mem-resolve.patch |   32 +
 ...dd-debug-option-to-disable-scissor-optimi.patch |  113 ++
 0004-freedreno-update-register-headers.patch       | 1238 ++++++++++++++++++++
 0005-freedreno-a3xx-some-texture-fixes.patch       |   65 +
 0006-freedreno-a3xx-compiler-fix-CMP.patch         |   45 +
 ...reno-a3xx-compiler-handle-saturate-on-dst.patch |   98 ++
 ...3xx-compiler-use-max_reg-rather-than-file.patch |   59 +
 ...3xx-compiler-cat4-cannot-use-const-reg-as.patch |  104 ++
 ...o-fix-segfault-when-no-color-buffer-bound.patch |  216 ++++
 ...3xx-compiler-make-compiler-errors-more-us.patch |  172 +++
 ...o-a3xx-compiler-bit-of-re-arrange-cleanup.patch |  420 +++++++
 0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch |  231 ++++
 0014-freedreno-a3xx-don-t-leak-so-much.patch       |   36 +
 ...dreno-a3xx-compiler-better-const-handling.patch |  376 ++++++
 ...no-a3xx-compiler-handle-sync-flags-better.patch |  128 ++
 ...-freedreno-updates-for-msm-drm-kms-driver.patch |  328 ++++++
 mesa.spec                                          |   44 +-
 sources                                            |    2 +-
 20 files changed, 3736 insertions(+), 3 deletions(-)
---
diff --git a/.gitignore b/.gitignore
index ebcc756..089b161 100644
--- a/.gitignore
+++ b/.gitignore
@@ -57,3 +57,4 @@ mesa-20100720.tar.bz2
 /mesa-20130902.tar.xz
 /mesa-20130919.tar.xz
 /mesa-20131113.tar.xz
+/mesa-20131114.tar.xz
diff --git a/0001-freedreno-a3xx-fix-color-inversion-on-mem-gmem-resto.patch b/0001-freedreno-a3xx-fix-color-inversion-on-mem-gmem-resto.patch
new file mode 100644
index 0000000..23b9bf6
--- /dev/null
+++ b/0001-freedreno-a3xx-fix-color-inversion-on-mem-gmem-resto.patch
@@ -0,0 +1,31 @@
+From 2d844be97fd5b6b0f02a94d4bb194c0bd19de6f9 Mon Sep 17 00:00:00 2001
+From: Rob Clark <robclark at freedesktop.org>
+Date: Sat, 13 Jul 2013 13:07:46 -0400
+Subject: [PATCH 01/17] freedreno/a3xx: fix color inversion on mem->gmem
+ restore
+
+Signed-off-by: Rob Clark <robclark at freedesktop.org>
+---
+ src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+index a7a4bf7..b8436c9 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+@@ -279,9 +279,9 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf
+ 			CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+ 	OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(psurf->format)) |
+ 			0x40000000 | // XXX
+-			fd3_tex_swiz(psurf->format,  PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_GREEN,
+-					PIPE_SWIZZLE_RED, PIPE_SWIZZLE_ALPHA));
+-	OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(psurf->format)) |
++			fd3_tex_swiz(psurf->format,  PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
++					PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
++	OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) |
+ 			A3XX_TEX_CONST_1_WIDTH(psurf->width) |
+ 			A3XX_TEX_CONST_1_HEIGHT(psurf->height));
+ 	OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(rsc->pitch * rsc->cpp) |
+-- 
+1.8.4.2
+
diff --git a/0002-freedreno-a3xx-fix-viewport-on-gmem-mem-resolve.patch b/0002-freedreno-a3xx-fix-viewport-on-gmem-mem-resolve.patch
new file mode 100644
index 0000000..eede425
--- /dev/null
+++ b/0002-freedreno-a3xx-fix-viewport-on-gmem-mem-resolve.patch
@@ -0,0 +1,32 @@
+From b2a32254d65c356604bbffda6e771dca0509e9ed Mon Sep 17 00:00:00 2001
+From: Rob Clark <robclark at freedesktop.org>
+Date: Sat, 13 Jul 2013 13:08:22 -0400
+Subject: [PATCH 02/17] freedreno/a3xx: fix viewport on gmem->mem resolve
+
+Signed-off-by: Rob Clark <robclark at freedesktop.org>
+---
+ src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+index 1cb170a..9050166 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+@@ -168,6 +168,14 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
+ 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+ 	OUT_RING(ring, 0x00000000);   /* GRAS_CL_CLIP_CNTL */
+ 
++	OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
++	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width/2.0 - 0.5));
++	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width/2.0));
++	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)pfb->height/2.0 - 0.5));
++	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)pfb->height/2.0));
++	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
++	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
++
+ 	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+ 	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+ 			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+-- 
+1.8.4.2
+
diff --git a/0003-freedreno-add-debug-option-to-disable-scissor-optimi.patch b/0003-freedreno-add-debug-option-to-disable-scissor-optimi.patch
new file mode 100644
index 0000000..e0bb580
--- /dev/null
+++ b/0003-freedreno-add-debug-option-to-disable-scissor-optimi.patch
@@ -0,0 +1,113 @@
+From 8b167d34bebcc9aaf67838be71cc3272728d4fe1 Mon Sep 17 00:00:00 2001
+From: Rob Clark <robclark at freedesktop.org>
+Date: Wed, 29 May 2013 10:16:33 -0400
+Subject: [PATCH 03/17] freedreno: add debug option to disable scissor
+ optimization
+
+Useful for testing and debugging.
+
+Signed-off-by: Rob Clark <robclark at freedesktop.org>
+---
+ src/gallium/drivers/freedreno/freedreno_gmem.c   | 26 +++++++++++++++---------
+ src/gallium/drivers/freedreno/freedreno_screen.c |  1 +
+ src/gallium/drivers/freedreno/freedreno_util.h   |  9 ++++----
+ 3 files changed, 22 insertions(+), 14 deletions(-)
+
+diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
+index 12633bd..197d1d9 100644
+--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
++++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
+@@ -71,7 +71,8 @@ calculate_tiles(struct fd_context *ctx)
+ {
+ 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
+ 	struct pipe_scissor_state *scissor = &ctx->max_scissor;
+-	uint32_t cpp = util_format_get_blocksize(ctx->framebuffer.cbufs[0]->format);
++	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
++	uint32_t cpp = util_format_get_blocksize(pfb->cbufs[0]->format);
+ 	uint32_t gmem_size = ctx->screen->gmemsize_bytes;
+ 	uint32_t minx, miny, width, height;
+ 	uint32_t nbins_x = 1, nbins_y = 1;
+@@ -84,10 +85,17 @@ calculate_tiles(struct fd_context *ctx)
+ 		return;
+ 	}
+ 
+-	minx = scissor->minx & ~31; /* round down to multiple of 32 */
+-	miny = scissor->miny & ~31;
+-	width = scissor->maxx - minx;
+-	height = scissor->maxy - miny;
++	if (fd_mesa_debug & FD_DBG_DSCIS) {
++		minx = 0;
++		miny = 0;
++		width = pfb->width;
++		height = pfb->height;
++	} else {
++		minx = scissor->minx & ~31; /* round down to multiple of 32 */
++		miny = scissor->miny & ~31;
++		width = scissor->maxx - minx;
++		height = scissor->maxy - miny;
++	}
+ 
+ // TODO we probably could optimize this a bit if we know that
+ // Z or stencil is not enabled for any of the draw calls..
+@@ -132,9 +140,7 @@ static void
+ render_tiles(struct fd_context *ctx)
+ {
+ 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
+-	uint32_t i, yoff = 0;
+-
+-	yoff= gmem->miny;
++	uint32_t i, yoff = gmem->miny;
+ 
+ 	ctx->emit_tile_init(ctx);
+ 
+@@ -143,13 +149,13 @@ render_tiles(struct fd_context *ctx)
+ 		uint32_t bh = gmem->bin_h;
+ 
+ 		/* clip bin height: */
+-		bh = MIN2(bh, gmem->height - yoff);
++		bh = MIN2(bh, gmem->miny + gmem->height - yoff);
+ 
+ 		for (j = 0; j < gmem->nbins_x; j++) {
+ 			uint32_t bw = gmem->bin_w;
+ 
+ 			/* clip bin width: */
+-			bw = MIN2(bw, gmem->width - xoff);
++			bw = MIN2(bw, gmem->minx + gmem->width - xoff);
+ 
+ 			DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d",
+ 					bh, yoff, bw, xoff);
+diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
+index 52d51c2..36ef8b0 100644
+--- a/src/gallium/drivers/freedreno/freedreno_screen.c
++++ b/src/gallium/drivers/freedreno/freedreno_screen.c
+@@ -60,6 +60,7 @@ static const struct debug_named_value debug_options[] = {
+ 		{"disasm",    FD_DBG_DISASM, "Dump TGSI and adreno shader disassembly"},
+ 		{"dclear",    FD_DBG_DCLEAR, "Mark all state dirty after clear"},
+ 		{"dgmem",     FD_DBG_DGMEM,  "Mark all state dirty after GMEM tile pass"},
++		{"dscis",     FD_DBG_DSCIS,  "Disable scissor optimization"},
+ 		DEBUG_NAMED_VALUE_END
+ };
+ 
+diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
+index f18f0fe..b49cdfc 100644
+--- a/src/gallium/drivers/freedreno/freedreno_util.h
++++ b/src/gallium/drivers/freedreno/freedreno_util.h
+@@ -47,10 +47,11 @@ enum adreno_pa_su_sc_draw fd_polygon_mode(unsigned mode);
+ enum adreno_stencil_op fd_stencil_op(unsigned op);
+ 
+ 
+-#define FD_DBG_MSGS   0x1
+-#define FD_DBG_DISASM 0x2
+-#define FD_DBG_DCLEAR 0x4
+-#define FD_DBG_DGMEM  0x8
++#define FD_DBG_MSGS     0x01
++#define FD_DBG_DISASM   0x02
++#define FD_DBG_DCLEAR   0x04
++#define FD_DBG_DGMEM    0x08
++#define FD_DBG_DSCIS    0x10
+ extern int fd_mesa_debug;
+ 
+ #define DBG(fmt, ...) \
+-- 
+1.8.4.2
+
diff --git a/0004-freedreno-update-register-headers.patch b/0004-freedreno-update-register-headers.patch
new file mode 100644
index 0000000..4654619
--- /dev/null
+++ b/0004-freedreno-update-register-headers.patch
@@ -0,0 +1,1238 @@
+From e1e9f69d3c90803d3c0e2d9b9396c1a06b5f0bb2 Mon Sep 17 00:00:00 2001
+From: Rob Clark <robclark at freedesktop.org>
+Date: Mon, 5 Aug 2013 17:57:24 -0400
+Subject: [PATCH 04/17] freedreno: update register headers
+
+resync w/ rnndb database
+
+Signed-off-by: Rob Clark <robclark at freedesktop.org>
+---
+ src/gallium/drivers/freedreno/a2xx/a2xx.xml.h     |  93 ++---
+ src/gallium/drivers/freedreno/a3xx/a3xx.xml.h     | 439 +++++++++++++++++++---
+ src/gallium/drivers/freedreno/a3xx/fd3_emit.c     |   4 +-
+ src/gallium/drivers/freedreno/a3xx/fd3_program.c  |   2 +-
+ src/gallium/drivers/freedreno/a3xx/fd3_util.c     |   5 +-
+ src/gallium/drivers/freedreno/adreno_common.xml.h | 319 +++++++++++++++-
+ src/gallium/drivers/freedreno/adreno_pm4.xml.h    |   6 +-
+ src/gallium/drivers/freedreno/freedreno_util.h    |   1 +
+ 8 files changed, 758 insertions(+), 111 deletions(-)
+
+diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+index bee01f1..3546386 100644
+--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
++++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+@@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
+ git clone git://0x04.net/rules-ng-ng
+ 
+ The rules-ng-ng source files this header was generated from are:
+-- /home/robclark/src/freedreno/envytools/rnndb/a2xx.xml                (  30127 bytes, from 2013-05-05 18:29:35)
++- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml              (    327 bytes, from 2013-07-05 19:21:12)
+ - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
+-- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml       (   3094 bytes, from 2013-05-05 18:29:22)
++- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml           (  30005 bytes, from 2013-07-19 21:30:48)
++- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml       (   8983 bytes, from 2013-07-24 01:38:36)
+ - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml          (   9712 bytes, from 2013-05-26 15:22:37)
++- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml           (  51415 bytes, from 2013-08-03 14:26:05)
+ 
+ Copyright (C) 2013 by the following authors:
+ - Rob Clark <robdclark at gmail.com> (robclark)
+@@ -236,56 +238,6 @@ enum sq_tex_filter {
+ 
+ #define REG_A2XX_CP_PFP_UCODE_DATA				0x000000c1
+ 
+-#define REG_A2XX_CP_RB_BASE					0x000001c0
+-
+-#define REG_A2XX_CP_RB_CNTL					0x000001c1
+-
+-#define REG_A2XX_CP_RB_RPTR_ADDR				0x000001c3
+-
+-#define REG_A2XX_CP_RB_RPTR					0x000001c4
+-
+-#define REG_A2XX_CP_RB_WPTR					0x000001c5
+-
+-#define REG_A2XX_CP_RB_WPTR_DELAY				0x000001c6
+-
+-#define REG_A2XX_CP_RB_RPTR_WR					0x000001c7
+-
+-#define REG_A2XX_CP_RB_WPTR_BASE				0x000001c8
+-
+-#define REG_A2XX_CP_QUEUE_THRESHOLDS				0x000001d5
+-
+-#define REG_A2XX_SCRATCH_UMSK					0x000001dc
+-
+-#define REG_A2XX_SCRATCH_ADDR					0x000001dd
+-
+-#define REG_A2XX_CP_STATE_DEBUG_INDEX				0x000001ec
+-
+-#define REG_A2XX_CP_STATE_DEBUG_DATA				0x000001ed
+-
+-#define REG_A2XX_CP_INT_CNTL					0x000001f2
+-
+-#define REG_A2XX_CP_INT_STATUS					0x000001f3
+-
+-#define REG_A2XX_CP_INT_ACK					0x000001f4
+-
+-#define REG_A2XX_CP_ME_CNTL					0x000001f6
+-
+-#define REG_A2XX_CP_ME_STATUS					0x000001f7
+-
+-#define REG_A2XX_CP_ME_RAM_WADDR				0x000001f8
+-
+-#define REG_A2XX_CP_ME_RAM_RADDR				0x000001f9
+-
+-#define REG_A2XX_CP_ME_RAM_DATA					0x000001fa
+-
+-#define REG_A2XX_CP_DEBUG					0x000001fc
+-
+-#define REG_A2XX_CP_CSQ_RB_STAT					0x000001fd
+-
+-#define REG_A2XX_CP_CSQ_IB1_STAT				0x000001fe
+-
+-#define REG_A2XX_CP_CSQ_IB2_STAT				0x000001ff
+-
+ #define REG_A2XX_RBBM_PERFCOUNTER1_SELECT			0x00000395
+ 
+ #define REG_A2XX_RBBM_PERFCOUNTER1_LO				0x00000397
+@@ -338,11 +290,32 @@ enum sq_tex_filter {
+ 
+ #define REG_A2XX_CP_STAT					0x0000047f
+ 
+-#define REG_A2XX_SCRATCH_REG0					0x00000578
+-
+-#define REG_A2XX_SCRATCH_REG2					0x0000057a
+-
+ #define REG_A2XX_RBBM_STATUS					0x000005d0
++#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK			0x0000001f
++#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT			0
++static inline uint32_t A2XX_RBBM_STATUS_CMDFIFO_AVAIL(uint32_t val)
++{
++	return ((val) << A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT) & A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK;
++}
++#define A2XX_RBBM_STATUS_TC_BUSY				0x00000020
++#define A2XX_RBBM_STATUS_HIRQ_PENDING				0x00000100
++#define A2XX_RBBM_STATUS_CPRQ_PENDING				0x00000200
++#define A2XX_RBBM_STATUS_CFRQ_PENDING				0x00000400
++#define A2XX_RBBM_STATUS_PFRQ_PENDING				0x00000800
++#define A2XX_RBBM_STATUS_VGT_BUSY_NO_DMA			0x00001000
++#define A2XX_RBBM_STATUS_RBBM_WU_BUSY				0x00004000
++#define A2XX_RBBM_STATUS_CP_NRT_BUSY				0x00010000
++#define A2XX_RBBM_STATUS_MH_BUSY				0x00040000
++#define A2XX_RBBM_STATUS_MH_COHERENCY_BUSY			0x00080000
++#define A2XX_RBBM_STATUS_SX_BUSY				0x00200000
++#define A2XX_RBBM_STATUS_TPC_BUSY				0x00400000
++#define A2XX_RBBM_STATUS_SC_CNTX_BUSY				0x01000000
++#define A2XX_RBBM_STATUS_PA_BUSY				0x02000000
++#define A2XX_RBBM_STATUS_VGT_BUSY				0x04000000
++#define A2XX_RBBM_STATUS_SQ_CNTX17_BUSY				0x08000000
++#define A2XX_RBBM_STATUS_SQ_CNTX0_BUSY				0x10000000
++#define A2XX_RBBM_STATUS_RB_CNTX_BUSY				0x40000000
++#define A2XX_RBBM_STATUS_GUI_ACTIVE				0x80000000
+ 
+ #define REG_A2XX_A220_VSC_BIN_SIZE				0x00000c01
+ #define A2XX_A220_VSC_BIN_SIZE_WIDTH__MASK			0x0000001f
+@@ -358,13 +331,13 @@ static inline uint32_t A2XX_A220_VSC_BIN_SIZE_HEIGHT(uint32_t val)
+ 	return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT) & A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK;
+ }
+ 
+-#define REG_A2XX_VSC_PIPE(i0)				       (0x00000c06 + 0x3*(i0))
++static inline uint32_t REG_A2XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
+ 
+-#define REG_A2XX_VSC_PIPE_CONFIG(i0)			       (0x00000c06 + 0x3*(i0))
++static inline uint32_t REG_A2XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
+ 
+-#define REG_A2XX_VSC_PIPE_DATA_ADDRESS(i0)		       (0x00000c07 + 0x3*(i0))
++static inline uint32_t REG_A2XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; }
+ 
+-#define REG_A2XX_VSC_PIPE_DATA_LENGTH(i0)		       (0x00000c08 + 0x3*(i0))
++static inline uint32_t REG_A2XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; }
+ 
+ #define REG_A2XX_PC_DEBUG_CNTL					0x00000c38
+ 
+diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+index c7f5085..d183516 100644
+--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
++++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+@@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
+ git clone git://0x04.net/rules-ng-ng
+ 
+ The rules-ng-ng source files this header was generated from are:
+-- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml                (  42578 bytes, from 2013-06-02 13:10:46)
++- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml              (    327 bytes, from 2013-07-05 19:21:12)
+ - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
+-- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml       (   3094 bytes, from 2013-05-05 18:29:22)
++- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml           (  30005 bytes, from 2013-07-19 21:30:48)
++- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml       (   8983 bytes, from 2013-07-24 01:38:36)
+ - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml          (   9712 bytes, from 2013-05-26 15:22:37)
++- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml           (  51415 bytes, from 2013-08-03 14:26:05)
+ 
+ Copyright (C) 2013 by the following authors:
+ - Rob Clark <robdclark at gmail.com> (robclark)
+@@ -130,6 +132,13 @@ enum a3xx_tex_fmt {
+ 	TFMT_NORM_USHORT_5551 = 6,
+ 	TFMT_NORM_USHORT_4444 = 7,
+ 	TFMT_NORM_UINT_X8Z24 = 10,
++	TFMT_NORM_UINT_NV12_UV_TILED = 17,
++	TFMT_NORM_UINT_NV12_Y_TILED = 19,
++	TFMT_NORM_UINT_NV12_UV = 21,
++	TFMT_NORM_UINT_NV12_Y = 23,
++	TFMT_NORM_UINT_I420_Y = 24,
++	TFMT_NORM_UINT_I420_U = 26,
++	TFMT_NORM_UINT_I420_V = 27,
+ 	TFMT_NORM_UINT_2_10_10_10 = 41,
+ 	TFMT_NORM_UINT_A8 = 44,
+ 	TFMT_NORM_UINT_L8_A8 = 47,
+@@ -207,6 +216,37 @@ enum a3xx_tex_swiz {
+ 	A3XX_TEX_ONE = 5,
+ };
+ 
++enum a3xx_tex_type {
++	A3XX_TEX_1D = 0,
++	A3XX_TEX_2D = 1,
++	A3XX_TEX_CUBE = 2,
++	A3XX_TEX_3D = 3,
++};
++
++#define A3XX_INT0_RBBM_GPU_IDLE					0x00000001
++#define A3XX_INT0_RBBM_AHB_ERROR				0x00000002
++#define A3XX_INT0_RBBM_REG_TIMEOUT				0x00000004
++#define A3XX_INT0_RBBM_ME_MS_TIMEOUT				0x00000008
++#define A3XX_INT0_RBBM_PFP_MS_TIMEOUT				0x00000010
++#define A3XX_INT0_RBBM_ATB_BUS_OVERFLOW				0x00000020
++#define A3XX_INT0_VFD_ERROR					0x00000040
++#define A3XX_INT0_CP_SW_INT					0x00000080
++#define A3XX_INT0_CP_T0_PACKET_IN_IB				0x00000100
++#define A3XX_INT0_CP_OPCODE_ERROR				0x00000200
++#define A3XX_INT0_CP_RESERVED_BIT_ERROR				0x00000400
++#define A3XX_INT0_CP_HW_FAULT					0x00000800
++#define A3XX_INT0_CP_DMA					0x00001000
++#define A3XX_INT0_CP_IB2_INT					0x00002000
++#define A3XX_INT0_CP_IB1_INT					0x00004000
++#define A3XX_INT0_CP_RB_INT					0x00008000
++#define A3XX_INT0_CP_REG_PROTECT_FAULT				0x00010000
++#define A3XX_INT0_CP_RB_DONE_TS					0x00020000
++#define A3XX_INT0_CP_VS_DONE_TS					0x00040000
++#define A3XX_INT0_CP_PS_DONE_TS					0x00080000
++#define A3XX_INT0_CACHE_FLUSH_TS				0x00100000
++#define A3XX_INT0_CP_AHB_ERROR_HALT				0x00200000
++#define A3XX_INT0_MISC_HANG_DETECT				0x01000000
++#define A3XX_INT0_UCHE_OOB_ACCESS				0x02000000
+ #define REG_A3XX_RBBM_HW_VERSION				0x00000000
+ 
+ #define REG_A3XX_RBBM_HW_RELEASE				0x00000001
+@@ -230,6 +270,27 @@ enum a3xx_tex_swiz {
+ #define REG_A3XX_RBBM_GPR0_CTL					0x0000002e
+ 
+ #define REG_A3XX_RBBM_STATUS					0x00000030
++#define A3XX_RBBM_STATUS_HI_BUSY				0x00000001
++#define A3XX_RBBM_STATUS_CP_ME_BUSY				0x00000002
++#define A3XX_RBBM_STATUS_CP_PFP_BUSY				0x00000004
++#define A3XX_RBBM_STATUS_CP_NRT_BUSY				0x00004000
++#define A3XX_RBBM_STATUS_VBIF_BUSY				0x00008000
++#define A3XX_RBBM_STATUS_TSE_BUSY				0x00010000
++#define A3XX_RBBM_STATUS_RAS_BUSY				0x00020000
++#define A3XX_RBBM_STATUS_RB_BUSY				0x00040000
++#define A3XX_RBBM_STATUS_PC_DCALL_BUSY				0x00080000
++#define A3XX_RBBM_STATUS_PC_VSD_BUSY				0x00100000
++#define A3XX_RBBM_STATUS_VFD_BUSY				0x00200000
++#define A3XX_RBBM_STATUS_VPC_BUSY				0x00400000
++#define A3XX_RBBM_STATUS_UCHE_BUSY				0x00800000
++#define A3XX_RBBM_STATUS_SP_BUSY				0x01000000
++#define A3XX_RBBM_STATUS_TPL1_BUSY				0x02000000
++#define A3XX_RBBM_STATUS_MARB_BUSY				0x04000000
++#define A3XX_RBBM_STATUS_VSC_BUSY				0x08000000
++#define A3XX_RBBM_STATUS_ARB_BUSY				0x10000000
++#define A3XX_RBBM_STATUS_HLSQ_BUSY				0x20000000
++#define A3XX_RBBM_STATUS_GPU_BUSY_NOHC				0x40000000
++#define A3XX_RBBM_STATUS_GPU_BUSY				0x80000000
+ 
+ #define REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL			0x00000033
+ 
+@@ -251,20 +312,202 @@ enum a3xx_tex_swiz {
+ 
+ #define REG_A3XX_RBBM_PERFCTR_CTL				0x00000080
+ 
++#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD0				0x00000081
++
++#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD1				0x00000082
++
++#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_LO			0x00000084
++
++#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_HI			0x00000085
++
++#define REG_A3XX_RBBM_PERFCOUNTER0_SELECT			0x00000086
++
++#define REG_A3XX_RBBM_PERFCOUNTER1_SELECT			0x00000087
++
+ #define REG_A3XX_RBBM_GPU_BUSY_MASKED				0x00000088
+ 
++#define REG_A3XX_RBBM_PERFCTR_CP_0_LO				0x00000090
++
++#define REG_A3XX_RBBM_PERFCTR_CP_0_HI				0x00000091
++
++#define REG_A3XX_RBBM_PERFCTR_RBBM_0_LO				0x00000092
++
++#define REG_A3XX_RBBM_PERFCTR_RBBM_0_HI				0x00000093
++
++#define REG_A3XX_RBBM_PERFCTR_RBBM_1_LO				0x00000094
++
++#define REG_A3XX_RBBM_PERFCTR_RBBM_1_HI				0x00000095
++
++#define REG_A3XX_RBBM_PERFCTR_PC_0_LO				0x00000096
++
++#define REG_A3XX_RBBM_PERFCTR_PC_0_HI				0x00000097
++
++#define REG_A3XX_RBBM_PERFCTR_PC_1_LO				0x00000098
++
++#define REG_A3XX_RBBM_PERFCTR_PC_1_HI				0x00000099
++
++#define REG_A3XX_RBBM_PERFCTR_PC_2_LO				0x0000009a
++
++#define REG_A3XX_RBBM_PERFCTR_PC_2_HI				0x0000009b
++
++#define REG_A3XX_RBBM_PERFCTR_PC_3_LO				0x0000009c
++
++#define REG_A3XX_RBBM_PERFCTR_PC_3_HI				0x0000009d
++
++#define REG_A3XX_RBBM_PERFCTR_VFD_0_LO				0x0000009e
++
++#define REG_A3XX_RBBM_PERFCTR_VFD_0_HI				0x0000009f
++
++#define REG_A3XX_RBBM_PERFCTR_VFD_1_LO				0x000000a0
++
++#define REG_A3XX_RBBM_PERFCTR_VFD_1_HI				0x000000a1
++
++#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_LO				0x000000a2
++
++#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_HI				0x000000a3
++
++#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_LO				0x000000a4
++
++#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_HI				0x000000a5
++
++#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_LO				0x000000a6
++
++#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_HI				0x000000a7
++
++#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_LO				0x000000a8
++
++#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_HI				0x000000a9
++
++#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_LO				0x000000aa
++
++#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_HI				0x000000ab
++
++#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_LO				0x000000ac
++
++#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_HI				0x000000ad
++
++#define REG_A3XX_RBBM_PERFCTR_VPC_0_LO				0x000000ae
++
++#define REG_A3XX_RBBM_PERFCTR_VPC_0_HI				0x000000af
++
++#define REG_A3XX_RBBM_PERFCTR_VPC_1_LO				0x000000b0
++
++#define REG_A3XX_RBBM_PERFCTR_VPC_1_HI				0x000000b1
++
++#define REG_A3XX_RBBM_PERFCTR_TSE_0_LO				0x000000b2
++
++#define REG_A3XX_RBBM_PERFCTR_TSE_0_HI				0x000000b3
++
++#define REG_A3XX_RBBM_PERFCTR_TSE_1_LO				0x000000b4
++
++#define REG_A3XX_RBBM_PERFCTR_TSE_1_HI				0x000000b5
++
++#define REG_A3XX_RBBM_PERFCTR_RAS_0_LO				0x000000b6
++
++#define REG_A3XX_RBBM_PERFCTR_RAS_0_HI				0x000000b7
++
++#define REG_A3XX_RBBM_PERFCTR_RAS_1_LO				0x000000b8
++
++#define REG_A3XX_RBBM_PERFCTR_RAS_1_HI				0x000000b9
++
++#define REG_A3XX_RBBM_PERFCTR_UCHE_0_LO				0x000000ba
++
++#define REG_A3XX_RBBM_PERFCTR_UCHE_0_HI				0x000000bb
++
++#define REG_A3XX_RBBM_PERFCTR_UCHE_1_LO				0x000000bc
++
++#define REG_A3XX_RBBM_PERFCTR_UCHE_1_HI				0x000000bd
++
++#define REG_A3XX_RBBM_PERFCTR_UCHE_2_LO				0x000000be
++
++#define REG_A3XX_RBBM_PERFCTR_UCHE_2_HI				0x000000bf
++
++#define REG_A3XX_RBBM_PERFCTR_UCHE_3_LO				0x000000c0
++
++#define REG_A3XX_RBBM_PERFCTR_UCHE_3_HI				0x000000c1
++
++#define REG_A3XX_RBBM_PERFCTR_UCHE_4_LO				0x000000c2
++
++#define REG_A3XX_RBBM_PERFCTR_UCHE_4_HI				0x000000c3
++
++#define REG_A3XX_RBBM_PERFCTR_UCHE_5_LO				0x000000c4
++
++#define REG_A3XX_RBBM_PERFCTR_UCHE_5_HI				0x000000c5
++
++#define REG_A3XX_RBBM_PERFCTR_TP_0_LO				0x000000c6
++
++#define REG_A3XX_RBBM_PERFCTR_TP_0_HI				0x000000c7
++
++#define REG_A3XX_RBBM_PERFCTR_TP_1_LO				0x000000c8
++
++#define REG_A3XX_RBBM_PERFCTR_TP_1_HI				0x000000c9
++
++#define REG_A3XX_RBBM_PERFCTR_TP_2_LO				0x000000ca
++
++#define REG_A3XX_RBBM_PERFCTR_TP_2_HI				0x000000cb
++
++#define REG_A3XX_RBBM_PERFCTR_TP_3_LO				0x000000cc
++
++#define REG_A3XX_RBBM_PERFCTR_TP_3_HI				0x000000cd
++
++#define REG_A3XX_RBBM_PERFCTR_TP_4_LO				0x000000ce
++
++#define REG_A3XX_RBBM_PERFCTR_TP_4_HI				0x000000cf
++
++#define REG_A3XX_RBBM_PERFCTR_TP_5_LO				0x000000d0
++
++#define REG_A3XX_RBBM_PERFCTR_TP_5_HI				0x000000d1
++
++#define REG_A3XX_RBBM_PERFCTR_SP_0_LO				0x000000d2
++
++#define REG_A3XX_RBBM_PERFCTR_SP_0_HI				0x000000d3
++
++#define REG_A3XX_RBBM_PERFCTR_SP_1_LO				0x000000d4
++
++#define REG_A3XX_RBBM_PERFCTR_SP_1_HI				0x000000d5
++
++#define REG_A3XX_RBBM_PERFCTR_SP_2_LO				0x000000d6
++
++#define REG_A3XX_RBBM_PERFCTR_SP_2_HI				0x000000d7
++
++#define REG_A3XX_RBBM_PERFCTR_SP_3_LO				0x000000d8
++
++#define REG_A3XX_RBBM_PERFCTR_SP_3_HI				0x000000d9
++
++#define REG_A3XX_RBBM_PERFCTR_SP_4_LO				0x000000da
++
++#define REG_A3XX_RBBM_PERFCTR_SP_4_HI				0x000000db
++
++#define REG_A3XX_RBBM_PERFCTR_SP_5_LO				0x000000dc
++
++#define REG_A3XX_RBBM_PERFCTR_SP_5_HI				0x000000dd
++
++#define REG_A3XX_RBBM_PERFCTR_SP_6_LO				0x000000de
++
++#define REG_A3XX_RBBM_PERFCTR_SP_6_HI				0x000000df
++
+ #define REG_A3XX_RBBM_PERFCTR_SP_7_LO				0x000000e0
+ 
+ #define REG_A3XX_RBBM_PERFCTR_SP_7_HI				0x000000e1
+ 
++#define REG_A3XX_RBBM_PERFCTR_RB_0_LO				0x000000e2
++
++#define REG_A3XX_RBBM_PERFCTR_RB_0_HI				0x000000e3
++
++#define REG_A3XX_RBBM_PERFCTR_RB_1_LO				0x000000e4
++
++#define REG_A3XX_RBBM_PERFCTR_RB_1_HI				0x000000e5
++
++#define REG_A3XX_RBBM_PERFCTR_PWR_0_LO				0x000000ea
++
++#define REG_A3XX_RBBM_PERFCTR_PWR_0_HI				0x000000eb
++
+ #define REG_A3XX_RBBM_PERFCTR_PWR_1_LO				0x000000ec
+ 
+ #define REG_A3XX_RBBM_PERFCTR_PWR_1_HI				0x000000ed
+ 
+ #define REG_A3XX_RBBM_RBBM_CTL					0x00000100
+ 
+-#define REG_A3XX_RBBM_RBBM_CTL					0x00000100
+-
+ #define REG_A3XX_RBBM_DEBUG_BUS_CTL				0x00000111
+ 
+ #define REG_A3XX_RBBM_DEBUG_BUS_DATA_STATUS			0x00000112
+@@ -287,22 +530,20 @@ enum a3xx_tex_swiz {
+ 
+ #define REG_A3XX_CP_MEQ_DATA					0x000001db
+ 
++#define REG_A3XX_CP_PERFCOUNTER_SELECT				0x00000445
++
+ #define REG_A3XX_CP_HW_FAULT					0x0000045c
+ 
+ #define REG_A3XX_CP_PROTECT_CTRL				0x0000045e
+ 
+ #define REG_A3XX_CP_PROTECT_STATUS				0x0000045f
+ 
+-#define REG_A3XX_CP_PROTECT(i0)				       (0x00000460 + 0x1*(i0))
++static inline uint32_t REG_A3XX_CP_PROTECT(uint32_t i0) { return 0x00000460 + 0x1*i0; }
+ 
+-#define REG_A3XX_CP_PROTECT_REG(i0)			       (0x00000460 + 0x1*(i0))
++static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460 + 0x1*i0; }
+ 
+ #define REG_A3XX_CP_AHB_FAULT					0x0000054d
+ 
+-#define REG_A3XX_CP_SCRATCH_REG2				0x0000057a
+-
+-#define REG_A3XX_CP_SCRATCH_REG3				0x0000057b
+-
+ #define REG_A3XX_GRAS_CL_CLIP_CNTL				0x00002040
+ #define A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER			0x00001000
+ #define A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE			0x00010000
+@@ -528,9 +769,9 @@ static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(uint32_t val)
+ 
+ #define REG_A3XX_UNKNOWN_20C3					0x000020c3
+ 
+-#define REG_A3XX_RB_MRT(i0)				       (0x000020c4 + 0x4*(i0))
++static inline uint32_t REG_A3XX_RB_MRT(uint32_t i0) { return 0x000020c4 + 0x4*i0; }
+ 
+-#define REG_A3XX_RB_MRT_CONTROL(i0)			       (0x000020c4 + 0x4*(i0))
++static inline uint32_t REG_A3XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020c4 + 0x4*i0; }
+ #define A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE			0x00000008
+ #define A3XX_RB_MRT_CONTROL_BLEND				0x00000010
+ #define A3XX_RB_MRT_CONTROL_BLEND2				0x00000020
+@@ -553,7 +794,7 @@ static inline uint32_t A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
+ 	return ((val) << A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+ }
+ 
+-#define REG_A3XX_RB_MRT_BUF_INFO(i0)			       (0x000020c5 + 0x4*(i0))
++static inline uint32_t REG_A3XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x000020c5 + 0x4*i0; }
+ #define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK			0x0000003f
+ #define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT		0
+ static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a3xx_color_fmt val)
+@@ -579,7 +820,7 @@ static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val)
+ 	return ((val >> 5) << A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK;
+ }
+ 
+-#define REG_A3XX_RB_MRT_BUF_BASE(i0)			       (0x000020c6 + 0x4*(i0))
++static inline uint32_t REG_A3XX_RB_MRT_BUF_BASE(uint32_t i0) { return 0x000020c6 + 0x4*i0; }
+ #define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK		0xfffffff0
+ #define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT		4
+ static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val)
+@@ -587,7 +828,7 @@ static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val)
+ 	return ((val >> 5) << A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT) & A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK;
+ }
+ 
+-#define REG_A3XX_RB_MRT_BLEND_CONTROL(i0)		       (0x000020c7 + 0x4*(i0))
++static inline uint32_t REG_A3XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x000020c7 + 0x4*i0; }
+ #define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK		0x0000001f
+ #define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT		0
+ static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val)
+@@ -627,12 +868,60 @@ static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_r
+ #define A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE			0x20000000
+ 
+ #define REG_A3XX_RB_BLEND_RED					0x000020e4
++#define A3XX_RB_BLEND_RED_UINT__MASK				0x000000ff
++#define A3XX_RB_BLEND_RED_UINT__SHIFT				0
++static inline uint32_t A3XX_RB_BLEND_RED_UINT(uint32_t val)
++{
++	return ((val) << A3XX_RB_BLEND_RED_UINT__SHIFT) & A3XX_RB_BLEND_RED_UINT__MASK;
++}
++#define A3XX_RB_BLEND_RED_FLOAT__MASK				0xffff0000
++#define A3XX_RB_BLEND_RED_FLOAT__SHIFT				16
++static inline uint32_t A3XX_RB_BLEND_RED_FLOAT(float val)
++{
++	return ((util_float_to_half(val)) << A3XX_RB_BLEND_RED_FLOAT__SHIFT) & A3XX_RB_BLEND_RED_FLOAT__MASK;
++}
+ 
+ #define REG_A3XX_RB_BLEND_GREEN					0x000020e5
++#define A3XX_RB_BLEND_GREEN_UINT__MASK				0x000000ff
++#define A3XX_RB_BLEND_GREEN_UINT__SHIFT				0
++static inline uint32_t A3XX_RB_BLEND_GREEN_UINT(uint32_t val)
++{
++	return ((val) << A3XX_RB_BLEND_GREEN_UINT__SHIFT) & A3XX_RB_BLEND_GREEN_UINT__MASK;
++}
++#define A3XX_RB_BLEND_GREEN_FLOAT__MASK				0xffff0000
++#define A3XX_RB_BLEND_GREEN_FLOAT__SHIFT			16
++static inline uint32_t A3XX_RB_BLEND_GREEN_FLOAT(float val)
++{
++	return ((util_float_to_half(val)) << A3XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A3XX_RB_BLEND_GREEN_FLOAT__MASK;
++}
+ 
+ #define REG_A3XX_RB_BLEND_BLUE					0x000020e6
++#define A3XX_RB_BLEND_BLUE_UINT__MASK				0x000000ff
++#define A3XX_RB_BLEND_BLUE_UINT__SHIFT				0
++static inline uint32_t A3XX_RB_BLEND_BLUE_UINT(uint32_t val)
++{
++	return ((val) << A3XX_RB_BLEND_BLUE_UINT__SHIFT) & A3XX_RB_BLEND_BLUE_UINT__MASK;
++}
++#define A3XX_RB_BLEND_BLUE_FLOAT__MASK				0xffff0000
++#define A3XX_RB_BLEND_BLUE_FLOAT__SHIFT				16
++static inline uint32_t A3XX_RB_BLEND_BLUE_FLOAT(float val)
++{
++	return ((util_float_to_half(val)) << A3XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A3XX_RB_BLEND_BLUE_FLOAT__MASK;
++}
+ 
+ #define REG_A3XX_RB_BLEND_ALPHA					0x000020e7
++#define A3XX_RB_BLEND_ALPHA_UINT__MASK				0x000000ff
++#define A3XX_RB_BLEND_ALPHA_UINT__SHIFT				0
++static inline uint32_t A3XX_RB_BLEND_ALPHA_UINT(uint32_t val)
++{
++	return ((val) << A3XX_RB_BLEND_ALPHA_UINT__SHIFT) & A3XX_RB_BLEND_ALPHA_UINT__MASK;
++}
++#define A3XX_RB_BLEND_ALPHA_FLOAT__MASK				0xffff0000
++#define A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT			16
++static inline uint32_t A3XX_RB_BLEND_ALPHA_FLOAT(float val)
++{
++	return ((util_float_to_half(val)) << A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A3XX_RB_BLEND_ALPHA_FLOAT__MASK;
++}
+ 
+ #define REG_A3XX_UNKNOWN_20E8					0x000020e8
+ 
+@@ -1063,9 +1352,9 @@ static inline uint32_t A3XX_VFD_CONTROL_1_REGID4INST(uint32_t val)
+ 
+ #define REG_A3XX_VFD_INDEX_OFFSET				0x00002245
+ 
+-#define REG_A3XX_VFD_FETCH(i0)				       (0x00002246 + 0x2*(i0))
++static inline uint32_t REG_A3XX_VFD_FETCH(uint32_t i0) { return 0x00002246 + 0x2*i0; }
+ 
+-#define REG_A3XX_VFD_FETCH_INSTR_0(i0)			       (0x00002246 + 0x2*(i0))
++static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_0(uint32_t i0) { return 0x00002246 + 0x2*i0; }
+ #define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK			0x0000007f
+ #define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT			0
+ static inline uint32_t A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val)
+@@ -1092,11 +1381,11 @@ static inline uint32_t A3XX_VFD_FETCH_INSTR_0_STEPRATE(uint32_t val)
+ 	return ((val) << A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK;
+ }
+ 
+-#define REG_A3XX_VFD_FETCH_INSTR_1(i0)			       (0x00002247 + 0x2*(i0))
++static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_1(uint32_t i0) { return 0x00002247 + 0x2*i0; }
+ 
+-#define REG_A3XX_VFD_DECODE(i0)				       (0x00002266 + 0x1*(i0))
++static inline uint32_t REG_A3XX_VFD_DECODE(uint32_t i0) { return 0x00002266 + 0x1*i0; }
+ 
+-#define REG_A3XX_VFD_DECODE_INSTR(i0)			       (0x00002266 + 0x1*(i0))
++static inline uint32_t REG_A3XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x00002266 + 0x1*i0; }
+ #define A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK			0x0000000f
+ #define A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT			0
+ static inline uint32_t A3XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val)
+@@ -1173,13 +1462,13 @@ static inline uint32_t A3XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val)
+ 	return ((val) << A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK;
+ }
+ 
+-#define REG_A3XX_VPC_VARYING_INTERP(i0)			       (0x00002282 + 0x1*(i0))
++static inline uint32_t REG_A3XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00002282 + 0x1*i0; }
+ 
+-#define REG_A3XX_VPC_VARYING_INTERP_MODE(i0)		       (0x00002282 + 0x1*(i0))
++static inline uint32_t REG_A3XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00002282 + 0x1*i0; }
+ 
+-#define REG_A3XX_VPC_VARYING_PS_REPL(i0)		       (0x00002286 + 0x1*(i0))
++static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x00002286 + 0x1*i0; }
+ 
+-#define REG_A3XX_VPC_VARYING_PS_REPL_MODE(i0)		       (0x00002286 + 0x1*(i0))
++static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x00002286 + 0x1*i0; }
+ 
+ #define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0			0x0000228a
+ 
+@@ -1293,9 +1582,9 @@ static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val)
+ 	return ((val) << A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK;
+ }
+ 
+-#define REG_A3XX_SP_VS_OUT(i0)				       (0x000022c7 + 0x1*(i0))
++static inline uint32_t REG_A3XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
+ 
+-#define REG_A3XX_SP_VS_OUT_REG(i0)			       (0x000022c7 + 0x1*(i0))
++static inline uint32_t REG_A3XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
+ #define A3XX_SP_VS_OUT_REG_A_REGID__MASK			0x000001ff
+ #define A3XX_SP_VS_OUT_REG_A_REGID__SHIFT			0
+ static inline uint32_t A3XX_SP_VS_OUT_REG_A_REGID(uint32_t val)
+@@ -1321,9 +1610,9 @@ static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
+ 	return ((val) << A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK;
+ }
+ 
+-#define REG_A3XX_SP_VS_VPC_DST(i0)			       (0x000022d0 + 0x1*(i0))
++static inline uint32_t REG_A3XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d0 + 0x1*i0; }
+ 
+-#define REG_A3XX_SP_VS_VPC_DST_REG(i0)			       (0x000022d0 + 0x1*(i0))
++static inline uint32_t REG_A3XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d0 + 0x1*i0; }
+ #define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK			0x000000ff
+ #define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT			0
+ static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val)
+@@ -1480,9 +1769,9 @@ static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+ 
+ #define REG_A3XX_SP_FS_OUTPUT_REG				0x000022ec
+ 
+-#define REG_A3XX_SP_FS_MRT(i0)				       (0x000022f0 + 0x1*(i0))
++static inline uint32_t REG_A3XX_SP_FS_MRT(uint32_t i0) { return 0x000022f0 + 0x1*i0; }
+ 
+-#define REG_A3XX_SP_FS_MRT_REG(i0)			       (0x000022f0 + 0x1*(i0))
++static inline uint32_t REG_A3XX_SP_FS_MRT_REG(uint32_t i0) { return 0x000022f0 + 0x1*i0; }
+ #define A3XX_SP_FS_MRT_REG_REGID__MASK				0x000000ff
+ #define A3XX_SP_FS_MRT_REG_REGID__SHIFT				0
+ static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val)
+@@ -1491,9 +1780,9 @@ static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val)
+ }
+ #define A3XX_SP_FS_MRT_REG_HALF_PRECISION			0x00000100
+ 
+-#define REG_A3XX_SP_FS_IMAGE_OUTPUT(i0)			       (0x000022f4 + 0x1*(i0))
++static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT(uint32_t i0) { return 0x000022f4 + 0x1*i0; }
+ 
+-#define REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i0)		       (0x000022f4 + 0x1*(i0))
++static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(uint32_t i0) { return 0x000022f4 + 0x1*i0; }
+ #define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK		0x0000003f
+ #define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT		0
+ static inline uint32_t A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(enum a3xx_color_fmt val)
+@@ -1607,9 +1896,9 @@ static inline uint32_t A3XX_VSC_BIN_SIZE_HEIGHT(uint32_t val)
+ 
+ #define REG_A3XX_VSC_SIZE_ADDRESS				0x00000c02
+ 
+-#define REG_A3XX_VSC_PIPE(i0)				       (0x00000c06 + 0x3*(i0))
++static inline uint32_t REG_A3XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
+ 
+-#define REG_A3XX_VSC_PIPE_CONFIG(i0)			       (0x00000c06 + 0x3*(i0))
++static inline uint32_t REG_A3XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
+ #define A3XX_VSC_PIPE_CONFIG_X__MASK				0x000003ff
+ #define A3XX_VSC_PIPE_CONFIG_X__SHIFT				0
+ static inline uint32_t A3XX_VSC_PIPE_CONFIG_X(uint32_t val)
+@@ -1635,26 +1924,46 @@ static inline uint32_t A3XX_VSC_PIPE_CONFIG_H(uint32_t val)
+ 	return ((val) << A3XX_VSC_PIPE_CONFIG_H__SHIFT) & A3XX_VSC_PIPE_CONFIG_H__MASK;
+ }
+ 
+-#define REG_A3XX_VSC_PIPE_DATA_ADDRESS(i0)		       (0x00000c07 + 0x3*(i0))
++static inline uint32_t REG_A3XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; }
+ 
+-#define REG_A3XX_VSC_PIPE_DATA_LENGTH(i0)		       (0x00000c08 + 0x3*(i0))
++static inline uint32_t REG_A3XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; }
+ 
+ #define REG_A3XX_UNKNOWN_0C3D					0x00000c3d
+ 
++#define REG_A3XX_PC_PERFCOUNTER0_SELECT				0x00000c48
++
++#define REG_A3XX_PC_PERFCOUNTER1_SELECT				0x00000c49
++
++#define REG_A3XX_PC_PERFCOUNTER2_SELECT				0x00000c4a
++
++#define REG_A3XX_PC_PERFCOUNTER3_SELECT				0x00000c4b
++
+ #define REG_A3XX_UNKNOWN_0C81					0x00000c81
+ 
+-#define REG_A3XX_GRAS_CL_USER_PLANE(i0)			       (0x00000ca0 + 0x4*(i0))
++#define REG_A3XX_GRAS_PERFCOUNTER0_SELECT			0x00000c88
++
++#define REG_A3XX_GRAS_PERFCOUNTER1_SELECT			0x00000c89
+ 
+-#define REG_A3XX_GRAS_CL_USER_PLANE_X(i0)		       (0x00000ca0 + 0x4*(i0))
++#define REG_A3XX_GRAS_PERFCOUNTER2_SELECT			0x00000c8a
+ 
+-#define REG_A3XX_GRAS_CL_USER_PLANE_Y(i0)		       (0x00000ca1 + 0x4*(i0))
++#define REG_A3XX_GRAS_PERFCOUNTER3_SELECT			0x00000c8b
+ 
+-#define REG_A3XX_GRAS_CL_USER_PLANE_Z(i0)		       (0x00000ca2 + 0x4*(i0))
++static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE(uint32_t i0) { return 0x00000ca0 + 0x4*i0; }
+ 
+-#define REG_A3XX_GRAS_CL_USER_PLANE_W(i0)		       (0x00000ca3 + 0x4*(i0))
++static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_X(uint32_t i0) { return 0x00000ca0 + 0x4*i0; }
++
++static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Y(uint32_t i0) { return 0x00000ca1 + 0x4*i0; }
++
++static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Z(uint32_t i0) { return 0x00000ca2 + 0x4*i0; }
++
++static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_W(uint32_t i0) { return 0x00000ca3 + 0x4*i0; }
+ 
+ #define REG_A3XX_RB_GMEM_BASE_ADDR				0x00000cc0
+ 
++#define REG_A3XX_RB_PERFCOUNTER0_SELECT				0x00000cc6
++
++#define REG_A3XX_RB_PERFCOUNTER1_SELECT				0x00000cc7
++
+ #define REG_A3XX_RB_WINDOW_SIZE					0x00000ce0
+ #define A3XX_RB_WINDOW_SIZE_WIDTH__MASK				0x00003fff
+ #define A3XX_RB_WINDOW_SIZE_WIDTH__SHIFT			0
+@@ -1669,18 +1978,46 @@ static inline uint32_t A3XX_RB_WINDOW_SIZE_HEIGHT(uint32_t val)
+ 	return ((val) << A3XX_RB_WINDOW_SIZE_HEIGHT__SHIFT) & A3XX_RB_WINDOW_SIZE_HEIGHT__MASK;
+ }
+ 
+-#define REG_A3XX_UNKNOWN_0E00					0x00000e00
++#define REG_A3XX_HLSQ_PERFCOUNTER0_SELECT			0x00000e00
++
++#define REG_A3XX_HLSQ_PERFCOUNTER1_SELECT			0x00000e01
++
++#define REG_A3XX_HLSQ_PERFCOUNTER2_SELECT			0x00000e02
++
++#define REG_A3XX_HLSQ_PERFCOUNTER3_SELECT			0x00000e03
++
++#define REG_A3XX_HLSQ_PERFCOUNTER4_SELECT			0x00000e04
++
++#define REG_A3XX_HLSQ_PERFCOUNTER5_SELECT			0x00000e05
+ 
+ #define REG_A3XX_UNKNOWN_0E43					0x00000e43
+ 
+ #define REG_A3XX_VFD_PERFCOUNTER0_SELECT			0x00000e44
+ 
++#define REG_A3XX_VFD_PERFCOUNTER1_SELECT			0x00000e45
++
+ #define REG_A3XX_VPC_VPC_DEBUG_RAM_SEL				0x00000e61
+ 
+ #define REG_A3XX_VPC_VPC_DEBUG_RAM_READ				0x00000e62
+ 
++#define REG_A3XX_VPC_PERFCOUNTER0_SELECT			0x00000e64
++
++#define REG_A3XX_VPC_PERFCOUNTER1_SELECT			0x00000e65
++
+ #define REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG			0x00000e82
+ 
++#define REG_A3XX_UCHE_PERFCOUNTER0_SELECT			0x00000e84
++
++#define REG_A3XX_UCHE_PERFCOUNTER1_SELECT			0x00000e85
++
++#define REG_A3XX_UCHE_PERFCOUNTER2_SELECT			0x00000e86
++
++#define REG_A3XX_UCHE_PERFCOUNTER3_SELECT			0x00000e87
++
++#define REG_A3XX_UCHE_PERFCOUNTER4_SELECT			0x00000e88
++
++#define REG_A3XX_UCHE_PERFCOUNTER5_SELECT			0x00000e89
++
+ #define REG_A3XX_UCHE_CACHE_INVALIDATE0_REG			0x00000ea0
+ #define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK		0x0fffffff
+ #define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT		0
+@@ -1724,6 +2061,18 @@ static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(enum a3xx_cache_op
+ 
+ #define REG_A3XX_UNKNOWN_0F03					0x00000f03
+ 
++#define REG_A3XX_TP_PERFCOUNTER0_SELECT				0x00000f04
++
++#define REG_A3XX_TP_PERFCOUNTER1_SELECT				0x00000f05
++
++#define REG_A3XX_TP_PERFCOUNTER2_SELECT				0x00000f06
++
++#define REG_A3XX_TP_PERFCOUNTER3_SELECT				0x00000f07
++
++#define REG_A3XX_TP_PERFCOUNTER4_SELECT				0x00000f08
++
++#define REG_A3XX_TP_PERFCOUNTER5_SELECT				0x00000f09
++
+ #define REG_A3XX_TEX_SAMP_0					0x00000000
+ #define A3XX_TEX_SAMP_0_XY_MAG__MASK				0x0000000c
+ #define A3XX_TEX_SAMP_0_XY_MAG__SHIFT				2
+@@ -1791,6 +2140,12 @@ static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val)
+ {
+ 	return ((val) << A3XX_TEX_CONST_0_FMT__SHIFT) & A3XX_TEX_CONST_0_FMT__MASK;
+ }
++#define A3XX_TEX_CONST_0_TYPE__MASK				0xc0000000
++#define A3XX_TEX_CONST_0_TYPE__SHIFT				30
++static inline uint32_t A3XX_TEX_CONST_0_TYPE(enum a3xx_tex_type val)
++{
++	return ((val) << A3XX_TEX_CONST_0_TYPE__SHIFT) & A3XX_TEX_CONST_0_TYPE__MASK;
++}
+ 
+ #define REG_A3XX_TEX_CONST_1					0x00000001
+ #define A3XX_TEX_CONST_1_HEIGHT__MASK				0x00003fff
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+index b8436c9..5ffd561 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+@@ -536,8 +536,8 @@ fd3_emit_restore(struct fd_context *ctx)
+ 	OUT_PKT0(ring, REG_A3XX_UNKNOWN_0C3D, 1);
+ 	OUT_RING(ring, 0x00000001);        /* UNKNOWN_0C3D */
+ 
+-	OUT_PKT0(ring, REG_A3XX_UNKNOWN_0E00, 1);
+-	OUT_RING(ring, 0x00000000);        /* UNKNOWN_0E00 */
++	OUT_PKT0(ring, REG_A3XX_HLSQ_PERFCOUNTER0_SELECT, 1);
++	OUT_RING(ring, 0x00000000);        /* HLSQ_PERFCOUNTER0_SELECT */
+ 
+ 	OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 2);
+ 	OUT_RING(ring, A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(0) |
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+index b5a027e..259c2dd 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+@@ -249,7 +249,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
+ 	 */
+ 	for (i = 0; i < 6; i++) {
+ 		OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER0_SELECT, 1);
+-		OUT_RING(ring, 0x00000000);    /* SP_PERFCOUNTER4_SELECT */
++		OUT_RING(ring, 0x00000000);    /* SP_PERFCOUNTER0_SELECT */
+ 
+ 		OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER4_SELECT, 1);
+ 		OUT_RING(ring, 0x00000000);    /* SP_PERFCOUNTER4_SELECT */
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_util.c b/src/gallium/drivers/freedreno/a3xx/fd3_util.c
+index a08bc23..6537fb7 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_util.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_util.c
+@@ -306,10 +306,11 @@ fd3_pipe2swap(enum pipe_format format)
+ 	case PIPE_FORMAT_B8G8R8A8_UNORM:
+ 	case PIPE_FORMAT_B8G8R8X8_UNORM:
+ 		return WXYZ;
++
++	case PIPE_FORMAT_R8G8B8A8_UNORM:
++	case PIPE_FORMAT_R8G8B8X8_UNORM:
+ 	case PIPE_FORMAT_Z24X8_UNORM:
+ 	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+-		return WZYX;
+-
+ 	default:
+ 		return WZYX;
+ 	}
+diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
+index b119812..61979d4 100644
+--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
++++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
+@@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
+ git clone git://0x04.net/rules-ng-ng
+ 
+ The rules-ng-ng source files this header was generated from are:
+-- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml                (  42578 bytes, from 2013-06-02 13:10:46)
++- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml              (    327 bytes, from 2013-07-05 19:21:12)
+ - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
+-- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml       (   3094 bytes, from 2013-05-05 18:29:22)
++- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml           (  30005 bytes, from 2013-07-19 21:30:48)
++- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml       (   8983 bytes, from 2013-07-24 01:38:36)
+ - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml          (   9712 bytes, from 2013-05-26 15:22:37)
++- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml           (  51415 bytes, from 2013-08-03 14:26:05)
+ 
+ Copyright (C) 2013 by the following authors:
+ - Rob Clark <robdclark at gmail.com> (robclark)
+@@ -113,5 +115,318 @@ enum adreno_rb_depth_format {
+ 	DEPTHX_24_8 = 1,
+ };
+ 
++enum adreno_mmu_clnt_beh {
++	BEH_NEVR = 0,
++	BEH_TRAN_RNG = 1,
++	BEH_TRAN_FLT = 2,
++};
++
++#define REG_AXXX_MH_MMU_CONFIG					0x00000040
++#define AXXX_MH_MMU_CONFIG_MMU_ENABLE				0x00000001
++#define AXXX_MH_MMU_CONFIG_SPLIT_MODE_ENABLE			0x00000002
++#define AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK		0x00000030
++#define AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT		4
++static inline uint32_t AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
++{
++	return ((val) << AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK;
++}
++#define AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK		0x000000c0
++#define AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT		6
++static inline uint32_t AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
++{
++	return ((val) << AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK;
++}
++#define AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK		0x00000300
++#define AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT		8
++static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
++{
++	return ((val) << AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK;
++}
++#define AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK		0x00000c00
++#define AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT		10
++static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
++{
++	return ((val) << AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK;
++}
++#define AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK		0x00003000
++#define AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT		12
++static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
++{
++	return ((val) << AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK;
++}
++#define AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK		0x0000c000
++#define AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT		14
++static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
++{
++	return ((val) << AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK;
++}
++#define AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK		0x00030000
++#define AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT		16
++static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
++{
++	return ((val) << AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK;
++}
++#define AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK		0x000c0000
++#define AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT		18
++static inline uint32_t AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
++{
++	return ((val) << AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK;
++}
++#define AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK		0x00300000
++#define AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT		20
++static inline uint32_t AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
++{
++	return ((val) << AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK;
++}
++#define AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK		0x00c00000
++#define AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT		22
++static inline uint32_t AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
++{
++	return ((val) << AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK;
++}
++#define AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK		0x03000000
++#define AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT		24
++static inline uint32_t AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
++{
++	return ((val) << AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK;
++}
++
++#define REG_AXXX_MH_MMU_VA_RANGE				0x00000041
++
++#define REG_AXXX_MH_MMU_PT_BASE					0x00000042
++
++#define REG_AXXX_MH_MMU_PAGE_FAULT				0x00000043
++
++#define REG_AXXX_MH_MMU_TRAN_ERROR				0x00000044
++
++#define REG_AXXX_MH_MMU_INVALIDATE				0x00000045
++
++#define REG_AXXX_MH_MMU_MPU_BASE				0x00000046
++
++#define REG_AXXX_MH_MMU_MPU_END					0x00000047
++
++#define REG_AXXX_CP_RB_BASE					0x000001c0
++
++#define REG_AXXX_CP_RB_CNTL					0x000001c1
++#define AXXX_CP_RB_CNTL_BUFSZ__MASK				0x0000003f
++#define AXXX_CP_RB_CNTL_BUFSZ__SHIFT				0
++static inline uint32_t AXXX_CP_RB_CNTL_BUFSZ(uint32_t val)
++{
++	return ((val) << AXXX_CP_RB_CNTL_BUFSZ__SHIFT) & AXXX_CP_RB_CNTL_BUFSZ__MASK;
++}
++#define AXXX_CP_RB_CNTL_BLKSZ__MASK				0x00003f00
++#define AXXX_CP_RB_CNTL_BLKSZ__SHIFT				8
++static inline uint32_t AXXX_CP_RB_CNTL_BLKSZ(uint32_t val)
++{
++	return ((val) << AXXX_CP_RB_CNTL_BLKSZ__SHIFT) & AXXX_CP_RB_CNTL_BLKSZ__MASK;
++}
++#define AXXX_CP_RB_CNTL_BUF_SWAP__MASK				0x00030000
++#define AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT				16
++static inline uint32_t AXXX_CP_RB_CNTL_BUF_SWAP(uint32_t val)
++{
++	return ((val) << AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT) & AXXX_CP_RB_CNTL_BUF_SWAP__MASK;
++}
++#define AXXX_CP_RB_CNTL_POLL_EN					0x00100000
++#define AXXX_CP_RB_CNTL_NO_UPDATE				0x08000000
++#define AXXX_CP_RB_CNTL_RPTR_WR_EN				0x80000000
++
++#define REG_AXXX_CP_RB_RPTR_ADDR				0x000001c3
++#define AXXX_CP_RB_RPTR_ADDR_SWAP__MASK				0x00000003
++#define AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT			0
++static inline uint32_t AXXX_CP_RB_RPTR_ADDR_SWAP(uint32_t val)
++{
++	return ((val) << AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT) & AXXX_CP_RB_RPTR_ADDR_SWAP__MASK;
++}
++#define AXXX_CP_RB_RPTR_ADDR_ADDR__MASK				0xfffffffc
++#define AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT			2
++static inline uint32_t AXXX_CP_RB_RPTR_ADDR_ADDR(uint32_t val)
++{
++	return ((val >> 2) << AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT) & AXXX_CP_RB_RPTR_ADDR_ADDR__MASK;
++}
++
++#define REG_AXXX_CP_RB_RPTR					0x000001c4
++
++#define REG_AXXX_CP_RB_WPTR					0x000001c5
++
++#define REG_AXXX_CP_RB_WPTR_DELAY				0x000001c6
++
++#define REG_AXXX_CP_RB_RPTR_WR					0x000001c7
++
++#define REG_AXXX_CP_RB_WPTR_BASE				0x000001c8
++
++#define REG_AXXX_CP_QUEUE_THRESHOLDS				0x000001d5
++#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK		0x0000000f
++#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT		0
++static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(uint32_t val)
++{
++	return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK;
++}
++#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK		0x00000f00
++#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT		8
++static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(uint32_t val)
++{
++	return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK;
++}
++#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK		0x000f0000
++#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT		16
++static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(uint32_t val)
++{
++	return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK;
++}
++
++#define REG_AXXX_CP_MEQ_THRESHOLDS				0x000001d6
++
++#define REG_AXXX_CP_CSQ_AVAIL					0x000001d7
++#define AXXX_CP_CSQ_AVAIL_RING__MASK				0x0000007f
++#define AXXX_CP_CSQ_AVAIL_RING__SHIFT				0
++static inline uint32_t AXXX_CP_CSQ_AVAIL_RING(uint32_t val)
++{
++	return ((val) << AXXX_CP_CSQ_AVAIL_RING__SHIFT) & AXXX_CP_CSQ_AVAIL_RING__MASK;
++}
++#define AXXX_CP_CSQ_AVAIL_IB1__MASK				0x00007f00
++#define AXXX_CP_CSQ_AVAIL_IB1__SHIFT				8
++static inline uint32_t AXXX_CP_CSQ_AVAIL_IB1(uint32_t val)
++{
++	return ((val) << AXXX_CP_CSQ_AVAIL_IB1__SHIFT) & AXXX_CP_CSQ_AVAIL_IB1__MASK;
++}
++#define AXXX_CP_CSQ_AVAIL_IB2__MASK				0x007f0000
++#define AXXX_CP_CSQ_AVAIL_IB2__SHIFT				16
++static inline uint32_t AXXX_CP_CSQ_AVAIL_IB2(uint32_t val)
++{
++	return ((val) << AXXX_CP_CSQ_AVAIL_IB2__SHIFT) & AXXX_CP_CSQ_AVAIL_IB2__MASK;
++}
++
++#define REG_AXXX_CP_STQ_AVAIL					0x000001d8
++#define AXXX_CP_STQ_AVAIL_ST__MASK				0x0000007f
++#define AXXX_CP_STQ_AVAIL_ST__SHIFT				0
++static inline uint32_t AXXX_CP_STQ_AVAIL_ST(uint32_t val)
++{
++	return ((val) << AXXX_CP_STQ_AVAIL_ST__SHIFT) & AXXX_CP_STQ_AVAIL_ST__MASK;
++}
++
++#define REG_AXXX_CP_MEQ_AVAIL					0x000001d9
++#define AXXX_CP_MEQ_AVAIL_MEQ__MASK				0x0000001f
++#define AXXX_CP_MEQ_AVAIL_MEQ__SHIFT				0
++static inline uint32_t AXXX_CP_MEQ_AVAIL_MEQ(uint32_t val)
++{
++	return ((val) << AXXX_CP_MEQ_AVAIL_MEQ__SHIFT) & AXXX_CP_MEQ_AVAIL_MEQ__MASK;
++}
++
++#define REG_AXXX_SCRATCH_UMSK					0x000001dc
++#define AXXX_SCRATCH_UMSK_UMSK__MASK				0x000000ff
++#define AXXX_SCRATCH_UMSK_UMSK__SHIFT				0
++static inline uint32_t AXXX_SCRATCH_UMSK_UMSK(uint32_t val)
++{
++	return ((val) << AXXX_SCRATCH_UMSK_UMSK__SHIFT) & AXXX_SCRATCH_UMSK_UMSK__MASK;
++}
++#define AXXX_SCRATCH_UMSK_SWAP__MASK				0x00030000
++#define AXXX_SCRATCH_UMSK_SWAP__SHIFT				16
++static inline uint32_t AXXX_SCRATCH_UMSK_SWAP(uint32_t val)
++{
++	return ((val) << AXXX_SCRATCH_UMSK_SWAP__SHIFT) & AXXX_SCRATCH_UMSK_SWAP__MASK;
++}
++
++#define REG_AXXX_SCRATCH_ADDR					0x000001dd
++
++#define REG_AXXX_CP_ME_RDADDR					0x000001ea
++
++#define REG_AXXX_CP_STATE_DEBUG_INDEX				0x000001ec
++
++#define REG_AXXX_CP_STATE_DEBUG_DATA				0x000001ed
++
++#define REG_AXXX_CP_INT_CNTL					0x000001f2
++
++#define REG_AXXX_CP_INT_STATUS					0x000001f3
++
++#define REG_AXXX_CP_INT_ACK					0x000001f4
++
++#define REG_AXXX_CP_ME_CNTL					0x000001f6
++
++#define REG_AXXX_CP_ME_STATUS					0x000001f7
++
++#define REG_AXXX_CP_ME_RAM_WADDR				0x000001f8
++
++#define REG_AXXX_CP_ME_RAM_RADDR				0x000001f9
++
++#define REG_AXXX_CP_ME_RAM_DATA					0x000001fa
++
++#define REG_AXXX_CP_DEBUG					0x000001fc
++#define AXXX_CP_DEBUG_PREDICATE_DISABLE				0x00800000
++#define AXXX_CP_DEBUG_PROG_END_PTR_ENABLE			0x01000000
++#define AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE			0x02000000
++#define AXXX_CP_DEBUG_PREFETCH_PASS_NOPS			0x04000000
++#define AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE			0x08000000
++#define AXXX_CP_DEBUG_PREFETCH_MATCH_DISABLE			0x10000000
++#define AXXX_CP_DEBUG_SIMPLE_ME_FLOW_CONTROL			0x40000000
++#define AXXX_CP_DEBUG_MIU_WRITE_PACK_DISABLE			0x80000000
++
++#define REG_AXXX_CP_CSQ_RB_STAT					0x000001fd
++#define AXXX_CP_CSQ_RB_STAT_RPTR__MASK				0x0000007f
++#define AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT				0
++static inline uint32_t AXXX_CP_CSQ_RB_STAT_RPTR(uint32_t val)
++{
++	return ((val) << AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_RPTR__MASK;
++}
++#define AXXX_CP_CSQ_RB_STAT_WPTR__MASK				0x007f0000
++#define AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT				16
++static inline uint32_t AXXX_CP_CSQ_RB_STAT_WPTR(uint32_t val)
++{
++	return ((val) << AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_WPTR__MASK;
++}
++
++#define REG_AXXX_CP_CSQ_IB1_STAT				0x000001fe
++#define AXXX_CP_CSQ_IB1_STAT_RPTR__MASK				0x0000007f
++#define AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT			0
++static inline uint32_t AXXX_CP_CSQ_IB1_STAT_RPTR(uint32_t val)
++{
++	return ((val) << AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_RPTR__MASK;
++}
++#define AXXX_CP_CSQ_IB1_STAT_WPTR__MASK				0x007f0000
++#define AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT			16
++static inline uint32_t AXXX_CP_CSQ_IB1_STAT_WPTR(uint32_t val)
++{
++	return ((val) << AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_WPTR__MASK;
++}
++
++#define REG_AXXX_CP_CSQ_IB2_STAT				0x000001ff
++#define AXXX_CP_CSQ_IB2_STAT_RPTR__MASK				0x0000007f
++#define AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT			0
++static inline uint32_t AXXX_CP_CSQ_IB2_STAT_RPTR(uint32_t val)
++{
++	return ((val) << AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_RPTR__MASK;
++}
++#define AXXX_CP_CSQ_IB2_STAT_WPTR__MASK				0x007f0000
++#define AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT			16
++static inline uint32_t AXXX_CP_CSQ_IB2_STAT_WPTR(uint32_t val)
++{
++	return ((val) << AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_WPTR__MASK;
++}
++
++#define REG_AXXX_CP_SCRATCH_REG0				0x00000578
++
++#define REG_AXXX_CP_SCRATCH_REG1				0x00000579
++
++#define REG_AXXX_CP_SCRATCH_REG2				0x0000057a
++
++#define REG_AXXX_CP_SCRATCH_REG3				0x0000057b
++
++#define REG_AXXX_CP_SCRATCH_REG4				0x0000057c
++
++#define REG_AXXX_CP_SCRATCH_REG5				0x0000057d
++
++#define REG_AXXX_CP_SCRATCH_REG6				0x0000057e
++
++#define REG_AXXX_CP_SCRATCH_REG7				0x0000057f
++
++#define REG_AXXX_CP_ME_CF_EVENT_SRC				0x0000060a
++
++#define REG_AXXX_CP_ME_CF_EVENT_ADDR				0x0000060b
++
++#define REG_AXXX_CP_ME_CF_EVENT_DATA				0x0000060c
++
++#define REG_AXXX_CP_ME_NRT_ADDR					0x0000060d
++
++#define REG_AXXX_CP_ME_NRT_DATA					0x0000060e
++
+ 
+ #endif /* ADRENO_COMMON_XML */
+diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+index d3a7bac..94c13f4 100644
+--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
++++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+@@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
+ git clone git://0x04.net/rules-ng-ng
+ 
+ The rules-ng-ng source files this header was generated from are:
+-- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml                (  42578 bytes, from 2013-06-02 13:10:46)
++- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml              (    327 bytes, from 2013-07-05 19:21:12)
+ - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
+-- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml       (   3094 bytes, from 2013-05-05 18:29:22)
++- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml           (  30005 bytes, from 2013-07-19 21:30:48)
++- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml       (   8983 bytes, from 2013-07-24 01:38:36)
+ - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml          (   9712 bytes, from 2013-05-26 15:22:37)
++- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml           (  51415 bytes, from 2013-08-03 14:26:05)
+ 
+ Copyright (C) 2013 by the following authors:
+ - Rob Clark <robdclark at gmail.com> (robclark)
+diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
+index b49cdfc..22857d2 100644
+--- a/src/gallium/drivers/freedreno/freedreno_util.h
++++ b/src/gallium/drivers/freedreno/freedreno_util.h
+@@ -35,6 +35,7 @@
+ #include "pipe/p_format.h"
+ #include "util/u_debug.h"
+ #include "util/u_math.h"
++#include "util/u_half.h"
+ 
+ #include "adreno_common.xml.h"
+ #include "adreno_pm4.xml.h"
+-- 
+1.8.4.2
+
diff --git a/0005-freedreno-a3xx-some-texture-fixes.patch b/0005-freedreno-a3xx-some-texture-fixes.patch
new file mode 100644
index 0000000..4fd4c68
--- /dev/null
+++ b/0005-freedreno-a3xx-some-texture-fixes.patch
@@ -0,0 +1,65 @@
+From 3da8868b5df98d8544091feeea7b6bb0f736324f Mon Sep 17 00:00:00 2001
+From: Rob Clark <robclark at freedesktop.org>
+Date: Mon, 5 Aug 2013 18:03:33 -0400
+Subject: [PATCH 05/17] freedreno/a3xx: some texture fixes
+
+Stop hard coding bits that indicate texture type (2d/3d/cube/etc).
+
+Signed-off-by: Rob Clark <robclark at freedesktop.org>
+---
+ src/gallium/drivers/freedreno/a3xx/fd3_texture.c | 25 +++++++++++++++++++++++-
+ 1 file changed, 24 insertions(+), 1 deletion(-)
+
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+index ae08b8a..e56325b 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+@@ -87,6 +87,7 @@ fd3_sampler_state_create(struct pipe_context *pctx,
+ 	so->base = *cso;
+ 
+ 	so->texsamp0 =
++			COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) |
+ 			A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter)) |
+ 			A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter)) |
+ 			A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) |
+@@ -97,6 +98,28 @@ fd3_sampler_state_create(struct pipe_context *pctx,
+ 	return so;
+ }
+ 
++static enum a3xx_tex_type
++tex_type(unsigned target)
++{
++	switch (target) {
++	default:
++		assert(0);
++	case PIPE_BUFFER:
++	case PIPE_TEXTURE_1D:
++	case PIPE_TEXTURE_1D_ARRAY:
++		return A3XX_TEX_1D;
++	case PIPE_TEXTURE_RECT:
++	case PIPE_TEXTURE_2D:
++	case PIPE_TEXTURE_2D_ARRAY:
++		return A3XX_TEX_2D;
++	case PIPE_TEXTURE_3D:
++		return A3XX_TEX_3D;
++	case PIPE_TEXTURE_CUBE:
++	case PIPE_TEXTURE_CUBE_ARRAY:
++		return A3XX_TEX_CUBE;
++	}
++}
++
+ static struct pipe_sampler_view *
+ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
+ 		const struct pipe_sampler_view *cso)
+@@ -116,7 +139,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
+ 	so->tex_resource =  rsc;
+ 
+ 	so->texconst0 =
+-			0x40000000 | /* ??? */
++			A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
+ 			A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) |
+ 			fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
+ 						cso->swizzle_b, cso->swizzle_a);
+-- 
+1.8.4.2
+
diff --git a/0006-freedreno-a3xx-compiler-fix-CMP.patch b/0006-freedreno-a3xx-compiler-fix-CMP.patch
new file mode 100644
index 0000000..ded5b9d
--- /dev/null
+++ b/0006-freedreno-a3xx-compiler-fix-CMP.patch
@@ -0,0 +1,45 @@
+From 83e65320012f327d2e8f1573443b2e20f059e76f Mon Sep 17 00:00:00 2001
+From: Rob Clark <robclark at freedesktop.org>
+Date: Tue, 20 Aug 2013 13:46:30 -0400
+Subject: [PATCH 06/17] freedreno/a3xx/compiler: fix CMP
+
+The 1st src to add.s needs (r) flag (repeat), otherwise it will end up:
+
+  add.s dst.xyzw, tmp.xxxx -1
+
+instead of:
+
+  add.s dst.xyzw, tmp.xyzw, -1
+
+Also, if we are using a temporary dst to avoid clobbering one of the src
+registers, we actually need to use that as the dst for the sel
+instruction.
+
+Signed-off-by: Rob Clark <robclark at freedesktop.org>
+---
+ src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+index eabe21c..07bede4 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+@@ -790,13 +790,13 @@ trans_cmp(const struct instr_translater *t,
+ 	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
+ 	instr->repeat = 3;
+ 	add_dst_reg(ctx, instr, &tmp_dst, 0);
+-	add_src_reg(ctx, instr, &tmp_src, 0);
++	add_src_reg(ctx, instr, &tmp_src, 0)->flags |= IR3_REG_R;
+ 	ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
+ 
+ 	/* sel.{f32,f16} dst, src2, tmp, src1 */
+ 	instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ?
+ 			OPC_SEL_F16 : OPC_SEL_F32);
+-	vectorize(ctx, instr, &inst->Dst[0].Register, 3,
++	vectorize(ctx, instr, dst, 3,
+ 			&inst->Src[2].Register, 0,
+ 			&tmp_src, 0,
+ 			&inst->Src[1].Register, 0);
+-- 
+1.8.4.2
+
diff --git a/0007-freedreno-a3xx-compiler-handle-saturate-on-dst.patch b/0007-freedreno-a3xx-compiler-handle-saturate-on-dst.patch
new file mode 100644
index 0000000..362c04f
--- /dev/null
+++ b/0007-freedreno-a3xx-compiler-handle-saturate-on-dst.patch
@@ -0,0 +1,98 @@
+From c83387438633233ae6bcc55e1f4eaa2793ce7449 Mon Sep 17 00:00:00 2001
+From: Rob Clark <robclark at freedesktop.org>
+Date: Tue, 20 Aug 2013 13:51:35 -0400
+Subject: [PATCH 07/17] freedreno/a3xx/compiler: handle saturate on dst
+
+Sometimes things other than color dst need saturating, like if there is
+a 'clamp(foo, 0.0, 1.0)'.  So for saturated dst add the extra
+instructions to fix up dst.
+
+Signed-off-by: Rob Clark <robclark at freedesktop.org>
+---
+ src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 49 +++++++++++++++++++++++
+ 1 file changed, 49 insertions(+)
+
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+index 07bede4..e2c7853 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+@@ -131,6 +131,11 @@ struct fd3_compile_context {
+ 	struct tgsi_src_register tmp_src;
+ };
+ 
++
++static void vectorize(struct fd3_compile_context *ctx,
++		struct ir3_instruction *instr, struct tgsi_dst_register *dst,
++		int nsrcs, ...);
++
+ static unsigned
+ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
+ 		const struct tgsi_token *tokens)
+@@ -234,6 +239,10 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
+ 		flags |= IR3_REG_CONST;
+ 		num = src->Index + ctx->base_reg[src->File];
+ 		break;
++	case TGSI_FILE_OUTPUT:
++		/* NOTE: we should only end up w/ OUTPUT file for things like
++		 * clamp()'ing saturated dst instructions
++		 */
+ 	case TGSI_FILE_INPUT:
+ 	case TGSI_FILE_TEMPORARY:
+ 		num = src->Index + ctx->base_reg[src->File];
+@@ -407,6 +416,35 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
+ 
+ }
+ 
++static void
++create_clamp(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
++		struct tgsi_src_register *minval, struct tgsi_src_register *maxval)
++{
++	struct ir3_instruction *instr;
++	struct tgsi_src_register src;
++
++	src_from_dst(&src, dst);
++
++	instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
++	vectorize(ctx, instr, dst, 2, &src, 0, minval, 0);
++
++	instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F);
++	vectorize(ctx, instr, dst, 2, &src, 0, maxval, 0);
++}
++
++static void
++create_clamp_imm(struct fd3_compile_context *ctx,
++		struct tgsi_dst_register *dst,
++		uint32_t minval, uint32_t maxval)
++{
++	struct tgsi_src_register minconst, maxconst;
++
++	get_immediate(ctx, &minconst, minval);
++	get_immediate(ctx, &maxconst, maxval);
++
++	create_clamp(ctx, dst, &minconst, &maxconst);
++}
++
+ static struct tgsi_dst_register *
+ get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst)
+ {
+@@ -1229,6 +1267,17 @@ compile_instructions(struct fd3_compile_context *ctx)
+ 				assert(0);
+ 			}
+ 
++			switch (inst->Instruction.Saturate) {
++			case TGSI_SAT_ZERO_ONE:
++				create_clamp_imm(ctx, &inst->Dst[0].Register,
++						fui(0.0), fui(1.0));
++				break;
++			case TGSI_SAT_MINUS_PLUS_ONE:
++				create_clamp_imm(ctx, &inst->Dst[0].Register,
++						fui(-1.0), fui(1.0));
++				break;
++			}
++
+ 			break;
+ 		}
+ 		default:
+-- 
+1.8.4.2
+
diff --git a/0008-freedreno-a3xx-compiler-use-max_reg-rather-than-file.patch b/0008-freedreno-a3xx-compiler-use-max_reg-rather-than-file.patch
new file mode 100644
index 0000000..c1f76c3
--- /dev/null
+++ b/0008-freedreno-a3xx-compiler-use-max_reg-rather-than-file.patch
@@ -0,0 +1,59 @@
+From 5394a872f30022f64e6b2b58ef983b1fe5f6c08d Mon Sep 17 00:00:00 2001
+From: Rob Clark <robclark at freedesktop.org>
+Date: Tue, 20 Aug 2013 13:54:01 -0400
+Subject: [PATCH 08/17] freedreno/a3xx/compiler: use max_reg rather than
+ file_count
+
+Our current (rather naive) register assignment is based on mapping
+different register files (INPUT, OUTPUT, TEMP, CONST, etc) based on the
+max register index of the preceding file.  But in some cases, the lowest
+used register in a file might not be zero.  In which case
+file_count[file] != file_max[file] + 1.
+
+Signed-off-by: Rob Clark <robclark at freedesktop.org>
+---
+ src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+index e2c7853..dc5c873 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+@@ -159,19 +159,19 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
+ 	/* Immediates go after constants: */
+ 	ctx->base_reg[TGSI_FILE_CONSTANT]  = 0;
+ 	ctx->base_reg[TGSI_FILE_IMMEDIATE] =
+-			ctx->info.file_count[TGSI_FILE_CONSTANT];
++			ctx->info.file_max[TGSI_FILE_CONSTANT] + 1;
+ 
+ 	/* Temporaries after outputs after inputs: */
+ 	ctx->base_reg[TGSI_FILE_INPUT]     = 0;
+ 	ctx->base_reg[TGSI_FILE_OUTPUT]    =
+-			ctx->info.file_count[TGSI_FILE_INPUT];
++			ctx->info.file_max[TGSI_FILE_INPUT] + 1;
+ 	ctx->base_reg[TGSI_FILE_TEMPORARY] =
+-			ctx->info.file_count[TGSI_FILE_INPUT] +
+-			ctx->info.file_count[TGSI_FILE_OUTPUT];
++			ctx->info.file_max[TGSI_FILE_INPUT] + 1 +
++			ctx->info.file_max[TGSI_FILE_OUTPUT] + 1;
+ 
+ 	so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
+-	ctx->immediate_idx = 4 * (ctx->info.file_count[TGSI_FILE_CONSTANT] +
+-			ctx->info.file_count[TGSI_FILE_IMMEDIATE]);
++	ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1 +
++			ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
+ 
+ 	ret = tgsi_parse_init(&ctx->parser, tokens);
+ 	if (ret != TGSI_PARSE_OK)
+@@ -309,7 +309,7 @@ get_internal_temp(struct fd3_compile_context *ctx,
+ 	/* assign next temporary: */
+ 	n = ctx->num_internal_temps++;
+ 
+-	tmp_dst->Index = ctx->info.file_count[TGSI_FILE_TEMPORARY] + n;
++	tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1;
+ 
+ 	src_from_dst(tmp_src, tmp_dst);
+ }
+-- 
+1.8.4.2
+
diff --git a/0009-freedreno-a3xx-compiler-cat4-cannot-use-const-reg-as.patch b/0009-freedreno-a3xx-compiler-cat4-cannot-use-const-reg-as.patch
new file mode 100644
index 0000000..a43dde8
--- /dev/null
+++ b/0009-freedreno-a3xx-compiler-cat4-cannot-use-const-reg-as.patch
@@ -0,0 +1,104 @@
+From f3a7e28fe47ec547c1c9b561b04af208ae2f0f04 Mon Sep 17 00:00:00 2001
+From: Rob Clark <robclark at freedesktop.org>
+Date: Tue, 20 Aug 2013 13:57:22 -0400
+Subject: [PATCH 09/17] freedreno/a3xx/compiler: cat4 cannot use const reg as
+ src
+
+Category 4 instructions (rsq, rcp, sqrt, etc) seem to be unable to take
+a const register as src.  In these cases we need to move the src to a
+temporary gpr first.
+
+This is the second case of such a restriction, where the instruction
+encoding appears to support a const src, but in fact the hw appears to
+ignore that bit.  So split things out into a helper that can be re-used
+for any instructions which have this limitation.
+
+Signed-off-by: Rob Clark <robclark at freedesktop.org>
+---
+ src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 37 +++++++++++++++++------
+ 1 file changed, 27 insertions(+), 10 deletions(-)
+
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+index dc5c873..772c7d2 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+@@ -135,6 +135,8 @@ struct fd3_compile_context {
+ static void vectorize(struct fd3_compile_context *ctx,
+ 		struct ir3_instruction *instr, struct tgsi_dst_register *dst,
+ 		int nsrcs, ...);
++static void create_mov(struct fd3_compile_context *ctx,
++		struct tgsi_dst_register *dst, struct tgsi_src_register *src);
+ 
+ static unsigned
+ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
+@@ -374,6 +376,23 @@ get_immediate(struct fd3_compile_context *ctx,
+ 	reg->SwizzleW  = swiz2tgsi[swiz];
+ }
+ 
++/* for instructions that cannot take a const register as src, if needed
++ * generate a move to temporary gpr:
++ */
++static struct tgsi_src_register *
++get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src,
++		struct tgsi_src_register *tmp_src)
++{
++	static struct tgsi_dst_register tmp_dst;
++	if ((src->File == TGSI_FILE_CONSTANT) ||
++			(src->File == TGSI_FILE_IMMEDIATE)) {
++		get_internal_temp(ctx, &tmp_dst, tmp_src);
++		create_mov(ctx, &tmp_dst, src);
++		src = tmp_src;
++	}
++	return src;
++}
++
+ static type_t
+ get_type(struct fd3_compile_context *ctx)
+ {
+@@ -1027,8 +1046,7 @@ instr_cat3(const struct instr_translater *t,
+ 		struct tgsi_full_instruction *inst)
+ {
+ 	struct tgsi_dst_register *dst = get_dst(ctx, inst);
+-	struct tgsi_src_register *src1 = &inst->Src[1].Register;
+-	struct tgsi_dst_register tmp_dst;
++	struct tgsi_src_register *src1;
+ 	struct tgsi_src_register tmp_src;
+ 	struct ir3_instruction *instr;
+ 
+@@ -1038,12 +1056,7 @@ instr_cat3(const struct instr_translater *t,
+ 	 * const.  Not sure if this is a hw bug, or simply that the
+ 	 * disassembler lies.
+ 	 */
+-	if ((src1->File == TGSI_FILE_CONSTANT) ||
+-			(src1->File == TGSI_FILE_IMMEDIATE)) {
+-		get_internal_temp(ctx, &tmp_dst, &tmp_src);
+-		create_mov(ctx, &tmp_dst, src1);
+-		src1 = &tmp_src;
+-	}
++	src1 = get_unconst(ctx, &inst->Src[1].Register, &tmp_src);
+ 
+ 	instr = ir3_instr_create(ctx->ir, 3,
+ 			ctx->so->half_precision ? t->hopc : t->opc);
+@@ -1060,13 +1073,17 @@ instr_cat4(const struct instr_translater *t,
+ 		struct tgsi_full_instruction *inst)
+ {
+ 	struct tgsi_dst_register *dst = get_dst(ctx, inst);
++	struct tgsi_src_register *src;
++	struct tgsi_src_register tmp_src;
+ 	struct ir3_instruction *instr;
+ 
++	/* seems like blob compiler avoids const as src.. */
++	src = get_unconst(ctx, &inst->Src[0].Register, &tmp_src);
++
+ 	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
+ 	instr = ir3_instr_create(ctx->ir, 4, t->opc);
+ 
+-	vectorize(ctx, instr, dst, 1,
+-			&inst->Src[0].Register, 0);
++	vectorize(ctx, instr, dst, 1, src, 0);
+ 
+ 	regmask_set(ctx->needs_ss, instr->regs[0]);
+ 
+-- 
+1.8.4.2
+
diff --git a/0010-freedreno-fix-segfault-when-no-color-buffer-bound.patch b/0010-freedreno-fix-segfault-when-no-color-buffer-bound.patch
new file mode 100644
index 0000000..8978dd6
--- /dev/null
+++ b/0010-freedreno-fix-segfault-when-no-color-buffer-bound.patch
@@ -0,0 +1,216 @@
+From 12da4c1a6aa4b2a9cc337f669986a63c59fc3095 Mon Sep 17 00:00:00 2001
+From: Rob Clark <robclark at freedesktop.org>
+Date: Wed, 21 Aug 2013 13:20:05 -0400
+Subject: [PATCH 10/17] freedreno: fix segfault when no color buffer bound
+
+Don't crash when no color buffer bound.  Something caught when starting
+to run piglit, fixes a hanful of piglit tests.
+
+Signed-off-by: Rob Clark <robclark at freedesktop.org>
+---
+ src/gallium/drivers/freedreno/a2xx/fd2_gmem.c     |  6 +++---
+ src/gallium/drivers/freedreno/a3xx/fd3_gmem.c     | 15 +++++++++++----
+ src/gallium/drivers/freedreno/freedreno_context.c |  3 ++-
+ src/gallium/drivers/freedreno/freedreno_draw.c    |  4 ++--
+ src/gallium/drivers/freedreno/freedreno_gmem.c    | 18 +++++++++++-------
+ src/gallium/drivers/freedreno/freedreno_state.c   |  2 +-
+ src/gallium/drivers/freedreno/freedreno_util.h    | 10 ++++++++++
+ 7 files changed, 40 insertions(+), 18 deletions(-)
+
+diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
+index e239eed..93695bc 100644
+--- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
++++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
+@@ -337,7 +337,7 @@ fd2_emit_tile_init(struct fd_context *ctx)
+ 	struct fd_ringbuffer *ring = ctx->ring;
+ 	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
+-	enum pipe_format format = pfb->cbufs[0]->format;
++	enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
+ 	uint32_t reg;
+ 
+ 	OUT_PKT3(ring, CP_SET_CONSTANT, 4);
+@@ -358,7 +358,7 @@ fd2_emit_tile_prep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
+ {
+ 	struct fd_ringbuffer *ring = ctx->ring;
+ 	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+-	enum pipe_format format = pfb->cbufs[0]->format;
++	enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
+ 
+ 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ 	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
+@@ -379,7 +379,7 @@ fd2_emit_tile_renderprep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
+ {
+ 	struct fd_ringbuffer *ring = ctx->ring;
+ 	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+-	enum pipe_format format = pfb->cbufs[0]->format;
++	enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
+ 
+ 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+ 	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+index 9050166..b9d0580 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+@@ -214,8 +214,12 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
+ 		}, 1);
+ 
+ 	if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+-		uint32_t base = depth_base(&ctx->gmem) *
+-				fd_resource(pfb->cbufs[0]->texture)->cpp;
++		uint32_t base = 0;
++		if (pfb->cbufs[0]) {
++			struct fd_resource *rsc =
++					fd_resource(pfb->cbufs[0]->texture);
++			base = depth_base(&ctx->gmem) * rsc->cpp;
++		}
+ 		emit_gmem2mem_surf(ring, RB_COPY_DEPTH_STENCIL, base, pfb->zsbuf);
+ 	}
+ 
+@@ -410,8 +414,11 @@ static void
+ fd3_emit_sysmem_prep(struct fd_context *ctx)
+ {
+ 	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+-	struct fd_resource *rsc = fd_resource(pfb->cbufs[0]->texture);
+ 	struct fd_ringbuffer *ring = ctx->ring;
++	uint32_t pitch = 0;
++
++	if (pfb->cbufs[0])
++		pitch = fd_resource(pfb->cbufs[0]->texture)->pitch;
+ 
+ 	fd3_emit_restore(ctx);
+ 
+@@ -422,7 +429,7 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
+ 	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0);
+ 
+ 	fd3_emit_rbrc_tile_state(ring,
+-			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(rsc->pitch));
++			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
+ 
+ 	/* setup scissor/offset for current tile: */
+ 	OUT_PKT0(ring, REG_A3XX_PA_SC_WINDOW_OFFSET, 1);
+diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
+index 44d525b..1d03351 100644
+--- a/src/gallium/drivers/freedreno/freedreno_context.c
++++ b/src/gallium/drivers/freedreno/freedreno_context.c
+@@ -86,7 +86,8 @@ fd_context_render(struct pipe_context *pctx)
+ 	ctx->gmem_reason = 0;
+ 	ctx->num_draws = 0;
+ 
+-	fd_resource(pfb->cbufs[0]->texture)->dirty = false;
++	if (pfb->cbufs[0])
++		fd_resource(pfb->cbufs[0]->texture)->dirty = false;
+ 	if (pfb->zsbuf)
+ 		fd_resource(pfb->zsbuf->texture)->dirty = false;
+ }
+diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
+index b02b8b9..d4f8d34 100644
+--- a/src/gallium/drivers/freedreno/freedreno_draw.c
++++ b/src/gallium/drivers/freedreno/freedreno_draw.c
+@@ -193,8 +193,8 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
+ 	}
+ 
+ 	DBG("%x depth=%f, stencil=%u (%s/%s)", buffers, depth, stencil,
+-			util_format_name(pfb->cbufs[0]->format),
+-			pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none");
++		util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
++		util_format_short_name(pipe_surface_format(pfb->zsbuf)));
+ 
+ 	ctx->clear(ctx, buffers, color, depth, stencil);
+ 
+diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
+index 197d1d9..3d959c6 100644
+--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
++++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
+@@ -72,12 +72,15 @@ calculate_tiles(struct fd_context *ctx)
+ 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
+ 	struct pipe_scissor_state *scissor = &ctx->max_scissor;
+ 	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+-	uint32_t cpp = util_format_get_blocksize(pfb->cbufs[0]->format);
+ 	uint32_t gmem_size = ctx->screen->gmemsize_bytes;
+ 	uint32_t minx, miny, width, height;
+ 	uint32_t nbins_x = 1, nbins_y = 1;
+ 	uint32_t bin_w, bin_h;
+ 	uint32_t max_width = 992;
++	uint32_t cpp = 4;
++
++	if (pfb->cbufs[0])
++		cpp = util_format_get_blocksize(pfb->cbufs[0]->format);
+ 
+ 	if ((gmem->cpp == cpp) &&
+ 			!memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) {
+@@ -211,15 +214,15 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
+ 
+ 	if (sysmem) {
+ 		DBG("rendering sysmem (%s/%s)",
+-			util_format_name(pfb->cbufs[0]->format),
+-			pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none");
++			util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
++			util_format_short_name(pipe_surface_format(pfb->zsbuf)));
+ 		render_sysmem(ctx);
+ 	} else {
+ 		struct fd_gmem_stateobj *gmem = &ctx->gmem;
+-		DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y,
+-			util_format_name(pfb->cbufs[0]->format),
+-			pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none");
+ 		calculate_tiles(ctx);
++		DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y,
++			util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
++			util_format_short_name(pipe_surface_format(pfb->zsbuf)));
+ 		render_tiles(ctx);
+ 	}
+ 
+@@ -231,7 +234,8 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
+ 
+ 	/* update timestamps on render targets: */
+ 	timestamp = fd_ringbuffer_timestamp(ctx->ring);
+-	fd_resource(pfb->cbufs[0]->texture)->timestamp = timestamp;
++	if (pfb->cbufs[0])
++		fd_resource(pfb->cbufs[0]->texture)->timestamp = timestamp;
+ 	if (pfb->zsbuf)
+ 		fd_resource(pfb->zsbuf->texture)->timestamp = timestamp;
+ 
+diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c
+index 2f5d52c..f5290a9 100644
+--- a/src/gallium/drivers/freedreno/freedreno_state.c
++++ b/src/gallium/drivers/freedreno/freedreno_state.c
+@@ -120,7 +120,7 @@ fd_set_framebuffer_state(struct pipe_context *pctx,
+ 	unsigned i;
+ 
+ 	DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->needs_flush,
+-			cso->cbufs[0], cso->zsbuf);
++			framebuffer->cbufs[0], framebuffer->zsbuf);
+ 
+ 	fd_context_render(pctx);
+ 
+diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
+index 22857d2..9f10686 100644
+--- a/src/gallium/drivers/freedreno/freedreno_util.h
++++ b/src/gallium/drivers/freedreno/freedreno_util.h
+@@ -33,6 +33,7 @@
+ #include <freedreno_ringbuffer.h>
+ 
+ #include "pipe/p_format.h"
++#include "pipe/p_state.h"
+ #include "util/u_debug.h"
+ #include "util/u_math.h"
+ #include "util/u_half.h"
+@@ -79,6 +80,15 @@ static inline uint32_t DRAW(enum pc_di_primtype prim_type,
+ 			(1                 << 14);
+ }
+ 
++
++static inline enum pipe_format
++pipe_surface_format(struct pipe_surface *psurf)
++{
++	if (!psurf)
++		return PIPE_FORMAT_NONE;
++	return psurf->format;
++}
++
+ #define LOG_DWORDS 0
+ 
+ 
+-- 
+1.8.4.2
+
diff --git a/0011-freedreno-a3xx-compiler-make-compiler-errors-more-us.patch b/0011-freedreno-a3xx-compiler-make-compiler-errors-more-us.patch
new file mode 100644
index 0000000..53aecaa
--- /dev/null
+++ b/0011-freedreno-a3xx-compiler-make-compiler-errors-more-us.patch
@@ -0,0 +1,172 @@
+From c726a6a907f119dfc4fb1c26fef7babf51dc1dea Mon Sep 17 00:00:00 2001
+From: Rob Clark <robclark at freedesktop.org>
+Date: Sat, 24 Aug 2013 12:56:22 -0400
+Subject: [PATCH 11/17] freedreno/a3xx/compiler: make compiler errors more
+ useful
+
+We probably should get rid of assert() entirely, but at this stage it is
+more useful for things to crash where we can catch it in a debugger.
+With compile_error() we have a single place to set an error flag (to
+bail out and return an error on the next instruction) so that will be a
+small change later when enough of the compiler bugs are sorted.
+
+But re-arrange/cleanup the error/assert stuff so we at least get a dump
+of the TGSI that triggered it.  So we see some useful output in piglit
+logs.
+
+Signed-off-by: Rob Clark <robclark at freedesktop.org>
+---
+ src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 47 +++++++++++++++--------
+ src/gallium/drivers/freedreno/a3xx/ir-a3xx.h      |  3 +-
+ 2 files changed, 33 insertions(+), 17 deletions(-)
+
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+index 772c7d2..e6c5bb7 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+@@ -185,6 +185,21 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
+ }
+ 
+ static void
++compile_error(struct fd3_compile_context *ctx, const char *format, ...)
++{
++	va_list ap;
++	va_start(ap, format);
++	_debug_vprintf(format, ap);
++	va_end(ap);
++	tgsi_dump(ctx->tokens, 0);
++	assert(0);
++}
++
++#define compile_assert(ctx, cond) do { \
++		if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
++	} while (0)
++
++static void
+ compile_free(struct fd3_compile_context *ctx)
+ {
+ 	tgsi_parse_free(&ctx->parser);
+@@ -212,9 +227,8 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
+ 		num = dst->Index + ctx->base_reg[dst->File];
+ 		break;
+ 	default:
+-		DBG("unsupported dst register file: %s",
++		compile_error(ctx, "unsupported dst register file: %s\n",
+ 			tgsi_file_name(dst->File));
+-		assert(0);
+ 		break;
+ 	}
+ 
+@@ -250,9 +264,8 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
+ 		num = src->Index + ctx->base_reg[src->File];
+ 		break;
+ 	default:
+-		DBG("unsupported src register file: %s",
++		compile_error(ctx, "unsupported src register file: %s\n",
+ 			tgsi_file_name(src->File));
+-		assert(0);
+ 		break;
+ 	}
+ 
+@@ -329,6 +342,13 @@ get_internal_temp_repl(struct fd3_compile_context *ctx,
+ 		tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X;
+ }
+ 
++static inline bool
++is_const(struct tgsi_src_register *src)
++{
++	return (src->File == TGSI_FILE_CONSTANT) ||
++			(src->File == TGSI_FILE_IMMEDIATE);
++}
++
+ static void
+ get_immediate(struct fd3_compile_context *ctx,
+ 		struct tgsi_src_register *reg, uint32_t val)
+@@ -578,8 +598,7 @@ trans_dotp(const struct instr_translater *t,
+ 	 * is a const.  Not sure if this is a hw bug, or simply that the
+ 	 * disassembler lies.
+ 	 */
+-	if ((src1->File == TGSI_FILE_IMMEDIATE) ||
+-			(src1->File == TGSI_FILE_CONSTANT)) {
++	if (is_const(src1)) {
+ 
+ 		/* the mov to tmp unswizzles src1, so now we have tmp.xyzw:
+ 		 */
+@@ -768,7 +787,7 @@ trans_samp(const struct instr_translater *t,
+ 		flags |= IR3_INSTR_P;
+ 		break;
+ 	default:
+-		assert(0);
++		compile_assert(ctx, 0);
+ 		break;
+ 	}
+ 
+@@ -1187,7 +1206,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
+ 	unsigned name = decl->Semantic.Name;
+ 	unsigned i;
+ 
+-	assert(decl->Declaration.Semantic);  // TODO is this ever not true?
++	compile_assert(ctx, decl->Declaration.Semantic);  // TODO is this ever not true?
+ 
+ 	DBG("decl out[%d] -> r%d", name, decl->Range.First + base);   // XXX
+ 
+@@ -1207,9 +1226,8 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
+ 				so->outputs[so->outputs_count++].regid = regid(i + base, 0);
+ 			break;
+ 		default:
+-			DBG("unknown VS semantic name: %s",
++			compile_error(ctx, "unknown VS semantic name: %s\n",
+ 					tgsi_semantic_names[name]);
+-			assert(0);
+ 		}
+ 	} else {
+ 		switch (name) {
+@@ -1217,9 +1235,8 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
+ 			so->color_regid = regid(decl->Range.First + base, 0);
+ 			break;
+ 		default:
+-			DBG("unknown VS semantic name: %s",
++			compile_error(ctx, "unknown VS semantic name: %s\n",
+ 					tgsi_semantic_names[name]);
+-			assert(0);
+ 		}
+ 	}
+ }
+@@ -1278,10 +1295,8 @@ compile_instructions(struct fd3_compile_context *ctx)
+ 				t->fxn(t, ctx, inst);
+ 				ctx->num_internal_temps = 0;
+ 			} else {
+-				debug_printf("unknown TGSI opc: %s\n",
++				compile_error(ctx, "unknown TGSI opc: %s\n",
+ 						tgsi_get_opcode_name(opc));
+-				tgsi_dump(ctx->tokens, 0);
+-				assert(0);
+ 			}
+ 
+ 			switch (inst->Instruction.Saturate) {
+@@ -1319,6 +1334,8 @@ fd3_compile_shader(struct fd3_shader_stateobj *so,
+ 
+ 	so->ir = ir3_shader_create();
+ 
++	assert(so->ir);
++
+ 	so->color_regid = regid(63,0);
+ 	so->pos_regid   = regid(63,0);
+ 	so->psize_regid = regid(63,0);
+diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
+index 2fedc7b..61c01a7 100644
+--- a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
++++ b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
+@@ -166,8 +166,7 @@ struct ir3_instruction {
+ 	};
+ };
+ 
+-/* this is just large to cope w/ the large test *.asm: */
+-#define MAX_INSTRS 10240
++#define MAX_INSTRS 1024
+ 
+ struct ir3_shader {
+ 	unsigned instrs_count;
+-- 
+1.8.4.2
+
diff --git a/0012-freedreno-a3xx-compiler-bit-of-re-arrange-cleanup.patch b/0012-freedreno-a3xx-compiler-bit-of-re-arrange-cleanup.patch
new file mode 100644
index 0000000..a96fc8d
--- /dev/null
+++ b/0012-freedreno-a3xx-compiler-bit-of-re-arrange-cleanup.patch
@@ -0,0 +1,420 @@
+From ca5514b85161d480fb711ac26d74fc447e1e9bda Mon Sep 17 00:00:00 2001
+From: Rob Clark <robclark at freedesktop.org>
+Date: Sat, 24 Aug 2013 13:00:07 -0400
+Subject: [PATCH 12/17] freedreno/a3xx/compiler: bit of re-arrange/cleanup
+
+It seems there are a number of cases where instructions have limitations
+about taking reading src's from const register file, so make
+get_unconst() a bit easier to use.
+
+Signed-off-by: Rob Clark <robclark at freedesktop.org>
+---
+ src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 132 ++++++++++++----------
+ 1 file changed, 71 insertions(+), 61 deletions(-)
+
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+index e6c5bb7..b5cdda8 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+@@ -91,6 +91,7 @@ struct fd3_compile_context {
+ 
+ 	unsigned next_inloc;
+ 	unsigned num_internal_temps;
++	struct tgsi_src_register internal_temps[6];
+ 
+ 	/* track registers which need to synchronize w/ "complex alu" cat3
+ 	 * instruction pipeline:
+@@ -128,7 +129,7 @@ struct fd3_compile_context {
+ 	 * up the vector operation
+ 	 */
+ 	struct tgsi_dst_register tmp_dst;
+-	struct tgsi_src_register tmp_src;
++	struct tgsi_src_register *tmp_src;
+ };
+ 
+ 
+@@ -309,11 +310,11 @@ src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
+ /* Get internal-temp src/dst to use for a sequence of instructions
+  * generated by a single TGSI op.
+  */
+-static void
++static struct tgsi_src_register *
+ get_internal_temp(struct fd3_compile_context *ctx,
+-		struct tgsi_dst_register *tmp_dst,
+-		struct tgsi_src_register *tmp_src)
++		struct tgsi_dst_register *tmp_dst)
+ {
++	struct tgsi_src_register *tmp_src;
+ 	int n;
+ 
+ 	tmp_dst->File      = TGSI_FILE_TEMPORARY;
+@@ -323,23 +324,28 @@ get_internal_temp(struct fd3_compile_context *ctx,
+ 
+ 	/* assign next temporary: */
+ 	n = ctx->num_internal_temps++;
++	compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
++	tmp_src = &ctx->internal_temps[n];
+ 
+ 	tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1;
+ 
+ 	src_from_dst(tmp_src, tmp_dst);
++
++	return tmp_src;
+ }
+ 
+ /* same as get_internal_temp, but w/ src.xxxx (for instructions that
+  * replicate their results)
+  */
+-static void
++static struct tgsi_src_register *
+ get_internal_temp_repl(struct fd3_compile_context *ctx,
+-		struct tgsi_dst_register *tmp_dst,
+-		struct tgsi_src_register *tmp_src)
++		struct tgsi_dst_register *tmp_dst)
+ {
+-	get_internal_temp(ctx, tmp_dst, tmp_src);
++	struct tgsi_src_register *tmp_src =
++			get_internal_temp(ctx, tmp_dst);
+ 	tmp_src->SwizzleX = tmp_src->SwizzleY =
+ 		tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X;
++	return tmp_src;
+ }
+ 
+ static inline bool
+@@ -349,6 +355,22 @@ is_const(struct tgsi_src_register *src)
+ 			(src->File == TGSI_FILE_IMMEDIATE);
+ }
+ 
++/* for instructions that cannot take a const register as src, if needed
++ * generate a move to temporary gpr:
++ */
++static struct tgsi_src_register *
++get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src)
++{
++	if (is_const(src)) {
++		static struct tgsi_dst_register tmp_dst;
++		struct tgsi_src_register *tmp_src =
++				get_internal_temp(ctx, &tmp_dst);
++		create_mov(ctx, &tmp_dst, src);
++		src = tmp_src;
++	}
++	return src;
++}
++
+ static void
+ get_immediate(struct fd3_compile_context *ctx,
+ 		struct tgsi_src_register *reg, uint32_t val)
+@@ -396,27 +418,16 @@ get_immediate(struct fd3_compile_context *ctx,
+ 	reg->SwizzleW  = swiz2tgsi[swiz];
+ }
+ 
+-/* for instructions that cannot take a const register as src, if needed
+- * generate a move to temporary gpr:
+- */
+-static struct tgsi_src_register *
+-get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src,
+-		struct tgsi_src_register *tmp_src)
++static type_t
++get_ftype(struct fd3_compile_context *ctx)
+ {
+-	static struct tgsi_dst_register tmp_dst;
+-	if ((src->File == TGSI_FILE_CONSTANT) ||
+-			(src->File == TGSI_FILE_IMMEDIATE)) {
+-		get_internal_temp(ctx, &tmp_dst, tmp_src);
+-		create_mov(ctx, &tmp_dst, src);
+-		src = tmp_src;
+-	}
+-	return src;
++	return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
+ }
+ 
+ static type_t
+-get_type(struct fd3_compile_context *ctx)
++get_utype(struct fd3_compile_context *ctx)
+ {
+-	return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
++	return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
+ }
+ 
+ static unsigned
+@@ -436,7 +447,7 @@ static void
+ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
+ 		struct tgsi_src_register *src)
+ {
+-	type_t type_mov = get_type(ctx);
++	type_t type_mov = get_ftype(ctx);
+ 	unsigned i;
+ 
+ 	for (i = 0; i < 4; i++) {
+@@ -492,7 +503,7 @@ get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst)
+ 	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ 		struct tgsi_src_register *src = &inst->Src[i].Register;
+ 		if ((src->File == dst->File) && (src->Index == dst->Index)) {
+-			get_internal_temp(ctx, &ctx->tmp_dst, &ctx->tmp_src);
++			ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst);
+ 			ctx->tmp_dst.WriteMask = dst->WriteMask;
+ 			dst = &ctx->tmp_dst;
+ 			break;
+@@ -507,7 +518,7 @@ put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst,
+ {
+ 	/* if necessary, add mov back into original dst: */
+ 	if (dst != &inst->Dst[0].Register) {
+-		create_mov(ctx, &inst->Dst[0].Register, &ctx->tmp_src);
++		create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src);
+ 	}
+ }
+ 
+@@ -580,7 +591,7 @@ trans_dotp(const struct instr_translater *t,
+ {
+ 	struct ir3_instruction *instr;
+ 	struct tgsi_dst_register tmp_dst;
+-	struct tgsi_src_register tmp_src;
++	struct tgsi_src_register *tmp_src;
+ 	struct tgsi_dst_register *dst  = &inst->Dst[0].Register;
+ 	struct tgsi_src_register *src0 = &inst->Src[0].Register;
+ 	struct tgsi_src_register *src1 = &inst->Src[1].Register;
+@@ -590,7 +601,7 @@ trans_dotp(const struct instr_translater *t,
+ 	unsigned n = t->arg;     /* number of components */
+ 	unsigned i;
+ 
+-	get_internal_temp_repl(ctx, &tmp_dst, &tmp_src);
++	tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
+ 
+ 	/* Blob compiler never seems to use a const in src1 position for
+ 	 * mad.*, although there does seem (according to disassembler
+@@ -609,7 +620,7 @@ trans_dotp(const struct instr_translater *t,
+ 		 * because after that point we no longer need tmp.x:
+ 		 */
+ 		create_mov(ctx, &tmp_dst, src1);
+-		src1 = &tmp_src;
++		src1 = tmp_src;
+ 	}
+ 
+ 	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+@@ -624,7 +635,7 @@ trans_dotp(const struct instr_translater *t,
+ 		add_dst_reg(ctx, instr, &tmp_dst, 0);
+ 		add_src_reg(ctx, instr, src0, swiz0[i]);
+ 		add_src_reg(ctx, instr, src1, swiz1[i]);
+-		add_src_reg(ctx, instr, &tmp_src, 0);
++		add_src_reg(ctx, instr, tmp_src, 0);
+ 	}
+ 
+ 	/* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */
+@@ -634,7 +645,7 @@ trans_dotp(const struct instr_translater *t,
+ 		instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+ 		add_dst_reg(ctx, instr, &tmp_dst, 0);
+ 		add_src_reg(ctx, instr, src1, swiz1[i]);
+-		add_src_reg(ctx, instr, &tmp_src, 0);
++		add_src_reg(ctx, instr, tmp_src, 0);
+ 
+ 		n++;
+ 	}
+@@ -646,7 +657,7 @@ trans_dotp(const struct instr_translater *t,
+ 		ir3_instr_create(ctx->ir, 0, OPC_NOP);
+ 	}
+ 
+-	create_mov(ctx, dst, &tmp_src);
++	create_mov(ctx, dst, tmp_src);
+ }
+ 
+ /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
+@@ -657,11 +668,11 @@ trans_lrp(const struct instr_translater *t,
+ {
+ 	struct ir3_instruction *instr;
+ 	struct tgsi_dst_register tmp_dst1, tmp_dst2;
+-	struct tgsi_src_register tmp_src1, tmp_src2;
++	struct tgsi_src_register *tmp_src1, *tmp_src2;
+ 	struct tgsi_src_register tmp_const;
+ 
+-	get_internal_temp(ctx, &tmp_dst1, &tmp_src1);
+-	get_internal_temp(ctx, &tmp_dst2, &tmp_src2);
++	tmp_src1 = get_internal_temp(ctx, &tmp_dst1);
++	tmp_src2 = get_internal_temp(ctx, &tmp_dst2);
+ 
+ 	get_immediate(ctx, &tmp_const, fui(1.0));
+ 
+@@ -680,14 +691,14 @@ trans_lrp(const struct instr_translater *t,
+ 	/* tmp2 = tmp2 * c */
+ 	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+ 	vectorize(ctx, instr, &tmp_dst2, 2,
+-			&tmp_src2, 0,
++			tmp_src2, 0,
+ 			&inst->Src[2].Register, 0);
+ 
+ 	/* dst = tmp1 + tmp2 */
+ 	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+ 	vectorize(ctx, instr, &inst->Dst[0].Register, 2,
+-			&tmp_src1, 0,
+-			&tmp_src2, 0);
++			tmp_src1, 0,
++			tmp_src2, 0);
+ }
+ 
+ /* FRC(x) = x - FLOOR(x) */
+@@ -698,9 +709,9 @@ trans_frac(const struct instr_translater *t,
+ {
+ 	struct ir3_instruction *instr;
+ 	struct tgsi_dst_register tmp_dst;
+-	struct tgsi_src_register tmp_src;
++	struct tgsi_src_register *tmp_src;
+ 
+-	get_internal_temp(ctx, &tmp_dst, &tmp_src);
++	tmp_src = get_internal_temp(ctx, &tmp_dst);
+ 
+ 	/* tmp = FLOOR(x) */
+ 	instr = ir3_instr_create(ctx->ir, 2, OPC_FLOOR_F);
+@@ -711,7 +722,7 @@ trans_frac(const struct instr_translater *t,
+ 	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+ 	vectorize(ctx, instr, &inst->Dst[0].Register, 2,
+ 			&inst->Src[0].Register, 0,
+-			&tmp_src, IR3_REG_NEGATE);
++			tmp_src, IR3_REG_NEGATE);
+ }
+ 
+ /* POW(a,b) = EXP2(b * LOG2(a)) */
+@@ -723,12 +734,12 @@ trans_pow(const struct instr_translater *t,
+ 	struct ir3_instruction *instr;
+ 	struct ir3_register *r;
+ 	struct tgsi_dst_register tmp_dst;
+-	struct tgsi_src_register tmp_src;
++	struct tgsi_src_register *tmp_src;
+ 	struct tgsi_dst_register *dst  = &inst->Dst[0].Register;
+ 	struct tgsi_src_register *src0 = &inst->Src[0].Register;
+ 	struct tgsi_src_register *src1 = &inst->Src[1].Register;
+ 
+-	get_internal_temp_repl(ctx, &tmp_dst, &tmp_src);
++	tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
+ 
+ 	/* log2 Rtmp, Rsrc0 */
+ 	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
+@@ -740,7 +751,7 @@ trans_pow(const struct instr_translater *t,
+ 	/* mul.f Rtmp, Rtmp, Rsrc1 */
+ 	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+ 	add_dst_reg(ctx, instr, &tmp_dst, 0);
+-	add_src_reg(ctx, instr, &tmp_src, 0);
++	add_src_reg(ctx, instr, tmp_src, 0);
+ 	add_src_reg(ctx, instr, src1, src1->SwizzleX);
+ 
+ 	/* blob compiler seems to ensure there are at least 6 instructions
+@@ -752,10 +763,10 @@ trans_pow(const struct instr_translater *t,
+ 	/* exp2 Rdst, Rtmp */
+ 	instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2);
+ 	r = add_dst_reg(ctx, instr, &tmp_dst, 0);
+-	add_src_reg(ctx, instr, &tmp_src, 0);
++	add_src_reg(ctx, instr, tmp_src, 0);
+ 	regmask_set(ctx->needs_ss, r);
+ 
+-	create_mov(ctx, dst, &tmp_src);
++	create_mov(ctx, dst, tmp_src);
+ }
+ 
+ /* texture fetch/sample instructions: */
+@@ -766,8 +777,6 @@ trans_samp(const struct instr_translater *t,
+ {
+ 	struct ir3_register *r;
+ 	struct ir3_instruction *instr;
+-	struct tgsi_dst_register tmp_dst;
+-	struct tgsi_src_register tmp_src;
+ 	struct tgsi_src_register *coord = &inst->Src[0].Register;
+ 	struct tgsi_src_register *samp  = &inst->Src[1].Register;
+ 	unsigned tex = inst->Texture.Texture;
+@@ -802,10 +811,13 @@ trans_samp(const struct instr_translater *t,
+ 	 */
+ 	for (i = 1; (i < 4) && (order[i] >= 0); i++) {
+ 		if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) {
+-			type_t type_mov = get_type(ctx);
++			struct tgsi_dst_register tmp_dst;
++			struct tgsi_src_register *tmp_src;
++
++			type_t type_mov = get_ftype(ctx);
+ 
+ 			/* need to move things around: */
+-			get_internal_temp(ctx, &tmp_dst, &tmp_src);
++			tmp_src = get_internal_temp(ctx, &tmp_dst);
+ 
+ 			for (j = 0; (j < 4) && (order[j] >= 0); j++) {
+ 				instr = ir3_instr_create(ctx->ir, 1, 0);
+@@ -816,7 +828,7 @@ trans_samp(const struct instr_translater *t,
+ 						src_swiz(coord, order[j]));
+ 			}
+ 
+-			coord = &tmp_src;
++			coord = tmp_src;
+ 
+ 			if (j < 4)
+ 				ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 4 - j - 1;
+@@ -826,7 +838,7 @@ trans_samp(const struct instr_translater *t,
+ 	}
+ 
+ 	instr = ir3_instr_create(ctx->ir, 5, t->opc);
+-	instr->cat5.type = get_type(ctx);
++	instr->cat5.type = get_ftype(ctx);
+ 	instr->cat5.samp = samp->Index;
+ 	instr->cat5.tex  = samp->Index;
+ 	instr->flags |= flags;
+@@ -847,12 +859,12 @@ trans_cmp(const struct instr_translater *t,
+ {
+ 	struct ir3_instruction *instr;
+ 	struct tgsi_dst_register tmp_dst;
+-	struct tgsi_src_register tmp_src;
++	struct tgsi_src_register *tmp_src;
+ 	struct tgsi_src_register constval;
+ 	/* final instruction uses original src1 and src2, so we need get_dst() */
+ 	struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ 
+-	get_internal_temp(ctx, &tmp_dst, &tmp_src);
++	tmp_src = get_internal_temp(ctx, &tmp_dst);
+ 
+ 	/* cmps.f.ge tmp, src0, 0.0 */
+ 	instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
+@@ -866,7 +878,7 @@ trans_cmp(const struct instr_translater *t,
+ 	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
+ 	instr->repeat = 3;
+ 	add_dst_reg(ctx, instr, &tmp_dst, 0);
+-	add_src_reg(ctx, instr, &tmp_src, 0)->flags |= IR3_REG_R;
++	add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R;
+ 	ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
+ 
+ 	/* sel.{f32,f16} dst, src2, tmp, src1 */
+@@ -874,7 +886,7 @@ trans_cmp(const struct instr_translater *t,
+ 			OPC_SEL_F16 : OPC_SEL_F32);
+ 	vectorize(ctx, instr, dst, 3,
+ 			&inst->Src[2].Register, 0,
+-			&tmp_src, 0,
++			tmp_src, 0,
+ 			&inst->Src[1].Register, 0);
+ 
+ 	put_dst(ctx, inst, dst);
+@@ -1066,7 +1078,6 @@ instr_cat3(const struct instr_translater *t,
+ {
+ 	struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ 	struct tgsi_src_register *src1;
+-	struct tgsi_src_register tmp_src;
+ 	struct ir3_instruction *instr;
+ 
+ 	/* Blob compiler never seems to use a const in src1 position..
+@@ -1075,7 +1086,7 @@ instr_cat3(const struct instr_translater *t,
+ 	 * const.  Not sure if this is a hw bug, or simply that the
+ 	 * disassembler lies.
+ 	 */
+-	src1 = get_unconst(ctx, &inst->Src[1].Register, &tmp_src);
++	src1 = get_unconst(ctx, &inst->Src[1].Register);
+ 
+ 	instr = ir3_instr_create(ctx->ir, 3,
+ 			ctx->so->half_precision ? t->hopc : t->opc);
+@@ -1093,11 +1104,10 @@ instr_cat4(const struct instr_translater *t,
+ {
+ 	struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ 	struct tgsi_src_register *src;
+-	struct tgsi_src_register tmp_src;
+ 	struct ir3_instruction *instr;
+ 
+ 	/* seems like blob compiler avoids const as src.. */
+-	src = get_unconst(ctx, &inst->Src[0].Register, &tmp_src);
++	src = get_unconst(ctx, &inst->Src[0].Register);
+ 
+ 	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
+ 	instr = ir3_instr_create(ctx->ir, 4, t->opc);
+-- 
+1.8.4.2
+
diff --git a/0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch b/0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch
new file mode 100644
index 0000000..621070a
--- /dev/null
+++ b/0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch
@@ -0,0 +1,231 @@
+From c20aa295ec0e1f7b70986a32ef2d74e5097cf640 Mon Sep 17 00:00:00 2001
+From: Rob Clark <robclark at freedesktop.org>
+Date: Sat, 24 Aug 2013 13:02:53 -0400
+Subject: [PATCH 13/17] freedreno/a3xx/compiler: fix SGT/SLT/etc
+
+The cmps.f.* instruction doesn't actually seem to give a float 1.0 or
+0.0 output.  It either needs a cov.u16f16 or add.s + sel.f16.  This
+makes SGT/SLT/etc more similar to CMP, so handle them in trans_cmp().
+
+This fixes a bunch of piglit tests.
+
+Signed-off-by: Rob Clark <robclark at freedesktop.org>
+---
+ src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 154 ++++++++++++++++++----
+ 1 file changed, 125 insertions(+), 29 deletions(-)
+
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+index b5cdda8..477053b 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+@@ -851,7 +851,39 @@ trans_samp(const struct instr_translater *t,
+ 	regmask_set(ctx->needs_sy, r);
+ }
+ 
+-/* CMP(a,b,c) = (a < 0) ? b : c */
++/*
++ * SEQ(a,b) = (a == b) ? 1.0 : 0.0
++ *   cmps.f.eq tmp0, b, a
++ *   cov.u16f16 dst, tmp0
++ *
++ * SNE(a,b) = (a != b) ? 1.0 : 0.0
++ *   cmps.f.eq tmp0, b, a
++ *   add.s tmp0, tmp0, -1
++ *   sel.f16 dst, {0.0}, tmp0, {1.0}
++ *
++ * SGE(a,b) = (a >= b) ? 1.0 : 0.0
++ *   cmps.f.ge tmp0, a, b
++ *   cov.u16f16 dst, tmp0
++ *
++ * SLE(a,b) = (a <= b) ? 1.0 : 0.0
++ *   cmps.f.ge tmp0, b, a
++ *   cov.u16f16 dst, tmp0
++ *
++ * SGT(a,b) = (a > b)  ? 1.0 : 0.0
++ *   cmps.f.ge tmp0, b, a
++ *   add.s tmp0, tmp0, -1
++ *   sel.f16 dst, {0.0}, tmp0, {1.0}
++ *
++ * SLT(a,b) = (a < b)  ? 1.0 : 0.0
++ *   cmps.f.ge tmp0, a, b
++ *   add.s tmp0, tmp0, -1
++ *   sel.f16 dst, {0.0}, tmp0, {1.0}
++ *
++ * CMP(a,b,c) = (a < 0.0) ? b : c
++ *   cmps.f.ge tmp0, a, {0.0}
++ *   add.s tmp0, tmp0, -1
++ *   sel.f16 dst, c, tmp0, b
++ */
+ static void
+ trans_cmp(const struct instr_translater *t,
+ 		struct fd3_compile_context *ctx,
+@@ -860,34 +892,97 @@ trans_cmp(const struct instr_translater *t,
+ 	struct ir3_instruction *instr;
+ 	struct tgsi_dst_register tmp_dst;
+ 	struct tgsi_src_register *tmp_src;
+-	struct tgsi_src_register constval;
+-	/* final instruction uses original src1 and src2, so we need get_dst() */
++	struct tgsi_src_register constval0, constval1;
++	/* final instruction for CMP() uses orig src1 and src2: */
+ 	struct tgsi_dst_register *dst = get_dst(ctx, inst);
++	struct tgsi_src_register *a0, *a1;
++	unsigned condition;
+ 
+ 	tmp_src = get_internal_temp(ctx, &tmp_dst);
+ 
+-	/* cmps.f.ge tmp, src0, 0.0 */
++	switch (t->tgsi_opc) {
++	case TGSI_OPCODE_SEQ:
++	case TGSI_OPCODE_SNE:
++		a0 = &inst->Src[1].Register;  /* b */
++		a1 = &inst->Src[0].Register;  /* a */
++		condition = IR3_COND_EQ;
++		break;
++	case TGSI_OPCODE_SGE:
++	case TGSI_OPCODE_SLT:
++		a0 = &inst->Src[0].Register;  /* a */
++		a1 = &inst->Src[1].Register;  /* b */
++		condition = IR3_COND_GE;
++		break;
++	case TGSI_OPCODE_SLE:
++	case TGSI_OPCODE_SGT:
++		a0 = &inst->Src[1].Register;  /* b */
++		a1 = &inst->Src[0].Register;  /* a */
++		condition = IR3_COND_GE;
++		break;
++	case TGSI_OPCODE_CMP:
++		get_immediate(ctx, &constval0, fui(0.0));
++		a0 = &inst->Src[0].Register;  /* a */
++		a1 = &constval0;              /* {0.0} */
++		condition = IR3_COND_GE;
++		break;
++	default:
++		compile_assert(ctx, 0);
++		return;
++	}
++
++	/* NOTE: seems blob compiler will move a const to a gpr if both
++	 * src args to cmps.f are const.  Need to check if this applies
++	 * to other instructions..
++	 */
++	if (is_const(a0) && is_const(a1))
++		a0 = get_unconst(ctx, a0);
++
++	/* cmps.f.ge tmp, a0, a1 */
+ 	instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
+-	instr->cat2.condition = IR3_COND_GE;
+-	get_immediate(ctx, &constval, fui(0.0));
+-	vectorize(ctx, instr, &tmp_dst, 2,
+-			&inst->Src[0].Register, 0,
+-			&constval, 0);
++	instr->cat2.condition = condition;
++	vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
+ 
+-	/* add.s tmp, tmp, -1 */
+-	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
+-	instr->repeat = 3;
+-	add_dst_reg(ctx, instr, &tmp_dst, 0);
+-	add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R;
+-	ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
++	switch (t->tgsi_opc) {
++	case TGSI_OPCODE_SEQ:
++	case TGSI_OPCODE_SGE:
++	case TGSI_OPCODE_SLE:
++		/* cov.u16f16 dst, tmp0 */
++		instr = ir3_instr_create(ctx->ir, 1, 0);
++		instr->cat1.src_type = get_utype(ctx);
++		instr->cat1.dst_type = get_ftype(ctx);
++		vectorize(ctx, instr, dst, 1, tmp_src, 0);
++		break;
++	case TGSI_OPCODE_SNE:
++	case TGSI_OPCODE_SGT:
++	case TGSI_OPCODE_SLT:
++	case TGSI_OPCODE_CMP:
++		/* add.s tmp, tmp, -1 */
++		instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
++		instr->repeat = 3;
++		add_dst_reg(ctx, instr, &tmp_dst, 0);
++		add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R;
++		ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
++
++		if (t->tgsi_opc == TGSI_OPCODE_CMP) {
++			/* sel.{f32,f16} dst, src2, tmp, src1 */
++			instr = ir3_instr_create(ctx->ir, 3,
++					ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
++			vectorize(ctx, instr, dst, 3,
++					&inst->Src[2].Register, 0,
++					tmp_src, 0,
++					&inst->Src[1].Register, 0);
++		} else {
++			get_immediate(ctx, &constval0, fui(0.0));
++			get_immediate(ctx, &constval1, fui(1.0));
++			/* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */
++			instr = ir3_instr_create(ctx->ir, 3,
++					ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
++			vectorize(ctx, instr, dst, 3,
++					&constval0, 0, tmp_src, 0, &constval1, 0);
++		}
+ 
+-	/* sel.{f32,f16} dst, src2, tmp, src1 */
+-	instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ?
+-			OPC_SEL_F16 : OPC_SEL_F32);
+-	vectorize(ctx, instr, dst, 3,
+-			&inst->Src[2].Register, 0,
+-			tmp_src, 0,
+-			&inst->Src[1].Register, 0);
++		break;
++	}
+ 
+ 	put_dst(ctx, inst, dst);
+ }
+@@ -948,8 +1043,8 @@ trans_if(const struct instr_translater *t,
+ 
+ 	instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
+ 	ir3_reg_create(instr, regid(REG_P0, 0), 0);
+-	add_src_reg(ctx, instr, &constval, constval.SwizzleX);
+ 	add_src_reg(ctx, instr, src, src->SwizzleX);
++	add_src_reg(ctx, instr, &constval, constval.SwizzleX);
+ 	instr->cat2.condition = IR3_COND_EQ;
+ 
+ 	instr = ir3_instr_create(ctx->ir, 0, OPC_BR);
+@@ -1033,10 +1128,6 @@ instr_cat2(const struct instr_translater *t,
+ 	instr = ir3_instr_create(ctx->ir, 2, t->opc);
+ 
+ 	switch (t->tgsi_opc) {
+-	case TGSI_OPCODE_SLT:
+-	case TGSI_OPCODE_SGE:
+-		instr->cat2.condition = t->arg;
+-		break;
+ 	case TGSI_OPCODE_ABS:
+ 		src0_flags = IR3_REG_ABS;
+ 		break;
+@@ -1135,12 +1226,11 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
+ 	INSTR(DPH,          trans_dotp, .arg = 3),   /* almost like DP3 */
+ 	INSTR(MIN,          instr_cat2, .opc = OPC_MIN_F),
+ 	INSTR(MAX,          instr_cat2, .opc = OPC_MAX_F),
+-	INSTR(SLT,          instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_LT),
+-	INSTR(SGE,          instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_GE),
+ 	INSTR(MAD,          instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
+ 	INSTR(LRP,          trans_lrp),
+ 	INSTR(FRC,          trans_frac),
+ 	INSTR(FLR,          instr_cat2, .opc = OPC_FLOOR_F),
++	INSTR(ARL,          instr_cat2, .opc = OPC_FLOOR_F),
+ 	INSTR(EX2,          instr_cat4, .opc = OPC_EXP2),
+ 	INSTR(LG2,          instr_cat4, .opc = OPC_LOG2),
+ 	INSTR(POW,          trans_pow),
+@@ -1149,6 +1239,12 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
+ 	INSTR(SIN,          instr_cat4, .opc = OPC_COS),
+ 	INSTR(TEX,          trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX),
+ 	INSTR(TXP,          trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP),
++	INSTR(SGT,          trans_cmp),
++	INSTR(SLT,          trans_cmp),
++	INSTR(SGE,          trans_cmp),
++	INSTR(SLE,          trans_cmp),
++	INSTR(SNE,          trans_cmp),
++	INSTR(SEQ,          trans_cmp),
+ 	INSTR(CMP,          trans_cmp),
+ 	INSTR(IF,           trans_if),
+ 	INSTR(ELSE,         trans_else),
+-- 
+1.8.4.2
+
diff --git a/0014-freedreno-a3xx-don-t-leak-so-much.patch b/0014-freedreno-a3xx-don-t-leak-so-much.patch
new file mode 100644
index 0000000..0ac9110
--- /dev/null
+++ b/0014-freedreno-a3xx-don-t-leak-so-much.patch
@@ -0,0 +1,36 @@
+From 0b2c5119cb772751edb3c42c9c0545443e26fd7f Mon Sep 17 00:00:00 2001
+From: Rob Clark <robclark at freedesktop.org>
+Date: Mon, 17 Jun 2013 20:11:54 -0400
+Subject: [PATCH 14/17] freedreno/a3xx: don't leak so much
+
+Signed-off-by: Rob Clark <robclark at freedesktop.org>
+---
+ src/gallium/drivers/freedreno/a3xx/fd3_context.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
+index 3ae9b29..589aeed 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
+@@ -40,7 +40,18 @@
+ static void
+ fd3_context_destroy(struct pipe_context *pctx)
+ {
++	struct fd3_context *fd3_ctx = fd3_context(fd_context(pctx));
++
+ 	fd3_prog_fini(pctx);
++
++	fd_bo_del(fd3_ctx->vs_pvt_mem);
++	fd_bo_del(fd3_ctx->fs_pvt_mem);
++	fd_bo_del(fd3_ctx->vsc_size_mem);
++	fd_bo_del(fd3_ctx->vsc_pipe_mem);
++
++	pipe_resource_reference(&fd3_ctx->solid_vbuf, NULL);
++	pipe_resource_reference(&fd3_ctx->blit_texcoord_vbuf, NULL);
++
+ 	fd_context_destroy(pctx);
+ }
+ 
+-- 
+1.8.4.2
+
diff --git a/0015-freedreno-a3xx-compiler-better-const-handling.patch b/0015-freedreno-a3xx-compiler-better-const-handling.patch
new file mode 100644
index 0000000..221e083
--- /dev/null
+++ b/0015-freedreno-a3xx-compiler-better-const-handling.patch
@@ -0,0 +1,376 @@
+From f1998c8aa7d82006f9ef7e6710a0f68f30bfc109 Mon Sep 17 00:00:00 2001
+From: Rob Clark <robclark at freedesktop.org>
+Date: Sat, 24 Aug 2013 17:30:50 -0400
+Subject: [PATCH 15/17] freedreno/a3xx/compiler: better const handling
+
+Seems like most/all instructions have some restrictions about const src
+registers.  In seems like the 2 src (cat2) instructions can take at most
+one const, and the 3 src (cat3) instructions can take at most one const
+in the first 2 arguments.  And so on.  Handle this properly now.
+
+Signed-off-by: Rob Clark <robclark at freedesktop.org>
+---
+ src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 211 +++++++++++++---------
+ 1 file changed, 121 insertions(+), 90 deletions(-)
+
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+index 477053b..dcdd2d9 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+@@ -355,20 +355,47 @@ is_const(struct tgsi_src_register *src)
+ 			(src->File == TGSI_FILE_IMMEDIATE);
+ }
+ 
++static type_t
++get_ftype(struct fd3_compile_context *ctx)
++{
++	return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
++}
++
++static type_t
++get_utype(struct fd3_compile_context *ctx)
++{
++	return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
++}
++
++static unsigned
++src_swiz(struct tgsi_src_register *src, int chan)
++{
++	switch (chan) {
++	case 0: return src->SwizzleX;
++	case 1: return src->SwizzleY;
++	case 2: return src->SwizzleZ;
++	case 3: return src->SwizzleW;
++	}
++	assert(0);
++	return 0;
++}
++
+ /* for instructions that cannot take a const register as src, if needed
+  * generate a move to temporary gpr:
+  */
+ static struct tgsi_src_register *
+ get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src)
+ {
+-	if (is_const(src)) {
+-		static struct tgsi_dst_register tmp_dst;
+-		struct tgsi_src_register *tmp_src =
+-				get_internal_temp(ctx, &tmp_dst);
+-		create_mov(ctx, &tmp_dst, src);
+-		src = tmp_src;
+-	}
+-	return src;
++	struct tgsi_dst_register tmp_dst;
++	struct tgsi_src_register *tmp_src;
++
++	compile_assert(ctx, is_const(src));
++
++	tmp_src = get_internal_temp(ctx, &tmp_dst);
++
++	create_mov(ctx, &tmp_dst, src);
++
++	return tmp_src;
+ }
+ 
+ static void
+@@ -418,31 +445,6 @@ get_immediate(struct fd3_compile_context *ctx,
+ 	reg->SwizzleW  = swiz2tgsi[swiz];
+ }
+ 
+-static type_t
+-get_ftype(struct fd3_compile_context *ctx)
+-{
+-	return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
+-}
+-
+-static type_t
+-get_utype(struct fd3_compile_context *ctx)
+-{
+-	return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
+-}
+-
+-static unsigned
+-src_swiz(struct tgsi_src_register *src, int chan)
+-{
+-	switch (chan) {
+-	case 0: return src->SwizzleX;
+-	case 1: return src->SwizzleY;
+-	case 2: return src->SwizzleZ;
+-	case 3: return src->SwizzleW;
+-	}
+-	assert(0);
+-	return 0;
+-}
+-
+ static void
+ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
+ 		struct tgsi_src_register *src)
+@@ -463,7 +465,6 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
+ 			ir3_instr_create(ctx->ir, 0, OPC_NOP);
+ 		}
+ 	}
+-
+ }
+ 
+ static void
+@@ -584,6 +585,15 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
+  * native instructions:
+  */
+ 
++static inline void
++get_swiz(unsigned *swiz, struct tgsi_src_register *src)
++{
++	swiz[0] = src->SwizzleX;
++	swiz[1] = src->SwizzleY;
++	swiz[2] = src->SwizzleZ;
++	swiz[3] = src->SwizzleW;
++}
++
+ static void
+ trans_dotp(const struct instr_translater *t,
+ 		struct fd3_compile_context *ctx,
+@@ -595,34 +605,31 @@ trans_dotp(const struct instr_translater *t,
+ 	struct tgsi_dst_register *dst  = &inst->Dst[0].Register;
+ 	struct tgsi_src_register *src0 = &inst->Src[0].Register;
+ 	struct tgsi_src_register *src1 = &inst->Src[1].Register;
+-	unsigned swiz0[] = { src0->SwizzleX, src0->SwizzleY, src0->SwizzleZ, src0->SwizzleW };
+-	unsigned swiz1[] = { src1->SwizzleX, src1->SwizzleY, src1->SwizzleZ, src1->SwizzleW };
++	unsigned swiz0[4];
++	unsigned swiz1[4];
+ 	opc_t opc_mad    = ctx->so->half_precision ? OPC_MAD_F16 : OPC_MAD_F32;
+ 	unsigned n = t->arg;     /* number of components */
+-	unsigned i;
++	unsigned i, swapped = 0;
+ 
+ 	tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
+ 
+-	/* Blob compiler never seems to use a const in src1 position for
+-	 * mad.*, although there does seem (according to disassembler
+-	 * hidden in libllvm-a3xx.so) to be a bit to indicate that src1
+-	 * is a const.  Not sure if this is a hw bug, or simply that the
+-	 * disassembler lies.
++	/* in particular, can't handle const for src1 for cat3/mad:
+ 	 */
+ 	if (is_const(src1)) {
+-
+-		/* the mov to tmp unswizzles src1, so now we have tmp.xyzw:
+-		 */
+-		for (i = 0; i < 4; i++)
+-			swiz1[i] = i;
+-
+-		/* the first mul.f will clobber tmp.x, but that is ok
+-		 * because after that point we no longer need tmp.x:
+-		 */
+-		create_mov(ctx, &tmp_dst, src1);
+-		src1 = tmp_src;
++		if (!is_const(src0)) {
++			struct tgsi_src_register *tmp;
++			tmp = src0;
++			src0 = src1;
++			src1 = tmp;
++			swapped = 1;
++		} else {
++			src0 = get_unconst(ctx, src0);
++		}
+ 	}
+ 
++	get_swiz(swiz0, src0);
++	get_swiz(swiz1, src1);
++
+ 	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+ 	add_dst_reg(ctx, instr, &tmp_dst, 0);
+ 	add_src_reg(ctx, instr, src0, swiz0[0]);
+@@ -640,22 +647,20 @@ trans_dotp(const struct instr_translater *t,
+ 
+ 	/* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */
+ 	if (t->tgsi_opc == TGSI_OPCODE_DPH) {
+-		ir3_instr_create(ctx->ir, 0, OPC_NOP);
++		ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 1;
+ 
+ 		instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+ 		add_dst_reg(ctx, instr, &tmp_dst, 0);
+-		add_src_reg(ctx, instr, src1, swiz1[i]);
++		if (swapped)
++			add_src_reg(ctx, instr, src0, swiz0[i]);
++		else
++			add_src_reg(ctx, instr, src1, swiz1[i]);
+ 		add_src_reg(ctx, instr, tmp_src, 0);
+ 
+ 		n++;
+ 	}
+ 
+-	ir3_instr_create(ctx->ir, 0, OPC_NOP);
+-
+-	/* pad out to multiple of 4 scalar instructions: */
+-	for (i = 2 * n; i % 4; i++) {
+-		ir3_instr_create(ctx->ir, 0, OPC_NOP);
+-	}
++	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 2;
+ 
+ 	create_mov(ctx, dst, tmp_src);
+ }
+@@ -670,6 +675,11 @@ trans_lrp(const struct instr_translater *t,
+ 	struct tgsi_dst_register tmp_dst1, tmp_dst2;
+ 	struct tgsi_src_register *tmp_src1, *tmp_src2;
+ 	struct tgsi_src_register tmp_const;
++	struct tgsi_src_register *src0 = &inst->Src[0].Register;
++	struct tgsi_src_register *src1 = &inst->Src[1].Register;
++
++	if (is_const(src0) && is_const(src1))
++		src0 = get_unconst(ctx, src0);
+ 
+ 	tmp_src1 = get_internal_temp(ctx, &tmp_dst1);
+ 	tmp_src2 = get_internal_temp(ctx, &tmp_dst2);
+@@ -678,15 +688,12 @@ trans_lrp(const struct instr_translater *t,
+ 
+ 	/* tmp1 = (a * b) */
+ 	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+-	vectorize(ctx, instr, &tmp_dst1, 2,
+-			&inst->Src[0].Register, 0,
+-			&inst->Src[1].Register, 0);
++	vectorize(ctx, instr, &tmp_dst1, 2, src0, 0, src1, 0);
+ 
+ 	/* tmp2 = (1 - a) */
+ 	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+-	vectorize(ctx, instr, &tmp_dst2, 2,
+-			&tmp_const, 0,
+-			&inst->Src[0].Register, IR3_REG_NEGATE);
++	vectorize(ctx, instr, &tmp_dst2, 2, &tmp_const, 0,
++			src0, IR3_REG_NEGATE);
+ 
+ 	/* tmp2 = tmp2 * c */
+ 	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+@@ -930,10 +937,6 @@ trans_cmp(const struct instr_translater *t,
+ 		return;
+ 	}
+ 
+-	/* NOTE: seems blob compiler will move a const to a gpr if both
+-	 * src args to cmps.f are const.  Need to check if this applies
+-	 * to other instructions..
+-	 */
+ 	if (is_const(a0) && is_const(a1))
+ 		a0 = get_unconst(ctx, a0);
+ 
+@@ -1041,6 +1044,9 @@ trans_if(const struct instr_translater *t,
+ 
+ 	get_immediate(ctx, &constval, fui(0.0));
+ 
++	if (is_const(src))
++		src = get_unconst(ctx, src);
++
+ 	instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
+ 	ir3_reg_create(instr, regid(REG_P0, 0), 0);
+ 	add_src_reg(ctx, instr, src, src->SwizzleX);
+@@ -1122,11 +1128,11 @@ instr_cat2(const struct instr_translater *t,
+ 		struct tgsi_full_instruction *inst)
+ {
+ 	struct tgsi_dst_register *dst = get_dst(ctx, inst);
++	struct tgsi_src_register *src0 = &inst->Src[0].Register;
++	struct tgsi_src_register *src1 = &inst->Src[1].Register;
+ 	struct ir3_instruction *instr;
+ 	unsigned src0_flags = 0;
+ 
+-	instr = ir3_instr_create(ctx->ir, 2, t->opc);
+-
+ 	switch (t->tgsi_opc) {
+ 	case TGSI_OPCODE_ABS:
+ 		src0_flags = IR3_REG_ABS;
+@@ -1149,41 +1155,65 @@ instr_cat2(const struct instr_translater *t,
+ 	case OPC_SETRM:
+ 	case OPC_CBITS_B:
+ 		/* these only have one src reg */
+-		vectorize(ctx, instr, dst, 1,
+-				&inst->Src[0].Register, src0_flags);
++		instr = ir3_instr_create(ctx->ir, 2, t->opc);
++		vectorize(ctx, instr, dst, 1, src0, src0_flags);
+ 		break;
+ 	default:
+-		vectorize(ctx, instr, dst, 2,
+-				&inst->Src[0].Register, src0_flags,
+-				&inst->Src[1].Register, 0);
++		if (is_const(src0) && is_const(src1))
++			src0 = get_unconst(ctx, src0);
++
++		instr = ir3_instr_create(ctx->ir, 2, t->opc);
++		vectorize(ctx, instr, dst, 2, src0, src0_flags, src1, 0);
+ 		break;
+ 	}
+ 
+ 	put_dst(ctx, inst, dst);
+ }
+ 
++static bool is_mad(opc_t opc)
++{
++	switch (opc) {
++	case OPC_MAD_U16:
++	case OPC_MADSH_U16:
++	case OPC_MAD_S16:
++	case OPC_MADSH_M16:
++	case OPC_MAD_U24:
++	case OPC_MAD_S24:
++	case OPC_MAD_F16:
++	case OPC_MAD_F32:
++		return true;
++	default:
++		return false;
++	}
++}
++
+ static void
+ instr_cat3(const struct instr_translater *t,
+ 		struct fd3_compile_context *ctx,
+ 		struct tgsi_full_instruction *inst)
+ {
+ 	struct tgsi_dst_register *dst = get_dst(ctx, inst);
+-	struct tgsi_src_register *src1;
++	struct tgsi_src_register *src0 = &inst->Src[0].Register;
++	struct tgsi_src_register *src1 = &inst->Src[1].Register;
+ 	struct ir3_instruction *instr;
+ 
+-	/* Blob compiler never seems to use a const in src1 position..
+-	 * although there does seem (according to disassembler hidden
+-	 * in libllvm-a3xx.so) to be a bit to indicate that src1 is a
+-	 * const.  Not sure if this is a hw bug, or simply that the
+-	 * disassembler lies.
++	/* in particular, can't handle const for src1 for cat3..
++	 * for mad, we can swap first two src's if needed:
+ 	 */
+-	src1 = get_unconst(ctx, &inst->Src[1].Register);
++	if (is_const(src1)) {
++		if (is_mad(t->opc) && !is_const(src0)) {
++			struct tgsi_src_register *tmp;
++			tmp = src0;
++			src0 = src1;
++			src1 = tmp;
++		} else {
++			src0 = get_unconst(ctx, src0);
++		}
++	}
+ 
+ 	instr = ir3_instr_create(ctx->ir, 3,
+ 			ctx->so->half_precision ? t->hopc : t->opc);
+-	vectorize(ctx, instr, dst, 3,
+-			&inst->Src[0].Register, 0,
+-			src1, 0,
++	vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
+ 			&inst->Src[2].Register, 0);
+ 	put_dst(ctx, inst, dst);
+ }
+@@ -1194,11 +1224,12 @@ instr_cat4(const struct instr_translater *t,
+ 		struct tgsi_full_instruction *inst)
+ {
+ 	struct tgsi_dst_register *dst = get_dst(ctx, inst);
+-	struct tgsi_src_register *src;
++	struct tgsi_src_register *src = &inst->Src[0].Register;
+ 	struct ir3_instruction *instr;
+ 
+ 	/* seems like blob compiler avoids const as src.. */
+-	src = get_unconst(ctx, &inst->Src[0].Register);
++	if (is_const(src))
++		src = get_unconst(ctx, src);
+ 
+ 	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
+ 	instr = ir3_instr_create(ctx->ir, 4, t->opc);
+-- 
+1.8.4.2
+
diff --git a/0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch b/0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch
new file mode 100644
index 0000000..279b118
--- /dev/null
+++ b/0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch
@@ -0,0 +1,128 @@
+From 4f0be333e7ee93fbb006c5570a594e49b4441731 Mon Sep 17 00:00:00 2001
+From: Rob Clark <robclark at freedesktop.org>
+Date: Tue, 27 Aug 2013 19:24:53 -0400
+Subject: [PATCH 16/17] freedreno/a3xx/compiler: handle sync flags better
+
+We need to set the flag on all the .xyzw components that are written by
+the instruction, not just on .x.  Otherwise a later use of rN.y (for
+example) will not trigger the appropriate sync bit to be set.
+
+Signed-off-by: Rob Clark <robclark at freedesktop.org>
+---
+ src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 50 +++++++++++++++--------
+ 1 file changed, 34 insertions(+), 16 deletions(-)
+
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+index dcdd2d9..5115411 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+@@ -62,10 +62,16 @@ static unsigned regmask_idx(struct ir3_register *reg)
+ 	return num;
+ }
+ 
+-static void regmask_set(regmask_t regmask, struct ir3_register *reg)
++static void regmask_set(regmask_t regmask, struct ir3_register *reg,
++		unsigned wrmask)
+ {
+-	unsigned idx = regmask_idx(reg);
+-	regmask[idx / 8] |= 1 << (idx % 8);
++	unsigned i;
++	for (i = 0; i < 4; i++) {
++		if (wrmask & (1 << i)) {
++			unsigned idx = regmask_idx(reg) + i;
++			regmask[idx / 8] |= 1 << (idx % 8);
++		}
++	}
+ }
+ 
+ static unsigned regmask_get(regmask_t regmask, struct ir3_register *reg)
+@@ -216,6 +222,24 @@ struct instr_translater {
+ 	unsigned arg;
+ };
+ 
++static unsigned
++src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg)
++{
++	unsigned flags = 0;
++
++	if (regmask_get(ctx->needs_ss, reg)) {
++		flags |= IR3_INSTR_SS;
++		memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
++	}
++
++	if (regmask_get(ctx->needs_sy, reg)) {
++		flags |= IR3_INSTR_SY;
++		memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
++	}
++
++	return flags;
++}
++
+ static struct ir3_register *
+ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
+ 		const struct tgsi_dst_register *dst, unsigned chan)
+@@ -279,15 +303,7 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
+ 
+ 	reg = ir3_reg_create(instr, regid(num, chan), flags);
+ 
+-	if (regmask_get(ctx->needs_ss, reg)) {
+-		instr->flags |= IR3_INSTR_SS;
+-		memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
+-	}
+-
+-	if (regmask_get(ctx->needs_sy, reg)) {
+-		instr->flags |= IR3_INSTR_SY;
+-		memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
+-	}
++	instr->flags |= src_flags(ctx, reg);
+ 
+ 	return reg;
+ }
+@@ -567,6 +583,7 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
+ 				cur->regs[j+1]->num =
+ 					regid(cur->regs[j+1]->num >> 2,
+ 						src_swiz(src, i));
++				cur->flags |= src_flags(ctx, cur->regs[j+1]);
+ 			}
+ 			va_end(ap);
+ 		}
+@@ -753,7 +770,7 @@ trans_pow(const struct instr_translater *t,
+ 	instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2);
+ 	r = add_dst_reg(ctx, instr, &tmp_dst, 0);
+ 	add_src_reg(ctx, instr, src0, src0->SwizzleX);
+-	regmask_set(ctx->needs_ss, r);
++	regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X);
+ 
+ 	/* mul.f Rtmp, Rtmp, Rsrc1 */
+ 	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+@@ -771,7 +788,7 @@ trans_pow(const struct instr_translater *t,
+ 	instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2);
+ 	r = add_dst_reg(ctx, instr, &tmp_dst, 0);
+ 	add_src_reg(ctx, instr, tmp_src, 0);
+-	regmask_set(ctx->needs_ss, r);
++	regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X);
+ 
+ 	create_mov(ctx, dst, tmp_src);
+ }
+@@ -855,7 +872,7 @@ trans_samp(const struct instr_translater *t,
+ 
+ 	add_src_reg(ctx, instr, coord, coord->SwizzleX);
+ 
+-	regmask_set(ctx->needs_sy, r);
++	regmask_set(ctx->needs_sy, r, r->wrmask);
+ }
+ 
+ /*
+@@ -1236,7 +1253,8 @@ instr_cat4(const struct instr_translater *t,
+ 
+ 	vectorize(ctx, instr, dst, 1, src, 0);
+ 
+-	regmask_set(ctx->needs_ss, instr->regs[0]);
++	regmask_set(ctx->needs_ss, instr->regs[0],
++			inst->Dst[0].Register.WriteMask);
+ 
+ 	put_dst(ctx, inst, dst);
+ }
+-- 
+1.8.4.2
+
diff --git a/0017-freedreno-updates-for-msm-drm-kms-driver.patch b/0017-freedreno-updates-for-msm-drm-kms-driver.patch
new file mode 100644
index 0000000..2fe4f05
--- /dev/null
+++ b/0017-freedreno-updates-for-msm-drm-kms-driver.patch
@@ -0,0 +1,328 @@
+From 4fd03f26aa1c2ddef24b2c4f8d1a10c96fbf7f40 Mon Sep 17 00:00:00 2001
+From: Rob Clark <robclark at freedesktop.org>
+Date: Thu, 29 Aug 2013 17:24:33 -0400
+Subject: [PATCH 17/17] freedreno: updates for msm drm/kms driver
+
+There where some small API tweaks in libdrm_freedreno to enable support
+for msm drm/kms driver.
+
+Signed-off-by: Rob Clark <robclark at freedesktop.org>
+---
+ src/gallium/drivers/freedreno/a2xx/fd2_emit.c      |  4 +--
+ src/gallium/drivers/freedreno/a2xx/fd2_gmem.c      |  6 ++---
+ src/gallium/drivers/freedreno/a3xx/fd3_emit.c      | 14 +++++------
+ src/gallium/drivers/freedreno/a3xx/fd3_gmem.c      |  8 +++---
+ src/gallium/drivers/freedreno/a3xx/fd3_program.c   |  4 +--
+ src/gallium/drivers/freedreno/freedreno_draw.c     |  2 +-
+ src/gallium/drivers/freedreno/freedreno_resource.c | 18 ++++++++++++--
+ src/gallium/drivers/freedreno/freedreno_util.h     | 29 +++++++++++++++-------
+ 8 files changed, 55 insertions(+), 30 deletions(-)
+
+diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
+index b03390e..35511ba 100644
+--- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
++++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
+@@ -137,7 +137,7 @@ emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx,
+ 	OUT_RING(ring, 0x00010000 + (0x6 * const_idx));
+ 
+ 	OUT_RING(ring, sampler->tex0 | view->tex0);
+-	OUT_RELOC(ring, view->tex_resource->bo, 0, view->fmt);
++	OUT_RELOC(ring, view->tex_resource->bo, 0, view->fmt, 0);
+ 	OUT_RING(ring, view->tex2);
+ 	OUT_RING(ring, sampler->tex3 | view->tex3);
+ 	OUT_RING(ring, sampler->tex4);
+@@ -171,7 +171,7 @@ fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
+ 	OUT_RING(ring, (0x1 << 16) | (val & 0xffff));
+ 	for (i = 0; i < n; i++) {
+ 		struct fd_resource *rsc = fd_resource(vbufs[i].prsc);
+-		OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3);
++		OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3, 0);
+ 		OUT_RING (ring, vbufs[i].size);
+ 	}
+ }
+diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
+index 93695bc..89f5a4d 100644
+--- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
++++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
+@@ -70,7 +70,7 @@ emit_gmem2mem_surf(struct fd_ringbuffer *ring, uint32_t base,
+ 	OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ 	OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
+ 	OUT_RING(ring, 0x00000000);             /* RB_COPY_CONTROL */
+-	OUT_RELOC(ring, rsc->bo, 0, 0);         /* RB_COPY_DEST_BASE */
++	OUT_RELOCW(ring, rsc->bo, 0, 0, 0);     /* RB_COPY_DEST_BASE */
+ 	OUT_RING(ring, rsc->pitch >> 5);        /* RB_COPY_DEST_PITCH */
+ 	OUT_RING(ring,                          /* RB_COPY_DEST_INFO */
+ 			A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(psurf->format)) |
+@@ -199,7 +199,7 @@ emit_mem2gmem_surf(struct fd_ringbuffer *ring, uint32_t base,
+ 			A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) |
+ 			A2XX_SQ_TEX_0_PITCH(rsc->pitch));
+ 	OUT_RELOC(ring, rsc->bo, 0,
+-			fd2_pipe2surface(psurf->format) | 0x800);
++			fd2_pipe2surface(psurf->format) | 0x800, 0);
+ 	OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) |
+ 			A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1));
+ 	OUT_RING(ring, 0x01000000 | // XXX
+@@ -241,7 +241,7 @@ fd2_emit_tile_mem2gmem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
+ 	y0 = ((float)yoff) / ((float)pfb->height);
+ 	y1 = ((float)yoff + bin_h) / ((float)pfb->height);
+ 	OUT_PKT3(ring, CP_MEM_WRITE, 9);
+-	OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0x60, 0);
++	OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0x60, 0, 0);
+ 	OUT_RING(ring, fui(x0));
+ 	OUT_RING(ring, fui(y0));
+ 	OUT_RING(ring, fui(x1));
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+index 5ffd561..5e58618 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+@@ -81,7 +81,7 @@ fd3_emit_constant(struct fd_ringbuffer *ring,
+ 	if (prsc) {
+ 		struct fd_bo *bo = fd_resource(prsc)->bo;
+ 		OUT_RELOC(ring, bo, offset,
+-				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
++				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
+ 	} else {
+ 		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+ 				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+@@ -212,7 +212,7 @@ emit_textures(struct fd_ringbuffer *ring,
+ 	for (i = 0; i < tex->num_textures; i++) {
+ 		struct fd3_pipe_sampler_view *view =
+ 				fd3_pipe_sampler_view(tex->textures[i]);
+-		OUT_RELOC(ring, view->tex_resource->bo, 0, 0);
++		OUT_RELOC(ring, view->tex_resource->bo, 0, 0, 0);
+ 		/* I think each entry is a ptr to mipmap level.. for now, just
+ 		 * pad w/ null's until I get around to actually implementing
+ 		 * mipmap support..
+@@ -296,7 +296,7 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf
+ 			CP_LOAD_STATE_0_NUM_UNIT(1));
+ 	OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+ 			CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+-	OUT_RELOC(ring, rsc->bo, 0, 0);
++	OUT_RELOC(ring, rsc->bo, 0, 0, 0);
+ }
+ 
+ void
+@@ -322,7 +322,7 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
+ 				COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
+ 				A3XX_VFD_FETCH_INSTR_0_INDEXCODE(i) |
+ 				A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
+-		OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0);
++		OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0, 0);
+ 
+ 		OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(i), 1);
+ 		OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
+@@ -481,12 +481,12 @@ fd3_emit_restore(struct fd_context *ctx)
+ 
+ 	OUT_PKT0(ring, REG_A3XX_SP_VS_PVT_MEM_CTRL_REG, 3);
+ 	OUT_RING(ring, 0x08000001);                  /* SP_VS_PVT_MEM_CTRL_REG */
+-	OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0, 0);  /* SP_VS_PVT_MEM_ADDR_REG */
++	OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR_REG */
+ 	OUT_RING(ring, 0x00000000);                  /* SP_VS_PVT_MEM_SIZE_REG */
+ 
+ 	OUT_PKT0(ring, REG_A3XX_SP_FS_PVT_MEM_CTRL_REG, 3);
+ 	OUT_RING(ring, 0x08000001);                  /* SP_FS_PVT_MEM_CTRL_REG */
+-	OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0, 0);  /* SP_FS_PVT_MEM_ADDR_REG */
++	OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR_REG */
+ 	OUT_RING(ring, 0x00000000);                  /* SP_FS_PVT_MEM_SIZE_REG */
+ 
+ 	OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
+@@ -549,7 +549,7 @@ fd3_emit_restore(struct fd_context *ctx)
+ 	OUT_RING(ring, 0x00000001);        /* UCHE_CACHE_MODE_CONTROL_REG */
+ 
+ 	OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
+-	OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0); /* VSC_SIZE_ADDRESS */
++	OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
+ 
+ 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+ 	OUT_RING(ring, 0x00000000);                  /* GRAS_CL_CLIP_CNTL */
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+index b9d0580..8d2df47 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+@@ -89,7 +89,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
+ 		if (bin_w || (i >= nr_bufs)) {
+ 			OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base));
+ 		} else {
+-			OUT_RELOCS(ring, res->bo, 0, 0, -1);
++			OUT_RELOCW(ring, res->bo, 0, 0, -1);
+ 		}
+ 
+ 		OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1);
+@@ -116,7 +116,7 @@ emit_gmem2mem_surf(struct fd_ringbuffer *ring,
+ 	OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
+ 			A3XX_RB_COPY_CONTROL_MODE(mode) |
+ 			A3XX_RB_COPY_CONTROL_GMEM_BASE(base));
+-	OUT_RELOCS(ring, rsc->bo, 0, 0, -1);    /* RB_COPY_DEST_BASE */
++	OUT_RELOCW(ring, rsc->bo, 0, 0, -1);    /* RB_COPY_DEST_BASE */
+ 	OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(rsc->pitch * rsc->cpp));
+ 	OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
+ 			A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(psurf->format)) |
+@@ -272,7 +272,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
+ 	y1 = ((float)yoff + bin_h) / ((float)pfb->height);
+ 
+ 	OUT_PKT3(ring, CP_MEM_WRITE, 5);
+-	OUT_RELOC(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0);
++	OUT_RELOC(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
+ 	OUT_RING(ring, fui(x0));
+ 	OUT_RING(ring, fui(y0));
+ 	OUT_RING(ring, fui(x1));
+@@ -395,7 +395,7 @@ update_vsc_pipe(struct fd_context *ctx)
+ 			A3XX_VSC_PIPE_CONFIG_Y(0) |
+ 			A3XX_VSC_PIPE_CONFIG_W(gmem->nbins_x) |
+ 			A3XX_VSC_PIPE_CONFIG_H(gmem->nbins_y));
+-	OUT_RELOC(ring, bo, 0, 0);              /* VSC_PIPE[0].DATA_ADDRESS */
++	OUT_RELOC(ring, bo, 0, 0, 0);           /* VSC_PIPE[0].DATA_ADDRESS */
+ 	OUT_RING(ring, fd_bo_size(bo) - 32);    /* VSC_PIPE[0].DATA_LENGTH */
+ 
+ 	for (i = 1; i < 8; i++) {
+diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+index 259c2dd..c6c51b1 100644
+--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
++++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+@@ -320,7 +320,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
+ 	OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2);
+ 	OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) |
+ 			A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
+-	OUT_RELOC(ring, vp->bo, 0, 0);    /* SP_VS_OBJ_START_REG */
++	OUT_RELOC(ring, vp->bo, 0, 0, 0);  /* SP_VS_OBJ_START_REG */
+ #endif
+ 
+ 	OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
+@@ -345,7 +345,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
+ 	OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
+ 	OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
+ 			A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(128 - fp->instrlen));
+-	OUT_RELOC(ring, fp->bo, 0, 0);    /* SP_FS_OBJ_START_REG */
++	OUT_RELOC(ring, fp->bo, 0, 0, 0);  /* SP_FS_OBJ_START_REG */
+ #endif
+ 
+ 	OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
+diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
+index d4f8d34..4a98ab4 100644
+--- a/src/gallium/drivers/freedreno/freedreno_draw.c
++++ b/src/gallium/drivers/freedreno/freedreno_draw.c
+@@ -104,7 +104,7 @@ fd_draw_emit(struct fd_context *ctx, const struct pipe_draw_info *info)
+ 			src_sel, idx_type, IGNORE_VISIBILITY));
+ 	OUT_RING(ring, info->count);       /* NumIndices */
+ 	if (info->indexed) {
+-		OUT_RELOC(ring, idx_bo, idx_offset, 0);
++		OUT_RELOC(ring, idx_bo, idx_offset, 0, 0);
+ 		OUT_RING (ring, idx_size);
+ 	}
+ }
+diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
+index 1b1eaa5..3e051ea 100644
+--- a/src/gallium/drivers/freedreno/freedreno_resource.c
++++ b/src/gallium/drivers/freedreno/freedreno_resource.c
+@@ -59,6 +59,9 @@ fd_resource_transfer_unmap(struct pipe_context *pctx,
+ 		struct pipe_transfer *ptrans)
+ {
+ 	struct fd_context *ctx = fd_context(pctx);
++	struct fd_resource *rsc = fd_resource(ptrans->resource);
++	if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED))
++		fd_bo_cpu_fini(rsc->bo);
+ 	pipe_resource_reference(&ptrans->resource, NULL);
+ 	util_slab_free(&ctx->transfer_pool, ptrans);
+ }
+@@ -74,12 +77,13 @@ fd_resource_transfer_map(struct pipe_context *pctx,
+ 	struct fd_resource *rsc = fd_resource(prsc);
+ 	struct pipe_transfer *ptrans = util_slab_alloc(&ctx->transfer_pool);
+ 	enum pipe_format format = prsc->format;
++	uint32_t op = 0;
+ 	char *buf;
+ 
+ 	if (!ptrans)
+ 		return NULL;
+ 
+-	/* util_slap_alloc() doesn't zero: */
++	/* util_slab_alloc() doesn't zero: */
+ 	memset(ptrans, 0, sizeof(*ptrans));
+ 
+ 	pipe_resource_reference(&ptrans->resource, prsc);
+@@ -90,7 +94,8 @@ fd_resource_transfer_map(struct pipe_context *pctx,
+ 	ptrans->layer_stride = ptrans->stride;
+ 
+ 	/* some state trackers (at least XA) don't do this.. */
+-	fd_resource_transfer_flush_region(pctx, ptrans, box);
++	if (!(usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
++		fd_resource_transfer_flush_region(pctx, ptrans, box);
+ 
+ 	buf = fd_bo_map(rsc->bo);
+ 	if (!buf) {
+@@ -98,6 +103,15 @@ fd_resource_transfer_map(struct pipe_context *pctx,
+ 		return NULL;
+ 	}
+ 
++	if (usage & PIPE_TRANSFER_READ)
++		op |= DRM_FREEDRENO_PREP_READ;
++
++	if (usage & PIPE_TRANSFER_WRITE)
++		op |= DRM_FREEDRENO_PREP_WRITE;
++
++	if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED))
++		fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op);
++
+ 	*pptrans = ptrans;
+ 
+ 	return buf +
+diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
+index 9f10686..7bbbe80 100644
+--- a/src/gallium/drivers/freedreno/freedreno_util.h
++++ b/src/gallium/drivers/freedreno/freedreno_util.h
+@@ -104,25 +104,36 @@ OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
+ 
+ static inline void
+ OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
+-		uint32_t offset, uint32_t or)
++		uint32_t offset, uint32_t or, int32_t shift)
+ {
+ 	if (LOG_DWORDS) {
+-		DBG("ring[%p]: OUT_RELOC  %04x:  %p+%u", ring,
+-				(uint32_t)(ring->cur - ring->last_start), bo, offset);
++		DBG("ring[%p]: OUT_RELOC   %04x:  %p+%u << %d", ring,
++				(uint32_t)(ring->cur - ring->last_start), bo, offset, shift);
+ 	}
+-	fd_ringbuffer_emit_reloc(ring, bo, offset, or);
++	fd_ringbuffer_reloc(ring, &(struct fd_reloc){
++		.bo = bo,
++		.flags = FD_RELOC_READ,
++		.offset = offset,
++		.or = or,
++		.shift = shift,
++	});
+ }
+ 
+-/* shifted reloc: */
+ static inline void
+-OUT_RELOCS(struct fd_ringbuffer *ring, struct fd_bo *bo,
++OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo,
+ 		uint32_t offset, uint32_t or, int32_t shift)
+ {
+ 	if (LOG_DWORDS) {
+-		DBG("ring[%p]: OUT_RELOCS  %04x:  %p+%u << %d", ring,
++		DBG("ring[%p]: OUT_RELOC   %04x:  %p+%u << %d", ring,
+ 				(uint32_t)(ring->cur - ring->last_start), bo, offset, shift);
+ 	}
+-	fd_ringbuffer_emit_reloc_shift(ring, bo, offset, or, shift);
++	fd_ringbuffer_reloc(ring, &(struct fd_reloc){
++		.bo = bo,
++		.flags = FD_RELOC_READ | FD_RELOC_WRITE,
++		.offset = offset,
++		.or = or,
++		.shift = shift,
++	});
+ }
+ 
+ static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
+@@ -155,7 +166,7 @@ OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
+ 		struct fd_ringmarker *end)
+ {
+ 	OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
+-	fd_ringbuffer_emit_reloc_ring(ring, start);
++	fd_ringbuffer_emit_reloc_ring(ring, start, end);
+ 	OUT_RING(ring, fd_ringmarker_dwords(start, end));
+ }
+ 
+-- 
+1.8.4.2
+
diff --git a/mesa.spec b/mesa.spec
index a8da90c..0619a6c 100644
--- a/mesa.spec
+++ b/mesa.spec
@@ -48,12 +48,12 @@
 
 %define _default_patch_fuzz 2
 
-%define gitdate 20131113
+%define gitdate 20131114
 #% define snapshot 
 
 Summary: Mesa graphics libraries
 Name: mesa
-Version: 9.2.2
+Version: 9.2.3
 Release: 1.%{gitdate}%{?dist}
 License: MIT
 Group: System Environment/Libraries
@@ -77,6 +77,25 @@ Patch15: mesa-9.2-hardware-float.patch
 Patch16: mesa-9.2-no-useless-vdpau.patch
 Patch20: mesa-9.2-evergreen-big-endian.patch
 
+# https://bugs.freedesktop.org/show_bug.cgi?id=71573
+Patch21: 0001-freedreno-a3xx-fix-color-inversion-on-mem-gmem-resto.patch
+Patch22: 0002-freedreno-a3xx-fix-viewport-on-gmem-mem-resolve.patch
+Patch23: 0003-freedreno-add-debug-option-to-disable-scissor-optimi.patch
+Patch24: 0004-freedreno-update-register-headers.patch
+Patch25: 0005-freedreno-a3xx-some-texture-fixes.patch
+Patch26: 0006-freedreno-a3xx-compiler-fix-CMP.patch
+Patch27: 0007-freedreno-a3xx-compiler-handle-saturate-on-dst.patch
+Patch28: 0008-freedreno-a3xx-compiler-use-max_reg-rather-than-file.patch
+Patch29: 0009-freedreno-a3xx-compiler-cat4-cannot-use-const-reg-as.patch
+Patch30: 0010-freedreno-fix-segfault-when-no-color-buffer-bound.patch
+Patch31: 0011-freedreno-a3xx-compiler-make-compiler-errors-more-us.patch
+Patch32: 0012-freedreno-a3xx-compiler-bit-of-re-arrange-cleanup.patch
+Patch33: 0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch
+Patch34: 0014-freedreno-a3xx-don-t-leak-so-much.patch
+Patch35: 0015-freedreno-a3xx-compiler-better-const-handling.patch
+Patch36: 0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch
+Patch37: 0017-freedreno-updates-for-msm-drm-kms-driver.patch
+
 BuildRequires: pkgconfig autoconf automake libtool
 %if %{with_hardware}
 BuildRequires: kernel-headers
@@ -302,6 +321,24 @@ grep -q ^/ src/gallium/auxiliary/vl/vl_decoder.c && exit 1
 %patch16 -p1 -b .vdpau
 %patch20 -p1 -b .egbe
 
+%patch21 -p1
+%patch22 -p1
+%patch23 -p1
+%patch24 -p1
+%patch25 -p1
+%patch26 -p1
+%patch27 -p1
+%patch28 -p1
+%patch29 -p1
+%patch30 -p1
+%patch31 -p1
+%patch32 -p1
+%patch33 -p1
+%patch34 -p1
+%patch35 -p1
+%patch36 -p1
+%patch37 -p1
+
 %if 0%{with_private_llvm}
 sed -i 's/llvm-config/mesa-private-llvm-config-%{__isa_bits}/g' configure.ac
 sed -i 's/`$LLVM_CONFIG --version`/&-mesa/' configure.ac
@@ -600,6 +637,9 @@ rm -rf $RPM_BUILD_ROOT
 %endif
 
 %changelog
+* Thu Nov 14 2013 Igor Gnatenko <i.gnatenko.brain at gmail.com> - 9.2.3-1.20131114
+- 9.2.3 upstream release
+
 * Wed Nov 13 2013 Igor Gnatenko <i.gnatenko.brain at gmail.com> - 9.2.2-1.20131113
 - 9.2.2 upstream release + fixes from git 9.2 branch
 
diff --git a/sources b/sources
index e08cadd..caf10db 100644
--- a/sources
+++ b/sources
@@ -1 +1 @@
-eafb41bd8c3160972e192b1d502ad8b6  mesa-20131113.tar.xz
+54f46fc070303e0d467779ab39103d58  mesa-20131114.tar.xz


More information about the scm-commits mailing list