[mesa/f19] rebase to Mesa 9.1.1 + fixes from git

Sat Apr 27 07:30:20 UTC 2013

commit 8a2a35f7ff928053cfc424f2155c67e0fac4308f
Author: Dave Airlie <airlied at gmail.com>
Date:   Sat Apr 27 17:29:43 2013 +1000

    rebase to Mesa 9.1.1 + fixes from git

 .gitignore                   |    1 +
 mesa-9.0-19-g895a587.patch   | 1889 -----------------------------------
 mesa-9.0.1-22-gd0a9ab2.patch | 1834 ----------------------------------
 mesa-9.1-53-gd0ccb5b.patch   | 1974 -------------------------------------
 mesa-9.1.1-53-g3cff41c.patch | 2242 ++++++++++++++++++++++++++++++++++++++++++
 mesa.spec                    |    9 +-
 sources                      |    2 +-
 7 files changed, 2250 insertions(+), 5701 deletions(-)
---

diff --git a/.gitignore b/.gitignore
index b475cc4..78b25bc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -46,3 +46,4 @@ mesa-20100720.tar.bz2
 /MesaLib-9.0.1.tar.bz2
 /mesa-20130213.tar.xz
 /MesaLib-9.1.tar.bz2
+/MesaLib-9.1.1.tar.bz2
diff --git a/mesa-9.1.1-53-g3cff41c.patch b/mesa-9.1.1-53-g3cff41c.patch
new file mode 100644
index 0000000..d6b302a
--- /dev/null
+++ b/mesa-9.1.1-53-g3cff41c.patch
@@ -0,0 +1,2242 @@
+diff --git a/configure.ac b/configure.ac
+index 4a98996..1c9d606 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -452,6 +452,9 @@ if test "x$enable_asm" = xyes; then
+         linux* | *freebsd* | dragonfly* | *netbsd*)
+             test "x$enable_64bit" = xyes && asm_arch=x86_64 || asm_arch=x86
+             ;;
++        gnu*)
++            asm_arch=x86
++            ;;
+         esac
+         ;;
+     x86_64)
+@@ -826,20 +829,6 @@ if test "x$enable_dri" = xyes; then
+     fi
+ fi
+ 
+-dnl Find out if X is available.
+-PKG_CHECK_MODULES([X11], [x11], [no_x=no], [no_x=yes])
+-
+-dnl Try to tell the user that the --x-* options are only used when
+-dnl pkg-config is not available. This must be right after AC_PATH_XTRA.
+-m4_divert_once([HELP_BEGIN],
+-[These options are only used when the X libraries cannot be found by the
+-pkg-config utility.])
+-
+-dnl We need X for xlib and dri, so bomb now if it's not found
+-if test "x$enable_glx" = xyes -a "x$no_x" = xyes; then
+-    AC_MSG_ERROR([X11 development libraries needed for GLX])
+-fi
+-
+ dnl Direct rendering or just indirect rendering
+ case "$host_os" in
+ gnu*)
+diff --git a/docs/relnotes-9.1.1.html b/docs/relnotes-9.1.1.html
+index 8921c8f..a73c974 100644
+--- a/docs/relnotes-9.1.1.html
++++ b/docs/relnotes-9.1.1.html
+@@ -30,6 +30,9 @@ because GL_ARB_compatibility is not supported.
+ 
+ <h2>MD5 checksums</h2>
+ <pre>
++6508d9882d8dce7106717f365632700c  MesaLib-9.1.1.tar.gz
++6ea2bdc3b7ecfb4257b39814b4182580  MesaLib-9.1.1.tar.bz2
++3434c0eb47849a08c53cd32833d10d13  MesaLib-9.1.1.zip
+ </pre>
+ 
+ <h2>New features</h2>
+diff --git a/include/c99_compat.h b/include/c99_compat.h
+new file mode 100644
+index 0000000..3a9f502
+--- /dev/null
++++ b/include/c99_compat.h
+@@ -0,0 +1,147 @@
++/**************************************************************************
++ *
++ * Copyright 2007-2013 VMware, Inc.
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
++ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ **************************************************************************/
++
++#ifndef _C99_COMPAT_H_
++#define _C99_COMPAT_H_
++
++
++/*
++ * MSVC hacks.
++ */
++#if defined(_MSC_VER)
++   /*
++    * Visual Studio 2012 will complain if we define the `inline` keyword, but
++    * actually it only supports the keyword on C++.
++    *
++    * We could skip this check by defining _ALLOW_KEYWORD_MACROS, but there is
++    * probably value in checking this for other keywords.  So simply include
++    * the checking before we define it below.
++    */
++#  if _MSC_VER >= 1700
++#    include <xkeycheck.h>
++#  endif
++
++   /*
++    * XXX: MSVC has a `__restrict` keyword, but it also has a
++    * `__declspec(restrict)` modifier, so it is impossible to define a
++    * `restrict` macro without interfering with the latter.  Furthermore the
++    * MSVC standard library uses __declspec(restrict) under the _CRTRESTRICT
++    * macro.  For now resolve this issue by redefining _CRTRESTRICT, but going
++    * forward we should probably should stop using restrict, especially
++    * considering that our code does not obbey strict aliasing rules any way.
++    */
++#  include <crtdefs.h>
++#  undef _CRTRESTRICT
++#  define _CRTRESTRICT
++#endif
++
++
++/*
++ * C99 inline keyword
++ */
++#ifndef inline
++#  ifdef __cplusplus
++     /* C++ supports inline keyword */
++#  elif defined(__GNUC__)
++#    define inline __inline__
++#  elif defined(_MSC_VER)
++#    define inline __inline
++#  elif defined(__ICL)
++#    define inline __inline
++#  elif defined(__INTEL_COMPILER)
++     /* Intel compiler supports inline keyword */
++#  elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
++#    define inline __inline
++#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
++     /* C99 supports inline keyword */
++#  elif (__STDC_VERSION__ >= 199901L)
++     /* C99 supports inline keyword */
++#  else
++#    define inline
++#  endif
++#endif
++
++
++/*
++ * C99 restrict keyword
++ *
++ * See also:
++ * - http://cellperformance.beyond3d.com/articles/2006/05/demystifying-the-restrict-keyword.html
++ */
++#ifndef restrict
++#  if (__STDC_VERSION__ >= 199901L)
++     /* C99 */
++#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
++     /* C99 */
++#  elif defined(__GNUC__)
++#    define restrict __restrict__
++#  elif defined(_MSC_VER)
++#    define restrict __restrict
++#  else
++#    define restrict /* */
++#  endif
++#endif
++
++
++/*
++ * C99 __func__ macro
++ */
++#ifndef __func__
++#  if (__STDC_VERSION__ >= 199901L)
++     /* C99 */
++#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
++     /* C99 */
++#  elif defined(__GNUC__)
++#    if __GNUC__ >= 2
++#      define __func__ __FUNCTION__
++#    else
++#      define __func__ "<unknown>"
++#    endif
++#  elif defined(_MSC_VER)
++#    if _MSC_VER >= 1300
++#      define __func__ __FUNCTION__
++#    else
++#      define __func__ "<unknown>"
++#    endif
++#  else
++#    define __func__ "<unknown>"
++#  endif
++#endif
++
++
++/* Simple test case for debugging */
++#if 0
++static inline const char *
++test_c99_compat_h(const void * restrict a,
++                  const void * restrict b)
++{
++   return __func__;
++}
++#endif
++
++
++#endif /* _C99_COMPAT_H_ */
+diff --git a/src/egl/main/eglcompiler.h b/src/egl/main/eglcompiler.h
+index 9823693..2499172 100644
+--- a/src/egl/main/eglcompiler.h
++++ b/src/egl/main/eglcompiler.h
+@@ -31,6 +31,9 @@
+ #define EGLCOMPILER_INCLUDED
+ 
+ 
++#include "c99_compat.h" /* inline, __func__, etc. */
++
++
+ /**
+  * Get standard integer types
+  */
+@@ -62,30 +65,7 @@
+ #endif
+ 
+ 
+-/**
+- * Function inlining
+- */
+-#ifndef inline
+-#  ifdef __cplusplus
+-     /* C++ supports inline keyword */
+-#  elif defined(__GNUC__)
+-#    define inline __inline__
+-#  elif defined(_MSC_VER)
+-#    define inline __inline
+-#  elif defined(__ICL)
+-#    define inline __inline
+-#  elif defined(__INTEL_COMPILER)
+-     /* Intel compiler supports inline keyword */
+-#  elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
+-#    define inline __inline
+-#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
+-     /* C99 supports inline keyword */
+-#  elif (__STDC_VERSION__ >= 199901L)
+-     /* C99 supports inline keyword */
+-#  else
+-#    define inline
+-#  endif
+-#endif
++/* XXX: Use standard `inline` keyword instead */
+ #ifndef INLINE
+ #  define INLINE inline
+ #endif
+@@ -104,21 +84,9 @@
+ #  endif
+ #endif
+ 
+-/**
+- * The __FUNCTION__ gcc variable is generally only used for debugging.
+- * If we're not using gcc, define __FUNCTION__ as a cpp symbol here.
+- * Don't define it if using a newer Windows compiler.
+- */
++/* XXX: Use standard `__func__` instead */
+ #ifndef __FUNCTION__
+-# if (!defined __GNUC__) && (!defined __xlC__) && \
+-      (!defined(_MSC_VER) || _MSC_VER < 1300)
+-#  if (__STDC_VERSION__ >= 199901L) /* C99 */ || \
+-    (defined(__SUNPRO_C) && defined(__C99FEATURES__))
+-#   define __FUNCTION__ __func__
+-#  else
+-#   define __FUNCTION__ "<unknown>"
+-#  endif
+-# endif
++#  define __FUNCTION__ __func__
+ #endif
+ 
+ #endif /* EGLCOMPILER_INCLUDED */
+diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am
+index a4eee47..f14279b 100644
+--- a/src/gallium/auxiliary/Makefile.am
++++ b/src/gallium/auxiliary/Makefile.am
+@@ -7,7 +7,10 @@ noinst_LTLIBRARIES = libgallium.la
+ 
+ AM_CFLAGS = \
+ 	-I$(top_srcdir)/src/gallium/auxiliary/util \
+-	$(GALLIUM_CFLAGS)
++	$(GALLIUM_CFLAGS) \
++	$(VISIBILITY_CFLAGS)
++
++AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS)
+ 
+ libgallium_la_SOURCES = \
+ 	$(C_SOURCES) \
+@@ -18,7 +21,7 @@ if HAVE_MESA_LLVM
+ AM_CFLAGS += \
+ 	$(LLVM_CFLAGS)
+ 
+-AM_CXXFLAGS = \
++AM_CXXFLAGS += \
+ 	$(GALLIUM_CFLAGS) \
+ 	$(LLVM_CXXFLAGS)
+ 
+diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+index 4898849..5fb4a11 100644
+--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
++++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+@@ -240,6 +240,7 @@ struct lp_exec_mask {
+    struct lp_build_context *bld;
+ 
+    boolean has_mask;
++   boolean ret_in_main;
+ 
+    LLVMTypeRef int_vec_type;
+ 
+diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+index 0621fb4..413a918 100644
+--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
++++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+@@ -73,6 +73,7 @@ static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context
+ 
+    mask->bld = bld;
+    mask->has_mask = FALSE;
++   mask->ret_in_main = FALSE;
+    mask->cond_stack_size = 0;
+    mask->loop_stack_size = 0;
+    mask->call_stack_size = 0;
+@@ -108,7 +109,7 @@ static void lp_exec_mask_update(struct lp_exec_mask *mask)
+    } else
+       mask->exec_mask = mask->cond_mask;
+ 
+-   if (mask->call_stack_size) {
++   if (mask->call_stack_size || mask->ret_in_main) {
+       mask->exec_mask = LLVMBuildAnd(builder,
+                                      mask->exec_mask,
+                                      mask->ret_mask,
+@@ -117,7 +118,8 @@ static void lp_exec_mask_update(struct lp_exec_mask *mask)
+ 
+    mask->has_mask = (mask->cond_stack_size > 0 ||
+                      mask->loop_stack_size > 0 ||
+-                     mask->call_stack_size > 0);
++                     mask->call_stack_size > 0 ||
++                     mask->ret_in_main);
+ }
+ 
+ static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
+@@ -348,11 +350,23 @@ static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
+    LLVMBuilderRef builder = mask->bld->gallivm->builder;
+    LLVMValueRef exec_mask;
+ 
+-   if (mask->call_stack_size == 0) {
++   if (mask->cond_stack_size == 0 &&
++       mask->loop_stack_size == 0 &&
++       mask->call_stack_size == 0) {
+       /* returning from main() */
+       *pc = -1;
+       return;
+    }
++
++   if (mask->call_stack_size == 0) {
++      /*
++       * This requires special handling since we need to ensure
++       * we don't drop the mask even if we have no call stack
++       * (e.g. after a ret in a if clause after the endif)
++       */
++      mask->ret_in_main = TRUE;
++   }
++
+    exec_mask = LLVMBuildNot(builder,
+                             mask->exec_mask,
+                             "ret");
+diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
+index 1267e79..dc3a5fb 100644
+--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
++++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
+@@ -1569,7 +1569,7 @@ tgsi_text_translate(
+    struct tgsi_token *tokens,
+    uint num_tokens )
+ {
+-   struct translate_ctx ctx;
++   struct translate_ctx ctx = {0};
+ 
+    ctx.text = text;
+    ctx.cur = text;
+diff --git a/src/gallium/drivers/Makefile.am b/src/gallium/drivers/Makefile.am
+index 25d9533..3477fee 100644
+--- a/src/gallium/drivers/Makefile.am
++++ b/src/gallium/drivers/Makefile.am
+@@ -1,6 +1,7 @@
+ AUTOMAKE_OPTIONS = subdir-objects
+ 
+ AM_CPPFLAGS = \
++	-I$(top_srcdir)/include \
+ 	-I$(top_srcdir)/src/gallium/include \
+ 	-I$(top_srcdir)/src/gallium/auxiliary \
+ 	-I$(top_srcdir)/src/gallium/drivers \
+diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
+index 328c0f7..e145391 100644
+--- a/src/gallium/drivers/llvmpipe/lp_scene.c
++++ b/src/gallium/drivers/llvmpipe/lp_scene.c
+@@ -64,6 +64,28 @@ lp_scene_create( struct pipe_context *pipe )
+ 
+    pipe_mutex_init(scene->mutex);
+ 
++#ifdef DEBUG
++   /* Do some scene limit sanity checks here */
++   {
++      size_t maxBins = TILES_X * TILES_Y;
++      size_t maxCommandBytes = sizeof(struct cmd_block) * maxBins;
++      size_t maxCommandPlusData = maxCommandBytes + DATA_BLOCK_SIZE;
++      /* We'll need at least one command block per bin.  Make sure that's
++       * less than the max allowed scene size.
++       */
++      assert(maxCommandBytes < LP_SCENE_MAX_SIZE);
++      /* We'll also need space for at least one other data block */
++      assert(maxCommandPlusData <= LP_SCENE_MAX_SIZE);
++
++      /* Ideally, the size of a cmd_block object will be a power of two
++       * in order to avoid wasting space when we allocation them from
++       * data blocks (which are power of two also).
++       */
++      assert(sizeof(struct cmd_block) ==
++             util_next_power_of_two(sizeof(struct cmd_block)));
++   }
++#endif
++
+    return scene;
+ }
+ 
+diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
+index b1db61b..801829d 100644
+--- a/src/gallium/drivers/llvmpipe/lp_scene.h
++++ b/src/gallium/drivers/llvmpipe/lp_scene.h
+@@ -49,12 +49,18 @@ struct lp_rast_state;
+ #define TILES_Y (LP_MAX_HEIGHT / TILE_SIZE)
+ 
+ 
+-#define CMD_BLOCK_MAX 128
++/* Commands per command block (ideally so sizeof(cmd_block) is a power of
++ * two in size.)
++ */
++#define CMD_BLOCK_MAX 29
++
++/* Bytes per data block.
++ */
+ #define DATA_BLOCK_SIZE (64 * 1024)
+ 
+ /* Scene temporary storage is clamped to this size:
+  */
+-#define LP_SCENE_MAX_SIZE (4*1024*1024)
++#define LP_SCENE_MAX_SIZE (9*1024*1024)
+ 
+ /* The maximum amount of texture storage referenced by a scene is
+  * clamped ot this size:
+diff --git a/src/gallium/drivers/nv50/nv50_blit.h b/src/gallium/drivers/nv50/nv50_blit.h
+index d409f21..bdd6a63 100644
+--- a/src/gallium/drivers/nv50/nv50_blit.h
++++ b/src/gallium/drivers/nv50/nv50_blit.h
+@@ -180,4 +180,44 @@ nv50_blit_eng2d_get_mask(const struct pipe_blit_info *info)
+    return mask;
+ }
+ 
++#if NOUVEAU_DRIVER == 0xc0
++# define nv50_format_table nvc0_format_table
++#endif
++
++/* return TRUE for formats that can be converted among each other by NVC0_2D */
++static INLINE boolean
++nv50_2d_dst_format_faithful(enum pipe_format format)
++{
++   const uint64_t mask =
++       NV50_ENG2D_SUPPORTED_FORMATS &
++      ~NV50_ENG2D_NOCONVERT_FORMATS;
++   uint8_t id = nv50_format_table[format].rt;
++   return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0)));
++}
++static INLINE boolean
++nv50_2d_src_format_faithful(enum pipe_format format)
++{
++   const uint64_t mask =
++      NV50_ENG2D_SUPPORTED_FORMATS &
++    ~(NV50_ENG2D_LUMINANCE_FORMATS | NV50_ENG2D_INTENSITY_FORMATS);
++   uint8_t id = nv50_format_table[format].rt;
++   return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0)));
++}
++
++static INLINE boolean
++nv50_2d_format_supported(enum pipe_format format)
++{
++   uint8_t id = nv50_format_table[format].rt;
++   return (id >= 0xc0) &&
++      (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)));
++}
++
++static INLINE boolean
++nv50_2d_dst_format_ops_supported(enum pipe_format format)
++{
++   uint8_t id = nv50_format_table[format].rt;
++   return (id >= 0xc0) &&
++      (NV50_ENG2D_OPERATION_FORMATS & (1ULL << (id - 0xc0)));
++}
++
+ #endif /* __NV50_BLIT_H__ */
+diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
+index a95e96d..f5e7b36 100644
+--- a/src/gallium/drivers/nv50/nv50_state_validate.c
++++ b/src/gallium/drivers/nv50/nv50_state_validate.c
+@@ -9,6 +9,7 @@ nv50_validate_fb(struct nv50_context *nv50)
+    struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+    unsigned i;
+    unsigned ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1;
++   uint32_t array_size = 0xffff, array_mode = 0;
+ 
+    nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ 
+@@ -23,6 +24,13 @@ nv50_validate_fb(struct nv50_context *nv50)
+       struct nv50_surface *sf = nv50_surface(fb->cbufs[i]);
+       struct nouveau_bo *bo = mt->base.bo;
+ 
++      array_size = MIN2(array_size, sf->depth);
++      if (mt->layout_3d)
++         array_mode = NV50_3D_RT_ARRAY_MODE_MODE_3D; /* 1 << 16 */
++
++      /* can't mix 3D with ARRAY or have RTs of different depth/array_size */
++      assert(mt->layout_3d || !array_mode || array_size == 1);
++
+       BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(i)), 5);
+       PUSH_DATAh(push, bo->offset + sf->offset);
+       PUSH_DATA (push, bo->offset + sf->offset);
+@@ -34,7 +42,7 @@ nv50_validate_fb(struct nv50_context *nv50)
+          PUSH_DATA (push, sf->width);
+          PUSH_DATA (push, sf->height);
+          BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
+-         PUSH_DATA (push, sf->depth);
++         PUSH_DATA (push, array_mode | array_size);
+       } else {
+          PUSH_DATA (push, 0);
+          PUSH_DATA (push, 0);
+@@ -63,7 +71,7 @@ nv50_validate_fb(struct nv50_context *nv50)
+       struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture);
+       struct nv50_surface *sf = nv50_surface(fb->zsbuf);
+       struct nouveau_bo *bo = mt->base.bo;
+-      int unk = mt->base.base.target == PIPE_TEXTURE_2D;
++      int unk = mt->base.base.target == PIPE_TEXTURE_3D || sf->depth == 1;
+ 
+       BEGIN_NV04(push, NV50_3D(ZETA_ADDRESS_HIGH), 5);
+       PUSH_DATAh(push, bo->offset + sf->offset);
+diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
+index 7a0470c..3a780f6 100644
+--- a/src/gallium/drivers/nv50/nv50_surface.c
++++ b/src/gallium/drivers/nv50/nv50_surface.c
+@@ -35,25 +35,22 @@
+ 
+ #include "nv50_context.h"
+ #include "nv50_resource.h"
+-#include "nv50_blit.h"
+ 
+ #include "nv50_defs.xml.h"
+ #include "nv50_texture.xml.h"
+ 
++/* these are used in nv50_blit.h */
+ #define NV50_ENG2D_SUPPORTED_FORMATS 0xff0843e080608409ULL
++#define NV50_ENG2D_NOCONVERT_FORMATS 0x0008402000000000ULL
++#define NV50_ENG2D_LUMINANCE_FORMATS 0x0008402000000000ULL
++#define NV50_ENG2D_INTENSITY_FORMATS 0x0000000000000000ULL
++#define NV50_ENG2D_OPERATION_FORMATS 0x060001c000608000ULL
+ 
+-/* return TRUE for formats that can be converted among each other by NV50_2D */
+-static INLINE boolean
+-nv50_2d_format_faithful(enum pipe_format format)
+-{
+-   uint8_t id = nv50_format_table[format].rt;
+-
+-   return (id >= 0xc0) &&
+-      (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)));
+-}
++#define NOUVEAU_DRIVER 0x50
++#include "nv50_blit.h"
+ 
+ static INLINE uint8_t
+-nv50_2d_format(enum pipe_format format)
++nv50_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
+ {
+    uint8_t id = nv50_format_table[format].rt;
+ 
+@@ -62,6 +59,7 @@ nv50_2d_format(enum pipe_format format)
+     */
+    if ((id >= 0xc0) && (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0))))
+       return id;
++   assert(dst_src_equal);
+ 
+    switch (util_format_get_blocksize(format)) {
+    case 1:
+@@ -78,7 +76,7 @@ nv50_2d_format(enum pipe_format format)
+ static int
+ nv50_2d_texture_set(struct nouveau_pushbuf *push, int dst,
+                     struct nv50_miptree *mt, unsigned level, unsigned layer,
+-                    enum pipe_format pformat)
++                    enum pipe_format pformat, boolean dst_src_pformat_equal)
+ {
+    struct nouveau_bo *bo = mt->base.bo;
+    uint32_t width, height, depth;
+@@ -86,7 +84,7 @@ nv50_2d_texture_set(struct nouveau_pushbuf *push, int dst,
+    uint32_t mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT;
+    uint32_t offset = mt->level[level].offset;
+ 
+-   format = nv50_2d_format(pformat);
++   format = nv50_2d_format(pformat, dst, dst_src_pformat_equal);
+    if (!format) {
+       NOUVEAU_ERR("invalid/unsupported surface format: %s\n",
+                   util_format_name(pformat));
+@@ -155,15 +153,16 @@ nv50_2d_texture_do_copy(struct nouveau_pushbuf *push,
+    const enum pipe_format dfmt = dst->base.base.format;
+    const enum pipe_format sfmt = src->base.base.format;
+    int ret;
++   boolean eqfmt = dfmt == sfmt;
+ 
+    if (!PUSH_SPACE(push, 2 * 16 + 32))
+       return PIPE_ERROR;
+ 
+-   ret = nv50_2d_texture_set(push, 1, dst, dst_level, dz, dfmt);
++   ret = nv50_2d_texture_set(push, 1, dst, dst_level, dz, dfmt, eqfmt);
+    if (ret)
+       return ret;
+ 
+-   ret = nv50_2d_texture_set(push, 0, src, src_level, sz, sfmt);
++   ret = nv50_2d_texture_set(push, 0, src, src_level, sz, sfmt, eqfmt);
+    if (ret)
+       return ret;
+ 
+@@ -243,8 +242,8 @@ nv50_resource_copy_region(struct pipe_context *pipe,
+    }
+ 
+    assert((src->format == dst->format) ||
+-          (nv50_2d_format_faithful(src->format) &&
+-           nv50_2d_format_faithful(dst->format)));
++          (nv50_2d_src_format_faithful(src->format) &&
++           nv50_2d_dst_format_faithful(dst->format)));
+ 
+    BCTX_REFN(nv50->bufctx, 2D, nv04_resource(src), RD);
+    BCTX_REFN(nv50->bufctx, 2D, nv04_resource(dst), WR);
+@@ -936,7 +935,7 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info)
+    nv50_blit_select_fp(blit, info);
+    nv50_blitctx_pre_blit(blit);
+ 
+-   nv50_blit_set_dst(blit, dst, info->dst.level,  0, info->dst.format);
++   nv50_blit_set_dst(blit, dst, info->dst.level, -1, info->dst.format);
+    nv50_blit_set_src(blit, src, info->src.level, -1, info->src.format,
+                      blit->filter);
+ 
+@@ -977,6 +976,8 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info)
+ 
+    BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1);
+    PUSH_DATA (push, 0);
++   BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1);
++   PUSH_DATA (push, 0x1);
+ 
+    /* Draw a large triangle in screen coordinates covering the whole
+     * render target, with scissors defining the destination region.
+@@ -1059,7 +1060,8 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info)
+    int64_t du_dx, dv_dy;
+    int i;
+    uint32_t mode;
+-   const uint32_t mask = nv50_blit_eng2d_get_mask(info);
++   uint32_t mask = nv50_blit_eng2d_get_mask(info);
++   boolean b;
+ 
+    mode = nv50_blit_get_filter(info) ?
+       NV50_2D_BLIT_CONTROL_FILTER_BILINEAR :
+@@ -1070,8 +1072,9 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info)
+    du_dx = ((int64_t)info->src.box.width << 32) / info->dst.box.width;
+    dv_dy = ((int64_t)info->src.box.height << 32) / info->dst.box.height;
+ 
+-   nv50_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format);
+-   nv50_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format);
++   b = info->dst.format == info->src.format;
++   nv50_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format, b);
++   nv50_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format, b);
+ 
+    if (info->scissor_enable) {
+       BEGIN_NV04(push, NV50_2D(CLIP_X), 5);
+@@ -1094,6 +1097,17 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info)
+       PUSH_DATA (push, 0xffffffff);
+       BEGIN_NV04(push, NV50_2D(OPERATION), 1);
+       PUSH_DATA (push, NV50_2D_OPERATION_ROP);
++   } else
++   if (info->src.format != info->dst.format) {
++      if (info->src.format == PIPE_FORMAT_R8_UNORM ||
++          info->src.format == PIPE_FORMAT_R16_UNORM ||
++          info->src.format == PIPE_FORMAT_R16_FLOAT ||
++          info->src.format == PIPE_FORMAT_R32_FLOAT) {
++         mask = 0xffff0000; /* also makes condition for OPERATION reset true */
++         BEGIN_NV04(push, NV50_2D(BETA4), 2);
++         PUSH_DATA (push, mask);
++         PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY_PREMULT);
++      }
+    }
+ 
+    if (src->ms_x > dst->ms_x || src->ms_y > dst->ms_y) {
+@@ -1224,10 +1238,25 @@ nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
+       debug_printf("blit: cannot filter array or cube textures in z direction");
+    }
+ 
+-   if (!eng3d && info->dst.format != info->src.format)
+-      if (!nv50_2d_format_faithful(info->dst.format) ||
+-          !nv50_2d_format_faithful(info->src.format))
++   if (!eng3d && info->dst.format != info->src.format) {
++      if (!nv50_2d_dst_format_faithful(info->dst.format) ||
++          !nv50_2d_src_format_faithful(info->src.format)) {
+          eng3d = TRUE;
++      } else
++      if (!nv50_2d_src_format_faithful(info->src.format)) {
++         if (!util_format_is_luminance(info->src.format)) {
++            if (util_format_is_intensity(info->src.format))
++               eng3d = TRUE;
++            else
++            if (!nv50_2d_dst_format_ops_supported(info->dst.format))
++               eng3d = TRUE;
++            else
++               eng3d = !nv50_2d_format_supported(info->src.format);
++         }
++      } else
++      if (util_format_is_luminance_alpha(info->src.format))
++         eng3d = TRUE;
++   }
+ 
+    if (info->src.resource->nr_samples == 8 &&
+        info->dst.resource->nr_samples <= 1)
+diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
+index 1cf1f96..bd3de58 100644
+--- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h
++++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
+@@ -1041,7 +1041,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define NVC0_3D_VIEWPORT_TRANSFORM_EN				0x0000192c
+ 
+ #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL				0x0000193c
+-#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK0			0x00000001
++#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1		0x00000001
+ #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__MASK		0x00000006
+ #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__SHIFT		1
+ #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK0			0x00000000
+diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c
+index 281d740..66154a4 100644
+--- a/src/gallium/drivers/nvc0/nvc0_surface.c
++++ b/src/gallium/drivers/nvc0/nvc0_surface.c
+@@ -36,29 +36,32 @@
+ 
+ #include "nv50/nv50_defs.xml.h"
+ #include "nv50/nv50_texture.xml.h"
+-#include "nv50/nv50_blit.h"
+ 
+-#define NVC0_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL
++/* these are used in nv50_blit.h */
++#define NV50_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL
++#define NV50_ENG2D_NOCONVERT_FORMATS 0x009cc02000000000ULL
++#define NV50_ENG2D_LUMINANCE_FORMATS 0x001cc02000000000ULL
++#define NV50_ENG2D_INTENSITY_FORMATS 0x0080000000000000ULL
++#define NV50_ENG2D_OPERATION_FORMATS 0x060001c000638000ULL
+ 
+-/* return TRUE for formats that can be converted among each other by NVC0_2D */
+-static INLINE boolean
+-nvc0_2d_format_faithful(enum pipe_format format)
+-{
+-   uint8_t id = nvc0_format_table[format].rt;
+-
+-   return (id >= 0xc0) && (NVC0_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)));
+-}
++#define NOUVEAU_DRIVER 0xc0
++#include "nv50/nv50_blit.h"
+ 
+ static INLINE uint8_t
+-nvc0_2d_format(enum pipe_format format)
++nvc0_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
+ {
+    uint8_t id = nvc0_format_table[format].rt;
+ 
++   /* A8_UNORM is treated as I8_UNORM as far as the 2D engine is concerned. */
++   if (!dst && unlikely(format == PIPE_FORMAT_I8_UNORM) && !dst_src_equal)
++      return NV50_SURFACE_FORMAT_A8_UNORM;
++
+    /* Hardware values for color formats range from 0xc0 to 0xff,
+     * but the 2D engine doesn't support all of them.
+     */
+-   if (nvc0_2d_format_faithful(format))
++   if (nv50_2d_format_supported(format))
+       return id;
++   assert(dst_src_equal);
+ 
+    switch (util_format_get_blocksize(format)) {
+    case 1:
+@@ -72,6 +75,7 @@ nvc0_2d_format(enum pipe_format format)
+    case 16:
+       return NV50_SURFACE_FORMAT_RGBA32_FLOAT;
+    default:
++      assert(0);
+       return 0;
+    }
+ }
+@@ -79,7 +83,7 @@ nvc0_2d_format(enum pipe_format format)
+ static int
+ nvc0_2d_texture_set(struct nouveau_pushbuf *push, boolean dst,
+                     struct nv50_miptree *mt, unsigned level, unsigned layer,
+-                    enum pipe_format pformat)
++                    enum pipe_format pformat, boolean dst_src_pformat_equal)
+ {
+    struct nouveau_bo *bo = mt->base.bo;
+    uint32_t width, height, depth;
+@@ -87,7 +91,7 @@ nvc0_2d_texture_set(struct nouveau_pushbuf *push, boolean dst,
+    uint32_t mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT;
+    uint32_t offset = mt->level[level].offset;
+ 
+-   format = nvc0_2d_format(pformat);
++   format = nvc0_2d_format(pformat, dst, dst_src_pformat_equal);
+    if (!format) {
+       NOUVEAU_ERR("invalid/unsupported surface format: %s\n",
+                   util_format_name(pformat));
+@@ -157,15 +161,16 @@ nvc0_2d_texture_do_copy(struct nouveau_pushbuf *push,
+    const enum pipe_format dfmt = dst->base.base.format;
+    const enum pipe_format sfmt = src->base.base.format;
+    int ret;
++   boolean eqfmt = dfmt == sfmt;
+ 
+    if (!PUSH_SPACE(push, 2 * 16 + 32))
+       return PIPE_ERROR;
+ 
+-   ret = nvc0_2d_texture_set(push, TRUE, dst, dst_level, dz, dfmt);
++   ret = nvc0_2d_texture_set(push, TRUE, dst, dst_level, dz, dfmt, eqfmt);
+    if (ret)
+       return ret;
+ 
+-   ret = nvc0_2d_texture_set(push, FALSE, src, src_level, sz, sfmt);
++   ret = nvc0_2d_texture_set(push, FALSE, src, src_level, sz, sfmt, eqfmt);
+    if (ret)
+       return ret;
+ 
+@@ -243,8 +248,8 @@ nvc0_resource_copy_region(struct pipe_context *pipe,
+       return;
+    }
+ 
+-   assert(nvc0_2d_format_faithful(src->format));
+-   assert(nvc0_2d_format_faithful(dst->format));
++   assert(nv50_2d_dst_format_faithful(dst->format));
++   assert(nv50_2d_src_format_faithful(src->format));
+ 
+    BCTX_REFN(nvc0->bufctx, 2D, nv04_resource(src), RD);
+    BCTX_REFN(nvc0->bufctx, 2D, nv04_resource(dst), WR);
+@@ -490,19 +495,19 @@ nvc0_blitter_make_vp(struct nvc0_blitter *blit)
+ {
+    static const uint32_t code_nvc0[] =
+    {
+-      0xfff01c66, 0x06000080, /* vfetch b128 { $r0 $r1 $r2 $r3 } a[0x80] */
+-      0xfff11c26, 0x06000090, /* vfetch b96 { $r4 $r5 $r6 } a[0x90]*/
+-      0x03f01c66, 0x0a7e0070, /* export b128 o[0x70] { $r0 $r1 $r2 $r3 } */
+-      0x13f01c26, 0x0a7e0080, /* export b96 o[0x80] { $r4 $r5 $r6 } */
++      0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */
++      0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */
++      0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */
++      0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */
+       0x00001de7, 0x80000000, /* exit */
+    };
+    static const uint32_t code_nve4[] =
+    {
+       0x00000007, 0x20000000, /* sched */
+-      0xfff01c66, 0x06000080, /* vfetch b128 { $r0 $r1 $r2 $r3 } a[0x80] */
+-      0xfff11c46, 0x06000090, /* vfetch b96 { $r4 $r5 $r6 } a[0x90]*/
+-      0x03f01c66, 0x0a7e0070, /* export b128 o[0x70] { $r0 $r1 $r2 $r3 } */
+-      0x13f01c46, 0x0a7e0080, /* export b96 o[0x80] { $r4 $r5 $r6 } */
++      0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */
++      0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */
++      0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */
++      0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */
+       0x00001de7, 0x80000000, /* exit */
+    };
+ 
+@@ -515,13 +520,13 @@ nvc0_blitter_make_vp(struct nvc0_blitter *blit)
+       blit->vp.code = (uint32_t *)code_nvc0; /* const_cast */
+       blit->vp.code_size = sizeof(code_nvc0);
+    }
+-   blit->vp.max_gpr = 7;
++   blit->vp.max_gpr = 6;
+    blit->vp.vp.edgeflag = PIPE_MAX_ATTRIBS;
+ 
+    blit->vp.hdr[0]  = 0x00020461; /* vertprog magic */
+    blit->vp.hdr[4]  = 0x000ff000; /* no outputs read */
+-   blit->vp.hdr[6]  = 0x0000003f; /* a[0x80], a[0x90] */
+-   blit->vp.hdr[13] = 0x0003f000; /* o[0x70], o[0x80] */
++   blit->vp.hdr[6]  = 0x00000073; /* a[0x80].xy, a[0x90].xyz */
++   blit->vp.hdr[13] = 0x00073000; /* o[0x70].xy, o[0x80].xyz */
+ }
+ 
+ static void
+@@ -820,7 +825,7 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
+    nvc0_blit_select_fp(blit, info);
+    nvc0_blitctx_pre_blit(blit);
+ 
+-   nvc0_blit_set_dst(blit, dst, info->dst.level,  0, info->dst.format);
++   nvc0_blit_set_dst(blit, dst, info->dst.level, -1, info->dst.format);
+    nvc0_blit_set_src(blit, src, info->src.level, -1, info->src.format,
+                      blit->filter);
+ 
+@@ -859,6 +864,8 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
+       z += 0.5f * dz;
+ 
+    IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 0);
++   IMMED_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 0x2 |
++              NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1);
+    BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2);
+    PUSH_DATA (push, nvc0->framebuffer.width << 16);
+    PUSH_DATA (push, nvc0->framebuffer.height << 16);
+@@ -925,11 +932,14 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
+    if (info->dst.box.z + info->dst.box.depth - 1)
+       IMMED_NVC0(push, NVC0_3D(LAYER), 0);
+ 
+-   /* re-enable normally constant state */
++   nvc0_blitctx_post_blit(blit);
+ 
+-   IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1);
++   /* restore viewport */
+ 
+-   nvc0_blitctx_post_blit(blit);
++   BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2);
++   PUSH_DATA (push, nvc0->framebuffer.width << 16);
++   PUSH_DATA (push, nvc0->framebuffer.height << 16);
++   IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1);
+ }
+ 
+ static void
+@@ -948,7 +958,8 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
+    int64_t du_dx, dv_dy;
+    int i;
+    uint32_t mode;
+-   const uint32_t mask = nv50_blit_eng2d_get_mask(info);
++   uint32_t mask = nv50_blit_eng2d_get_mask(info);
++   boolean b;
+ 
+    mode = nv50_blit_get_filter(info) ?
+       NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR :
+@@ -959,8 +970,9 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
+    du_dx = ((int64_t)info->src.box.width << 32) / info->dst.box.width;
+    dv_dy = ((int64_t)info->src.box.height << 32) / info->dst.box.height;
+ 
+-   nvc0_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format);
+-   nvc0_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format);
++   b = info->dst.format == info->src.format;
++   nvc0_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format, b);
++   nvc0_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format, b);
+ 
+    if (info->scissor_enable) {
+       BEGIN_NVC0(push, NVC0_2D(CLIP_X), 5);
+@@ -981,6 +993,25 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
+       PUSH_DATA (push, 0xffffffff);
+       PUSH_DATA (push, 0xffffffff);
+       IMMED_NVC0(push, NVC0_2D(OPERATION), NVC0_2D_OPERATION_ROP);
++   } else
++   if (info->src.format != info->dst.format) {
++      if (info->src.format == PIPE_FORMAT_R8_UNORM ||
++          info->src.format == PIPE_FORMAT_R8_SNORM ||
++          info->src.format == PIPE_FORMAT_R16_UNORM ||
++          info->src.format == PIPE_FORMAT_R16_SNORM ||
++          info->src.format == PIPE_FORMAT_R16_FLOAT ||
++          info->src.format == PIPE_FORMAT_R32_FLOAT) {
++         mask = 0xffff0000; /* also makes condition for OPERATION reset true */
++         BEGIN_NVC0(push, NVC0_2D(BETA4), 2);
++         PUSH_DATA (push, mask);
++         PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY_PREMULT);
++      } else
++      if (info->src.format == PIPE_FORMAT_A8_UNORM) {
++         mask = 0xff000000;
++         BEGIN_NVC0(push, NVC0_2D(BETA4), 2);
++         PUSH_DATA (push, mask);
++         PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY_PREMULT);
++      }
+    }
+ 
+    if (src->ms_x > dst->ms_x || src->ms_y > dst->ms_y) {
+@@ -1106,10 +1137,24 @@ nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
+       debug_printf("blit: cannot filter array or cube textures in z direction");
+    }
+ 
+-   if (!eng3d && info->dst.format != info->src.format)
+-      if (!nvc0_2d_format_faithful(info->dst.format) ||
+-          !nvc0_2d_format_faithful(info->src.format))
++   if (!eng3d && info->dst.format != info->src.format) {
++      if (!nv50_2d_dst_format_faithful(info->dst.format)) {
++         eng3d = TRUE;
++      } else
++      if (!nv50_2d_src_format_faithful(info->src.format)) {
++         if (!util_format_is_luminance(info->src.format)) {
++            if (util_format_is_intensity(info->src.format))
++               eng3d = info->src.format != PIPE_FORMAT_I8_UNORM;
++            else
++            if (!nv50_2d_dst_format_ops_supported(info->dst.format))
++               eng3d = TRUE;
++            else
++               eng3d = !nv50_2d_format_supported(info->src.format);
++         }
++      } else
++      if (util_format_is_luminance_alpha(info->src.format))
+          eng3d = TRUE;
++   }
+ 
+    if (info->src.resource->nr_samples == 8 &&
+        info->dst.resource->nr_samples <= 1)
+diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c
+index 734c7f2..74afd6f 100644
+--- a/src/gallium/drivers/r300/compiler/radeon_optimize.c
++++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c
+@@ -708,6 +708,7 @@ static int peephole_mul_omod(
+ 	struct rc_list * writer_list;
+ 	struct rc_variable * var;
+ 	struct peephole_mul_cb_data cb_data;
++	unsigned writemask_sum;
+ 
+ 	for (i = 0; i < 2; i++) {
+ 		unsigned int j;
+@@ -815,10 +816,11 @@ static int peephole_mul_omod(
+ 	}
+ 
+ 	/* Rewrite the instructions */
++	writemask_sum = rc_variable_writemask_sum(writer_list->Item);
+ 	for (var = writer_list->Item; var; var = var->Friend) {
+ 		struct rc_variable * writer = var;
+ 		unsigned conversion_swizzle = rc_make_conversion_swizzle(
+-					writer->Inst->U.I.DstReg.WriteMask,
++					writemask_sum,
+ 					inst_mul->U.I.DstReg.WriteMask);
+ 		writer->Inst->U.I.Omod = omod_op;
+ 		writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File;
+diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
+index a7973a5..80b859f 100644
+--- a/src/gallium/drivers/r600/r600_pipe.c
++++ b/src/gallium/drivers/r600/r600_pipe.c
+@@ -1157,7 +1157,7 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
+ 	 * case were triggering lockup quickly such as :
+ 	 * piglit/bin/depthstencil-render-miplevels 1024 d=s=z24_s8
+ 	 */
+-	rscreen->use_hyperz = debug_get_bool_option("R600_HYPERZ", TRUE);
++	rscreen->use_hyperz = debug_get_bool_option("R600_HYPERZ", FALSE);
+ 	rscreen->use_hyperz = rscreen->info.drm_minor >= 26 ? rscreen->use_hyperz : FALSE;
+ 
+ 	rscreen->global_pool = compute_memory_pool_new(rscreen);
+diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c
+index 0335189..782ad26 100644
+--- a/src/gallium/drivers/r600/r600_query.c
++++ b/src/gallium/drivers/r600/r600_query.c
+@@ -186,10 +186,11 @@ static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *que
+ 	case PIPE_QUERY_PRIMITIVES_GENERATED:
+ 	case PIPE_QUERY_SO_STATISTICS:
+ 	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
++		va += query->buffer.results_end + query->result_size/2;
+ 		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
+ 		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
+-		cs->buf[cs->cdw++] = query->buffer.results_end + query->result_size/2;
+-		cs->buf[cs->cdw++] = 0;
++		cs->buf[cs->cdw++] = va;
++		cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
+ 		break;
+ 	case PIPE_QUERY_TIME_ELAPSED:
+ 		va += query->buffer.results_end + query->result_size/2;
+diff --git a/src/gallium/drivers/radeon/Makefile.am b/src/gallium/drivers/radeon/Makefile.am
+index e6eb241..a3a7b74 100644
+--- a/src/gallium/drivers/radeon/Makefile.am
++++ b/src/gallium/drivers/radeon/Makefile.am
+@@ -1,11 +1,14 @@
+ include Makefile.sources
+ include $(top_srcdir)/src/gallium/Automake.inc
+ 
++LIBGALLIUM_LIBS=
++
+ if HAVE_GALLIUM_R600
+ if HAVE_GALLIUM_RADEONSI
+ lib_LTLIBRARIES = libllvmradeon at VERSION@.la
+ libllvmradeon at VERSION@_la_LDFLAGS = -Wl, -shared -avoid-version \
+ 	$(LLVM_LDFLAGS)
++LIBGALLIUM_LIBS += $(top_builddir)/src/gallium/auxiliary/libgallium.la
+ else
+ noinst_LTLIBRARIES = libllvmradeon at VERSION@.la
+ endif
+@@ -26,5 +29,6 @@ libllvmradeon at VERSION@_la_SOURCES = \
+ 	$(C_FILES)
+ 
+ libllvmradeon at VERSION@_la_LIBADD = \
++	$(LIBGALLIUM_LIBS) \
+ 	$(CLOCK_LIB) \
+ 	$(LLVM_LIBS)
+diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
+index 8c35625..93766a3 100644
+--- a/src/gallium/drivers/radeonsi/si_state_draw.c
++++ b/src/gallium/drivers/radeonsi/si_state_draw.c
+@@ -401,6 +401,11 @@ static void si_update_derived_state(struct r600_context *rctx)
+ 	}
+ 
+ 	if (si_pm4_state_changed(rctx, ps) || si_pm4_state_changed(rctx, vs)) {
++		/* XXX: Emitting the PS state even when only the VS changed
++		 * fixes random failures with piglit glsl-max-varyings.
++		 * Not sure why...
++		 */
++		rctx->emitted.named.ps = NULL;
+ 		si_update_spi_map(rctx);
+ 	}
+ }
+diff --git a/src/gallium/drivers/rbug/Makefile.am b/src/gallium/drivers/rbug/Makefile.am
+index 655bfe1..3c1a8b5 100644
+--- a/src/gallium/drivers/rbug/Makefile.am
++++ b/src/gallium/drivers/rbug/Makefile.am
+@@ -30,6 +30,7 @@ noinst_LTLIBRARIES = librbug.la
+ # preprocessor is determined by the ordering of the -I flags.
+ AM_CFLAGS = \
+ 	$(GALLIUM_CFLAGS) \
++	$(VISIBILITY_CFLAGS) \
+ 	-I$(top_srcdir)/src/gallium/drivers \
+ 	-I$(top_srcdir)/include
+ 
+diff --git a/src/gallium/drivers/svga/Makefile.am b/src/gallium/drivers/svga/Makefile.am
+index fdaa3c8..7eacd90 100644
+--- a/src/gallium/drivers/svga/Makefile.am
++++ b/src/gallium/drivers/svga/Makefile.am
+@@ -29,6 +29,8 @@ AM_CPPFLAGS = \
+ 	-I$(top_srcdir)/include \
+ 	$(GALLIUM_CFLAGS)
+ 
++AM_CFLAGS = $(VISIBILITY_CFLAGS)
++
+ #On some systems -std= must be added to CFLAGS to be the last -std=
+ CFLAGS += -std=gnu99
+ 
+diff --git a/src/gallium/drivers/trace/Makefile.am b/src/gallium/drivers/trace/Makefile.am
+index a9e1457..984ead4 100644
+--- a/src/gallium/drivers/trace/Makefile.am
++++ b/src/gallium/drivers/trace/Makefile.am
+@@ -1,7 +1,8 @@
+ include $(top_srcdir)/src/gallium/Automake.inc
+ 
+ AM_CFLAGS = \
+-	$(GALLIUM_CFLAGS)
++	$(GALLIUM_CFLAGS) \
++	$(VISIBILITY_CFLAGS)
+ 
+ noinst_LTLIBRARIES = libtrace.la
+ 
+diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
+index 5958333..a131969 100644
+--- a/src/gallium/include/pipe/p_compiler.h
++++ b/src/gallium/include/pipe/p_compiler.h
+@@ -29,6 +29,8 @@
+ #define P_COMPILER_H
+ 
+ 
++#include "c99_compat.h" /* inline, __func__, etc. */
++
+ #include "p_config.h"
+ 
+ #include <stdlib.h>
+@@ -90,28 +92,7 @@ typedef unsigned char boolean;
+ #endif
+ #endif
+ 
+-/* Function inlining */
+-#ifndef inline
+-#  ifdef __cplusplus
+-     /* C++ supports inline keyword */
+-#  elif defined(__GNUC__)
+-#    define inline __inline__
+-#  elif defined(_MSC_VER)
+-#    define inline __inline
+-#  elif defined(__ICL)
+-#    define inline __inline
+-#  elif defined(__INTEL_COMPILER)
+-     /* Intel compiler supports inline keyword */
+-#  elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
+-#    define inline __inline
+-#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
+-     /* C99 supports inline keyword */
+-#  elif (__STDC_VERSION__ >= 199901L)
+-     /* C99 supports inline keyword */
+-#  else
+-#    define inline
+-#  endif
+-#endif
++/* XXX: Use standard `inline` keyword instead */
+ #ifndef INLINE
+ #  define INLINE inline
+ #endif
+@@ -127,26 +108,6 @@ typedef unsigned char boolean;
+ #  endif
+ #endif
+ 
+-/*
+- * Define the C99 restrict keyword.
+- *
+- * See also:
+- * - http://cellperformance.beyond3d.com/articles/2006/05/demystifying-the-restrict-keyword.html
+- */
+-#ifndef restrict
+-#  if (__STDC_VERSION__ >= 199901L)
+-     /* C99 */
+-#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
+-     /* C99 */
+-#  elif defined(__GNUC__)
+-#    define restrict __restrict__
+-#  elif defined(_MSC_VER)
+-#    define restrict __restrict
+-#  else
+-#    define restrict /* */
+-#  endif
+-#endif
+-
+ 
+ /* Function visibility */
+ #ifndef PUBLIC
+@@ -160,35 +121,10 @@ typedef unsigned char boolean;
+ #endif
+ 
+ 
+-/* The __FUNCTION__ gcc variable is generally only used for debugging.
+- * If we're not using gcc, define __FUNCTION__ as a cpp symbol here.
+- */
++/* XXX: Use standard `__func__` instead */
+ #ifndef __FUNCTION__
+-# if !defined(__GNUC__)
+-#  if (__STDC_VERSION__ >= 199901L) /* C99 */ || \
+-    (defined(__SUNPRO_C) && defined(__C99FEATURES__))
+-#   define __FUNCTION__ __func__
+-#  else
+-#   define __FUNCTION__ "<unknown>"
+-#  endif
+-# endif
+-# if defined(_MSC_VER) && _MSC_VER < 1300
+-#  define __FUNCTION__ "<unknown>"
+-# endif
++#  define __FUNCTION__ __func__
+ #endif
+-#ifndef __func__
+-#  if (__STDC_VERSION__ >= 199901L) || \
+-      (defined(__SUNPRO_C) && defined(__C99FEATURES__))
+-       /* __func__ is part of C99 */
+-#  elif defined(_MSC_VER)
+-#    if _MSC_VER >= 1300
+-#      define __func__ __FUNCTION__
+-#    else
+-#      define __func__ "<unknown>"
+-#    endif
+-#  endif
+-#endif
+-
+ 
+ 
+ /* This should match linux gcc cdecl semantics everywhere, so that we
+diff --git a/src/gallium/state_trackers/egl/Makefile.am b/src/gallium/state_trackers/egl/Makefile.am
+index e19e9a3..f78b36e 100644
+--- a/src/gallium/state_trackers/egl/Makefile.am
++++ b/src/gallium/state_trackers/egl/Makefile.am
+@@ -27,7 +27,7 @@ include $(top_srcdir)/src/gallium/Automake.inc
+ AM_CFLAGS = $(GALLIUM_CFLAGS)
+ AM_CPPFLAGS = \
+ 	-I$(top_srcdir)/src/egl/main \
+-	-I$(top_srcdir)/src/egl/wayland/wayland-drm/ \
++	-I$(top_builddir)/src/egl/wayland/wayland-drm/ \
+ 	-I$(top_srcdir)/include
+ 
+ noinst_LTLIBRARIES = libegl.la
+diff --git a/src/gallium/state_trackers/xa/Makefile.am b/src/gallium/state_trackers/xa/Makefile.am
+index 5b53ef9..57d55c4 100644
+--- a/src/gallium/state_trackers/xa/Makefile.am
++++ b/src/gallium/state_trackers/xa/Makefile.am
+@@ -24,7 +24,9 @@ include $(top_srcdir)/src/gallium/Automake.inc
+ 
+ AM_CFLAGS = \
+ 	-Wall -pedantic \
+-	$(GALLIUM_CFLAGS)
++	$(GALLIUM_CFLAGS) \
++	$(VISIBILITY_CFLAGS)
++
+ AM_CPPFLAGS = \
+ 	-I$(top_srcdir)/src/gallium/ \
+ 	-I$(top_srcdir)/src/gallium/winsys \
+diff --git a/src/gallium/winsys/svga/drm/Makefile.am b/src/gallium/winsys/svga/drm/Makefile.am
+index 53bbcc2..d7ada3c 100644
+--- a/src/gallium/winsys/svga/drm/Makefile.am
++++ b/src/gallium/winsys/svga/drm/Makefile.am
+@@ -31,6 +31,8 @@ AM_CPPFLAGS = \
+ 	$(GALLIUM_CFLAGS) \
+ 	$(LIBDRM_CFLAGS)
+ 
++AM_CFLAGS = $(VISIBILITY_CFLAGS)
++
+ #On some systems -std= must be added to CFLAGS to be the last -std=
+ CFLAGS += -std=gnu99 -D_FILE_OFFSET_BITS=64
+ 
+diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
+index 02d85b8..dee9709 100644
+--- a/src/glsl/glsl_types.cpp
++++ b/src/glsl/glsl_types.cpp
+@@ -446,6 +446,8 @@ const glsl_type *glsl_type::get_scalar_type() const
+       return int_type;
+    case GLSL_TYPE_FLOAT:
+       return float_type;
++   case GLSL_TYPE_BOOL:
++      return bool_type;
+    default:
+       /* Handle everything else */
+       return type;
+diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
+index d8cafd5..78ce13e 100644
+--- a/src/glsl/ir_validate.cpp
++++ b/src/glsl/ir_validate.cpp
+@@ -695,6 +695,11 @@ check_node_type(ir_instruction *ir, void *data)
+ void
+ validate_ir_tree(exec_list *instructions)
+ {
++   /* We shouldn't have any reason to validate IR in a release build,
++    * and it's half composed of assert()s anyway which wouldn't do
++    * anything.
++    */
++#ifdef DEBUG
+    ir_validate v;
+ 
+    v.run(instructions);
+@@ -704,4 +709,5 @@ validate_ir_tree(exec_list *instructions)
+ 
+       visit_tree(ir, check_node_type, NULL);
+    }
++#endif
+ }
+diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
+index 57e7a9a..cf0420c 100644
+--- a/src/glsl/linker.cpp
++++ b/src/glsl/linker.cpp
+@@ -1067,13 +1067,11 @@ link_intrastage_shaders(void *mem_ctx,
+ 
+    free(linking_shaders);
+ 
+-#ifdef DEBUG
+    /* At this point linked should contain all of the linked IR, so
+     * validate it to make sure nothing went wrong.
+     */
+    if (linked)
+       validate_ir_tree(linked->ir);
+-#endif
+ 
+    /* Make a pass over all variable declarations to ensure that arrays with
+     * unspecified sizes have a size specified.  The size is inferred from the
+diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am
+index 4aa900a..f01709b 100644
+--- a/src/glx/Makefile.am
++++ b/src/glx/Makefile.am
+@@ -39,6 +39,7 @@ AM_CFLAGS = \
+ 	-I$(top_srcdir)/src/mapi/glapi \
+ 	-I$(top_builddir)/src/mapi \
+ 	-I$(top_builddir)/src/mapi/glapi \
++	$(VISIBILITY_CFLAGS) \
+ 	$(SHARED_GLAPI_CFLAGS) \
+ 	$(EXTRA_DEFINES_XF86VIDMODE) \
+ 	-D_REENTRANT \
+diff --git a/src/mapi/glapi/gen/gl_x86-64_asm.py b/src/mapi/glapi/gen/gl_x86-64_asm.py
+index a3548c2..19e0e15 100644
+--- a/src/mapi/glapi/gen/gl_x86-64_asm.py
++++ b/src/mapi/glapi/gen/gl_x86-64_asm.py
+@@ -181,19 +181,6 @@ class PrintGenericStubs(gl_XML.gl_print_base):
+ 
+     def printRealFooter(self):
+         print ''
+-        print '#if defined(GLX_USE_TLS) && defined(__linux__)'
+-        print '	.section ".note.ABI-tag", "a"'
+-        print '	.p2align 2'
+-        print '	.long	1f - 0f   /* name length */'
+-        print '	.long	3f - 2f   /* data length */'
+-        print '	.long	1         /* note length */'
+-        print '0:	.asciz "GNU"      /* vendor name */'
+-        print '1:	.p2align 2'
+-        print '2:	.long	0         /* note data: the ABI tag */'
+-        print '	.long	2,4,20    /* Minimum kernel version w/TLS */'
+-        print '3:	.p2align 2        /* pad out section */'
+-        print '#endif /* GLX_USE_TLS */'
+-        print ''
+         print '#if defined (__ELF__) && defined (__linux__)'
+         print '	.section .note.GNU-stack,"",%progbits'
+         print '#endif'
+diff --git a/src/mapi/glapi/gen/gl_x86_asm.py b/src/mapi/glapi/gen/gl_x86_asm.py
+index 8b0f6ee..919bbc0 100644
+--- a/src/mapi/glapi/gen/gl_x86_asm.py
++++ b/src/mapi/glapi/gen/gl_x86_asm.py
+@@ -189,19 +189,6 @@ class PrintGenericStubs(gl_XML.gl_print_base):
+         print '\t\tALIGNTEXT16'
+         print 'GLNAME(gl_dispatch_functions_end):'
+         print ''
+-        print '#if defined(GLX_USE_TLS) && defined(__linux__)'
+-        print '	.section ".note.ABI-tag", "a"'
+-        print '	.p2align 2'
+-        print '	.long	1f - 0f   /* name length */'
+-        print '	.long	3f - 2f   /* data length */'
+-        print '	.long	1         /* note length */'
+-        print '0:	.asciz "GNU"      /* vendor name */'
+-        print '1:	.p2align 2'
+-        print '2:	.long	0         /* note data: the ABI tag */'
+-        print '	.long	2,4,20    /* Minimum kernel version w/TLS */'
+-        print '3:	.p2align 2        /* pad out section */'
+-        print '#endif /* GLX_USE_TLS */'
+-        print ''
+         print '#if defined (__ELF__) && defined (__linux__)'
+         print '	.section .note.GNU-stack,"",%progbits'
+         print '#endif'
+diff --git a/src/mapi/mapi/entry_x86-64_tls.h b/src/mapi/mapi/entry_x86-64_tls.h
+index 72d4125..36cad00 100644
+--- a/src/mapi/mapi/entry_x86-64_tls.h
++++ b/src/mapi/mapi/entry_x86-64_tls.h
+@@ -28,19 +28,6 @@
+ 
+ #include "u_macros.h"
+ 
+-#ifdef __linux__
+-__asm__(".section .note.ABI-tag, \"a\"\n\t"
+-        ".p2align 2\n\t"
+-        ".long 1f - 0f\n\t"      /* name length */
+-        ".long 3f - 2f\n\t"      /* data length */
+-        ".long 1\n\t"            /* note length */
+-        "0: .asciz \"GNU\"\n\t"  /* vendor name */
+-        "1: .p2align 2\n\t"
+-        "2: .long 0\n\t"         /* note data: the ABI tag */
+-        ".long 2,4,20\n\t"       /* Minimum kernel version w/TLS */
+-        "3: .p2align 2\n\t");    /* pad out section */
+-#endif /* __linux__ */
+-
+ __asm__(".text\n"
+         ".balign 32\n"
+         "x86_64_entry_start:");
+diff --git a/src/mapi/mapi/entry_x86_tls.h b/src/mapi/mapi/entry_x86_tls.h
+index de91812..58d09ca 100644
+--- a/src/mapi/mapi/entry_x86_tls.h
++++ b/src/mapi/mapi/entry_x86_tls.h
+@@ -29,19 +29,6 @@
+ #include <string.h>
+ #include "u_macros.h"
+ 
+-#ifdef __linux__
+-__asm__(".section .note.ABI-tag, \"a\"\n\t"
+-        ".p2align 2\n\t"
+-        ".long 1f - 0f\n\t"      /* name length */
+-        ".long 3f - 2f\n\t"      /* data length */
+-        ".long 1\n\t"            /* note length */
+-        "0: .asciz \"GNU\"\n\t"  /* vendor name */
+-        "1: .p2align 2\n\t"
+-        "2: .long 0\n\t"         /* note data: the ABI tag */
+-        ".long 2,4,20\n\t"       /* Minimum kernel version w/TLS */
+-        "3: .p2align 2\n\t");    /* pad out section */
+-#endif /* __linux__ */
+-
+ __asm__(".text");
+ 
+ __asm__("x86_current_tls:\n\t"
+diff --git a/src/mapi/mapi/u_compiler.h b/src/mapi/mapi/u_compiler.h
+index 2b019ed..f376e97 100644
+--- a/src/mapi/mapi/u_compiler.h
++++ b/src/mapi/mapi/u_compiler.h
+@@ -1,28 +1,10 @@
+ #ifndef _U_COMPILER_H_
+ #define _U_COMPILER_H_
+ 
+-/* Function inlining */
+-#ifndef inline
+-#  ifdef __cplusplus
+-     /* C++ supports inline keyword */
+-#  elif defined(__GNUC__)
+-#    define inline __inline__
+-#  elif defined(_MSC_VER)
+-#    define inline __inline
+-#  elif defined(__ICL)
+-#    define inline __inline
+-#  elif defined(__INTEL_COMPILER)
+-     /* Intel compiler supports inline keyword */
+-#  elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
+-#    define inline __inline
+-#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
+-     /* C99 supports inline keyword */
+-#  elif (__STDC_VERSION__ >= 199901L)
+-     /* C99 supports inline keyword */
+-#  else
+-#    define inline
+-#  endif
+-#endif
++#include "c99_compat.h" /* inline, __func__, etc. */
++
++
++/* XXX: Use standard `inline` keyword instead */
+ #ifndef INLINE
+ #  define INLINE inline
+ #endif
+diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
+index 53d8e54..cde1a06 100644
+--- a/src/mesa/drivers/dri/i965/brw_clear.c
++++ b/src/mesa/drivers/dri/i965/brw_clear.c
+@@ -40,6 +40,8 @@
+ #include "intel_mipmap_tree.h"
+ #include "intel_regions.h"
+ 
++#include "brw_context.h"
++
+ #define FILE_DEBUG_FLAG DEBUG_BLIT
+ 
+ static const char *buffer_names[] = {
+@@ -219,7 +221,8 @@ brw_fast_clear_depth(struct gl_context *ctx)
+ static void
+ brw_clear(struct gl_context *ctx, GLbitfield mask)
+ {
+-   struct intel_context *intel = intel_context(ctx);
++   struct brw_context *brw = brw_context(ctx);
++   struct intel_context *intel = &brw->intel;
+ 
+    if (!_mesa_check_conditional_render(ctx))
+       return;
+@@ -229,6 +232,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
+    }
+ 
+    intel_prepare_render(intel);
++   brw_workaround_depthstencil_alignment(brw);
+ 
+    if (mask & BUFFER_BIT_DEPTH) {
+       if (brw_fast_clear_depth(ctx)) {
+diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
+index 79cc12f..4bcfb95 100644
+--- a/src/mesa/drivers/dri/i965/brw_defines.h
++++ b/src/mesa/drivers/dri/i965/brw_defines.h
+@@ -437,6 +437,7 @@
+ #define BRW_SURFACEFORMAT_B10G10R10A2_SSCALED            0x1B9
+ #define BRW_SURFACEFORMAT_B10G10R10A2_UINT               0x1BA
+ #define BRW_SURFACEFORMAT_B10G10R10A2_SINT               0x1BB
++#define BRW_SURFACEFORMAT_RAW                            0x1FF
+ #define BRW_SURFACE_FORMAT_SHIFT	18
+ #define BRW_SURFACE_FORMAT_MASK		INTEL_MASK(26, 18)
+ 
+@@ -857,6 +858,7 @@ enum brw_message_target {
+    GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9,
+ 
+    GEN7_SFID_DATAPORT_DATA_CACHE     = 10,
++   HSW_SFID_DATAPORT_DATA_CACHE_1    = 12,
+ };
+ 
+ #define GEN7_MESSAGE_TARGET_DP_DATA_CACHE     10
+@@ -965,7 +967,44 @@ enum brw_message_target {
+ 
+ /* GEN7 */
+ #define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE          10
++#define GEN7_DATAPORT_DC_OWORD_BLOCK_READ                           0
++#define GEN7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ                 1
++#define GEN7_DATAPORT_DC_OWORD_DUAL_BLOCK_READ                      2
+ #define GEN7_DATAPORT_DC_DWORD_SCATTERED_READ                       3
++#define GEN7_DATAPORT_DC_BYTE_SCATTERED_READ                        4
++#define GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ                       5
++#define GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP                          6
++#define GEN7_DATAPORT_DC_MEMORY_FENCE                               7
++#define GEN7_DATAPORT_DC_OWORD_BLOCK_WRITE                          8
++#define GEN7_DATAPORT_DC_OWORD_DUAL_BLOCK_WRITE                     10
++#define GEN7_DATAPORT_DC_DWORD_SCATTERED_WRITE                      11
++#define GEN7_DATAPORT_DC_BYTE_SCATTERED_WRITE                       12
++#define GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE                      13
++
++/* HSW */
++#define HSW_DATAPORT_DC_PORT0_OWORD_BLOCK_READ                      0
++#define HSW_DATAPORT_DC_PORT0_UNALIGNED_OWORD_BLOCK_READ            1
++#define HSW_DATAPORT_DC_PORT0_OWORD_DUAL_BLOCK_READ                 2
++#define HSW_DATAPORT_DC_PORT0_DWORD_SCATTERED_READ                  3
++#define HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ                   4
++#define HSW_DATAPORT_DC_PORT0_MEMORY_FENCE                          7
++#define HSW_DATAPORT_DC_PORT0_OWORD_BLOCK_WRITE                     8
++#define HSW_DATAPORT_DC_PORT0_OWORD_DUAL_BLOCK_WRITE                10
++#define HSW_DATAPORT_DC_PORT0_DWORD_SCATTERED_WRITE                 11
++#define HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE                  12
++
++#define HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ                  1
++#define HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP                     2
++#define HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2             3
++#define HSW_DATAPORT_DC_PORT1_MEDIA_BLOCK_READ                      4
++#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ                    5
++#define HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP                       6
++#define HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2               7
++#define HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE                 9
++#define HSW_DATAPORT_DC_PORT1_MEDIA_BLOCK_WRITE                     10
++#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP                     11
++#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2             12
++#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE                   13
+ 
+ /* dataport atomic operations. */
+ #define BRW_AOP_AND                   1
+diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
+index b34754a..40cae37 100644
+--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
++++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
+@@ -2539,15 +2539,22 @@ void brw_shader_time_add(struct brw_compile *p,
+    brw_set_src0(p, send, brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+                                       base_mrf, 0));
+ 
++   uint32_t sfid, msg_type;
++   if (intel->is_haswell) {
++      sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
++      msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
++   } else {
++      sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
++      msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
++   }
++
+    bool header_present = false;
+    bool eot = false;
+    uint32_t mlen = 2; /* offset, value */
+    uint32_t rlen = 0;
+-   brw_set_message_descriptor(p, send,
+-                              GEN7_SFID_DATAPORT_DATA_CACHE,
+-                              mlen, rlen, header_present, eot);
++   brw_set_message_descriptor(p, send, sfid, mlen, rlen, header_present, eot);
+ 
+-   send->bits3.ud |= 6 << 14; /* untyped atomic op */
++   send->bits3.ud |= msg_type << 14;
+    send->bits3.ud |= 0 << 13; /* no return data */
+    send->bits3.ud |= 1 << 12; /* SIMD8 mode */
+    send->bits3.ud |= BRW_AOP_ADD << 8;
+diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
+index f80219e..4924441 100644
+--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
++++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
+@@ -2295,7 +2295,8 @@ clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps,
+ void
+ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
+ {
+-   int write_len = inst->regs_written() * dispatch_width / 8;
++   int reg_size = dispatch_width / 8;
++   int write_len = inst->regs_written() * reg_size;
+    int first_write_grf = inst->dst.reg;
+    bool needs_dep[BRW_MAX_MRF];
+    assert(write_len < (int)sizeof(needs_dep) - 1);
+@@ -2334,14 +2335,19 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
+        * instruction but a MOV that might have left us an outstanding
+        * dependency has more latency than a MOV.
+        */
+-      if (scan_inst->dst.file == GRF &&
+-          scan_inst->dst.reg >= first_write_grf &&
+-          scan_inst->dst.reg < first_write_grf + write_len &&
+-          needs_dep[scan_inst->dst.reg - first_write_grf]) {
+-         inst->insert_before(DEP_RESOLVE_MOV(scan_inst->dst.reg));
+-         needs_dep[scan_inst->dst.reg - first_write_grf] = false;
+-         if (scan_inst_16wide)
+-            needs_dep[scan_inst->dst.reg - first_write_grf + 1] = false;
++      if (scan_inst->dst.file == GRF) {
++         for (int i = 0; i < scan_inst->regs_written(); i++) {
++            int reg = scan_inst->dst.reg + i * reg_size;
++
++            if (reg >= first_write_grf &&
++                reg < first_write_grf + write_len &&
++                needs_dep[reg - first_write_grf]) {
++               inst->insert_before(DEP_RESOLVE_MOV(reg));
++               needs_dep[reg - first_write_grf] = false;
++               if (scan_inst_16wide)
++                  needs_dep[reg - first_write_grf + 1] = false;
++            }
++         }
+       }
+ 
+       /* Clear the flag for registers that actually got read (as expected). */
+@@ -2494,6 +2500,8 @@ fs_visitor::lower_uniform_pull_constant_loads()
+          inst->insert_before(setup2);
+          inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7;
+          inst->src[1] = payload;
++
++         this->live_intervals_valid = false;
+       } else {
+          /* Before register allocation, we didn't tell the scheduler about the
+           * MRF we use.  We know it's safe to use this MRF because nothing
+diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+index db8f397..4c7991d 100644
+--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
++++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+@@ -190,6 +190,37 @@ fs_visitor::calculate_live_intervals()
+ 	    int reg = inst->src[i].reg;
+ 
+ 	    use[reg] = ip;
++
++            /* In most cases, a register can be written over safely by the
++             * same instruction that is its last use.  For a single
++             * instruction, the sources are dereferenced before writing of the
++             * destination starts (naturally).  This gets more complicated for
++             * simd16, because the instruction:
++             *
++             * mov(16)      g4<1>F      g4<8,8,1>F   g6<8,8,1>F
++             *
++             * is actually decoded in hardware as:
++             *
++             * mov(8)       g4<1>F      g4<8,8,1>F   g6<8,8,1>F
++             * mov(8)       g5<1>F      g5<8,8,1>F   g7<8,8,1>F
++             *
++             * Which is safe.  However, if we have uniform accesses
++             * happening, we get into trouble:
++             *
++             * mov(8)       g4<1>F      g4<0,1,0>F   g6<8,8,1>F
++             * mov(8)       g5<1>F      g4<0,1,0>F   g7<8,8,1>F
++             *
++             * Now our destination for the first instruction overwrote the
++             * second instruction's src0, and we get garbage for those 8
++             * pixels.  There's a similar issue for the pre-gen6
++             * pixel_x/pixel_y, which are registers of 16-bit values and thus
++             * would get stomped by the first decode as well.
++             */
++            if (dispatch_width == 16 && (inst->src[i].smear ||
++                                         (this->pixel_x.reg == reg ||
++                                          this->pixel_y.reg == reg))) {
++               use[reg]++;
++            }
+ 	 }
+       }
+ 
+@@ -264,28 +295,5 @@ fs_visitor::virtual_grf_interferes(int a, int b)
+    int start = MAX2(a_def, b_def);
+    int end = MIN2(a_use, b_use);
+ 
+-   /* If the register is used to store 16 values of less than float
+-    * size (only the case for pixel_[xy]), then we can't allocate
+-    * another dword-sized thing to that register that would be used in
+-    * the same instruction.  This is because when the GPU decodes (for
+-    * example):
+-    *
+-    * (declare (in ) vec4 gl_FragCoord at 0x97766a0)
+-    * add(16)         g6<1>F          g6<8,8,1>UW     0.5F { align1 compr };
+-    *
+-    * it's actually processed as:
+-    * add(8)         g6<1>F          g6<8,8,1>UW     0.5F { align1 };
+-    * add(8)         g7<1>F          g6.8<8,8,1>UW   0.5F { align1 sechalf };
+-    *
+-    * so our second half values in g6 got overwritten in the first
+-    * half.
+-    */
+-   if (dispatch_width == 16 && (this->pixel_x.reg == a ||
+-				this->pixel_x.reg == b ||
+-				this->pixel_y.reg == a ||
+-				this->pixel_y.reg == b)) {
+-      return start <= end;
+-   }
+-
+    return start < end;
+ }
+diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
+index ecc61c4..02ce57b 100644
+--- a/src/mesa/drivers/dri/i965/brw_state.h
++++ b/src/mesa/drivers/dri/i965/brw_state.h
+@@ -216,6 +216,8 @@ void gen7_set_surface_mcs_info(struct brw_context *brw,
+                                bool is_render_target);
+ void gen7_check_surface_setup(uint32_t *surf, bool is_render_target);
+ void gen7_init_vtable_surface_functions(struct brw_context *brw);
++void gen7_create_shader_time_surface(struct brw_context *brw,
++                                     uint32_t *out_offset);
+ 
+ /* brw_wm_sampler_state.c */
+ uint32_t translate_wrap_mode(GLenum wrap, bool using_nearest);
+diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+index 4da7eaa..2aefc0c 100644
+--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
++++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+@@ -137,14 +137,11 @@ const struct brw_tracked_state brw_vs_ubo_surfaces = {
+ static void
+ brw_vs_upload_binding_table(struct brw_context *brw)
+ {
+-   struct intel_context *intel = &brw->intel;
+    uint32_t *bind;
+    int i;
+ 
+    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+-      intel->vtbl.create_constant_surface(brw, brw->shader_time.bo, 0,
+-                                          brw->shader_time.bo->size,
+-                                          &brw->vs.surf_offset[SURF_INDEX_VS_SHADER_TIME]);
++      gen7_create_shader_time_surface(brw, &brw->vs.surf_offset[SURF_INDEX_VS_SHADER_TIME]);
+ 
+       assert(brw->vs.prog_data->num_surfaces <= SURF_INDEX_VS_SHADER_TIME);
+       brw->vs.prog_data->num_surfaces = SURF_INDEX_VS_SHADER_TIME;
+diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+index 6ec7d71..657a56f 100644
+--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
++++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+@@ -770,7 +770,8 @@ brw_get_texture_swizzle(const struct gl_context *ctx,
+    case GL_RED:
+    case GL_RG:
+    case GL_RGB:
+-      swizzles[3] = SWIZZLE_ONE;
++      if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
++         swizzles[3] = SWIZZLE_ONE;
+       break;
+    }
+ 
+@@ -1468,14 +1469,11 @@ const struct brw_tracked_state brw_wm_ubo_surfaces = {
+ static void
+ brw_upload_wm_binding_table(struct brw_context *brw)
+ {
+-   struct intel_context *intel = &brw->intel;
+    uint32_t *bind;
+    int i;
+ 
+    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+-      intel->vtbl.create_constant_surface(brw, brw->shader_time.bo, 0,
+-                                          brw->shader_time.bo->size,
+-                                          &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
++      gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
+    }
+ 
+    /* Might want to calculate nr_surfaces first, to avoid taking up so much
+diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c
+index d32f636..7ac5d5f 100644
+--- a/src/mesa/drivers/dri/i965/gen6_cc.c
++++ b/src/mesa/drivers/dri/i965/gen6_cc.c
+@@ -126,7 +126,7 @@ gen6_upload_blend_state(struct brw_context *brw)
+           * not read the alpha channel, but will instead use the correct
+           * implicit value for alpha.
+           */
+-         if (!_mesa_base_format_has_channel(rb->_BaseFormat, GL_TEXTURE_ALPHA_TYPE))
++         if (rb && !_mesa_base_format_has_channel(rb->_BaseFormat, GL_TEXTURE_ALPHA_TYPE))
+          {
+             srcRGB = brw_fix_xRGB_alpha(srcRGB);
+             srcA = brw_fix_xRGB_alpha(srcA);
+diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+index 24f1b9c..2913fc6 100644
+--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
++++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+@@ -413,6 +413,46 @@ gen7_create_constant_surface(struct brw_context *brw,
+    gen7_check_surface_setup(surf, false /* is_render_target */);
+ }
+ 
++/**
++ * Create a surface for shader time.
++ */
++void
++gen7_create_shader_time_surface(struct brw_context *brw, uint32_t *out_offset)
++{
++   struct intel_context *intel = &brw->intel;
++   const int w = brw->shader_time.bo->size - 1;
++
++   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
++                                    8 * 4, 32, out_offset);
++   memset(surf, 0, 8 * 4);
++
++   surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
++             BRW_SURFACEFORMAT_RAW << BRW_SURFACE_FORMAT_SHIFT |
++             BRW_SURFACE_RC_READ_WRITE;
++
++   surf[1] = brw->shader_time.bo->offset; /* reloc */
++
++   surf[2] = SET_FIELD(w & 0x7f, GEN7_SURFACE_WIDTH) |
++             SET_FIELD((w >> 7) & 0x1fff, GEN7_SURFACE_HEIGHT);
++   surf[3] = SET_FIELD((w >> 20) & 0x7f, BRW_SURFACE_DEPTH);
++
++   /* Unlike texture or renderbuffer surfaces, we only do untyped operations
++    * on the shader_time surface, so there's no need to set HSW channel
++    * overrides.
++    */
++
++   /* Emit relocation to surface contents.  Section 5.1.1 of the gen4
++    * bspec ("Data Cache") says that the data cache does not exist as
++    * a separate cache and is just the sampler cache.
++    */
++   drm_intel_bo_emit_reloc(intel->batch.bo,
++                           *out_offset + 4,
++                           brw->shader_time.bo, 0,
++                           I915_GEM_DOMAIN_SAMPLER, 0);
++
++   gen7_check_surface_setup(surf, false /* is_render_target */);
++}
++
+ static void
+ gen7_update_null_renderbuffer_surface(struct brw_context *brw, unsigned unit)
+ {
+diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
+index 5ec93f1..4173c0f 100644
+--- a/src/mesa/drivers/dri/intel/intel_screen.c
++++ b/src/mesa/drivers/dri/intel/intel_screen.c
+@@ -312,7 +312,7 @@ intel_create_image_from_name(__DRIscreen *screen,
+        cpp = _mesa_get_format_bytes(image->format);
+     image->region = intel_region_alloc_for_handle(intelScreen,
+ 						  cpp, width, height,
+-						  pitch, name, "image");
++						  pitch * cpp, name, "image");
+     if (image->region == NULL) {
+        free(image);
+        return NULL;
+diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.c b/src/mesa/drivers/dri/nouveau/nouveau_driver.c
+index f56b3b2..6c119d5 100644
+--- a/src/mesa/drivers/dri/nouveau/nouveau_driver.c
++++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.c
+@@ -69,7 +69,8 @@ nouveau_flush(struct gl_context *ctx)
+ 		__DRIdri2LoaderExtension *dri2 = screen->dri2.loader;
+ 		__DRIdrawable *drawable = nctx->dri_context->driDrawablePriv;
+ 
+-		dri2->flushFrontBuffer(drawable, drawable->loaderPrivate);
++		if (drawable && drawable->loaderPrivate)
++			dri2->flushFrontBuffer(drawable, drawable->loaderPrivate);
+ 	}
+ }
+ 
+diff --git a/src/mesa/drivers/dri/nouveau/nv10_context.c b/src/mesa/drivers/dri/nouveau/nv10_context.c
+index 7eda4e0..4ffc4ef 100644
+--- a/src/mesa/drivers/dri/nouveau/nv10_context.c
++++ b/src/mesa/drivers/dri/nouveau/nv10_context.c
+@@ -469,7 +469,7 @@ nv10_context_create(struct nouveau_screen *screen, const struct gl_config *visua
+ 		goto fail;
+ 
+ 	/* 3D engine. */
+-	if (context_chipset(ctx) >= 0x17)
++	if (context_chipset(ctx) >= 0x17 && context_chipset(ctx) != 0x1a)
+ 		celsius_class = NV17_3D_CLASS;
+ 	else if (context_chipset(ctx) >= 0x11)
+ 		celsius_class = NV15_3D_CLASS;
+diff --git a/src/mesa/drivers/osmesa/Makefile.am b/src/mesa/drivers/osmesa/Makefile.am
+index c4b178b..2503401 100644
+--- a/src/mesa/drivers/osmesa/Makefile.am
++++ b/src/mesa/drivers/osmesa/Makefile.am
+@@ -24,6 +24,7 @@
+ AM_CPPFLAGS = \
+ 	-I$(top_srcdir)/include \
+ 	-I$(top_srcdir)/src/mapi \
++	-I$(top_builddir)/src/mapi \
+ 	-I$(top_srcdir)/src/mesa/ \
+ 	$(DEFINES) \
+ 	$(API_DEFINES)
+diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h
+index b22b994..8b23665 100644
+--- a/src/mesa/main/compiler.h
++++ b/src/mesa/main/compiler.h
+@@ -48,6 +48,8 @@
+ #include <float.h>
+ #include <stdarg.h>
+ 
++#include "c99_compat.h" /* inline, __func__, etc. */
++
+ 
+ #ifdef __cplusplus
+ extern "C" {
+@@ -111,30 +113,7 @@ extern "C" {
+ 
+ 
+ 
+-/**
+- * Function inlining
+- */
+-#ifndef inline
+-#  ifdef __cplusplus
+-     /* C++ supports inline keyword */
+-#  elif defined(__GNUC__)
+-#    define inline __inline__
+-#  elif defined(_MSC_VER)
+-#    define inline __inline
+-#  elif defined(__ICL)
+-#    define inline __inline
+-#  elif defined(__INTEL_COMPILER)
+-     /* Intel compiler supports inline keyword */
+-#  elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
+-#    define inline __inline
+-#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
+-     /* C99 supports inline keyword */
+-#  elif (__STDC_VERSION__ >= 199901L)
+-     /* C99 supports inline keyword */
+-#  else
+-#    define inline
+-#  endif
+-#endif
++/* XXX: Use standard `inline` keyword instead */
+ #ifndef INLINE
+ #  define INLINE inline
+ #endif
+@@ -177,35 +156,10 @@ extern "C" {
+ #  endif
+ #endif
+ 
+-/**
+- * The __FUNCTION__ gcc variable is generally only used for debugging.
+- * If we're not using gcc, define __FUNCTION__ as a cpp symbol here.
+- * Don't define it if using a newer Windows compiler.
+- */
++/* XXX: Use standard `__func__` instead */
+ #ifndef __FUNCTION__
+-# if !defined(__GNUC__) && !defined(__xlC__) &&	\
+-      (!defined(_MSC_VER) || _MSC_VER < 1300)
+-#  if (__STDC_VERSION__ >= 199901L) /* C99 */ || \
+-    (defined(__SUNPRO_C) && defined(__C99FEATURES__))
+-#   define __FUNCTION__ __func__
+-#  else
+-#   define __FUNCTION__ "<unknown>"
+-#  endif
+-# endif
++#  define __FUNCTION__ __func__
+ #endif
+-#ifndef __func__
+-#  if (__STDC_VERSION__ >= 199901L) || \
+-      (defined(__SUNPRO_C) && defined(__C99FEATURES__))
+-       /* __func__ is part of C99 */
+-#  elif defined(_MSC_VER)
+-#    if _MSC_VER >= 1300
+-#      define __func__ __FUNCTION__
+-#    else
+-#      define __func__ "<unknown>"
+-#    endif
+-#  endif
+-#endif
+-
+ 
+ /**
+  * Either define MESA_BIG_ENDIAN or MESA_LITTLE_ENDIAN, and CPU_TO_LE32.
+@@ -353,8 +307,9 @@ static INLINE GLuint CPU_TO_LE32(GLuint x)
+  * USE_IEEE: Determine if we're using IEEE floating point
+  */
+ #if defined(__i386__) || defined(__386__) || defined(__sparc__) || \
+-    defined(__s390x__) || defined(__powerpc__) || \
++    defined(__s390__) || defined(__s390x__) || defined(__powerpc__) || \
+     defined(__x86_64__) || \
++    defined(__m68k__) || \
+     defined(ia64) || defined(__ia64__) || \
+     defined(__hppa__) || defined(hpux) || \
+     defined(__mips) || defined(_MIPS_ARCH) || \
+diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
+index 257f839..61c1151 100644
+--- a/src/mesa/main/fbobject.c
++++ b/src/mesa/main/fbobject.c
+@@ -3160,7 +3160,9 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+       }
+    }
+ 
+-   if (!mask) {
++   if (!mask ||
++       (srcX1 - srcX0) == 0 || (srcY1 - srcY0) == 0 ||
++       (dstX1 - dstX0) == 0 || (dstY1 - dstY0) == 0) {
+       return;
+    }
+ 
+diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
+index 5f4e2fa..6fb2f5d 100644
+--- a/src/mesa/main/get.c
++++ b/src/mesa/main/get.c
+@@ -34,6 +34,7 @@
+ #include "state.h"
+ #include "texcompress.h"
+ #include "framebuffer.h"
++#include "samplerobj.h"
+ 
+ /* This is a table driven implemetation of the glGet*v() functions.
+  * The basic idea is that most getters just look up an int somewhere
+@@ -823,7 +824,16 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
+       {
+          struct gl_sampler_object *samp =
+             ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler;
+-         v->value_int = samp ? samp->Name : 0;
++
++         /*
++          * The sampler object may have been deleted on another context,
++          * so we try to lookup the sampler object before returning its Name.
++          */
++         if (samp && _mesa_lookup_samplerobj(ctx, samp->Name)) {
++            v->value_int = samp->Name;
++         } else {
++            v->value_int = 0;
++         }
+       }
+       break;
+    /* GL_ARB_uniform_buffer_object */
+diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
+index 9aab889..15c1c4d 100644
+--- a/src/mesa/main/get_hash_params.py
++++ b/src/mesa/main/get_hash_params.py
+@@ -412,7 +412,7 @@ descriptor=[
+   [ "DEPTH_SCALE", "CONTEXT_FLOAT(Pixel.DepthScale), NO_EXTRA" ],
+   [ "DOUBLEBUFFER", "BUFFER_INT(Visual.doubleBufferMode), NO_EXTRA" ],
+   [ "DRAW_BUFFER", "BUFFER_ENUM(ColorDrawBuffer[0]), NO_EXTRA" ],
+-  [ "EDGE_FLAG", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ],
++  [ "EDGE_FLAG", "LOC_CUSTOM, TYPE_BOOLEAN, 0, extra_flush_current" ],
+   [ "FEEDBACK_BUFFER_SIZE", "CONTEXT_INT(Feedback.BufferSize), NO_EXTRA" ],
+   [ "FEEDBACK_BUFFER_TYPE", "CONTEXT_ENUM(Feedback.Type), NO_EXTRA" ],
+   [ "FOG_INDEX", "CONTEXT_FLOAT(Fog.Index), NO_EXTRA" ],
+diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
+index 3369623..8f906ae 100644
+--- a/src/mesa/main/mtypes.h
++++ b/src/mesa/main/mtypes.h
+@@ -1274,6 +1274,7 @@ struct gl_texture_object
+    GLfloat Priority;		/**< in [0,1] */
+    GLint BaseLevel;		/**< min mipmap level, OpenGL 1.2 */
+    GLint MaxLevel;		/**< max mipmap level, OpenGL 1.2 */
++   GLint ImmutableLevels;       /**< ES 3.0 / ARB_texture_view */
+    GLint _MaxLevel;		/**< actual max mipmap level (q in the spec) */
+    GLfloat _MaxLambda;		/**< = _MaxLevel - BaseLevel (q - b in spec) */
+    GLint CropRect[4];           /**< GL_OES_draw_texture */
+diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c
+index 319a444..5cff329 100644
+--- a/src/mesa/main/samplerobj.c
++++ b/src/mesa/main/samplerobj.c
+@@ -40,7 +40,7 @@
+ #include "main/samplerobj.h"
+ 
+ 
+-static struct gl_sampler_object *
++struct gl_sampler_object *
+ _mesa_lookup_samplerobj(struct gl_context *ctx, GLuint name)
+ {
+    if (name == 0)
+@@ -206,9 +206,19 @@ _mesa_DeleteSamplers(GLsizei count, const GLuint *samplers)
+ 
+    for (i = 0; i < count; i++) {
+       if (samplers[i]) {
++         GLuint j;
+          struct gl_sampler_object *sampObj =
+             _mesa_lookup_samplerobj(ctx, samplers[i]);
++   
+          if (sampObj) {
++            /* If the sampler is currently bound, unbind it. */
++            for (j = 0; j < ctx->Const.MaxCombinedTextureImageUnits; j++) {
++               if (ctx->Texture.Unit[j].Sampler == sampObj) {
++                  FLUSH_VERTICES(ctx, _NEW_TEXTURE);
++                  _mesa_reference_sampler_object(ctx, &ctx->Texture.Unit[j].Sampler, NULL);
++               }
++            }
++
+             /* The ID is immediately freed for re-use */
+             _mesa_HashRemove(ctx->Shared->SamplerObjects, samplers[i]);
+             /* But the object exists until its reference count goes to zero */
+diff --git a/src/mesa/main/samplerobj.h b/src/mesa/main/samplerobj.h
+index 3114257..69e3899 100644
+--- a/src/mesa/main/samplerobj.h
++++ b/src/mesa/main/samplerobj.h
+@@ -62,6 +62,8 @@ _mesa_reference_sampler_object(struct gl_context *ctx,
+       _mesa_reference_sampler_object_(ctx, ptr, samp);
+ }
+ 
++extern struct gl_sampler_object *
++_mesa_lookup_samplerobj(struct gl_context *ctx, GLuint name);
+ 
+ extern struct gl_sampler_object *
+ _mesa_new_sampler_object(struct gl_context *ctx, GLuint name);
+diff --git a/src/mesa/main/tests/hash_table/Makefile.am b/src/mesa/main/tests/hash_table/Makefile.am
+index 272c63a..f63841d 100644
+--- a/src/mesa/main/tests/hash_table/Makefile.am
++++ b/src/mesa/main/tests/hash_table/Makefile.am
+@@ -19,6 +19,7 @@
+ #  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ AM_CPPFLAGS = \
++	-I$(top_srcdir)/include \
+ 	-I$(top_srcdir)/src/mesa/main \
+ 	$(API_DEFINES) $(DEFINES) $(INCLUDE_DIRS)
+ 
+diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
+index 7299a4b..74b09ef 100644
+--- a/src/mesa/main/texgetimage.c
++++ b/src/mesa/main/texgetimage.c
+@@ -518,6 +518,7 @@ get_tex_rgba(struct gl_context *ctx, GLuint dimensions,
+    if (type_needs_clamping(type)) {
+       /* the returned image type can't have negative values */
+       if (dataType == GL_FLOAT ||
++          dataType == GL_HALF_FLOAT ||
+           dataType == GL_SIGNED_NORMALIZED ||
+           format == GL_LUMINANCE ||
+           format == GL_LUMINANCE_ALPHA) {
+diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
+index 1b9525b..1b91b89 100644
+--- a/src/mesa/main/teximage.c
++++ b/src/mesa/main/teximage.c
+@@ -1362,6 +1362,7 @@ _mesa_legal_texture_dimensions(struct gl_context *ctx, GLenum target,
+          return GL_FALSE;
+       return GL_TRUE;
+ 
++   case GL_TEXTURE_CUBE_MAP:
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+@@ -3438,19 +3439,21 @@ copyteximage(struct gl_context *ctx, GLuint dims,
+          _mesa_init_teximage_fields(ctx, texImage, width, height, 1,
+                                     border, internalFormat, texFormat);
+ 
+-         /* Allocate texture memory (no pixel data yet) */
+-         ctx->Driver.AllocTextureImageBuffer(ctx, texImage);
++         if (width && height) {
++            /* Allocate texture memory (no pixel data yet) */
++            ctx->Driver.AllocTextureImageBuffer(ctx, texImage);
+ 
+-         if (_mesa_clip_copytexsubimage(ctx, &dstX, &dstY, &srcX, &srcY,
+-                                        &width, &height)) {
+-            struct gl_renderbuffer *srcRb =
+-               get_copy_tex_image_source(ctx, texImage->TexFormat);
++            if (_mesa_clip_copytexsubimage(ctx, &dstX, &dstY, &srcX, &srcY,
++                                           &width, &height)) {
++               struct gl_renderbuffer *srcRb =
++                  get_copy_tex_image_source(ctx, texImage->TexFormat);
+ 
+-            ctx->Driver.CopyTexSubImage(ctx, dims, texImage, dstX, dstY, dstZ,
+-                                        srcRb, srcX, srcY, width, height);
+-         }
++               ctx->Driver.CopyTexSubImage(ctx, dims, texImage, dstX, dstY, dstZ,
++                                           srcRb, srcX, srcY, width, height);
++            }
+ 
+-         check_gen_mipmap(ctx, target, texObj, level);
++            check_gen_mipmap(ctx, target, texObj, level);
++         }
+ 
+          _mesa_update_fbo_texture(ctx, texObj, face, level);
+ 
+diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
+index 6f18ec6..dd67baa 100644
+--- a/src/mesa/main/texparam.c
++++ b/src/mesa/main/texparam.c
+@@ -1432,6 +1432,12 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
+          *params = (GLfloat) obj->Immutable;
+          break;
+ 
++      case GL_TEXTURE_IMMUTABLE_LEVELS:
++         if (!_mesa_is_gles3(ctx))
++            goto invalid_pname;
++         *params = (GLfloat) obj->ImmutableLevels;
++         break;
++
+       case GL_REQUIRED_TEXTURE_IMAGE_UNITS_OES:
+          if (!_mesa_is_gles(ctx) || !ctx->Extensions.OES_EGL_image_external)
+             goto invalid_pname;
+@@ -1609,6 +1615,12 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
+          *params = (GLint) obj->Immutable;
+          break;
+ 
++      case GL_TEXTURE_IMMUTABLE_LEVELS:
++         if (!_mesa_is_gles3(ctx))
++            goto invalid_pname;
++         *params = obj->ImmutableLevels;
++         break;
++
+       case GL_REQUIRED_TEXTURE_IMAGE_UNITS_OES:
+          if (!_mesa_is_gles(ctx) || !ctx->Extensions.OES_EGL_image_external)
+             goto invalid_pname;
+diff --git a/src/mesa/main/texstorage.c b/src/mesa/main/texstorage.c
+index 00f19ba..675fd74 100644
+--- a/src/mesa/main/texstorage.c
++++ b/src/mesa/main/texstorage.c
+@@ -397,6 +397,7 @@ texstorage(GLuint dims, GLenum target, GLsizei levels, GLenum internalformat,
+       }
+ 
+       texObj->Immutable = GL_TRUE;
++      texObj->ImmutableLevels = levels;
+    }
+ }
+ 
+diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
+index efb386e..f5b5c41 100644
+--- a/src/mesa/vbo/vbo_save_draw.c
++++ b/src/mesa/vbo/vbo_save_draw.c
+@@ -253,7 +253,7 @@ vbo_save_playback_vertex_list(struct gl_context *ctx, void *data)
+    struct vbo_save_context *save = &vbo_context(ctx)->save;
+    GLboolean remap_vertex_store = GL_FALSE;
+ 
+-   if (save->vertex_store->buffer) {
++   if (save->vertex_store && save->vertex_store->buffer) {
+       /* The vertex store is currently mapped but we're about to replay
+        * a display list.  This can happen when a nested display list is
+        * being build with GL_COMPILE_AND_EXECUTE.
diff --git a/mesa.spec b/mesa.spec
index cdf5126..7d9f01b 100644
--- a/mesa.spec
+++ b/mesa.spec
@@ -47,8 +47,8 @@
 
 Summary: Mesa graphics libraries
 Name: mesa
-Version: 9.1
-Release: 6%{?dist}
+Version: 9.1.1
+Release: 1%{?dist}
 License: MIT
 Group: System Environment/Libraries
 URL: http://www.mesa3d.org
@@ -64,7 +64,7 @@ Source3: make-git-snapshot.sh
 Source4: Mesa-MLAA-License-Clarification-Email.txt
 
 # git diff-tree -p mesa-9.1..origin/9.1 > `git describe origin/9.1`.patch
-Patch0: mesa-9.1-53-gd0ccb5b.patch
+Patch0: mesa-9.1.1-53-g3cff41c.patch
 
 Patch1: nv50-fix-build.patch
 Patch2: intel-revert-gl3.patch
@@ -592,6 +592,9 @@ rm -rf $RPM_BUILD_ROOT
 %endif
 
 %changelog
+* Sat Apr 27 2013 Dave Airlie <airlied at redhat.com> 9.1.1-1
+- rebase to Mesa 9.1.1 + fixes from git
+
 * Thu Apr 11 2013 Dave Airlie <airlied at redhat.com> 9.1-6
 - enable glx tls for glamor to work properly
 
diff --git a/sources b/sources
index 9dba37c..7ecd241 100644
--- a/sources
+++ b/sources
@@ -1 +1 @@
-d3891e02215422e120271d976ff1947e  MesaLib-9.1.tar.bz2
+6ea2bdc3b7ecfb4257b39814b4182580  MesaLib-9.1.1.tar.bz2