[kernel/f14/master] nouveau: misc fixes from upstream + NVAF support

Ben Skeggs bskeggs at fedoraproject.org
Mon Sep 6 05:59:34 UTC 2010


commit 1001ab34f7e74ce2704af0746e23abe4a84ece32
Author: Ben Skeggs <bskeggs at redhat.com>
Date:   Mon Sep 6 15:55:25 2010 +1000

    nouveau: misc fixes from upstream + NVAF support
    
    This looks far far worse than it is... The reason:
     - NVAF support came after nv50_grctx.c was de-magiced (ie. lots of
       comments explaining what all the hexified numbers are for)
     - nv50_grctx.c de-magic depended on an earlier commit which changed
       all gpu object accessors from 32-bit indices to byte offsets
    
    All in all, it was easier to pull in those big scary-looking (but
    harmless) commits than to risk messing up a backport.

 drm-nouveau-race-fix.patch |   14 +-
 drm-nouveau-updates.patch  | 7586 ++++++++++++++++++++++++++++++++++++++++----
 kernel.spec                |    5 +-
 3 files changed, 6980 insertions(+), 625 deletions(-)
---
diff --git a/drm-nouveau-race-fix.patch b/drm-nouveau-race-fix.patch
index 8b6f4df..bf6cc4f 100644
--- a/drm-nouveau-race-fix.patch
+++ b/drm-nouveau-race-fix.patch
@@ -1,7 +1,7 @@
-From 6b33cff9cd4f6f26df99e1486d53c91698d133b4 Mon Sep 17 00:00:00 2001
+From 4733f633c4bfb0672d5bd88a8d19a03e27a3c1d0 Mon Sep 17 00:00:00 2001
 From: Ben Skeggs <bskeggs at redhat.com>
 Date: Fri, 23 Jul 2010 09:06:52 +1000
-Subject: [PATCH 5/5] drm-nouveau-race-fix
+Subject: [PATCH 2/2] drm-nouveau-race-fix
 
 drm/nouveau: fix race condition when under memory pressure
 
@@ -30,7 +30,7 @@ Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
  3 files changed, 42 insertions(+), 10 deletions(-)
 
 diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
-index f1357f1..f552c76 100644
+index 553a01d..5e62d1b 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
 @@ -36,6 +36,21 @@
@@ -56,10 +56,10 @@ index f1357f1..f552c76 100644
  nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
  {
 diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
-index c001574..922ef43 100644
+index 2eb622b..70a16f3 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h
 +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
-@@ -1165,6 +1165,7 @@ extern u16 nouveau_bo_rd16(struct nouveau_bo *nvbo, unsigned index);
+@@ -1167,6 +1167,7 @@ extern u16 nouveau_bo_rd16(struct nouveau_bo *nvbo, unsigned index);
  extern void nouveau_bo_wr16(struct nouveau_bo *nvbo, unsigned index, u16 val);
  extern u32 nouveau_bo_rd32(struct nouveau_bo *nvbo, unsigned index);
  extern void nouveau_bo_wr32(struct nouveau_bo *nvbo, unsigned index, u32 val);
@@ -68,7 +68,7 @@ index c001574..922ef43 100644
  /* nouveau_fence.c */
  struct nouveau_fence;
 diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
-index 547f2c2..a915dcd 100644
+index 62ac673..613f878 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
 @@ -361,16 +361,11 @@ validate_list(struct nouveau_channel *chan, struct list_head *list,
@@ -137,5 +137,5 @@ index 547f2c2..a915dcd 100644
  	ret = nouveau_gem_pushbuf_validate(chan, file_priv, bo, req->buffers,
  					   req->nr_buffers, &op, &do_reloc);
 -- 
-1.7.2
+1.7.2.2
 
diff --git a/drm-nouveau-updates.patch b/drm-nouveau-updates.patch
index 08baed1..82ded47 100644
--- a/drm-nouveau-updates.patch
+++ b/drm-nouveau-updates.patch
@@ -1,7 +1,7 @@
-From 0983b95112413e5239b09f1f1a4500c0bccfe3b7 Mon Sep 17 00:00:00 2001
+From c156fa3c71b6581b34526a9b2b649c3f4d57dd3e Mon Sep 17 00:00:00 2001
 From: Ben Skeggs <bskeggs at redhat.com>
 Date: Tue, 1 Jun 2010 15:32:24 +1000
-Subject: [PATCH 4/4] drm-nouveau-updates
+Subject: [PATCH 1/2] drm-nouveau-updates
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 8bit
@@ -656,73 +656,268 @@ drm/nouveau: Don't try DDC on the dummy I2C channel.
 Signed-off-by: Francisco Jerez <currojerez at riseup.net>
 
 drm/nv50: fix minor thinko from nvc0 changes
+
+drm/nouveau: check for error when allocating/mapping dummy page
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: remove warning about unknown tmds table revisions
+
+This message is apparently confusing people, and is being blamed for some
+modesetting issues.  Lets remove the message, and instead replace it
+with an unconditional printout of the table revision.
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: punt some more log messages to debug level
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nv50-nvc0: ramht_size is meant to be in bytes, not entries
+
+Fixes an infinite loop that can happen in RAMHT lookup.
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: Add TV-out quirk for an MSI nForce2 IGP.
+
+The blob also thinks there's a TV connected, so hardware bug...
+
+Signed-off-by: Francisco Jerez <currojerez at riseup.net>
+
+drm/nouveau: Workaround missing GPIO tables on an Apple iMac G4 NV18.
+
+This should fix the reported TV-out load detection false positives
+(fdo bug 29455).
+
+Reported-by: Vlado Plaga <rechner at vlado-do.de>
+Signed-off-by: Francisco Jerez <currojerez at riseup.net>
+
+drm/nvc0: fix thinko in instmem suspend/resume
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nv50: calculate vram reordering block size
+
+Will be used at a later point when we plug in an alternative VRAM memory
+manager for GeForce 8+ boards.
+
+Based on pscnv code to do the same.
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+Signed-off-by: Marcin Kościelnicki <koriakin at 0x04.net>
+
+drm/nv50: add dcb type 14 to enum to prevent compiler complaint
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: Use a helper function to match PCI device/subsystem IDs.
+
+Signed-off-by: Francisco Jerez <currojerez at riseup.net>
+
+drm/nv30: Apply modesetting to the correct slave encoder
+
+Signed-off-by: Patrice Mandin <patmandin at gmail.com>
+Reviewed-by: Francisco Jerez <currojerez at riseup.net>
+
+drm/nouveau: Fix backlight control on PPC machines with an internal TMDS panel.
+
+This commit fixes fdo bug 29685.
+
+Reported-by: Vlado Plaga <rechner at vlado-do.de>
+Signed-off-by: Francisco Jerez <currojerez at riseup.net>
+
+drm/nouveau: Fix TMDS on some DCB1.5 boards.
+
+The TMDS output of an nv11 was being detected as LVDS, because it uses
+DCB type 2 for TMDS instead of type 4.
+
+Reported-by: Bertrand VIEILLE <Vieille.Bertrand at free.fr>
+Signed-off-by: Francisco Jerez <currojerez at riseup.net>
+
+drm/nv20: Don't use pushbuf calls on the original nv20.
+
+The "return" command is buggy on the original nv20, it jumps back to
+the caller address as expected, but it doesn't clear the subroutine
+active bit making the subsequent pushbuf calls fail with a "stack"
+overflow.
+
+Signed-off-by: Francisco Jerez <currojerez at riseup.net>
+
+drm/nouveau: Fix suspend on some nv4x AGP cards.
+
+On some nv4x cards (specifically, the ones that use an internal
+PCIE->AGP bridge) the AGP controller state isn't preserved after a
+suspend/resume cycle, and the AGP control registers have moved from
+0x18xx to 0x100xx, so the FW check in nouveau_mem_reset_agp() doesn't
+quite work. Check "dev->agp->mode" instead.
+
+Signed-off-by: Francisco Jerez <currojerez at riseup.net>
+
+drm/nv20: Use the nv30 CRTC bandwidth calculation code.
+
+nv2x CRTC FIFOs are as large as in nv3x (4kB it seems), and the FIFO
+control registers have the same layout: we can make them share the
+same implementation.
+
+Previously we were using the nv1x code, but the calculated FIFO
+watermarks are usually too low for nv2x and they cause horrible
+scanout artifacts. They've gone unnoticed until now because we've been
+leaving one of the bandwidth regs uninitialized (CRE 47, which
+contains the most significant bits of FFLWM), so everything seemed to
+work fine except in some cases after a cold boot, depending on the
+memory bandwidth and pixel clocks used.
+
+Signed-off-by: Francisco Jerez <currojerez at riseup.net>
+
+drm/nv50: add new accelerated bo move funtion
+
+Hopefully this one will be better able to cope with moving tiled buffers
+around without getting them all scrambled as a result.
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: move check for no-op bo move before memcpy fallback
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: remove second map of notifier bo
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: require explicit unmap of kmapped bos
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nv17-nv4x: Fix analog load detection false positive on rare occasions.
+
+On some boards the residual current DAC outputs can draw when they're
+disconnected can be high enough to give a false load detection
+positive (I've only seen it in the S-video luma output of some cards,
+but just to be sure). The output line capacitance is limited and
+sampling twice should fix it reliably.
+
+Signed-off-by: Francisco Jerez <currojerez at riseup.net>
+
+drm/nv40: Try to set up CRE_LCD even if it has unknown bits set.
+
+They don't seem to do anything useful, and we really want to program
+CRE_LCD if we aren't lucky enough to find the right CRTC binding
+already set.
+
+Signed-off-by: Francisco Jerez <currojerez at riseup.net>
+
+drm/nouveau: have nv_mask return original register value
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nv50: initialize ramht_refs list for faked 0 channel
+
+We need it for PFIFO_INTR_CACHE_ERROR interrupt handling,
+because nouveau_fifo_swmthd looks for matching gpuobj in
+ramht_refs list.
+It fixes kernel panic in nouveau_gpuobj_ref_find.
+
+Signed-off-by: Marcin Slusarz <marcin.slusarz at gmail.com>
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: move ramht code out of nouveau_object.c, nothing to see here
+
+Reviewed-by: Francisco Jerez <currojerez at riseup.net>
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: modify object accessors, offset in bytes rather than dwords
+
+Reviewed-by: Francisco Jerez <currojerez at riseup.net>
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nv50: demagic grctx, and add NVAF support
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+Signed-off-by: Marcin Kościelnicki <koriakin at 0x04.net>
+
+drm/nv50: move vm trap to nv50_fb.c
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nv50: report BAR access faults
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
 ---
  drivers/gpu/drm/drm_crtc_helper.c           |   22 +-
  drivers/gpu/drm/i2c/ch7006_drv.c            |   22 +-
  drivers/gpu/drm/i2c/ch7006_priv.h           |    2 +-
  drivers/gpu/drm/nouveau/Makefile            |   12 +-
- drivers/gpu/drm/nouveau/nouveau_acpi.c      |   38 ++-
- drivers/gpu/drm/nouveau/nouveau_bios.c      |  820 ++++++++++++++++++++++-----
- drivers/gpu/drm/nouveau/nouveau_bios.h      |    5 +-
- drivers/gpu/drm/nouveau/nouveau_bo.c        |   15 +-
- drivers/gpu/drm/nouveau/nouveau_calc.c      |    4 +-
- drivers/gpu/drm/nouveau/nouveau_channel.c   |    5 -
- drivers/gpu/drm/nouveau/nouveau_connector.c |  417 ++++++--------
+ drivers/gpu/drm/nouveau/nouveau_acpi.c      |   38 +-
+ drivers/gpu/drm/nouveau/nouveau_bios.c      |  910 ++++++--
+ drivers/gpu/drm/nouveau/nouveau_bios.h      |    6 +-
+ drivers/gpu/drm/nouveau/nouveau_bo.c        |  223 ++-
+ drivers/gpu/drm/nouveau/nouveau_calc.c      |   10 +-
+ drivers/gpu/drm/nouveau/nouveau_channel.c   |    6 +-
+ drivers/gpu/drm/nouveau/nouveau_connector.c |  417 ++--
  drivers/gpu/drm/nouveau/nouveau_connector.h |    7 +-
- drivers/gpu/drm/nouveau/nouveau_dma.c       |    8 +-
- drivers/gpu/drm/nouveau/nouveau_dp.c        |  128 +++--
+ drivers/gpu/drm/nouveau/nouveau_dma.c       |    7 -
+ drivers/gpu/drm/nouveau/nouveau_dp.c        |  128 +-
  drivers/gpu/drm/nouveau/nouveau_drv.c       |   39 +-
- drivers/gpu/drm/nouveau/nouveau_drv.h       |  178 ++++---
+ drivers/gpu/drm/nouveau/nouveau_drv.h       |  203 +-
  drivers/gpu/drm/nouveau/nouveau_encoder.h   |   16 +-
  drivers/gpu/drm/nouveau/nouveau_fbcon.c     |    4 +-
  drivers/gpu/drm/nouveau/nouveau_fence.c     |   35 +-
- drivers/gpu/drm/nouveau/nouveau_gem.c       |   11 +-
- drivers/gpu/drm/nouveau/nouveau_grctx.c     |  160 ------
- drivers/gpu/drm/nouveau/nouveau_hw.c        |   11 +-
- drivers/gpu/drm/nouveau/nouveau_i2c.c       |   83 ++-
+ drivers/gpu/drm/nouveau/nouveau_gem.c       |   15 +-
+ drivers/gpu/drm/nouveau/nouveau_grctx.c     |  160 --
+ drivers/gpu/drm/nouveau/nouveau_grctx.h     |    2 +-
+ drivers/gpu/drm/nouveau/nouveau_hw.c        |   13 +-
+ drivers/gpu/drm/nouveau/nouveau_i2c.c       |   83 +-
  drivers/gpu/drm/nouveau/nouveau_i2c.h       |   11 +-
- drivers/gpu/drm/nouveau/nouveau_irq.c       |    6 +-
- drivers/gpu/drm/nouveau/nouveau_mem.c       |  327 +++---------
+ drivers/gpu/drm/nouveau/nouveau_irq.c       |   70 +-
+ drivers/gpu/drm/nouveau/nouveau_mem.c       |  404 ++---
  drivers/gpu/drm/nouveau/nouveau_notifier.c  |   30 +-
- drivers/gpu/drm/nouveau/nouveau_object.c    |  105 +---
- drivers/gpu/drm/nouveau/nouveau_reg.h       |  109 +++--
- drivers/gpu/drm/nouveau/nouveau_sgdma.c     |   46 +--
- drivers/gpu/drm/nouveau/nouveau_state.c     |  340 +++++++----
- drivers/gpu/drm/nouveau/nv04_crtc.c         |    8 +
- drivers/gpu/drm/nouveau/nv04_dac.c          |   50 ++-
- drivers/gpu/drm/nouveau/nv04_dfp.c          |   85 +++-
- drivers/gpu/drm/nouveau/nv04_display.c      |   90 ++--
- drivers/gpu/drm/nouveau/nv04_fifo.c         |   20 +-
+ drivers/gpu/drm/nouveau/nouveau_object.c    |  325 +--
+ drivers/gpu/drm/nouveau/nouveau_ramht.c     |  160 ++
+ drivers/gpu/drm/nouveau/nouveau_ramht.h     |   31 +
+ drivers/gpu/drm/nouveau/nouveau_reg.h       |  109 +-
+ drivers/gpu/drm/nouveau/nouveau_sgdma.c     |  108 +-
+ drivers/gpu/drm/nouveau/nouveau_state.c     |  340 ++-
+ drivers/gpu/drm/nouveau/nv04_crtc.c         |   11 +-
+ drivers/gpu/drm/nouveau/nv04_dac.c          |   60 +-
+ drivers/gpu/drm/nouveau/nv04_dfp.c          |  145 +-
+ drivers/gpu/drm/nouveau/nv04_display.c      |   90 +-
+ drivers/gpu/drm/nouveau/nv04_fifo.c         |   28 +-
  drivers/gpu/drm/nouveau/nv04_graph.c        |    5 +-
  drivers/gpu/drm/nouveau/nv04_instmem.c      |   21 +-
  drivers/gpu/drm/nouveau/nv04_mc.c           |    4 +
- drivers/gpu/drm/nouveau/nv04_tv.c           |  133 ++---
+ drivers/gpu/drm/nouveau/nv04_tv.c           |  133 +-
  drivers/gpu/drm/nouveau/nv10_fifo.c         |   10 -
- drivers/gpu/drm/nouveau/nv10_gpio.c         |   92 +++
- drivers/gpu/drm/nouveau/nv10_graph.c        |  175 ++++--
- drivers/gpu/drm/nouveau/nv17_gpio.c         |   92 ---
- drivers/gpu/drm/nouveau/nv17_tv.c           |   65 ++-
- drivers/gpu/drm/nouveau/nv20_graph.c        |   96 ++--
- drivers/gpu/drm/nouveau/nv30_fb.c           |   95 +++
+ drivers/gpu/drm/nouveau/nv10_gpio.c         |   92 +
+ drivers/gpu/drm/nouveau/nv10_graph.c        |  175 +-
+ drivers/gpu/drm/nouveau/nv17_gpio.c         |   92 -
+ drivers/gpu/drm/nouveau/nv17_tv.c           |   82 +-
+ drivers/gpu/drm/nouveau/nv20_graph.c        |  564 +++---
+ drivers/gpu/drm/nouveau/nv30_fb.c           |   95 +
  drivers/gpu/drm/nouveau/nv40_fifo.c         |    8 -
- drivers/gpu/drm/nouveau/nv40_graph.c        |   58 +--
+ drivers/gpu/drm/nouveau/nv40_graph.c        |   62 +-
+ drivers/gpu/drm/nouveau/nv40_grctx.c        |    6 +-
  drivers/gpu/drm/nouveau/nv40_mc.c           |    2 +-
- drivers/gpu/drm/nouveau/nv50_crtc.c         |   65 +--
+ drivers/gpu/drm/nouveau/nv50_crtc.c         |   67 +-
  drivers/gpu/drm/nouveau/nv50_dac.c          |   43 +-
- drivers/gpu/drm/nouveau/nv50_display.c      |  424 +++++++++------
+ drivers/gpu/drm/nouveau/nv50_display.c      |  435 +++--
  drivers/gpu/drm/nouveau/nv50_display.h      |    6 +-
- drivers/gpu/drm/nouveau/nv50_fifo.c         |  126 ++---
- drivers/gpu/drm/nouveau/nv50_gpio.c         |   35 ++
- drivers/gpu/drm/nouveau/nv50_graph.c        |   86 ++--
- drivers/gpu/drm/nouveau/nv50_instmem.c      |   67 +--
- drivers/gpu/drm/nouveau/nv50_sor.c          |  105 ++--
- drivers/gpu/drm/nouveau/nvc0_fb.c           |   38 ++
- drivers/gpu/drm/nouveau/nvc0_fifo.c         |   95 +++
- drivers/gpu/drm/nouveau/nvc0_graph.c        |   74 +++
- drivers/gpu/drm/nouveau/nvc0_instmem.c      |  231 ++++++++
+ drivers/gpu/drm/nouveau/nv50_fb.c           |   39 +
+ drivers/gpu/drm/nouveau/nv50_fifo.c         |  335 ++--
+ drivers/gpu/drm/nouveau/nv50_gpio.c         |   35 +
+ drivers/gpu/drm/nouveau/nv50_graph.c        |  104 +-
+ drivers/gpu/drm/nouveau/nv50_grctx.c        | 3305 +++++++++++++++++----------
+ drivers/gpu/drm/nouveau/nv50_instmem.c      |   81 +-
+ drivers/gpu/drm/nouveau/nv50_sor.c          |  105 +-
+ drivers/gpu/drm/nouveau/nvc0_fb.c           |   38 +
+ drivers/gpu/drm/nouveau/nvc0_fifo.c         |   95 +
+ drivers/gpu/drm/nouveau/nvc0_graph.c        |   74 +
+ drivers/gpu/drm/nouveau/nvc0_instmem.c      |  234 ++
  drivers/gpu/drm/nouveau/nvreg.h             |   22 -
- 64 files changed, 3285 insertions(+), 2257 deletions(-)
+ 70 files changed, 6456 insertions(+), 4215 deletions(-)
  delete mode 100644 drivers/gpu/drm/nouveau/nouveau_grctx.c
+ create mode 100644 drivers/gpu/drm/nouveau/nouveau_ramht.c
+ create mode 100644 drivers/gpu/drm/nouveau/nouveau_ramht.h
  create mode 100644 drivers/gpu/drm/nouveau/nv10_gpio.c
  delete mode 100644 drivers/gpu/drm/nouveau/nv17_gpio.c
  create mode 100644 drivers/gpu/drm/nouveau/nv30_fb.c
@@ -857,7 +1052,7 @@ index 9487123..17667b7 100644
  
  	struct ch7006_state state;
 diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile
-index acd31ed..e9b06e4 100644
+index acd31ed..d6cfbf2 100644
 --- a/drivers/gpu/drm/nouveau/Makefile
 +++ b/drivers/gpu/drm/nouveau/Makefile
 @@ -9,20 +9,20 @@ nouveau-y := nouveau_drv.o nouveau_state.o nouveau_channel.o nouveau_mem.o \
@@ -865,7 +1060,7 @@ index acd31ed..e9b06e4 100644
               nouveau_hw.o nouveau_calc.o nouveau_bios.o nouveau_i2c.o \
               nouveau_display.o nouveau_connector.o nouveau_fbcon.o \
 -             nouveau_dp.o nouveau_grctx.o \
-+             nouveau_dp.o \
++             nouveau_dp.o nouveau_ramht.o \
               nv04_timer.o \
               nv04_mc.o nv40_mc.o nv50_mc.o \
 -             nv04_fb.o nv10_fb.o nv40_fb.o nv50_fb.o \
@@ -955,7 +1150,7 @@ index d4bcca8..c17a055 100644
 +	return 0;
 +}
 diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
-index e492919..eb77710 100644
+index e492919..8fa3396 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_bios.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
 @@ -28,6 +28,8 @@
@@ -2043,7 +2238,52 @@ index e492919..eb77710 100644
  	{ NULL                                , 0   , NULL                            }
  };
  
-@@ -4068,7 +4561,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
+@@ -3376,27 +3869,10 @@ static int call_lvds_manufacturer_script(struct drm_device *dev, struct dcb_entr
+ 	}
+ #ifdef __powerpc__
+ 	/* Powerbook specific quirks */
+-	if ((dev->pci_device & 0xffff) == 0x0179 ||
+-	    (dev->pci_device & 0xffff) == 0x0189 ||
+-	    (dev->pci_device & 0xffff) == 0x0329) {
+-		if (script == LVDS_RESET) {
+-			nv_write_tmds(dev, dcbent->or, 0, 0x02, 0x72);
+-
+-		} else if (script == LVDS_PANEL_ON) {
+-			bios_wr32(bios, NV_PBUS_DEBUG_DUALHEAD_CTL,
+-				  bios_rd32(bios, NV_PBUS_DEBUG_DUALHEAD_CTL)
+-				  | (1 << 31));
+-			bios_wr32(bios, NV_PCRTC_GPIO_EXT,
+-				  bios_rd32(bios, NV_PCRTC_GPIO_EXT) | 1);
+-
+-		} else if (script == LVDS_PANEL_OFF) {
+-			bios_wr32(bios, NV_PBUS_DEBUG_DUALHEAD_CTL,
+-				  bios_rd32(bios, NV_PBUS_DEBUG_DUALHEAD_CTL)
+-				  & ~(1 << 31));
+-			bios_wr32(bios, NV_PCRTC_GPIO_EXT,
+-				  bios_rd32(bios, NV_PCRTC_GPIO_EXT) & ~3);
+-		}
+-	}
++	if (script == LVDS_RESET &&
++	    (dev->pci_device == 0x0179 || dev->pci_device == 0x0189 ||
++	     dev->pci_device == 0x0329))
++		nv_write_tmds(dev, dcbent->or, 0, 0x02, 0x72);
+ #endif
+ 
+ 	return 0;
+@@ -3888,11 +4364,8 @@ int nouveau_bios_parse_lvds_table(struct drm_device *dev, int pxclk, bool *dl, b
+ 	 *
+ 	 * For the moment, a quirk will do :)
+ 	 */
+-	if ((dev->pdev->device == 0x01d7) &&
+-	    (dev->pdev->subsystem_vendor == 0x1028) &&
+-	    (dev->pdev->subsystem_device == 0x01c2)) {
++	if (nv_match_device(dev, 0x01d7, 0x1028, 0x01c2))
+ 		bios->fp.duallink_transition_clk = 80000;
+-	}
+ 
+ 	/* set dual_link flag for EDID case */
+ 	if (pxclk && (chip_version < 0x25 || chip_version > 0x28))
+@@ -4068,7 +4541,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
  					  bios->display.script_table_ptr,
  					  table[2], table[3], table[0] >= 0x21);
  	if (!otable) {
@@ -2052,7 +2292,34 @@ index e492919..eb77710 100644
  		return 1;
  	}
  
-@@ -4125,7 +4618,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
+@@ -4094,7 +4567,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
+ 			return 1;
+ 		}
+ 
+-		NV_TRACE(dev, "0x%04X: parsing output script 0\n", script);
++		NV_DEBUG_KMS(dev, "0x%04X: parsing output script 0\n", script);
+ 		nouveau_bios_run_init_table(dev, script, dcbent);
+ 	} else
+ 	if (pxclk == -1) {
+@@ -4104,7 +4577,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
+ 			return 1;
+ 		}
+ 
+-		NV_TRACE(dev, "0x%04X: parsing output script 1\n", script);
++		NV_DEBUG_KMS(dev, "0x%04X: parsing output script 1\n", script);
+ 		nouveau_bios_run_init_table(dev, script, dcbent);
+ 	} else
+ 	if (pxclk == -2) {
+@@ -4117,7 +4590,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
+ 			return 1;
+ 		}
+ 
+-		NV_TRACE(dev, "0x%04X: parsing output script 2\n", script);
++		NV_DEBUG_KMS(dev, "0x%04X: parsing output script 2\n", script);
+ 		nouveau_bios_run_init_table(dev, script, dcbent);
+ 	} else
+ 	if (pxclk > 0) {
+@@ -4125,11 +4598,11 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
  		if (script)
  			script = clkcmptable(bios, script, pxclk);
  		if (!script) {
@@ -2061,7 +2328,21 @@ index e492919..eb77710 100644
  			return 1;
  		}
  
-@@ -4484,7 +4977,7 @@ int get_pll_limits(struct drm_device *dev, uint32_t limit_match, struct pll_lims
+-		NV_TRACE(dev, "0x%04X: parsing clock script 0\n", script);
++		NV_DEBUG_KMS(dev, "0x%04X: parsing clock script 0\n", script);
+ 		nouveau_bios_run_init_table(dev, script, dcbent);
+ 	} else
+ 	if (pxclk < 0) {
+@@ -4141,7 +4614,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
+ 			return 1;
+ 		}
+ 
+-		NV_TRACE(dev, "0x%04X: parsing clock script 1\n", script);
++		NV_DEBUG_KMS(dev, "0x%04X: parsing clock script 1\n", script);
+ 		nouveau_bios_run_init_table(dev, script, dcbent);
+ 	}
+ 
+@@ -4484,7 +4957,7 @@ int get_pll_limits(struct drm_device *dev, uint32_t limit_match, struct pll_lims
  		pll_lim->min_p = record[12];
  		pll_lim->max_p = record[13];
  		/* where did this go to?? */
@@ -2070,7 +2351,32 @@ index e492919..eb77710 100644
  			pll_lim->refclk = 27000;
  		else
  			pll_lim->refclk = 100000;
-@@ -5151,10 +5644,14 @@ static int parse_bmp_structure(struct drm_device *dev, struct nvbios *bios, unsi
+@@ -4864,19 +5337,17 @@ static int parse_bit_tmds_tbl_entry(struct drm_device *dev, struct nvbios *bios,
+ 	}
+ 
+ 	tmdstableptr = ROM16(bios->data[bitentry->offset]);
+-
+-	if (tmdstableptr == 0x0) {
++	if (!tmdstableptr) {
+ 		NV_ERROR(dev, "Pointer to TMDS table invalid\n");
+ 		return -EINVAL;
+ 	}
+ 
++	NV_INFO(dev, "TMDS table version %d.%d\n",
++		bios->data[tmdstableptr] >> 4, bios->data[tmdstableptr] & 0xf);
++
+ 	/* nv50+ has v2.0, but we don't parse it atm */
+-	if (bios->data[tmdstableptr] != 0x11) {
+-		NV_WARN(dev,
+-			"TMDS table revision %d.%d not currently supported\n",
+-			bios->data[tmdstableptr] >> 4, bios->data[tmdstableptr] & 0xf);
++	if (bios->data[tmdstableptr] != 0x11)
+ 		return -ENOSYS;
+-	}
+ 
+ 	/*
+ 	 * These two scripts are odd: they don't seem to get run even when
+@@ -5151,10 +5622,14 @@ static int parse_bmp_structure(struct drm_device *dev, struct nvbios *bios, unsi
  	bios->legacy.i2c_indices.crt = bios->data[legacy_i2c_offset];
  	bios->legacy.i2c_indices.tv = bios->data[legacy_i2c_offset + 1];
  	bios->legacy.i2c_indices.panel = bios->data[legacy_i2c_offset + 2];
@@ -2089,7 +2395,39 @@ index e492919..eb77710 100644
  
  	if (bmplength > 74) {
  		bios->fmaxvco = ROM32(bmp[67]);
-@@ -5506,7 +6003,7 @@ static void fabricate_vga_output(struct dcb_table *dcb, int i2c, int heads)
+@@ -5312,6 +5787,20 @@ parse_dcb_gpio_table(struct nvbios *bios)
+ 			gpio->line = tvdac_gpio[1] >> 4;
+ 			gpio->invert = tvdac_gpio[0] & 2;
+ 		}
++	} else {
++		/*
++		 * No systematic way to store GPIO info on pre-v2.2
++		 * DCBs, try to match the PCI device IDs.
++		 */
++
++		/* Apple iMac G4 NV18 */
++		if (nv_match_device(dev, 0x0189, 0x10de, 0x0010)) {
++			struct dcb_gpio_entry *gpio = new_gpio_entry(bios);
++
++			gpio->tag = DCB_GPIO_TVDAC0;
++			gpio->line = 4;
++		}
++
+ 	}
+ 
+ 	if (!gpio_table_ptr)
+@@ -5387,9 +5876,7 @@ apply_dcb_connector_quirks(struct nvbios *bios, int idx)
+ 	struct drm_device *dev = bios->dev;
+ 
+ 	/* Gigabyte NX85T */
+-	if ((dev->pdev->device == 0x0421) &&
+-	    (dev->pdev->subsystem_vendor == 0x1458) &&
+-	    (dev->pdev->subsystem_device == 0x344c)) {
++	if (nv_match_device(dev, 0x0421, 0x1458, 0x344c)) {
+ 		if (cte->type == DCB_CONNECTOR_HDMI_1)
+ 			cte->type = DCB_CONNECTOR_DVI_I;
+ 	}
+@@ -5506,7 +5993,7 @@ static void fabricate_vga_output(struct dcb_table *dcb, int i2c, int heads)
  	entry->i2c_index = i2c;
  	entry->heads = heads;
  	entry->location = DCB_LOC_ON_CHIP;
@@ -2098,7 +2436,7 @@ index e492919..eb77710 100644
  }
  
  static void fabricate_dvi_i_output(struct dcb_table *dcb, bool twoHeads)
-@@ -5589,9 +6086,12 @@ parse_dcb20_entry(struct drm_device *dev, struct dcb_table *dcb,
+@@ -5589,9 +6076,12 @@ parse_dcb20_entry(struct drm_device *dev, struct dcb_table *dcb,
  			if (conf & 0x4 || conf & 0x8)
  				entry->lvdsconf.use_power_scripts = true;
  		} else {
@@ -2112,7 +2450,7 @@ index e492919..eb77710 100644
  		}
  		if (conf & mask) {
  			/*
-@@ -5631,7 +6131,13 @@ parse_dcb20_entry(struct drm_device *dev, struct dcb_table *dcb,
+@@ -5631,9 +6121,15 @@ parse_dcb20_entry(struct drm_device *dev, struct dcb_table *dcb,
  		}
  		break;
  	case OUTPUT_TMDS:
@@ -2125,9 +2463,41 @@ index e492919..eb77710 100644
 +			entry->tmdsconf.slave_addr = (conf & 0x00000070) >> 4;
 +
  		break;
- 	case 0xe:
+-	case 0xe:
++	case OUTPUT_EOL:
  		/* weird g80 mobile type that "nv" treats as a terminator */
-@@ -5706,13 +6212,6 @@ parse_dcb15_entry(struct drm_device *dev, struct dcb_table *dcb,
+ 		dcb->entries--;
+ 		return false;
+@@ -5670,22 +6166,14 @@ parse_dcb15_entry(struct drm_device *dev, struct dcb_table *dcb,
+ 		entry->type = OUTPUT_TV;
+ 		break;
+ 	case 2:
+-	case 3:
+-		entry->type = OUTPUT_LVDS;
+-		break;
+ 	case 4:
+-		switch ((conn & 0x000000f0) >> 4) {
+-		case 0:
+-			entry->type = OUTPUT_TMDS;
+-			break;
+-		case 1:
++		if (conn & 0x10)
+ 			entry->type = OUTPUT_LVDS;
+-			break;
+-		default:
+-			NV_ERROR(dev, "Unknown DCB subtype 4/%d\n",
+-				 (conn & 0x000000f0) >> 4);
+-			return false;
+-		}
++		else
++			entry->type = OUTPUT_TMDS;
++		break;
++	case 3:
++		entry->type = OUTPUT_LVDS;
+ 		break;
+ 	default:
+ 		NV_ERROR(dev, "Unknown DCB type %d\n", conn & 0x0000000f);
+@@ -5706,13 +6194,6 @@ parse_dcb15_entry(struct drm_device *dev, struct dcb_table *dcb,
  	case OUTPUT_TV:
  		entry->tvconf.has_component_output = false;
  		break;
@@ -2141,7 +2511,7 @@ index e492919..eb77710 100644
  	case OUTPUT_LVDS:
  		if ((conn & 0x00003f00) != 0x10)
  			entry->lvdsconf.use_straps_for_mode = true;
-@@ -5793,6 +6292,31 @@ void merge_like_dcb_entries(struct drm_device *dev, struct dcb_table *dcb)
+@@ -5793,6 +6274,29 @@ void merge_like_dcb_entries(struct drm_device *dev, struct dcb_table *dcb)
  	dcb->entries = newentries;
  }
  
@@ -2160,9 +2530,7 @@ index e492919..eb77710 100644
 +	 * nasty problems until this is sorted (assuming it's not a
 +	 * VBIOS bug).
 +	 */
-+	if ((dev->pdev->device == 0x040d) &&
-+	    (dev->pdev->subsystem_vendor == 0x1028) &&
-+	    (dev->pdev->subsystem_device == 0x019b)) {
++	if (nv_match_device(dev, 0x040d, 0x1028, 0x019b)) {
 +		if (*conn == 0x02026312 && *conf == 0x00000020)
 +			return false;
 +	}
@@ -2173,7 +2541,7 @@ index e492919..eb77710 100644
  static int
  parse_dcb_table(struct drm_device *dev, struct nvbios *bios, bool twoHeads)
  {
-@@ -5903,6 +6427,19 @@ parse_dcb_table(struct drm_device *dev, struct nvbios *bios, bool twoHeads)
+@@ -5903,6 +6407,19 @@ parse_dcb_table(struct drm_device *dev, struct nvbios *bios, bool twoHeads)
  		dcb->i2c_table = &bios->data[i2ctabptr];
  		if (dcb->version >= 0x30)
  			dcb->i2c_default_indices = dcb->i2c_table[4];
@@ -2193,7 +2561,7 @@ index e492919..eb77710 100644
  	}
  
  	if (entries > DCB_MAX_NUM_ENTRIES)
-@@ -5926,6 +6463,9 @@ parse_dcb_table(struct drm_device *dev, struct nvbios *bios, bool twoHeads)
+@@ -5926,6 +6443,9 @@ parse_dcb_table(struct drm_device *dev, struct nvbios *bios, bool twoHeads)
  		if ((connection & 0x0000000f) == 0x0000000f)
  			continue;
  
@@ -2203,7 +2571,7 @@ index e492919..eb77710 100644
  		NV_TRACEWARN(dev, "Raw DCB entry %d: %08x %08x\n",
  			     dcb->entries, connection, config);
  
-@@ -6181,9 +6721,8 @@ nouveau_run_vbios_init(struct drm_device *dev)
+@@ -6181,9 +6701,8 @@ nouveau_run_vbios_init(struct drm_device *dev)
  	struct nvbios *bios = &dev_priv->vbios;
  	int i, ret = 0;
  
@@ -2215,7 +2583,7 @@ index e492919..eb77710 100644
  
  	if (bios->major_version < 5)	/* BMP only */
  		load_nv17_hw_sequencer_ucode(dev, bios);
-@@ -6216,8 +6755,6 @@ nouveau_run_vbios_init(struct drm_device *dev)
+@@ -6216,8 +6735,6 @@ nouveau_run_vbios_init(struct drm_device *dev)
  		}
  	}
  
@@ -2224,7 +2592,7 @@ index e492919..eb77710 100644
  	return ret;
  }
  
-@@ -6238,7 +6775,6 @@ static bool
+@@ -6238,7 +6755,6 @@ static bool
  nouveau_bios_posted(struct drm_device *dev)
  {
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -2232,7 +2600,7 @@ index e492919..eb77710 100644
  	unsigned htotal;
  
  	if (dev_priv->chipset >= NV_50) {
-@@ -6248,13 +6784,12 @@ nouveau_bios_posted(struct drm_device *dev)
+@@ -6248,13 +6764,12 @@ nouveau_bios_posted(struct drm_device *dev)
  		return true;
  	}
  
@@ -2247,7 +2615,7 @@ index e492919..eb77710 100644
  	return (htotal != 0);
  }
  
-@@ -6263,8 +6798,6 @@ nouveau_bios_init(struct drm_device *dev)
+@@ -6263,8 +6778,6 @@ nouveau_bios_init(struct drm_device *dev)
  {
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  	struct nvbios *bios = &dev_priv->vbios;
@@ -2256,7 +2624,7 @@ index e492919..eb77710 100644
  	int ret;
  
  	if (!NVInitVBIOS(dev))
-@@ -6284,40 +6817,27 @@ nouveau_bios_init(struct drm_device *dev)
+@@ -6284,40 +6797,27 @@ nouveau_bios_init(struct drm_device *dev)
  	if (!bios->major_version)	/* we don't run version 0 bios */
  		return 0;
  
@@ -2300,7 +2668,7 @@ index e492919..eb77710 100644
  	/* allow subsequent scripts to execute */
  	bios->execute = true;
 diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.h b/drivers/gpu/drm/nouveau/nouveau_bios.h
-index adf4ec2..fd14dfd 100644
+index adf4ec2..c1de2f3 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_bios.h
 +++ b/drivers/gpu/drm/nouveau/nouveau_bios.h
 @@ -81,6 +81,7 @@ struct dcb_connector_table_entry {
@@ -2311,7 +2679,15 @@ index adf4ec2..fd14dfd 100644
  };
  
  struct dcb_connector_table {
-@@ -117,6 +118,7 @@ struct dcb_entry {
+@@ -94,6 +95,7 @@ enum dcb_type {
+ 	OUTPUT_TMDS = 2,
+ 	OUTPUT_LVDS = 3,
+ 	OUTPUT_DP = 6,
++	OUTPUT_EOL = 14, /* DCB 4.0+, appears to be end-of-list */
+ 	OUTPUT_ANY = -1
+ };
+ 
+@@ -117,6 +119,7 @@ struct dcb_entry {
  		struct {
  			struct sor_conf sor;
  			bool use_straps_for_mode;
@@ -2319,7 +2695,7 @@ index adf4ec2..fd14dfd 100644
  			bool use_power_scripts;
  		} lvdsconf;
  		struct {
-@@ -129,6 +131,7 @@ struct dcb_entry {
+@@ -129,6 +132,7 @@ struct dcb_entry {
  		} dpconf;
  		struct {
  			struct sor_conf sor;
@@ -2327,7 +2703,7 @@ index adf4ec2..fd14dfd 100644
  		} tmdsconf;
  	};
  	bool i2c_upper_default;
-@@ -249,8 +252,6 @@ struct nvbios {
+@@ -249,8 +253,6 @@ struct nvbios {
  
  	struct {
  		int crtchead;
@@ -2337,10 +2713,18 @@ index adf4ec2..fd14dfd 100644
  
  	struct {
 diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
-index 6f3c195..f1357f1 100644
+index 6f3c195..553a01d 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
-@@ -51,9 +51,6 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
+@@ -43,17 +43,12 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
+ 	struct drm_device *dev = dev_priv->dev;
+ 	struct nouveau_bo *nvbo = nouveau_bo(bo);
+ 
+-	ttm_bo_kunmap(&nvbo->kmap);
+-
+ 	if (unlikely(nvbo->gem))
+ 		DRM_ERROR("bo %p still attached to GEM object\n", bo);
+ 
  	if (nvbo->tile)
  		nv10_mem_expire_tiling(dev, nvbo->tile, NULL);
  
@@ -2350,7 +2734,7 @@ index 6f3c195..f1357f1 100644
  	kfree(nvbo);
  }
  
-@@ -166,9 +163,6 @@ nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan,
+@@ -166,9 +161,6 @@ nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan,
  	}
  	nvbo->channel = NULL;
  
@@ -2360,7 +2744,17 @@ index 6f3c195..f1357f1 100644
  	*pnvbo = nvbo;
  	return 0;
  }
-@@ -461,9 +455,9 @@ nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
+@@ -296,7 +288,8 @@ nouveau_bo_map(struct nouveau_bo *nvbo)
+ void
+ nouveau_bo_unmap(struct nouveau_bo *nvbo)
+ {
+-	ttm_bo_kunmap(&nvbo->kmap);
++	if (nvbo)
++		ttm_bo_kunmap(&nvbo->kmap);
+ }
+ 
+ u16
+@@ -461,18 +454,20 @@ nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
  		return ret;
  
  	ret = ttm_bo_move_accel_cleanup(&nvbo->bo, fence, NULL,
@@ -2373,92 +2767,384 @@ index 6f3c195..f1357f1 100644
  	nouveau_fence_unref((void *)&fence);
  	return ret;
  }
-@@ -711,8 +705,7 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
- 		return ret;
- 
- 	/* Software copy if the card isn't up and running yet. */
--	if (dev_priv->init_state != NOUVEAU_CARD_INIT_DONE ||
--	    !dev_priv->channel) {
-+	if (!dev_priv->channel) {
- 		ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
- 		goto out;
- 	}
-diff --git a/drivers/gpu/drm/nouveau/nouveau_calc.c b/drivers/gpu/drm/nouveau/nouveau_calc.c
-index 88f9bc0..ca85da7 100644
---- a/drivers/gpu/drm/nouveau/nouveau_calc.c
-+++ b/drivers/gpu/drm/nouveau/nouveau_calc.c
-@@ -200,7 +200,7 @@ nv04_update_arb(struct drm_device *dev, int VClk, int bpp,
- 	struct nv_sim_state sim_data;
- 	int MClk = nouveau_hw_get_clock(dev, MPLL);
- 	int NVClk = nouveau_hw_get_clock(dev, NVPLL);
--	uint32_t cfg1 = nvReadFB(dev, NV_PFB_CFG1);
-+	uint32_t cfg1 = nvReadFB(dev, NV04_PFB_CFG1);
- 
- 	sim_data.pclk_khz = VClk;
- 	sim_data.mclk_khz = MClk;
-@@ -218,7 +218,7 @@ nv04_update_arb(struct drm_device *dev, int VClk, int bpp,
- 		sim_data.mem_latency = 3;
- 		sim_data.mem_page_miss = 10;
- 	} else {
--		sim_data.memory_type = nvReadFB(dev, NV_PFB_CFG0) & 0x1;
-+		sim_data.memory_type = nvReadFB(dev, NV04_PFB_CFG0) & 0x1;
- 		sim_data.memory_width = (nvReadEXTDEV(dev, NV_PEXTDEV_BOOT_0) & 0x10) ? 128 : 64;
- 		sim_data.mem_latency = cfg1 & 0xf;
- 		sim_data.mem_page_miss = ((cfg1 >> 4) & 0xf) + ((cfg1 >> 31) & 0x1);
-diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
-index 1fc57ef..e952c3b 100644
---- a/drivers/gpu/drm/nouveau/nouveau_channel.c
-+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
-@@ -257,9 +257,7 @@ nouveau_channel_free(struct nouveau_channel *chan)
- 	nouveau_debugfs_channel_fini(chan);
  
- 	/* Give outstanding push buffers a chance to complete */
--	spin_lock_irqsave(&chan->fence.lock, flags);
- 	nouveau_fence_update(chan);
--	spin_unlock_irqrestore(&chan->fence.lock, flags);
- 	if (chan->fence.sequence != chan->fence.sequence_ack) {
- 		struct nouveau_fence *fence = NULL;
+ static inline uint32_t
+-nouveau_bo_mem_ctxdma(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
+-		      struct ttm_mem_reg *mem)
++nouveau_bo_mem_ctxdma(struct ttm_buffer_object *bo,
++		      struct nouveau_channel *chan, struct ttm_mem_reg *mem)
+ {
+-	if (chan == nouveau_bdev(nvbo->bo.bdev)->channel) {
++	struct nouveau_bo *nvbo = nouveau_bo(bo);
++
++	if (nvbo->no_vm) {
+ 		if (mem->mem_type == TTM_PL_TT)
+ 			return NvDmaGART;
+ 		return NvDmaVRAM;
+@@ -484,86 +479,181 @@ nouveau_bo_mem_ctxdma(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
+ }
  
-@@ -368,8 +366,6 @@ nouveau_ioctl_fifo_alloc(struct drm_device *dev, void *data,
- 	struct nouveau_channel *chan;
+ static int
+-nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
+-		     bool no_wait_reserve, bool no_wait_gpu,
+-		     struct ttm_mem_reg *new_mem)
++nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
++		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+ {
+-	struct nouveau_bo *nvbo = nouveau_bo(bo);
+ 	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+-	struct ttm_mem_reg *old_mem = &bo->mem;
+-	struct nouveau_channel *chan;
+-	uint64_t src_offset, dst_offset;
+-	uint32_t page_count;
++	struct nouveau_bo *nvbo = nouveau_bo(bo);
++	u64 length = (new_mem->num_pages << PAGE_SHIFT);
++	u64 src_offset, dst_offset;
  	int ret;
  
--	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
+-	chan = nvbo->channel;
+-	if (!chan || nvbo->tile_flags || nvbo->no_vm)
+-		chan = dev_priv->channel;
 -
- 	if (dev_priv->engine.graph.accel_blocked)
- 		return -ENODEV;
- 
-@@ -418,7 +414,6 @@ nouveau_ioctl_fifo_free(struct drm_device *dev, void *data,
- 	struct drm_nouveau_channel_free *cfree = data;
- 	struct nouveau_channel *chan;
- 
--	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
- 	NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(cfree->channel, file_priv, chan);
- 
- 	nouveau_channel_free(chan);
-diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
-index 149ed22..a1473ff 100644
---- a/drivers/gpu/drm/nouveau/nouveau_connector.c
-+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
-@@ -37,12 +37,6 @@
- #include "nouveau_connector.h"
- #include "nouveau_hw.h"
- 
--static inline struct drm_encoder_slave_funcs *
--get_slave_funcs(struct nouveau_encoder *enc)
--{
--	return to_encoder_slave(to_drm_encoder(enc))->slave_funcs;
--}
+ 	src_offset = old_mem->mm_node->start << PAGE_SHIFT;
+ 	dst_offset = new_mem->mm_node->start << PAGE_SHIFT;
+-	if (chan != dev_priv->channel) {
+-		if (old_mem->mem_type == TTM_PL_TT)
+-			src_offset += dev_priv->vm_gart_base;
+-		else
++	if (!nvbo->no_vm) {
++		if (old_mem->mem_type == TTM_PL_VRAM)
+ 			src_offset += dev_priv->vm_vram_base;
 -
- static struct nouveau_encoder *
- find_encoder_by_type(struct drm_connector *connector, int type)
- {
-@@ -102,60 +96,12 @@ nouveau_connector_destroy(struct drm_connector *drm_connector)
- 	kfree(drm_connector);
- }
+-		if (new_mem->mem_type == TTM_PL_TT)
+-			dst_offset += dev_priv->vm_gart_base;
+ 		else
++			src_offset += dev_priv->vm_gart_base;
++
++		if (new_mem->mem_type == TTM_PL_VRAM)
+ 			dst_offset += dev_priv->vm_vram_base;
++		else
++			dst_offset += dev_priv->vm_gart_base;
+ 	}
  
--static void
--nouveau_connector_ddc_prepare(struct drm_connector *connector, int *flags)
+ 	ret = RING_SPACE(chan, 3);
+ 	if (ret)
+ 		return ret;
+-	BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_DMA_SOURCE, 2);
+-	OUT_RING(chan, nouveau_bo_mem_ctxdma(nvbo, chan, old_mem));
+-	OUT_RING(chan, nouveau_bo_mem_ctxdma(nvbo, chan, new_mem));
+ 
+-	if (dev_priv->card_type >= NV_50) {
+-		ret = RING_SPACE(chan, 4);
++	BEGIN_RING(chan, NvSubM2MF, 0x0184, 2);
++	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, old_mem));
++	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, new_mem));
++
++	while (length) {
++		u32 amount, stride, height;
++
++		amount  = min(length, (u64)(16 * 1024 * 1024));
++		stride  = 64 * 4;
++		height  = amount / stride;
++
++		if (new_mem->mem_type == TTM_PL_VRAM && nvbo->tile_flags) {
++			ret = RING_SPACE(chan, 8);
++			if (ret)
++				return ret;
++
++			BEGIN_RING(chan, NvSubM2MF, 0x0200, 7);
++			OUT_RING  (chan, 0);
++			OUT_RING  (chan, 0x20);
++			OUT_RING  (chan, stride);
++			OUT_RING  (chan, height);
++			OUT_RING  (chan, 1);
++			OUT_RING  (chan, 0);
++			OUT_RING  (chan, 0);
++		} else {
++			ret = RING_SPACE(chan, 2);
++			if (ret)
++				return ret;
++
++			BEGIN_RING(chan, NvSubM2MF, 0x0200, 1);
++			OUT_RING  (chan, 1);
++		}
++		if (old_mem->mem_type == TTM_PL_VRAM && nvbo->tile_flags) {
++			ret = RING_SPACE(chan, 8);
++			if (ret)
++				return ret;
++
++			BEGIN_RING(chan, NvSubM2MF, 0x021c, 7);
++			OUT_RING  (chan, 0);
++			OUT_RING  (chan, 0x20);
++			OUT_RING  (chan, stride);
++			OUT_RING  (chan, height);
++			OUT_RING  (chan, 1);
++			OUT_RING  (chan, 0);
++			OUT_RING  (chan, 0);
++		} else {
++			ret = RING_SPACE(chan, 2);
++			if (ret)
++				return ret;
++
++			BEGIN_RING(chan, NvSubM2MF, 0x021c, 1);
++			OUT_RING  (chan, 1);
++		}
++
++		ret = RING_SPACE(chan, 14);
+ 		if (ret)
+ 			return ret;
+-		BEGIN_RING(chan, NvSubM2MF, 0x0200, 1);
+-		OUT_RING(chan, 1);
+-		BEGIN_RING(chan, NvSubM2MF, 0x021c, 1);
+-		OUT_RING(chan, 1);
++
++		BEGIN_RING(chan, NvSubM2MF, 0x0238, 2);
++		OUT_RING  (chan, upper_32_bits(src_offset));
++		OUT_RING  (chan, upper_32_bits(dst_offset));
++		BEGIN_RING(chan, NvSubM2MF, 0x030c, 8);
++		OUT_RING  (chan, lower_32_bits(src_offset));
++		OUT_RING  (chan, lower_32_bits(dst_offset));
++		OUT_RING  (chan, stride);
++		OUT_RING  (chan, stride);
++		OUT_RING  (chan, stride);
++		OUT_RING  (chan, height);
++		OUT_RING  (chan, 0x00000101);
++		OUT_RING  (chan, 0x00000000);
++		BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_NOP, 1);
++		OUT_RING  (chan, 0);
++
++		length -= amount;
++		src_offset += amount;
++		dst_offset += amount;
+ 	}
+ 
++	return 0;
++}
++
++static int
++nv04_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
++		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
++{
++	u32 src_offset = old_mem->mm_node->start << PAGE_SHIFT;
++	u32 dst_offset = new_mem->mm_node->start << PAGE_SHIFT;
++	u32 page_count = new_mem->num_pages;
++	int ret;
++
++	ret = RING_SPACE(chan, 3);
++	if (ret)
++		return ret;
++
++	BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_DMA_SOURCE, 2);
++	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, old_mem));
++	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, new_mem));
++
+ 	page_count = new_mem->num_pages;
+ 	while (page_count) {
+ 		int line_count = (page_count > 2047) ? 2047 : page_count;
+ 
+-		if (dev_priv->card_type >= NV_50) {
+-			ret = RING_SPACE(chan, 3);
+-			if (ret)
+-				return ret;
+-			BEGIN_RING(chan, NvSubM2MF, 0x0238, 2);
+-			OUT_RING(chan, upper_32_bits(src_offset));
+-			OUT_RING(chan, upper_32_bits(dst_offset));
+-		}
+ 		ret = RING_SPACE(chan, 11);
+ 		if (ret)
+ 			return ret;
++
+ 		BEGIN_RING(chan, NvSubM2MF,
+ 				 NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
+-		OUT_RING(chan, lower_32_bits(src_offset));
+-		OUT_RING(chan, lower_32_bits(dst_offset));
+-		OUT_RING(chan, PAGE_SIZE); /* src_pitch */
+-		OUT_RING(chan, PAGE_SIZE); /* dst_pitch */
+-		OUT_RING(chan, PAGE_SIZE); /* line_length */
+-		OUT_RING(chan, line_count);
+-		OUT_RING(chan, (1<<8)|(1<<0));
+-		OUT_RING(chan, 0);
++		OUT_RING  (chan, src_offset);
++		OUT_RING  (chan, dst_offset);
++		OUT_RING  (chan, PAGE_SIZE); /* src_pitch */
++		OUT_RING  (chan, PAGE_SIZE); /* dst_pitch */
++		OUT_RING  (chan, PAGE_SIZE); /* line_length */
++		OUT_RING  (chan, line_count);
++		OUT_RING  (chan, 0x00000101);
++		OUT_RING  (chan, 0x00000000);
+ 		BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_NOP, 1);
+-		OUT_RING(chan, 0);
++		OUT_RING  (chan, 0);
+ 
+ 		page_count -= line_count;
+ 		src_offset += (PAGE_SIZE * line_count);
+ 		dst_offset += (PAGE_SIZE * line_count);
+ 	}
+ 
++	return 0;
++}
++
++static int
++nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
++		     bool no_wait_reserve, bool no_wait_gpu,
++		     struct ttm_mem_reg *new_mem)
++{
++	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
++	struct nouveau_bo *nvbo = nouveau_bo(bo);
++	struct nouveau_channel *chan;
++	int ret;
++
++	chan = nvbo->channel;
++	if (!chan || nvbo->no_vm)
++		chan = dev_priv->channel;
++
++	if (dev_priv->card_type < NV_50)
++		ret = nv04_bo_move_m2mf(chan, bo, &bo->mem, new_mem);
++	else
++		ret = nv50_bo_move_m2mf(chan, bo, &bo->mem, new_mem);
++	if (ret)
++		return ret;
++
+ 	return nouveau_bo_move_accel_cleanup(chan, nvbo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+ }
+ 
+@@ -710,13 +800,6 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
+ 	if (ret)
+ 		return ret;
+ 
+-	/* Software copy if the card isn't up and running yet. */
+-	if (dev_priv->init_state != NOUVEAU_CARD_INIT_DONE ||
+-	    !dev_priv->channel) {
+-		ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+-		goto out;
+-	}
+-
+ 	/* Fake bo copy. */
+ 	if (old_mem->mem_type == TTM_PL_SYSTEM && !bo->ttm) {
+ 		BUG_ON(bo->mem.mm_node != NULL);
+@@ -725,6 +808,12 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
+ 		goto out;
+ 	}
+ 
++	/* Software copy if the card isn't up and running yet. */
++	if (!dev_priv->channel) {
++		ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
++		goto out;
++	}
++
+ 	/* Hardware assisted copy. */
+ 	if (new_mem->mem_type == TTM_PL_SYSTEM)
+ 		ret = nouveau_bo_move_flipd(bo, evict, intr, no_wait_reserve, no_wait_gpu, new_mem);
+diff --git a/drivers/gpu/drm/nouveau/nouveau_calc.c b/drivers/gpu/drm/nouveau/nouveau_calc.c
+index 88f9bc0..23d9896 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_calc.c
++++ b/drivers/gpu/drm/nouveau/nouveau_calc.c
+@@ -200,7 +200,7 @@ nv04_update_arb(struct drm_device *dev, int VClk, int bpp,
+ 	struct nv_sim_state sim_data;
+ 	int MClk = nouveau_hw_get_clock(dev, MPLL);
+ 	int NVClk = nouveau_hw_get_clock(dev, NVPLL);
+-	uint32_t cfg1 = nvReadFB(dev, NV_PFB_CFG1);
++	uint32_t cfg1 = nvReadFB(dev, NV04_PFB_CFG1);
+ 
+ 	sim_data.pclk_khz = VClk;
+ 	sim_data.mclk_khz = MClk;
+@@ -218,7 +218,7 @@ nv04_update_arb(struct drm_device *dev, int VClk, int bpp,
+ 		sim_data.mem_latency = 3;
+ 		sim_data.mem_page_miss = 10;
+ 	} else {
+-		sim_data.memory_type = nvReadFB(dev, NV_PFB_CFG0) & 0x1;
++		sim_data.memory_type = nvReadFB(dev, NV04_PFB_CFG0) & 0x1;
+ 		sim_data.memory_width = (nvReadEXTDEV(dev, NV_PEXTDEV_BOOT_0) & 0x10) ? 128 : 64;
+ 		sim_data.mem_latency = cfg1 & 0xf;
+ 		sim_data.mem_page_miss = ((cfg1 >> 4) & 0xf) + ((cfg1 >> 31) & 0x1);
+@@ -234,7 +234,7 @@ nv04_update_arb(struct drm_device *dev, int VClk, int bpp,
+ }
+ 
+ static void
+-nv30_update_arb(int *burst, int *lwm)
++nv20_update_arb(int *burst, int *lwm)
+ {
+ 	unsigned int fifo_size, burst_size, graphics_lwm;
+ 
+@@ -251,14 +251,14 @@ nouveau_calc_arb(struct drm_device *dev, int vclk, int bpp, int *burst, int *lwm
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 
+-	if (dev_priv->card_type < NV_30)
++	if (dev_priv->card_type < NV_20)
+ 		nv04_update_arb(dev, vclk, bpp, burst, lwm);
+ 	else if ((dev->pci_device & 0xfff0) == 0x0240 /*CHIPSET_C51*/ ||
+ 		 (dev->pci_device & 0xfff0) == 0x03d0 /*CHIPSET_C512*/) {
+ 		*burst = 128;
+ 		*lwm = 0x0480;
+ 	} else
+-		nv30_update_arb(burst, lwm);
++		nv20_update_arb(burst, lwm);
+ }
+ 
+ static int
+diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
+index 1fc57ef..9a31023 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
++++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
+@@ -257,9 +257,7 @@ nouveau_channel_free(struct nouveau_channel *chan)
+ 	nouveau_debugfs_channel_fini(chan);
+ 
+ 	/* Give outstanding push buffers a chance to complete */
+-	spin_lock_irqsave(&chan->fence.lock, flags);
+ 	nouveau_fence_update(chan);
+-	spin_unlock_irqrestore(&chan->fence.lock, flags);
+ 	if (chan->fence.sequence != chan->fence.sequence_ack) {
+ 		struct nouveau_fence *fence = NULL;
+ 
+@@ -311,6 +309,7 @@ nouveau_channel_free(struct nouveau_channel *chan)
+ 	/* Release the channel's resources */
+ 	nouveau_gpuobj_ref_del(dev, &chan->pushbuf);
+ 	if (chan->pushbuf_bo) {
++		nouveau_bo_unmap(chan->pushbuf_bo);
+ 		nouveau_bo_unpin(chan->pushbuf_bo);
+ 		nouveau_bo_ref(NULL, &chan->pushbuf_bo);
+ 	}
+@@ -368,8 +367,6 @@ nouveau_ioctl_fifo_alloc(struct drm_device *dev, void *data,
+ 	struct nouveau_channel *chan;
+ 	int ret;
+ 
+-	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
+-
+ 	if (dev_priv->engine.graph.accel_blocked)
+ 		return -ENODEV;
+ 
+@@ -418,7 +415,6 @@ nouveau_ioctl_fifo_free(struct drm_device *dev, void *data,
+ 	struct drm_nouveau_channel_free *cfree = data;
+ 	struct nouveau_channel *chan;
+ 
+-	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
+ 	NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(cfree->channel, file_priv, chan);
+ 
+ 	nouveau_channel_free(chan);
+diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
+index 149ed22..a1473ff 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
++++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
+@@ -37,12 +37,6 @@
+ #include "nouveau_connector.h"
+ #include "nouveau_hw.h"
+ 
+-static inline struct drm_encoder_slave_funcs *
+-get_slave_funcs(struct nouveau_encoder *enc)
+-{
+-	return to_encoder_slave(to_drm_encoder(enc))->slave_funcs;
+-}
+-
+ static struct nouveau_encoder *
+ find_encoder_by_type(struct drm_connector *connector, int type)
+ {
+@@ -102,60 +96,12 @@ nouveau_connector_destroy(struct drm_connector *drm_connector)
+ 	kfree(drm_connector);
+ }
+ 
+-static void
+-nouveau_connector_ddc_prepare(struct drm_connector *connector, int *flags)
 -{
 -	struct drm_nouveau_private *dev_priv = connector->dev->dev_private;
 -
@@ -3076,24 +3762,23 @@ index 4ef38ab..0d2e668 100644
  
  #endif /* __NOUVEAU_CONNECTOR_H__ */
 diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
-index 65c441a..2e3c6ca 100644
+index 65c441a..2d00699 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_dma.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
-@@ -92,11 +92,9 @@ nouveau_dma_init(struct nouveau_channel *chan)
+@@ -91,13 +91,6 @@ nouveau_dma_init(struct nouveau_channel *chan)
+ 	if (ret)
  		return ret;
  
- 	/* Map M2MF notifier object - fbcon. */
+-	/* Map M2MF notifier object - fbcon. */
 -	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
 -		ret = nouveau_bo_map(chan->notifier_bo);
 -		if (ret)
 -			return ret;
 -	}
-+	ret = nouveau_bo_map(chan->notifier_bo);
-+	if (ret)
-+		return ret;
- 
+-
  	/* Insert NOPS for NOUVEAU_DMA_SKIPS */
  	ret = RING_SPACE(chan, NOUVEAU_DMA_SKIPS);
+ 	if (ret)
 diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
 index deeb21c..8a1b188 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_dp.c
@@ -3380,7 +4065,7 @@ index 2737704..a8d3d17 100644
  	nouveau_unregister_dsm_handler();
  }
 diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
-index c697191..c001574 100644
+index c697191..2eb622b 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h
 +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
 @@ -123,14 +123,6 @@ nvbo_kmap_obj_iovirtual(struct nouveau_bo *nvbo)
@@ -3398,7 +4083,11 @@ index c697191..c001574 100644
  enum nouveau_flags {
  	NV_NFORCE   = 0x10000000,
  	NV_NFORCE2  = 0x20000000
-@@ -149,7 +141,7 @@ struct nouveau_gpuobj {
+@@ -146,10 +138,11 @@ enum nouveau_flags {
+ #define NVOBJ_FLAG_ZERO_FREE		(1 << 2)
+ #define NVOBJ_FLAG_FAKE			(1 << 3)
+ struct nouveau_gpuobj {
++	struct drm_device *dev;
  	struct list_head list;
  
  	struct nouveau_channel *im_channel;
@@ -3407,7 +4096,7 @@ index c697191..c001574 100644
  	struct nouveau_bo *im_backing;
  	uint32_t im_backing_start;
  	uint32_t *im_backing_suspend;
-@@ -196,7 +188,7 @@ struct nouveau_channel {
+@@ -196,7 +189,7 @@ struct nouveau_channel {
  		struct list_head pending;
  		uint32_t sequence;
  		uint32_t sequence_ack;
@@ -3416,7 +4105,7 @@ index c697191..c001574 100644
  	} fence;
  
  	/* DMA push buffer */
-@@ -206,7 +198,7 @@ struct nouveau_channel {
+@@ -206,7 +199,7 @@ struct nouveau_channel {
  
  	/* Notifier memory */
  	struct nouveau_bo *notifier_bo;
@@ -3425,7 +4114,7 @@ index c697191..c001574 100644
  
  	/* PFIFO context */
  	struct nouveau_gpuobj_ref *ramfc;
-@@ -224,7 +216,7 @@ struct nouveau_channel {
+@@ -224,7 +217,7 @@ struct nouveau_channel {
  
  	/* Objects */
  	struct nouveau_gpuobj_ref *ramin; /* Private instmem */
@@ -3434,7 +4123,7 @@ index c697191..c001574 100644
  	struct nouveau_gpuobj_ref *ramht; /* Hash table */
  	struct list_head           ramht_refs; /* Objects referenced by RAMHT */
  
-@@ -277,8 +269,7 @@ struct nouveau_instmem_engine {
+@@ -277,8 +270,7 @@ struct nouveau_instmem_engine {
  	void	(*clear)(struct drm_device *, struct nouveau_gpuobj *);
  	int	(*bind)(struct drm_device *, struct nouveau_gpuobj *);
  	int	(*unbind)(struct drm_device *, struct nouveau_gpuobj *);
@@ -3444,7 +4133,7 @@ index c697191..c001574 100644
  };
  
  struct nouveau_mc_engine {
-@@ -303,10 +294,11 @@ struct nouveau_fb_engine {
+@@ -303,10 +295,11 @@ struct nouveau_fb_engine {
  };
  
  struct nouveau_fifo_engine {
@@ -3458,7 +4147,7 @@ index c697191..c001574 100644
  	int  (*init)(struct drm_device *);
  	void (*takedown)(struct drm_device *);
  
-@@ -339,10 +331,11 @@ struct nouveau_pgraph_object_class {
+@@ -339,10 +332,11 @@ struct nouveau_pgraph_object_class {
  struct nouveau_pgraph_engine {
  	struct nouveau_pgraph_object_class *grclass;
  	bool accel_blocked;
@@ -3472,7 +4161,7 @@ index c697191..c001574 100644
  	int  (*init)(struct drm_device *);
  	void (*takedown)(struct drm_device *);
  
-@@ -358,6 +351,24 @@ struct nouveau_pgraph_engine {
+@@ -358,6 +352,24 @@ struct nouveau_pgraph_engine {
  				  uint32_t size, uint32_t pitch);
  };
  
@@ -3497,7 +4186,7 @@ index c697191..c001574 100644
  struct nouveau_engine {
  	struct nouveau_instmem_engine instmem;
  	struct nouveau_mc_engine      mc;
-@@ -365,6 +376,8 @@ struct nouveau_engine {
+@@ -365,6 +377,8 @@ struct nouveau_engine {
  	struct nouveau_fb_engine      fb;
  	struct nouveau_pgraph_engine  graph;
  	struct nouveau_fifo_engine    fifo;
@@ -3506,7 +4195,7 @@ index c697191..c001574 100644
  };
  
  struct nouveau_pll_vals {
-@@ -397,7 +410,7 @@ enum nv04_fp_display_regs {
+@@ -397,7 +411,7 @@ enum nv04_fp_display_regs {
  
  struct nv04_crtc_reg {
  	unsigned char MiscOutReg;     /* */
@@ -3515,7 +4204,7 @@ index c697191..c001574 100644
  	uint8_t CR58[0x10];
  	uint8_t Sequencer[5];
  	uint8_t Graphics[9];
-@@ -496,15 +509,11 @@ enum nouveau_card_type {
+@@ -496,15 +510,11 @@ enum nouveau_card_type {
  	NV_30      = 0x30,
  	NV_40      = 0x40,
  	NV_50      = 0x50,
@@ -3532,7 +4221,7 @@ index c697191..c001574 100644
  
  	/* the card type, takes NV_* as values */
  	enum nouveau_card_type card_type;
-@@ -528,13 +537,9 @@ struct drm_nouveau_private {
+@@ -528,13 +538,9 @@ struct drm_nouveau_private {
  		struct ttm_global_reference mem_global_ref;
  		struct ttm_bo_global_ref bo_global_ref;
  		struct ttm_bo_device bdev;
@@ -3546,7 +4235,15 @@ index c697191..c001574 100644
  	int fifo_alloc_count;
  	struct nouveau_channel *fifos[NOUVEAU_MAX_CHANNEL_NR];
  
-@@ -595,11 +600,7 @@ struct drm_nouveau_private {
+@@ -579,6 +585,7 @@ struct drm_nouveau_private {
+ 	/* VRAM/fb configuration */
+ 	uint64_t vram_size;
+ 	uint64_t vram_sys_base;
++	u32 vram_rblock_size;
+ 
+ 	uint64_t fb_phys;
+ 	uint64_t fb_available_size;
+@@ -595,11 +602,7 @@ struct drm_nouveau_private {
  	struct nouveau_gpuobj *vm_vram_pt[NV50_VM_VRAM_NR];
  	int vm_vram_pt_nr;
  
@@ -3559,7 +4256,7 @@ index c697191..c001574 100644
  
  	struct list_head gpuobj_list;
  
-@@ -618,6 +619,11 @@ struct drm_nouveau_private {
+@@ -618,6 +621,11 @@ struct drm_nouveau_private {
  	struct backlight_device *backlight;
  
  	struct nouveau_channel *evo;
@@ -3571,7 +4268,7 @@ index c697191..c001574 100644
  
  	struct {
  		struct dentry *channel_root;
-@@ -652,14 +658,6 @@ nouveau_bo_ref(struct nouveau_bo *ref, struct nouveau_bo **pnvbo)
+@@ -652,14 +660,6 @@ nouveau_bo_ref(struct nouveau_bo *ref, struct nouveau_bo **pnvbo)
  	return 0;
  }
  
@@ -3586,7 +4283,7 @@ index c697191..c001574 100644
  #define NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(id, cl, ch) do {    \
  	struct drm_nouveau_private *nv = dev->dev_private;       \
  	if (!nouveau_channel_owner(dev, (cl), (id))) {           \
-@@ -682,7 +680,6 @@ extern int nouveau_tv_disable;
+@@ -682,7 +682,6 @@ extern int nouveau_tv_disable;
  extern char *nouveau_tv_norm;
  extern int nouveau_reg_debug;
  extern char *nouveau_vbios;
@@ -3594,7 +4291,7 @@ index c697191..c001574 100644
  extern int nouveau_ignorelid;
  extern int nouveau_nofbaccel;
  extern int nouveau_noaccel;
-@@ -707,17 +704,10 @@ extern bool nouveau_wait_for_idle(struct drm_device *);
+@@ -707,17 +706,10 @@ extern bool nouveau_wait_for_idle(struct drm_device *);
  extern int  nouveau_card_init(struct drm_device *);
  
  /* nouveau_mem.c */
@@ -3613,7 +4310,7 @@ index c697191..c001574 100644
  extern void nouveau_mem_close(struct drm_device *);
  extern struct nouveau_tile_reg *nv10_mem_set_tiling(struct drm_device *dev,
  						    uint32_t addr,
-@@ -857,11 +847,13 @@ void nouveau_register_dsm_handler(void);
+@@ -857,11 +849,13 @@ void nouveau_register_dsm_handler(void);
  void nouveau_unregister_dsm_handler(void);
  int nouveau_acpi_get_bios_chunk(uint8_t *bios, int offset, int len);
  bool nouveau_acpi_rom_supported(struct pci_dev *pdev);
@@ -3627,7 +4324,7 @@ index c697191..c001574 100644
  #endif
  
  /* nouveau_backlight.c */
-@@ -924,6 +916,10 @@ extern void nv10_fb_takedown(struct drm_device *);
+@@ -924,15 +918,23 @@ extern void nv10_fb_takedown(struct drm_device *);
  extern void nv10_fb_set_region_tiling(struct drm_device *, int, uint32_t,
  				      uint32_t, uint32_t);
  
@@ -3638,18 +4335,21 @@ index c697191..c001574 100644
  /* nv40_fb.c */
  extern int  nv40_fb_init(struct drm_device *);
  extern void nv40_fb_takedown(struct drm_device *);
-@@ -934,6 +930,10 @@ extern void nv40_fb_set_region_tiling(struct drm_device *, int, uint32_t,
+ extern void nv40_fb_set_region_tiling(struct drm_device *, int, uint32_t,
+ 				      uint32_t, uint32_t);
+-
+ /* nv50_fb.c */
  extern int  nv50_fb_init(struct drm_device *);
  extern void nv50_fb_takedown(struct drm_device *);
- 
++extern void nv50_fb_vm_trap(struct drm_device *, int display, const char *);
++
 +/* nvc0_fb.c */
 +extern int  nvc0_fb_init(struct drm_device *);
 +extern void nvc0_fb_takedown(struct drm_device *);
-+
+ 
  /* nv04_fifo.c */
  extern int  nv04_fifo_init(struct drm_device *);
- extern void nv04_fifo_disable(struct drm_device *);
-@@ -971,6 +971,20 @@ extern void nv50_fifo_destroy_context(struct nouveau_channel *);
+@@ -971,6 +973,20 @@ extern void nv50_fifo_destroy_context(struct nouveau_channel *);
  extern int  nv50_fifo_load_context(struct nouveau_channel *);
  extern int  nv50_fifo_unload_context(struct drm_device *);
  
@@ -3670,7 +4370,7 @@ index c697191..c001574 100644
  /* nv04_graph.c */
  extern struct nouveau_pgraph_object_class nv04_graph_grclass[];
  extern int  nv04_graph_init(struct drm_device *);
-@@ -1035,11 +1049,15 @@ extern int  nv50_graph_unload_context(struct drm_device *);
+@@ -1035,11 +1051,15 @@ extern int  nv50_graph_unload_context(struct drm_device *);
  extern void nv50_graph_context_switch(struct drm_device *);
  extern int  nv50_grctx_init(struct nouveau_grctx *);
  
@@ -3691,7 +4391,7 @@ index c697191..c001574 100644
  
  /* nv04_instmem.c */
  extern int  nv04_instmem_init(struct drm_device *);
-@@ -1051,8 +1069,7 @@ extern int  nv04_instmem_populate(struct drm_device *, struct nouveau_gpuobj *,
+@@ -1051,8 +1071,7 @@ extern int  nv04_instmem_populate(struct drm_device *, struct nouveau_gpuobj *,
  extern void nv04_instmem_clear(struct drm_device *, struct nouveau_gpuobj *);
  extern int  nv04_instmem_bind(struct drm_device *, struct nouveau_gpuobj *);
  extern int  nv04_instmem_unbind(struct drm_device *, struct nouveau_gpuobj *);
@@ -3701,7 +4401,7 @@ index c697191..c001574 100644
  
  /* nv50_instmem.c */
  extern int  nv50_instmem_init(struct drm_device *);
-@@ -1064,8 +1081,21 @@ extern int  nv50_instmem_populate(struct drm_device *, struct nouveau_gpuobj *,
+@@ -1064,8 +1083,21 @@ extern int  nv50_instmem_populate(struct drm_device *, struct nouveau_gpuobj *,
  extern void nv50_instmem_clear(struct drm_device *, struct nouveau_gpuobj *);
  extern int  nv50_instmem_bind(struct drm_device *, struct nouveau_gpuobj *);
  extern int  nv50_instmem_unbind(struct drm_device *, struct nouveau_gpuobj *);
@@ -3725,7 +4425,7 @@ index c697191..c001574 100644
  
  /* nv04_mc.c */
  extern int  nv04_mc_init(struct drm_device *);
-@@ -1088,13 +1118,14 @@ extern long nouveau_compat_ioctl(struct file *file, unsigned int cmd,
+@@ -1088,13 +1120,14 @@ extern long nouveau_compat_ioctl(struct file *file, unsigned int cmd,
  				 unsigned long arg);
  
  /* nv04_dac.c */
@@ -3742,7 +4442,7 @@ index c697191..c001574 100644
  extern int nv04_dfp_get_bound_head(struct drm_device *dev, struct dcb_entry *dcbent);
  extern void nv04_dfp_bind_head(struct drm_device *dev, struct dcb_entry *dcbent,
  			       int head, bool dl);
-@@ -1103,15 +1134,17 @@ extern void nv04_dfp_update_fp_control(struct drm_encoder *encoder, int mode);
+@@ -1103,15 +1136,17 @@ extern void nv04_dfp_update_fp_control(struct drm_encoder *encoder, int mode);
  
  /* nv04_tv.c */
  extern int nv04_tv_identify(struct drm_device *dev, int i2c_index);
@@ -3763,7 +4463,7 @@ index c697191..c001574 100644
  
  /* nv04_crtc.c */
  extern int nv04_crtc_create(struct drm_device *, int index);
-@@ -1147,7 +1180,6 @@ extern int nouveau_fence_wait(void *obj, void *arg, bool lazy, bool intr);
+@@ -1147,7 +1182,6 @@ extern int nouveau_fence_wait(void *obj, void *arg, bool lazy, bool intr);
  extern int nouveau_fence_flush(void *obj, void *arg);
  extern void nouveau_fence_unref(void **obj);
  extern void *nouveau_fence_ref(void *obj);
@@ -3771,7 +4471,7 @@ index c697191..c001574 100644
  
  /* nouveau_gem.c */
  extern int nouveau_gem_new(struct drm_device *, struct nouveau_channel *,
-@@ -1167,13 +1199,15 @@ extern int nouveau_gem_ioctl_cpu_fini(struct drm_device *, void *,
+@@ -1167,13 +1201,15 @@ extern int nouveau_gem_ioctl_cpu_fini(struct drm_device *, void *,
  extern int nouveau_gem_ioctl_info(struct drm_device *, void *,
  				  struct drm_file *);
  
@@ -3790,21 +4490,56 @@ index c697191..c001574 100644
  
  /* nv50_calc. */
  int nv50_calc_pll(struct drm_device *, struct pll_lims *, int clk,
-@@ -1220,6 +1254,14 @@ static inline void nv_wr32(struct drm_device *dev, unsigned reg, u32 val)
+@@ -1220,6 +1256,13 @@ static inline void nv_wr32(struct drm_device *dev, unsigned reg, u32 val)
  	iowrite32_native(val, dev_priv->mmio + reg);
  }
  
-+static inline void nv_mask(struct drm_device *dev, u32 reg, u32 mask, u32 val)
++static inline u32 nv_mask(struct drm_device *dev, u32 reg, u32 mask, u32 val)
 +{
 +	u32 tmp = nv_rd32(dev, reg);
-+	tmp &= ~mask;
-+	tmp |= val;
-+	nv_wr32(dev, reg, tmp);
++	nv_wr32(dev, reg, (tmp & ~mask) | val);
++	return tmp;
 +}
 +
  static inline u8 nv_rd08(struct drm_device *dev, unsigned reg)
  {
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
+@@ -1249,17 +1292,8 @@ static inline void nv_wi32(struct drm_device *dev, unsigned offset, u32 val)
+ }
+ 
+ /* object access */
+-static inline u32 nv_ro32(struct drm_device *dev, struct nouveau_gpuobj *obj,
+-				unsigned index)
+-{
+-	return nv_ri32(dev, obj->im_pramin->start + index * 4);
+-}
+-
+-static inline void nv_wo32(struct drm_device *dev, struct nouveau_gpuobj *obj,
+-				unsigned index, u32 val)
+-{
+-	nv_wi32(dev, obj->im_pramin->start + index * 4, val);
+-}
++extern u32 nv_ro32(struct nouveau_gpuobj *, u32 offset);
++extern void nv_wo32(struct nouveau_gpuobj *, u32 offset, u32 val);
+ 
+ /*
+  * Logging
+@@ -1346,6 +1380,15 @@ nv_two_reg_pll(struct drm_device *dev)
+ 	return false;
+ }
+ 
++static inline bool
++nv_match_device(struct drm_device *dev, unsigned device,
++		unsigned sub_vendor, unsigned sub_device)
++{
++	return dev->pdev->device == device &&
++		dev->pdev->subsystem_vendor == sub_vendor &&
++		dev->pdev->subsystem_device == sub_device;
++}
++
+ #define NV_SW                                                        0x0000506e
+ #define NV_SW_DMA_SEMAPHORE                                          0x00000060
+ #define NV_SW_SEMAPHORE_OFFSET                                       0x00000064
 diff --git a/drivers/gpu/drm/nouveau/nouveau_encoder.h b/drivers/gpu/drm/nouveau/nouveau_encoder.h
 index e1df820..7c82d68 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_encoder.h
@@ -3988,7 +4723,7 @@ index faddf53..6b208ff 100644
  }
  
 diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
-index 69c76cf..547f2c2 100644
+index 69c76cf..62ac673 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
 @@ -137,8 +137,6 @@ nouveau_gem_ioctl_new(struct drm_device *dev, void *data,
@@ -4012,6 +4747,24 @@ index 69c76cf..547f2c2 100644
  	NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(req->channel, file_priv, chan);
  
  	req->vram_available = dev_priv->fb_aper_free;
+@@ -650,7 +647,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
+ 				      push[i].length);
+ 		}
+ 	} else
+-	if (dev_priv->card_type >= NV_20) {
++	if (dev_priv->chipset >= 0x25) {
+ 		ret = RING_SPACE(chan, req->nr_push * 2);
+ 		if (ret) {
+ 			NV_ERROR(dev, "cal_space: %d\n", ret);
+@@ -725,7 +722,7 @@ out_next:
+ 		req->suffix0 = 0x00000000;
+ 		req->suffix1 = 0x00000000;
+ 	} else
+-	if (dev_priv->card_type >= NV_20) {
++	if (dev_priv->chipset >= 0x25) {
+ 		req->suffix0 = 0x00020000;
+ 		req->suffix1 = 0x00000000;
+ 	} else {
 @@ -760,8 +757,6 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
  	bool no_wait = !!(req->flags & NOUVEAU_GEM_CPU_PREP_NOWAIT);
  	int ret = -EINVAL;
@@ -4205,37 +4958,53 @@ index f731c5f..0000000
 -		nv_wo32(dev, ctx, le32_to_cpu(cv->data[i].offset),
 -			le32_to_cpu(cv->data[i].value));
 -}
+diff --git a/drivers/gpu/drm/nouveau/nouveau_grctx.h b/drivers/gpu/drm/nouveau/nouveau_grctx.h
+index 5d39c4c..4a8ad13 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_grctx.h
++++ b/drivers/gpu/drm/nouveau/nouveau_grctx.h
+@@ -126,7 +126,7 @@ gr_def(struct nouveau_grctx *ctx, uint32_t reg, uint32_t val)
+ 	reg = (reg - 0x00400000) / 4;
+ 	reg = (reg - ctx->ctxprog_reg) + ctx->ctxvals_base;
+ 
+-	nv_wo32(ctx->dev, ctx->data, reg, val);
++	nv_wo32(ctx->data, reg * 4, val);
+ }
+ #endif
+ 
 diff --git a/drivers/gpu/drm/nouveau/nouveau_hw.c b/drivers/gpu/drm/nouveau/nouveau_hw.c
-index 7855b35..7b61368 100644
+index 7855b35..f8ec49b 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_hw.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_hw.c
-@@ -865,8 +865,12 @@ nv_save_state_ext(struct drm_device *dev, int head,
+@@ -865,8 +865,13 @@ nv_save_state_ext(struct drm_device *dev, int head,
  	rd_cio_state(dev, head, regp, NV_CIO_CRE_FF_INDEX);
  	rd_cio_state(dev, head, regp, NV_CIO_CRE_FFLWM__INDEX);
  	rd_cio_state(dev, head, regp, NV_CIO_CRE_21);
 -	if (dev_priv->card_type >= NV_30)
 +
-+	if (dev_priv->card_type >= NV_30) {
++	if (dev_priv->card_type >= NV_20)
  		rd_cio_state(dev, head, regp, NV_CIO_CRE_47);
++
++	if (dev_priv->card_type >= NV_30)
 +		rd_cio_state(dev, head, regp, 0x9f);
-+	}
 +
  	rd_cio_state(dev, head, regp, NV_CIO_CRE_49);
  	rd_cio_state(dev, head, regp, NV_CIO_CRE_HCUR_ADDR0_INDEX);
  	rd_cio_state(dev, head, regp, NV_CIO_CRE_HCUR_ADDR1_INDEX);
-@@ -971,8 +975,11 @@ nv_load_state_ext(struct drm_device *dev, int head,
+@@ -971,9 +976,13 @@ nv_load_state_ext(struct drm_device *dev, int head,
  	wr_cio_state(dev, head, regp, NV_CIO_CRE_ENH_INDEX);
  	wr_cio_state(dev, head, regp, NV_CIO_CRE_FF_INDEX);
  	wr_cio_state(dev, head, regp, NV_CIO_CRE_FFLWM__INDEX);
 -	if (dev_priv->card_type >= NV_30)
 +
-+	if (dev_priv->card_type >= NV_30) {
++	if (dev_priv->card_type >= NV_20)
  		wr_cio_state(dev, head, regp, NV_CIO_CRE_47);
-+		wr_cio_state(dev, head, regp, 0x9f);
-+	}
  
++	if (dev_priv->card_type >= NV_30)
++		wr_cio_state(dev, head, regp, 0x9f);
++
  	wr_cio_state(dev, head, regp, NV_CIO_CRE_49);
  	wr_cio_state(dev, head, regp, NV_CIO_CRE_HCUR_ADDR0_INDEX);
+ 	wr_cio_state(dev, head, regp, NV_CIO_CRE_HCUR_ADDR1_INDEX);
 diff --git a/drivers/gpu/drm/nouveau/nouveau_i2c.c b/drivers/gpu/drm/nouveau/nouveau_i2c.c
 index 316a3c7..8461485 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_i2c.c
@@ -4388,7 +5157,7 @@ index c8eaf7a..f71cb32 100644
  
  #endif /* __NOUVEAU_I2C_H__ */
 diff --git a/drivers/gpu/drm/nouveau/nouveau_irq.c b/drivers/gpu/drm/nouveau/nouveau_irq.c
-index 53360f1..794b0ee 100644
+index 53360f1..b8658a0 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_irq.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_irq.c
 @@ -49,7 +49,7 @@ nouveau_irq_preinstall(struct drm_device *dev)
@@ -4400,7 +5169,22 @@ index 53360f1..794b0ee 100644
  		INIT_WORK(&dev_priv->irq_work, nv50_display_irq_handler_bh);
  		INIT_WORK(&dev_priv->hpd_work, nv50_display_irq_hotplug_bh);
  		INIT_LIST_HEAD(&dev_priv->vbl_waiting);
-@@ -586,11 +586,11 @@ nouveau_pgraph_irq_handler(struct drm_device *dev)
+@@ -226,6 +226,14 @@ nouveau_fifo_irq_handler(struct drm_device *dev)
+ 			nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 1);
+ 		}
+ 
++		if (dev_priv->card_type == NV_50) {
++			if (status & 0x00000010) {
++				nv50_fb_vm_trap(dev, 1, "PFIFO_BAR_FAULT");
++				status &= ~0x00000010;
++				nv_wr32(dev, 0x002100, 0x00000010);
++			}
++		}
++
+ 		if (status) {
+ 			NV_INFO(dev, "PFIFO_INTR 0x%08x - Ch %d\n",
+ 				status, chid);
+@@ -586,11 +594,11 @@ nouveau_pgraph_irq_handler(struct drm_device *dev)
  		}
  
  		if (status & NV_PGRAPH_INTR_CONTEXT_SWITCH) {
@@ -4414,8 +5198,140 @@ index 53360f1..794b0ee 100644
  		}
  
  		if (status) {
+@@ -605,40 +613,6 @@ nouveau_pgraph_irq_handler(struct drm_device *dev)
+ 	nv_wr32(dev, NV03_PMC_INTR_0, NV_PMC_INTR_0_PGRAPH_PENDING);
+ }
+ 
+-static void
+-nv50_pfb_vm_trap(struct drm_device *dev, int display, const char *name)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	uint32_t trap[6];
+-	int i, ch;
+-	uint32_t idx = nv_rd32(dev, 0x100c90);
+-	if (idx & 0x80000000) {
+-		idx &= 0xffffff;
+-		if (display) {
+-			for (i = 0; i < 6; i++) {
+-				nv_wr32(dev, 0x100c90, idx | i << 24);
+-				trap[i] = nv_rd32(dev, 0x100c94);
+-			}
+-			for (ch = 0; ch < dev_priv->engine.fifo.channels; ch++) {
+-				struct nouveau_channel *chan = dev_priv->fifos[ch];
+-
+-				if (!chan || !chan->ramin)
+-					continue;
+-
+-				if (trap[1] == chan->ramin->instance >> 12)
+-					break;
+-			}
+-			NV_INFO(dev, "%s - VM: Trapped %s at %02x%04x%04x status %08x %08x channel %d\n",
+-					name, (trap[5]&0x100?"read":"write"),
+-					trap[5]&0xff, trap[4]&0xffff,
+-					trap[3]&0xffff, trap[0], trap[2], ch);
+-		}
+-		nv_wr32(dev, 0x100c90, idx | 0x80000000);
+-	} else if (display) {
+-		NV_INFO(dev, "%s - no VM fault?\n", name);
+-	}
+-}
+-
+ static struct nouveau_enum_names nv50_mp_exec_error_names[] =
+ {
+ 	{ 3, "STACK_UNDERFLOW" },
+@@ -711,7 +685,7 @@ nv50_pgraph_tp_trap(struct drm_device *dev, int type, uint32_t ustatus_old,
+ 		tps++;
+ 		switch (type) {
+ 		case 6: /* texture error... unknown for now */
+-			nv50_pfb_vm_trap(dev, display, name);
++			nv50_fb_vm_trap(dev, display, name);
+ 			if (display) {
+ 				NV_ERROR(dev, "magic set %d:\n", i);
+ 				for (r = ustatus_addr + 4; r <= ustatus_addr + 0x10; r += 4)
+@@ -734,7 +708,7 @@ nv50_pgraph_tp_trap(struct drm_device *dev, int type, uint32_t ustatus_old,
+ 			uint32_t e1c = nv_rd32(dev, ustatus_addr + 0x14);
+ 			uint32_t e20 = nv_rd32(dev, ustatus_addr + 0x18);
+ 			uint32_t e24 = nv_rd32(dev, ustatus_addr + 0x1c);
+-			nv50_pfb_vm_trap(dev, display, name);
++			nv50_fb_vm_trap(dev, display, name);
+ 			/* 2d engine destination */
+ 			if (ustatus & 0x00000010) {
+ 				if (display) {
+@@ -817,7 +791,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
+ 
+ 		/* Known to be triggered by screwed up NOTIFY and COND... */
+ 		if (ustatus & 0x00000001) {
+-			nv50_pfb_vm_trap(dev, display, "PGRAPH_TRAP_DISPATCH_FAULT");
++			nv50_fb_vm_trap(dev, display, "PGRAPH_TRAP_DISPATCH_FAULT");
+ 			nv_wr32(dev, 0x400500, 0);
+ 			if (nv_rd32(dev, 0x400808) & 0x80000000) {
+ 				if (display) {
+@@ -842,7 +816,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
+ 			ustatus &= ~0x00000001;
+ 		}
+ 		if (ustatus & 0x00000002) {
+-			nv50_pfb_vm_trap(dev, display, "PGRAPH_TRAP_DISPATCH_QUERY");
++			nv50_fb_vm_trap(dev, display, "PGRAPH_TRAP_DISPATCH_QUERY");
+ 			nv_wr32(dev, 0x400500, 0);
+ 			if (nv_rd32(dev, 0x40084c) & 0x80000000) {
+ 				if (display) {
+@@ -884,15 +858,15 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
+ 			NV_INFO(dev, "PGRAPH_TRAP_M2MF - no ustatus?\n");
+ 		}
+ 		if (ustatus & 0x00000001) {
+-			nv50_pfb_vm_trap(dev, display, "PGRAPH_TRAP_M2MF_NOTIFY");
++			nv50_fb_vm_trap(dev, display, "PGRAPH_TRAP_M2MF_NOTIFY");
+ 			ustatus &= ~0x00000001;
+ 		}
+ 		if (ustatus & 0x00000002) {
+-			nv50_pfb_vm_trap(dev, display, "PGRAPH_TRAP_M2MF_IN");
++			nv50_fb_vm_trap(dev, display, "PGRAPH_TRAP_M2MF_IN");
+ 			ustatus &= ~0x00000002;
+ 		}
+ 		if (ustatus & 0x00000004) {
+-			nv50_pfb_vm_trap(dev, display, "PGRAPH_TRAP_M2MF_OUT");
++			nv50_fb_vm_trap(dev, display, "PGRAPH_TRAP_M2MF_OUT");
+ 			ustatus &= ~0x00000004;
+ 		}
+ 		NV_INFO (dev, "PGRAPH_TRAP_M2MF - %08x %08x %08x %08x\n",
+@@ -917,7 +891,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
+ 			NV_INFO(dev, "PGRAPH_TRAP_VFETCH - no ustatus?\n");
+ 		}
+ 		if (ustatus & 0x00000001) {
+-			nv50_pfb_vm_trap(dev, display, "PGRAPH_TRAP_VFETCH_FAULT");
++			nv50_fb_vm_trap(dev, display, "PGRAPH_TRAP_VFETCH_FAULT");
+ 			NV_INFO (dev, "PGRAPH_TRAP_VFETCH_FAULT - %08x %08x %08x %08x\n",
+ 					nv_rd32(dev, 0x400c00),
+ 					nv_rd32(dev, 0x400c08),
+@@ -939,7 +913,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
+ 			NV_INFO(dev, "PGRAPH_TRAP_STRMOUT - no ustatus?\n");
+ 		}
+ 		if (ustatus & 0x00000001) {
+-			nv50_pfb_vm_trap(dev, display, "PGRAPH_TRAP_STRMOUT_FAULT");
++			nv50_fb_vm_trap(dev, display, "PGRAPH_TRAP_STRMOUT_FAULT");
+ 			NV_INFO (dev, "PGRAPH_TRAP_STRMOUT_FAULT - %08x %08x %08x %08x\n",
+ 					nv_rd32(dev, 0x401804),
+ 					nv_rd32(dev, 0x401808),
+@@ -964,7 +938,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
+ 			NV_INFO(dev, "PGRAPH_TRAP_CCACHE - no ustatus?\n");
+ 		}
+ 		if (ustatus & 0x00000001) {
+-			nv50_pfb_vm_trap(dev, display, "PGRAPH_TRAP_CCACHE_FAULT");
++			nv50_fb_vm_trap(dev, display, "PGRAPH_TRAP_CCACHE_FAULT");
+ 			NV_INFO (dev, "PGRAPH_TRAP_CCACHE_FAULT - %08x %08x %08x %08x %08x %08x %08x\n",
+ 					nv_rd32(dev, 0x405800),
+ 					nv_rd32(dev, 0x405804),
+@@ -986,7 +960,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
+ 	 * remaining, so try to handle it anyway. Perhaps related to that
+ 	 * unknown DMA slot on tesla? */
+ 	if (status & 0x20) {
+-		nv50_pfb_vm_trap(dev, display, "PGRAPH_TRAP_UNKC04");
++		nv50_fb_vm_trap(dev, display, "PGRAPH_TRAP_UNKC04");
+ 		ustatus = nv_rd32(dev, 0x402000) & 0x7fffffff;
+ 		if (display)
+ 			NV_INFO(dev, "PGRAPH_TRAP_UNKC04 - Unhandled ustatus 0x%08x\n", ustatus);
 diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
-index c1fd42b..db442c1 100644
+index c1fd42b..ee799c2 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
 @@ -35,162 +35,6 @@
@@ -4589,7 +5505,15 @@ index c1fd42b..db442c1 100644
  	while (size) {
  		unsigned offset_h = upper_32_bits(phys);
  		unsigned offset_l = lower_32_bits(phys);
-@@ -331,36 +174,12 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size,
+@@ -326,41 +169,18 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size,
+ 			virt  += (end - pte);
+ 
+ 			while (pte < end) {
+-				nv_wo32(dev, pgt, pte++, offset_l);
+-				nv_wo32(dev, pgt, pte++, offset_h);
++				nv_wo32(pgt, (pte * 4) + 0, offset_l);
++				nv_wo32(pgt, (pte * 4) + 4, offset_h);
++				pte += 2;
  			}
  		}
  	}
@@ -4631,7 +5555,7 @@ index c1fd42b..db442c1 100644
  	return 0;
  }
  
-@@ -374,7 +193,6 @@ nv50_mem_vm_unbind(struct drm_device *dev, uint64_t virt, uint32_t size)
+@@ -374,7 +194,6 @@ nv50_mem_vm_unbind(struct drm_device *dev, uint64_t virt, uint32_t size)
  	virt -= dev_priv->vm_vram_base;
  	pages = (size >> 16) << 1;
  
@@ -4639,10 +5563,13 @@ index c1fd42b..db442c1 100644
  	while (pages) {
  		pgt = dev_priv->vm_vram_pt[virt >> 29];
  		pte = (virt & 0x1ffe0000ULL) >> 15;
-@@ -388,57 +206,19 @@ nv50_mem_vm_unbind(struct drm_device *dev, uint64_t virt, uint32_t size)
- 		while (pte < end)
- 			nv_wo32(dev, pgt, pte++, 0);
- 	}
+@@ -385,60 +204,24 @@ nv50_mem_vm_unbind(struct drm_device *dev, uint64_t virt, uint32_t size)
+ 		pages -= (end - pte);
+ 		virt  += (end - pte) << 15;
+ 
+-		while (pte < end)
+-			nv_wo32(dev, pgt, pte++, 0);
+-	}
 -	dev_priv->engine.instmem.finish_access(dev);
 -
 -	nv_wr32(dev, 0x100c80, 0x00050001);
@@ -4664,7 +5591,11 @@ index c1fd42b..db442c1 100644
 -		NV_ERROR(dev, "timeout: (0x100c80 & 1) == 0 (2)\n");
 -		NV_ERROR(dev, "0x100c80 = 0x%08x\n", nv_rd32(dev, 0x100c80));
 -		return;
--	}
++		while (pte < end) {
++			nv_wo32(pgt, (pte * 4), 0);
++			pte++;
++		}
+ 	}
 +	dev_priv->engine.instmem.flush(dev);
  
 -	nv_wr32(dev, 0x100c80, 0x00060001);
@@ -4704,7 +5635,7 @@ index c1fd42b..db442c1 100644
  {
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  
-@@ -449,8 +229,7 @@ void nouveau_mem_close(struct drm_device *dev)
+@@ -449,8 +232,7 @@ void nouveau_mem_close(struct drm_device *dev)
  
  	nouveau_ttm_global_release(dev_priv);
  
@@ -4714,7 +5645,7 @@ index c1fd42b..db442c1 100644
  		struct drm_agp_mem *entry, *tempe;
  
  		/* Remove AGP resources, but leave dev->agp
-@@ -470,29 +249,29 @@ void nouveau_mem_close(struct drm_device *dev)
+@@ -470,29 +252,29 @@ void nouveau_mem_close(struct drm_device *dev)
  		dev->agp->enabled = 0;
  	}
  
@@ -4752,34 +5683,106 @@ index c1fd42b..db442c1 100644
  		return 4 * 1024 * 1024;
  	}
  
-@@ -536,12 +315,22 @@ nouveau_mem_detect(struct drm_device *dev)
- 	} else
- 	if (dev_priv->flags & (NV_NFORCE | NV_NFORCE2)) {
- 		dev_priv->vram_size = nouveau_mem_detect_nforce(dev);
--	} else {
--		dev_priv->vram_size  = nv_rd32(dev, NV04_FIFO_DATA);
--		dev_priv->vram_size &= NV10_FIFO_DATA_RAM_AMOUNT_MB_MASK;
--		if (dev_priv->chipset == 0xaa || dev_priv->chipset == 0xac)
-+	} else
-+	if (dev_priv->card_type < NV_50) {
-+		dev_priv->vram_size  = nv_rd32(dev, NV04_PFB_FIFO_DATA);
-+		dev_priv->vram_size &= NV10_PFB_FIFO_DATA_RAM_AMOUNT_MB_MASK;
-+	} else
-+	if (dev_priv->card_type < NV_C0) {
-+		dev_priv->vram_size = nv_rd32(dev, NV04_PFB_FIFO_DATA);
-+		dev_priv->vram_size |= (dev_priv->vram_size & 0xff) << 32;
-+		dev_priv->vram_size &= 0xffffffff00ll;
-+		if (dev_priv->chipset == 0xaa || dev_priv->chipset == 0xac) {
- 			dev_priv->vram_sys_base = nv_rd32(dev, 0x100e10);
- 			dev_priv->vram_sys_base <<= 12;
-+		}
-+	} else {
-+		dev_priv->vram_size  = nv_rd32(dev, 0x10f20c) << 20;
+@@ -525,7 +307,61 @@ nouveau_mem_detect_nforce(struct drm_device *dev)
+ 	return 0;
+ }
+ 
+-/* returns the amount of FB ram in bytes */
++static void
++nv50_vram_preinit(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	int i, parts, colbits, rowbitsa, rowbitsb, banks;
++	u64 rowsize, predicted;
++	u32 r0, r4, rt, ru;
++
++	r0 = nv_rd32(dev, 0x100200);
++	r4 = nv_rd32(dev, 0x100204);
++	rt = nv_rd32(dev, 0x100250);
++	ru = nv_rd32(dev, 0x001540);
++	NV_DEBUG(dev, "memcfg 0x%08x 0x%08x 0x%08x 0x%08x\n", r0, r4, rt, ru);
++
++	for (i = 0, parts = 0; i < 8; i++) {
++		if (ru & (0x00010000 << i))
++			parts++;
++	}
++
++	colbits  =  (r4 & 0x0000f000) >> 12;
++	rowbitsa = ((r4 & 0x000f0000) >> 16) + 8;
++	rowbitsb = ((r4 & 0x00f00000) >> 20) + 8;
++	banks    = ((r4 & 0x01000000) ? 8 : 4);
++
++	rowsize = parts * banks * (1 << colbits) * 8;
++	predicted = rowsize << rowbitsa;
++	if (r0 & 0x00000004)
++		predicted += rowsize << rowbitsb;
++
++	if (predicted != dev_priv->vram_size) {
++		NV_WARN(dev, "memory controller reports %dMiB VRAM\n",
++			(u32)(dev_priv->vram_size >> 20));
++		NV_WARN(dev, "we calculated %dMiB VRAM\n",
++			(u32)(predicted >> 20));
++	}
++
++	dev_priv->vram_rblock_size = rowsize >> 12;
++	if (rt & 1)
++		dev_priv->vram_rblock_size *= 3;
++
++	NV_DEBUG(dev, "rblock %lld bytes\n",
++		 (u64)dev_priv->vram_rblock_size << 12);
++}
++
++static void
++nvaa_vram_preinit(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++
++	/* To our knowledge, there's no large scale reordering of pages
++	 * that occurs on IGP chipsets.
++	 */
++	dev_priv->vram_rblock_size = 1;
++}
++
+ int
+ nouveau_mem_detect(struct drm_device *dev)
+ {
+@@ -536,12 +372,31 @@ nouveau_mem_detect(struct drm_device *dev)
+ 	} else
+ 	if (dev_priv->flags & (NV_NFORCE | NV_NFORCE2)) {
+ 		dev_priv->vram_size = nouveau_mem_detect_nforce(dev);
+-	} else {
+-		dev_priv->vram_size  = nv_rd32(dev, NV04_FIFO_DATA);
+-		dev_priv->vram_size &= NV10_FIFO_DATA_RAM_AMOUNT_MB_MASK;
+-		if (dev_priv->chipset == 0xaa || dev_priv->chipset == 0xac)
++	} else
++	if (dev_priv->card_type < NV_50) {
++		dev_priv->vram_size  = nv_rd32(dev, NV04_PFB_FIFO_DATA);
++		dev_priv->vram_size &= NV10_PFB_FIFO_DATA_RAM_AMOUNT_MB_MASK;
++	} else
++	if (dev_priv->card_type < NV_C0) {
++		dev_priv->vram_size = nv_rd32(dev, NV04_PFB_FIFO_DATA);
++		dev_priv->vram_size |= (dev_priv->vram_size & 0xff) << 32;
++		dev_priv->vram_size &= 0xffffffff00ll;
++
++		switch (dev_priv->chipset) {
++		case 0xaa:
++		case 0xac:
++		case 0xaf:
+ 			dev_priv->vram_sys_base = nv_rd32(dev, 0x100e10);
+ 			dev_priv->vram_sys_base <<= 12;
++			nvaa_vram_preinit(dev);
++			break;
++		default:
++			nv50_vram_preinit(dev);
++			break;
++		}
++	} else {
++		dev_priv->vram_size  = nv_rd32(dev, 0x10f20c) << 20;
 +		dev_priv->vram_size *= nv_rd32(dev, 0x121c74);
  	}
  
  	NV_INFO(dev, "Detected %dMiB VRAM\n", (int)(dev_priv->vram_size >> 20));
-@@ -555,18 +344,36 @@ nouveau_mem_detect(struct drm_device *dev)
+@@ -555,18 +410,37 @@ nouveau_mem_detect(struct drm_device *dev)
  	return -ENOMEM;
  }
  
@@ -4796,7 +5799,8 @@ index c1fd42b..db442c1 100644
 +	/* First of all, disable fast writes, otherwise if it's
 +	 * already enabled in the AGP bridge and we disable the card's
 +	 * AGP controller we might be locking ourselves out of it. */
-+	if (nv_rd32(dev, NV04_PBUS_PCI_NV_19) & PCI_AGP_COMMAND_FW) {
++	if ((nv_rd32(dev, NV04_PBUS_PCI_NV_19) |
++	     dev->agp->mode) & PCI_AGP_COMMAND_FW) {
 +		struct drm_agp_info info;
 +		struct drm_agp_mode mode;
 +
@@ -4822,7 +5826,7 @@ index c1fd42b..db442c1 100644
  
  	/* power cycle pgraph, if enabled */
  	pmc_enable = nv_rd32(dev, NV03_PMC_ENABLE);
-@@ -578,11 +385,12 @@ static void nouveau_mem_reset_agp(struct drm_device *dev)
+@@ -578,11 +452,12 @@ static void nouveau_mem_reset_agp(struct drm_device *dev)
  	}
  
  	/* and restore (gives effect of resetting AGP) */
@@ -4837,7 +5841,7 @@ index c1fd42b..db442c1 100644
  int
  nouveau_mem_init_agp(struct drm_device *dev)
  {
-@@ -592,11 +400,6 @@ nouveau_mem_init_agp(struct drm_device *dev)
+@@ -592,11 +467,6 @@ nouveau_mem_init_agp(struct drm_device *dev)
  	struct drm_agp_mode mode;
  	int ret;
  
@@ -4849,7 +5853,7 @@ index c1fd42b..db442c1 100644
  	if (!dev->agp->acquired) {
  		ret = drm_agp_acquire(dev);
  		if (ret) {
-@@ -605,6 +408,8 @@ nouveau_mem_init_agp(struct drm_device *dev)
+@@ -605,6 +475,8 @@ nouveau_mem_init_agp(struct drm_device *dev)
  		}
  	}
  
@@ -4858,7 +5862,7 @@ index c1fd42b..db442c1 100644
  	ret = drm_agp_info(dev, &info);
  	if (ret) {
  		NV_ERROR(dev, "Unable to get AGP info: %d\n", ret);
-@@ -659,8 +464,6 @@ nouveau_mem_init(struct drm_device *dev)
+@@ -659,8 +531,6 @@ nouveau_mem_init(struct drm_device *dev)
  		return ret;
  	}
  
@@ -4867,7 +5871,7 @@ index c1fd42b..db442c1 100644
  	spin_lock_init(&dev_priv->tile.lock);
  
  	dev_priv->fb_available_size = dev_priv->vram_size;
-@@ -692,7 +495,7 @@ nouveau_mem_init(struct drm_device *dev)
+@@ -692,7 +562,7 @@ nouveau_mem_init(struct drm_device *dev)
  
  	/* GART */
  #if !defined(__powerpc__) && !defined(__ia64__)
@@ -4970,60 +5974,160 @@ index 9537f3e..3ec181f 100644
  
  	ret = nouveau_notifier_alloc(chan, na->handle, na->size, &na->offset);
 diff --git a/drivers/gpu/drm/nouveau/nouveau_object.c b/drivers/gpu/drm/nouveau/nouveau_object.c
-index e7c100b..4bf6b33 100644
+index e7c100b..6aedc3b 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_object.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_object.c
-@@ -132,7 +132,6 @@ nouveau_ramht_insert(struct drm_device *dev, struct nouveau_gpuobj_ref *ref)
- 		}
- 	}
+@@ -34,6 +34,7 @@
+ #include "drm.h"
+ #include "nouveau_drv.h"
+ #include "nouveau_drm.h"
++#include "nouveau_ramht.h"
  
--	instmem->prepare_access(dev, true);
- 	co = ho = nouveau_ramht_hash_handle(dev, chan->id, ref->handle);
- 	do {
- 		if (!nouveau_ramht_entry_valid(dev, ramht, co)) {
-@@ -143,7 +142,7 @@ nouveau_ramht_insert(struct drm_device *dev, struct nouveau_gpuobj_ref *ref)
- 			nv_wo32(dev, ramht, (co + 4)/4, ctx);
+ /* NVidia uses context objects to drive drawing operations.
  
- 			list_add_tail(&ref->list, &chan->ramht_refs);
+@@ -65,141 +66,6 @@
+    The key into the hash table depends on the object handle and channel id and
+    is given as:
+ */
+-static uint32_t
+-nouveau_ramht_hash_handle(struct drm_device *dev, int channel, uint32_t handle)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	uint32_t hash = 0;
+-	int i;
+-
+-	NV_DEBUG(dev, "ch%d handle=0x%08x\n", channel, handle);
+-
+-	for (i = 32; i > 0; i -= dev_priv->ramht_bits) {
+-		hash ^= (handle & ((1 << dev_priv->ramht_bits) - 1));
+-		handle >>= dev_priv->ramht_bits;
+-	}
+-
+-	if (dev_priv->card_type < NV_50)
+-		hash ^= channel << (dev_priv->ramht_bits - 4);
+-	hash <<= 3;
+-
+-	NV_DEBUG(dev, "hash=0x%08x\n", hash);
+-	return hash;
+-}
+-
+-static int
+-nouveau_ramht_entry_valid(struct drm_device *dev, struct nouveau_gpuobj *ramht,
+-			  uint32_t offset)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	uint32_t ctx = nv_ro32(dev, ramht, (offset + 4)/4);
+-
+-	if (dev_priv->card_type < NV_40)
+-		return ((ctx & NV_RAMHT_CONTEXT_VALID) != 0);
+-	return (ctx != 0);
+-}
+-
+-static int
+-nouveau_ramht_insert(struct drm_device *dev, struct nouveau_gpuobj_ref *ref)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_instmem_engine *instmem = &dev_priv->engine.instmem;
+-	struct nouveau_channel *chan = ref->channel;
+-	struct nouveau_gpuobj *ramht = chan->ramht ? chan->ramht->gpuobj : NULL;
+-	uint32_t ctx, co, ho;
+-
+-	if (!ramht) {
+-		NV_ERROR(dev, "No hash table!\n");
+-		return -EINVAL;
+-	}
+-
+-	if (dev_priv->card_type < NV_40) {
+-		ctx = NV_RAMHT_CONTEXT_VALID | (ref->instance >> 4) |
+-		      (chan->id << NV_RAMHT_CONTEXT_CHANNEL_SHIFT) |
+-		      (ref->gpuobj->engine << NV_RAMHT_CONTEXT_ENGINE_SHIFT);
+-	} else
+-	if (dev_priv->card_type < NV_50) {
+-		ctx = (ref->instance >> 4) |
+-		      (chan->id << NV40_RAMHT_CONTEXT_CHANNEL_SHIFT) |
+-		      (ref->gpuobj->engine << NV40_RAMHT_CONTEXT_ENGINE_SHIFT);
+-	} else {
+-		if (ref->gpuobj->engine == NVOBJ_ENGINE_DISPLAY) {
+-			ctx = (ref->instance << 10) | 2;
+-		} else {
+-			ctx = (ref->instance >> 4) |
+-			      ((ref->gpuobj->engine <<
+-				NV40_RAMHT_CONTEXT_ENGINE_SHIFT));
+-		}
+-	}
+-
+-	instmem->prepare_access(dev, true);
+-	co = ho = nouveau_ramht_hash_handle(dev, chan->id, ref->handle);
+-	do {
+-		if (!nouveau_ramht_entry_valid(dev, ramht, co)) {
+-			NV_DEBUG(dev,
+-				 "insert ch%d 0x%08x: h=0x%08x, c=0x%08x\n",
+-				 chan->id, co, ref->handle, ctx);
+-			nv_wo32(dev, ramht, (co + 0)/4, ref->handle);
+-			nv_wo32(dev, ramht, (co + 4)/4, ctx);
+-
+-			list_add_tail(&ref->list, &chan->ramht_refs);
 -			instmem->finish_access(dev);
-+			instmem->flush(dev);
- 			return 0;
- 		}
- 		NV_DEBUG(dev, "collision ch%d 0x%08x: h=0x%08x\n",
-@@ -153,7 +152,6 @@ nouveau_ramht_insert(struct drm_device *dev, struct nouveau_gpuobj_ref *ref)
- 		if (co >= dev_priv->ramht_size)
- 			co = 0;
- 	} while (co != ho);
+-			return 0;
+-		}
+-		NV_DEBUG(dev, "collision ch%d 0x%08x: h=0x%08x\n",
+-			 chan->id, co, nv_ro32(dev, ramht, co/4));
+-
+-		co += 8;
+-		if (co >= dev_priv->ramht_size)
+-			co = 0;
+-	} while (co != ho);
 -	instmem->finish_access(dev);
- 
- 	NV_ERROR(dev, "RAMHT space exhausted. ch=%d\n", chan->id);
- 	return -ENOMEM;
-@@ -173,7 +171,6 @@ nouveau_ramht_remove(struct drm_device *dev, struct nouveau_gpuobj_ref *ref)
- 		return;
- 	}
- 
+-
+-	NV_ERROR(dev, "RAMHT space exhausted. ch=%d\n", chan->id);
+-	return -ENOMEM;
+-}
+-
+-static void
+-nouveau_ramht_remove(struct drm_device *dev, struct nouveau_gpuobj_ref *ref)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_instmem_engine *instmem = &dev_priv->engine.instmem;
+-	struct nouveau_channel *chan = ref->channel;
+-	struct nouveau_gpuobj *ramht = chan->ramht ? chan->ramht->gpuobj : NULL;
+-	uint32_t co, ho;
+-
+-	if (!ramht) {
+-		NV_ERROR(dev, "No hash table!\n");
+-		return;
+-	}
+-
 -	instmem->prepare_access(dev, true);
- 	co = ho = nouveau_ramht_hash_handle(dev, chan->id, ref->handle);
- 	do {
- 		if (nouveau_ramht_entry_valid(dev, ramht, co) &&
-@@ -186,7 +183,7 @@ nouveau_ramht_remove(struct drm_device *dev, struct nouveau_gpuobj_ref *ref)
- 			nv_wo32(dev, ramht, (co + 4)/4, 0x00000000);
- 
- 			list_del(&ref->list);
+-	co = ho = nouveau_ramht_hash_handle(dev, chan->id, ref->handle);
+-	do {
+-		if (nouveau_ramht_entry_valid(dev, ramht, co) &&
+-		    (ref->handle == nv_ro32(dev, ramht, (co/4)))) {
+-			NV_DEBUG(dev,
+-				 "remove ch%d 0x%08x: h=0x%08x, c=0x%08x\n",
+-				 chan->id, co, ref->handle,
+-				 nv_ro32(dev, ramht, (co + 4)));
+-			nv_wo32(dev, ramht, (co + 0)/4, 0x00000000);
+-			nv_wo32(dev, ramht, (co + 4)/4, 0x00000000);
+-
+-			list_del(&ref->list);
 -			instmem->finish_access(dev);
-+			instmem->flush(dev);
- 			return;
- 		}
- 
-@@ -195,7 +192,6 @@ nouveau_ramht_remove(struct drm_device *dev, struct nouveau_gpuobj_ref *ref)
- 			co = 0;
- 	} while (co != ho);
- 	list_del(&ref->list);
+-			return;
+-		}
+-
+-		co += 8;
+-		if (co >= dev_priv->ramht_size)
+-			co = 0;
+-	} while (co != ho);
+-	list_del(&ref->list);
 -	instmem->finish_access(dev);
+-
+-	NV_ERROR(dev, "RAMHT entry not found. ch=%d, handle=0x%08x\n",
+-		 chan->id, ref->handle);
+-}
  
- 	NV_ERROR(dev, "RAMHT entry not found. ch=%d, handle=0x%08x\n",
- 		 chan->id, ref->handle);
-@@ -209,7 +205,7 @@ nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
+ int
+ nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
+@@ -209,7 +75,7 @@ nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  	struct nouveau_engine *engine = &dev_priv->engine;
  	struct nouveau_gpuobj *gpuobj;
@@ -5032,7 +6136,15 @@ index e7c100b..4bf6b33 100644
  	int ret;
  
  	NV_DEBUG(dev, "ch%d size=%u align=%d flags=0x%08x\n",
-@@ -233,25 +229,12 @@ nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
+@@ -222,6 +88,7 @@ nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
+ 	if (!gpuobj)
+ 		return -ENOMEM;
+ 	NV_DEBUG(dev, "gpuobj %p\n", gpuobj);
++	gpuobj->dev = dev;
+ 	gpuobj->flags = flags;
+ 	gpuobj->im_channel = chan;
+ 
+@@ -233,25 +100,12 @@ nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
  	 * available.
  	 */
  	if (chan) {
@@ -5050,18 +6162,18 @@ index e7c100b..4bf6b33 100644
  		NV_DEBUG(dev, "global heap\n");
 -		pramin = dev_priv->ramin_heap;
 -	}
--
++		pramin = &dev_priv->ramin_heap;
+ 
 -	if (!pramin) {
 -		NV_ERROR(dev, "No PRAMIN heap!\n");
 -		return -EINVAL;
 -	}
-+		pramin = &dev_priv->ramin_heap;
- 
+-
 -	if (!chan) {
  		ret = engine->instmem.populate(dev, gpuobj, &size);
  		if (ret) {
  			nouveau_gpuobj_del(dev, &gpuobj);
-@@ -260,9 +243,10 @@ nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
+@@ -260,9 +114,10 @@ nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
  	}
  
  	/* Allocate a chunk of the PRAMIN aperture */
@@ -5075,31 +6187,33 @@ index e7c100b..4bf6b33 100644
  	if (!gpuobj->im_pramin) {
  		nouveau_gpuobj_del(dev, &gpuobj);
  		return -ENOMEM;
-@@ -279,10 +263,9 @@ nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
+@@ -279,10 +134,9 @@ nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
  	if (gpuobj->flags & NVOBJ_FLAG_ZERO_ALLOC) {
  		int i;
  
 -		engine->instmem.prepare_access(dev, true);
  		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
- 			nv_wo32(dev, gpuobj, i/4, 0);
+-			nv_wo32(dev, gpuobj, i/4, 0);
 -		engine->instmem.finish_access(dev);
++			nv_wo32(gpuobj, i, 0);
 +		engine->instmem.flush(dev);
  	}
  
  	*gpuobj_ret = gpuobj;
-@@ -370,10 +353,9 @@ nouveau_gpuobj_del(struct drm_device *dev, struct nouveau_gpuobj **pgpuobj)
+@@ -370,10 +224,9 @@ nouveau_gpuobj_del(struct drm_device *dev, struct nouveau_gpuobj **pgpuobj)
  	}
  
  	if (gpuobj->im_pramin && (gpuobj->flags & NVOBJ_FLAG_ZERO_FREE)) {
 -		engine->instmem.prepare_access(dev, true);
  		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
- 			nv_wo32(dev, gpuobj, i/4, 0);
+-			nv_wo32(dev, gpuobj, i/4, 0);
 -		engine->instmem.finish_access(dev);
++			nv_wo32(gpuobj, i, 0);
 +		engine->instmem.flush(dev);
  	}
  
  	if (gpuobj->dtor)
-@@ -386,7 +368,7 @@ nouveau_gpuobj_del(struct drm_device *dev, struct nouveau_gpuobj **pgpuobj)
+@@ -386,7 +239,7 @@ nouveau_gpuobj_del(struct drm_device *dev, struct nouveau_gpuobj **pgpuobj)
  		if (gpuobj->flags & NVOBJ_FLAG_FAKE)
  			kfree(gpuobj->im_pramin);
  		else
@@ -5108,7 +6222,14 @@ index e7c100b..4bf6b33 100644
  	}
  
  	list_del(&gpuobj->list);
-@@ -589,7 +571,7 @@ nouveau_gpuobj_new_fake(struct drm_device *dev, uint32_t p_offset,
+@@ -583,13 +436,14 @@ nouveau_gpuobj_new_fake(struct drm_device *dev, uint32_t p_offset,
+ 	if (!gpuobj)
+ 		return -ENOMEM;
+ 	NV_DEBUG(dev, "gpuobj %p\n", gpuobj);
++	gpuobj->dev = dev;
+ 	gpuobj->im_channel = NULL;
+ 	gpuobj->flags      = flags | NVOBJ_FLAG_FAKE;
+ 
  	list_add_tail(&gpuobj->list, &dev_priv->gpuobj_list);
  
  	if (p_offset != ~0) {
@@ -5117,19 +6238,20 @@ index e7c100b..4bf6b33 100644
  					    GFP_KERNEL);
  		if (!gpuobj->im_pramin) {
  			nouveau_gpuobj_del(dev, &gpuobj);
-@@ -605,10 +587,9 @@ nouveau_gpuobj_new_fake(struct drm_device *dev, uint32_t p_offset,
+@@ -605,10 +459,9 @@ nouveau_gpuobj_new_fake(struct drm_device *dev, uint32_t p_offset,
  	}
  
  	if (gpuobj->flags & NVOBJ_FLAG_ZERO_ALLOC) {
 -		dev_priv->engine.instmem.prepare_access(dev, true);
  		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
- 			nv_wo32(dev, gpuobj, i/4, 0);
+-			nv_wo32(dev, gpuobj, i/4, 0);
 -		dev_priv->engine.instmem.finish_access(dev);
++			nv_wo32(gpuobj, i, 0);
 +		dev_priv->engine.instmem.flush(dev);
  	}
  
  	if (pref) {
-@@ -696,8 +677,6 @@ nouveau_gpuobj_dma_new(struct nouveau_channel *chan, int class,
+@@ -696,8 +549,6 @@ nouveau_gpuobj_dma_new(struct nouveau_channel *chan, int class,
  		return ret;
  	}
  
@@ -5138,8 +6260,43 @@ index e7c100b..4bf6b33 100644
  	if (dev_priv->card_type < NV_50) {
  		uint32_t frame, adjust, pte_flags = 0;
  
-@@ -734,7 +713,7 @@ nouveau_gpuobj_dma_new(struct nouveau_channel *chan, int class,
- 		nv_wo32(dev, *gpuobj, 5, flags5);
+@@ -706,14 +557,12 @@ nouveau_gpuobj_dma_new(struct nouveau_channel *chan, int class,
+ 		adjust = offset &  0x00000fff;
+ 		frame  = offset & ~0x00000fff;
+ 
+-		nv_wo32(dev, *gpuobj, 0, ((1<<12) | (1<<13) |
+-				(adjust << 20) |
+-				 (access << 14) |
+-				 (target << 16) |
+-				  class));
+-		nv_wo32(dev, *gpuobj, 1, size - 1);
+-		nv_wo32(dev, *gpuobj, 2, frame | pte_flags);
+-		nv_wo32(dev, *gpuobj, 3, frame | pte_flags);
++		nv_wo32(*gpuobj,  0, ((1<<12) | (1<<13) | (adjust << 20) |
++				      (access << 14) | (target << 16) |
++				      class));
++		nv_wo32(*gpuobj,  4, size - 1);
++		nv_wo32(*gpuobj,  8, frame | pte_flags);
++		nv_wo32(*gpuobj, 12, frame | pte_flags);
+ 	} else {
+ 		uint64_t limit = offset + size - 1;
+ 		uint32_t flags0, flags5;
+@@ -726,15 +575,15 @@ nouveau_gpuobj_dma_new(struct nouveau_channel *chan, int class,
+ 			flags5 = 0x00080000;
+ 		}
+ 
+-		nv_wo32(dev, *gpuobj, 0, flags0 | class);
+-		nv_wo32(dev, *gpuobj, 1, lower_32_bits(limit));
+-		nv_wo32(dev, *gpuobj, 2, lower_32_bits(offset));
+-		nv_wo32(dev, *gpuobj, 3, ((upper_32_bits(limit) & 0xff) << 24) |
+-					(upper_32_bits(offset) & 0xff));
+-		nv_wo32(dev, *gpuobj, 5, flags5);
++		nv_wo32(*gpuobj,  0, flags0 | class);
++		nv_wo32(*gpuobj,  4, lower_32_bits(limit));
++		nv_wo32(*gpuobj,  8, lower_32_bits(offset));
++		nv_wo32(*gpuobj, 12, ((upper_32_bits(limit) & 0xff) << 24) |
++				      (upper_32_bits(offset) & 0xff));
++		nv_wo32(*gpuobj, 20, flags5);
  	}
  
 -	instmem->finish_access(dev);
@@ -5147,15 +6304,40 @@ index e7c100b..4bf6b33 100644
  
  	(*gpuobj)->engine = NVOBJ_ENGINE_SW;
  	(*gpuobj)->class  = class;
-@@ -849,7 +828,6 @@ nouveau_gpuobj_gr_new(struct nouveau_channel *chan, int class,
+@@ -849,32 +698,31 @@ nouveau_gpuobj_gr_new(struct nouveau_channel *chan, int class,
  		return ret;
  	}
  
 -	dev_priv->engine.instmem.prepare_access(dev, true);
  	if (dev_priv->card_type >= NV_50) {
- 		nv_wo32(dev, *gpuobj, 0, class);
- 		nv_wo32(dev, *gpuobj, 5, 0x00010000);
-@@ -874,7 +852,7 @@ nouveau_gpuobj_gr_new(struct nouveau_channel *chan, int class,
+-		nv_wo32(dev, *gpuobj, 0, class);
+-		nv_wo32(dev, *gpuobj, 5, 0x00010000);
++		nv_wo32(*gpuobj,  0, class);
++		nv_wo32(*gpuobj, 20, 0x00010000);
+ 	} else {
+ 		switch (class) {
+ 		case NV_CLASS_NULL:
+-			nv_wo32(dev, *gpuobj, 0, 0x00001030);
+-			nv_wo32(dev, *gpuobj, 1, 0xFFFFFFFF);
++			nv_wo32(*gpuobj, 0, 0x00001030);
++			nv_wo32(*gpuobj, 4, 0xFFFFFFFF);
+ 			break;
+ 		default:
+ 			if (dev_priv->card_type >= NV_40) {
+-				nv_wo32(dev, *gpuobj, 0, class);
++				nv_wo32(*gpuobj, 0, class);
+ #ifdef __BIG_ENDIAN
+-				nv_wo32(dev, *gpuobj, 2, 0x01000000);
++				nv_wo32(*gpuobj, 8, 0x01000000);
+ #endif
+ 			} else {
+ #ifdef __BIG_ENDIAN
+-				nv_wo32(dev, *gpuobj, 0, class | 0x00080000);
++				nv_wo32(*gpuobj, 0, class | 0x00080000);
+ #else
+-				nv_wo32(dev, *gpuobj, 0, class);
++				nv_wo32(*gpuobj, 0, class);
+ #endif
  			}
  		}
  	}
@@ -5164,7 +6346,7 @@ index e7c100b..4bf6b33 100644
  
  	(*gpuobj)->engine = NVOBJ_ENGINE_GR;
  	(*gpuobj)->class  = class;
-@@ -920,6 +898,7 @@ nouveau_gpuobj_channel_init_pramin(struct nouveau_channel *chan)
+@@ -920,6 +768,7 @@ nouveau_gpuobj_channel_init_pramin(struct nouveau_channel *chan)
  	base = 0;
  
  	/* PGRAPH context */
@@ -5172,7 +6354,7 @@ index e7c100b..4bf6b33 100644
  
  	if (dev_priv->card_type == NV_50) {
  		/* Various fixed table thingos */
-@@ -930,12 +909,8 @@ nouveau_gpuobj_channel_init_pramin(struct nouveau_channel *chan)
+@@ -930,12 +779,8 @@ nouveau_gpuobj_channel_init_pramin(struct nouveau_channel *chan)
  		size += 0x8000;
  		/* RAMFC */
  		size += 0x1000;
@@ -5185,7 +6367,7 @@ index e7c100b..4bf6b33 100644
  	ret = nouveau_gpuobj_new_ref(dev, NULL, NULL, 0, size, 0x1000, 0,
  				     &chan->ramin);
  	if (ret) {
-@@ -944,8 +919,7 @@ nouveau_gpuobj_channel_init_pramin(struct nouveau_channel *chan)
+@@ -944,8 +789,7 @@ nouveau_gpuobj_channel_init_pramin(struct nouveau_channel *chan)
  	}
  	pramin = chan->ramin->gpuobj;
  
@@ -5195,7 +6377,7 @@ index e7c100b..4bf6b33 100644
  	if (ret) {
  		NV_ERROR(dev, "Error creating PRAMIN heap: %d\n", ret);
  		nouveau_gpuobj_ref_del(dev, &chan->ramin);
-@@ -969,15 +943,11 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
+@@ -969,15 +813,11 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
  
  	NV_DEBUG(dev, "ch%d vram=0x%08x tt=0x%08x\n", chan->id, vram_h, tt_h);
  
@@ -5216,7 +6398,7 @@ index e7c100b..4bf6b33 100644
  	}
  
  	/* NV50 VM
-@@ -988,17 +958,13 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
+@@ -988,50 +828,42 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
  	if (dev_priv->card_type >= NV_50) {
  		uint32_t vm_offset, pde;
  
@@ -5233,9 +6415,14 @@ index e7c100b..4bf6b33 100644
  			return ret;
 -		}
  		for (i = 0; i < 0x4000; i += 8) {
- 			nv_wo32(dev, chan->vm_pd, (i+0)/4, 0x00000000);
- 			nv_wo32(dev, chan->vm_pd, (i+4)/4, 0xdeadcafe);
-@@ -1008,10 +974,8 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
+-			nv_wo32(dev, chan->vm_pd, (i+0)/4, 0x00000000);
+-			nv_wo32(dev, chan->vm_pd, (i+4)/4, 0xdeadcafe);
++			nv_wo32(chan->vm_pd, i + 0, 0x00000000);
++			nv_wo32(chan->vm_pd, i + 4, 0xdeadcafe);
+ 		}
+ 
+-		pde = (dev_priv->vm_gart_base / (512*1024*1024)) * 2;
++		pde = (dev_priv->vm_gart_base / (512*1024*1024)) * 8;
  		ret = nouveau_gpuobj_ref_add(dev, NULL, 0,
  					     dev_priv->gart_info.sg_ctxdma,
  					     &chan->vm_gart_pt);
@@ -5244,10 +6431,15 @@ index e7c100b..4bf6b33 100644
 +		if (ret)
  			return ret;
 -		}
- 		nv_wo32(dev, chan->vm_pd, pde++,
- 			    chan->vm_gart_pt->instance | 0x03);
- 		nv_wo32(dev, chan->vm_pd, pde++, 0x00000000);
-@@ -1021,17 +985,15 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
+-		nv_wo32(dev, chan->vm_pd, pde++,
+-			    chan->vm_gart_pt->instance | 0x03);
+-		nv_wo32(dev, chan->vm_pd, pde++, 0x00000000);
++		nv_wo32(chan->vm_pd, pde + 0, chan->vm_gart_pt->instance | 3);
++		nv_wo32(chan->vm_pd, pde + 4, 0x00000000);
+ 
+-		pde = (dev_priv->vm_vram_base / (512*1024*1024)) * 2;
++		pde = (dev_priv->vm_vram_base / (512*1024*1024)) * 8;
+ 		for (i = 0; i < dev_priv->vm_vram_pt_nr; i++) {
  			ret = nouveau_gpuobj_ref_add(dev, NULL, 0,
  						     dev_priv->vm_vram_pt[i],
  						     &chan->vm_vram_pt[i]);
@@ -5257,9 +6449,13 @@ index e7c100b..4bf6b33 100644
  				return ret;
 -			}
  
- 			nv_wo32(dev, chan->vm_pd, pde++,
- 				    chan->vm_vram_pt[i]->instance | 0x61);
- 			nv_wo32(dev, chan->vm_pd, pde++, 0x00000000);
+-			nv_wo32(dev, chan->vm_pd, pde++,
+-				    chan->vm_vram_pt[i]->instance | 0x61);
+-			nv_wo32(dev, chan->vm_pd, pde++, 0x00000000);
++			nv_wo32(chan->vm_pd, pde + 0,
++				chan->vm_vram_pt[i]->instance | 0x61);
++			nv_wo32(chan->vm_pd, pde + 4, 0x00000000);
++			pde += 8;
  		}
  
 -		instmem->finish_access(dev);
@@ -5267,7 +6463,7 @@ index e7c100b..4bf6b33 100644
  	}
  
  	/* RAMHT */
-@@ -1130,8 +1092,8 @@ nouveau_gpuobj_channel_takedown(struct nouveau_channel *chan)
+@@ -1130,8 +962,8 @@ nouveau_gpuobj_channel_takedown(struct nouveau_channel *chan)
  	for (i = 0; i < dev_priv->vm_vram_pt_nr; i++)
  		nouveau_gpuobj_ref_del(dev, &chan->vm_vram_pt[i]);
  
@@ -5278,30 +6474,34 @@ index e7c100b..4bf6b33 100644
  	if (chan->ramin)
  		nouveau_gpuobj_ref_del(dev, &chan->ramin);
  
-@@ -1164,10 +1126,8 @@ nouveau_gpuobj_suspend(struct drm_device *dev)
+@@ -1164,10 +996,8 @@ nouveau_gpuobj_suspend(struct drm_device *dev)
  			return -ENOMEM;
  		}
  
 -		dev_priv->engine.instmem.prepare_access(dev, false);
- 		for (i = 0; i < gpuobj->im_pramin->size / 4; i++)
- 			gpuobj->im_backing_suspend[i] = nv_ro32(dev, gpuobj, i);
+-		for (i = 0; i < gpuobj->im_pramin->size / 4; i++)
+-			gpuobj->im_backing_suspend[i] = nv_ro32(dev, gpuobj, i);
 -		dev_priv->engine.instmem.finish_access(dev);
++		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
++			gpuobj->im_backing_suspend[i/4] = nv_ro32(gpuobj, i);
  	}
  
  	return 0;
-@@ -1212,10 +1172,9 @@ nouveau_gpuobj_resume(struct drm_device *dev)
+@@ -1212,10 +1042,9 @@ nouveau_gpuobj_resume(struct drm_device *dev)
  		if (!gpuobj->im_backing_suspend)
  			continue;
  
 -		dev_priv->engine.instmem.prepare_access(dev, true);
- 		for (i = 0; i < gpuobj->im_pramin->size / 4; i++)
- 			nv_wo32(dev, gpuobj, i, gpuobj->im_backing_suspend[i]);
+-		for (i = 0; i < gpuobj->im_pramin->size / 4; i++)
+-			nv_wo32(dev, gpuobj, i, gpuobj->im_backing_suspend[i]);
 -		dev_priv->engine.instmem.finish_access(dev);
++		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
++			nv_wo32(gpuobj, i, gpuobj->im_backing_suspend[i/4]);
 +		dev_priv->engine.instmem.flush(dev);
  	}
  
  	nouveau_gpuobj_suspend_cleanup(dev);
-@@ -1232,7 +1191,6 @@ int nouveau_ioctl_grobj_alloc(struct drm_device *dev, void *data,
+@@ -1232,7 +1061,6 @@ int nouveau_ioctl_grobj_alloc(struct drm_device *dev, void *data,
  	struct nouveau_channel *chan;
  	int ret;
  
@@ -5309,7 +6509,7 @@ index e7c100b..4bf6b33 100644
  	NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(init->channel, file_priv, chan);
  
  	if (init->handle == ~0)
-@@ -1283,7 +1241,6 @@ int nouveau_ioctl_gpuobj_free(struct drm_device *dev, void *data,
+@@ -1283,7 +1111,6 @@ int nouveau_ioctl_gpuobj_free(struct drm_device *dev, void *data,
  	struct nouveau_channel *chan;
  	int ret;
  
@@ -5317,123 +6517,344 @@ index e7c100b..4bf6b33 100644
  	NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(objfree->channel, file_priv, chan);
  
  	ret = nouveau_gpuobj_ref_find(chan, objfree->handle, &ref);
-diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h
-index 6ca80a3..21a6e45 100644
---- a/drivers/gpu/drm/nouveau/nouveau_reg.h
-+++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
-@@ -1,19 +1,64 @@
- 
-+#define NV04_PFB_BOOT_0						0x00100000
-+#	define NV04_PFB_BOOT_0_RAM_AMOUNT			0x00000003
-+#	define NV04_PFB_BOOT_0_RAM_AMOUNT_32MB			0x00000000
-+#	define NV04_PFB_BOOT_0_RAM_AMOUNT_4MB			0x00000001
-+#	define NV04_PFB_BOOT_0_RAM_AMOUNT_8MB			0x00000002
-+#	define NV04_PFB_BOOT_0_RAM_AMOUNT_16MB			0x00000003
-+#	define NV04_PFB_BOOT_0_RAM_WIDTH_128			0x00000004
-+#	define NV04_PFB_BOOT_0_RAM_TYPE				0x00000028
-+#	define NV04_PFB_BOOT_0_RAM_TYPE_SGRAM_8MBIT		0x00000000
-+#	define NV04_PFB_BOOT_0_RAM_TYPE_SGRAM_16MBIT		0x00000008
-+#	define NV04_PFB_BOOT_0_RAM_TYPE_SGRAM_16MBIT_4BANK	0x00000010
-+#	define NV04_PFB_BOOT_0_RAM_TYPE_SDRAM_16MBIT		0x00000018
-+#	define NV04_PFB_BOOT_0_RAM_TYPE_SDRAM_64MBIT		0x00000020
-+#	define NV04_PFB_BOOT_0_RAM_TYPE_SDRAM_64MBITX16		0x00000028
-+#	define NV04_PFB_BOOT_0_UMA_ENABLE			0x00000100
-+#	define NV04_PFB_BOOT_0_UMA_SIZE				0x0000f000
-+#define NV04_PFB_DEBUG_0					0x00100080
-+#	define NV04_PFB_DEBUG_0_PAGE_MODE			0x00000001
-+#	define NV04_PFB_DEBUG_0_REFRESH_OFF			0x00000010
-+#	define NV04_PFB_DEBUG_0_REFRESH_COUNTX64		0x00003f00
-+#	define NV04_PFB_DEBUG_0_REFRESH_SLOW_CLK		0x00004000
-+#	define NV04_PFB_DEBUG_0_SAFE_MODE			0x00008000
-+#	define NV04_PFB_DEBUG_0_ALOM_ENABLE			0x00010000
-+#	define NV04_PFB_DEBUG_0_CASOE				0x00100000
-+#	define NV04_PFB_DEBUG_0_CKE_INVERT			0x10000000
-+#	define NV04_PFB_DEBUG_0_REFINC				0x20000000
-+#	define NV04_PFB_DEBUG_0_SAVE_POWER_OFF			0x40000000
-+#define NV04_PFB_CFG0						0x00100200
-+#	define NV04_PFB_CFG0_SCRAMBLE				0x20000000
-+#define NV04_PFB_CFG1						0x00100204
-+#define NV04_PFB_FIFO_DATA					0x0010020c
-+#	define NV10_PFB_FIFO_DATA_RAM_AMOUNT_MB_MASK		0xfff00000
-+#	define NV10_PFB_FIFO_DATA_RAM_AMOUNT_MB_SHIFT		20
-+#define NV10_PFB_REFCTRL					0x00100210
-+#	define NV10_PFB_REFCTRL_VALID_1				(1 << 31)
-+#define NV04_PFB_PAD						0x0010021c
-+#	define NV04_PFB_PAD_CKE_NORMAL				(1 << 0)
-+#define NV10_PFB_TILE(i)                              (0x00100240 + (i*16))
-+#define NV10_PFB_TILE__SIZE					8
-+#define NV10_PFB_TLIMIT(i)                            (0x00100244 + (i*16))
-+#define NV10_PFB_TSIZE(i)                             (0x00100248 + (i*16))
-+#define NV10_PFB_TSTATUS(i)                           (0x0010024c + (i*16))
-+#define NV04_PFB_REF						0x001002d0
-+#	define NV04_PFB_REF_CMD_REFRESH				(1 << 0)
-+#define NV04_PFB_PRE						0x001002d4
-+#	define NV04_PFB_PRE_CMD_PRECHARGE			(1 << 0)
-+#define NV10_PFB_CLOSE_PAGE2					0x0010033c
-+#define NV04_PFB_SCRAMBLE(i)                         (0x00100400 + 4 * (i))
-+#define NV40_PFB_TILE(i)                              (0x00100600 + (i*16))
-+#define NV40_PFB_TILE__SIZE_0					12
-+#define NV40_PFB_TILE__SIZE_1					15
-+#define NV40_PFB_TLIMIT(i)                            (0x00100604 + (i*16))
-+#define NV40_PFB_TSIZE(i)                             (0x00100608 + (i*16))
-+#define NV40_PFB_TSTATUS(i)                           (0x0010060c + (i*16))
-+#define NV40_PFB_UNK_800					0x00100800
- 
--#define NV03_BOOT_0                                        0x00100000
--#    define NV03_BOOT_0_RAM_AMOUNT                         0x00000003
--#    define NV03_BOOT_0_RAM_AMOUNT_8MB                     0x00000000
--#    define NV03_BOOT_0_RAM_AMOUNT_2MB                     0x00000001
--#    define NV03_BOOT_0_RAM_AMOUNT_4MB                     0x00000002
--#    define NV03_BOOT_0_RAM_AMOUNT_8MB_SDRAM               0x00000003
--#    define NV04_BOOT_0_RAM_AMOUNT_32MB                    0x00000000
--#    define NV04_BOOT_0_RAM_AMOUNT_4MB                     0x00000001
--#    define NV04_BOOT_0_RAM_AMOUNT_8MB                     0x00000002
--#    define NV04_BOOT_0_RAM_AMOUNT_16MB                    0x00000003
--
--#define NV04_FIFO_DATA                                     0x0010020c
--#    define NV10_FIFO_DATA_RAM_AMOUNT_MB_MASK              0xfff00000
--#    define NV10_FIFO_DATA_RAM_AMOUNT_MB_SHIFT             20
-+#define NV_PEXTDEV_BOOT_0					0x00101000
-+#define NV_PEXTDEV_BOOT_0_RAMCFG				0x0000003c
-+#	define NV_PEXTDEV_BOOT_0_STRAP_FP_IFACE_12BIT		(8 << 12)
-+#define NV_PEXTDEV_BOOT_3					0x0010100c
- 
- #define NV_RAMIN                                           0x00700000
- 
-@@ -131,23 +176,6 @@
- #define NV04_PTIMER_TIME_1                                 0x00009410
- #define NV04_PTIMER_ALARM_0                                0x00009420
+@@ -1293,3 +1120,17 @@ int nouveau_ioctl_gpuobj_free(struct drm_device *dev, void *data,
  
--#define NV04_PFB_CFG0                                      0x00100200
--#define NV04_PFB_CFG1                                      0x00100204
--#define NV40_PFB_020C                                      0x0010020C
--#define NV10_PFB_TILE(i)                                   (0x00100240 + (i*16))
--#define NV10_PFB_TILE__SIZE                                8
--#define NV10_PFB_TLIMIT(i)                                 (0x00100244 + (i*16))
--#define NV10_PFB_TSIZE(i)                                  (0x00100248 + (i*16))
--#define NV10_PFB_TSTATUS(i)                                (0x0010024C + (i*16))
--#define NV10_PFB_CLOSE_PAGE2                               0x0010033C
--#define NV40_PFB_TILE(i)                                   (0x00100600 + (i*16))
--#define NV40_PFB_TILE__SIZE_0                              12
--#define NV40_PFB_TILE__SIZE_1                              15
--#define NV40_PFB_TLIMIT(i)                                 (0x00100604 + (i*16))
--#define NV40_PFB_TSIZE(i)                                  (0x00100608 + (i*16))
--#define NV40_PFB_TSTATUS(i)                                (0x0010060C + (i*16))
--#define NV40_PFB_UNK_800					0x00100800
--
- #define NV04_PGRAPH_DEBUG_0                                0x00400080
- #define NV04_PGRAPH_DEBUG_1                                0x00400084
- #define NV04_PGRAPH_DEBUG_2                                0x00400088
-@@ -192,28 +220,21 @@
- #    define NV_PGRAPH_INTR_ERROR                              (1<<20)
- #define NV10_PGRAPH_CTX_CONTROL                            0x00400144
- #define NV10_PGRAPH_CTX_USER                               0x00400148
--#define NV10_PGRAPH_CTX_SWITCH1                            0x0040014C
--#define NV10_PGRAPH_CTX_SWITCH2                            0x00400150
--#define NV10_PGRAPH_CTX_SWITCH3                            0x00400154
--#define NV10_PGRAPH_CTX_SWITCH4                            0x00400158
--#define NV10_PGRAPH_CTX_SWITCH5                            0x0040015C
-+#define NV10_PGRAPH_CTX_SWITCH(i)                         (0x0040014C + 0x4*(i))
+ 	return 0;
+ }
++
++u32
++nv_ro32(struct nouveau_gpuobj *gpuobj, u32 offset)
++{
++	struct drm_device *dev = gpuobj->dev;
++	return nv_ri32(dev, gpuobj->im_pramin->start + offset);
++}
++
++void
++nv_wo32(struct nouveau_gpuobj *gpuobj, u32 offset, u32 val)
++{
++	struct drm_device *dev = gpuobj->dev;
++	nv_wi32(dev, gpuobj->im_pramin->start + offset, val);
++}
+diff --git a/drivers/gpu/drm/nouveau/nouveau_ramht.c b/drivers/gpu/drm/nouveau/nouveau_ramht.c
+new file mode 100644
+index 0000000..e5cc93c
+--- /dev/null
++++ b/drivers/gpu/drm/nouveau/nouveau_ramht.c
+@@ -0,0 +1,160 @@
++/*
++ * Copyright 2010 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: Ben Skeggs
++ */
++
++#include "drmP.h"
++
++#include "nouveau_drv.h"
++#include "nouveau_ramht.h"
++
++static uint32_t
++nouveau_ramht_hash_handle(struct drm_device *dev, int channel, uint32_t handle)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	uint32_t hash = 0;
++	int i;
++
++	NV_DEBUG(dev, "ch%d handle=0x%08x\n", channel, handle);
++
++	for (i = 32; i > 0; i -= dev_priv->ramht_bits) {
++		hash ^= (handle & ((1 << dev_priv->ramht_bits) - 1));
++		handle >>= dev_priv->ramht_bits;
++	}
++
++	if (dev_priv->card_type < NV_50)
++		hash ^= channel << (dev_priv->ramht_bits - 4);
++	hash <<= 3;
++
++	NV_DEBUG(dev, "hash=0x%08x\n", hash);
++	return hash;
++}
++
++static int
++nouveau_ramht_entry_valid(struct drm_device *dev, struct nouveau_gpuobj *ramht,
++			  uint32_t offset)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	uint32_t ctx = nv_ro32(ramht, offset + 4);
++
++	if (dev_priv->card_type < NV_40)
++		return ((ctx & NV_RAMHT_CONTEXT_VALID) != 0);
++	return (ctx != 0);
++}
++
++int
++nouveau_ramht_insert(struct drm_device *dev, struct nouveau_gpuobj_ref *ref)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_instmem_engine *instmem = &dev_priv->engine.instmem;
++	struct nouveau_channel *chan = ref->channel;
++	struct nouveau_gpuobj *ramht = chan->ramht ? chan->ramht->gpuobj : NULL;
++	uint32_t ctx, co, ho;
++
++	if (!ramht) {
++		NV_ERROR(dev, "No hash table!\n");
++		return -EINVAL;
++	}
++
++	if (dev_priv->card_type < NV_40) {
++		ctx = NV_RAMHT_CONTEXT_VALID | (ref->instance >> 4) |
++		      (chan->id << NV_RAMHT_CONTEXT_CHANNEL_SHIFT) |
++		      (ref->gpuobj->engine << NV_RAMHT_CONTEXT_ENGINE_SHIFT);
++	} else
++	if (dev_priv->card_type < NV_50) {
++		ctx = (ref->instance >> 4) |
++		      (chan->id << NV40_RAMHT_CONTEXT_CHANNEL_SHIFT) |
++		      (ref->gpuobj->engine << NV40_RAMHT_CONTEXT_ENGINE_SHIFT);
++	} else {
++		if (ref->gpuobj->engine == NVOBJ_ENGINE_DISPLAY) {
++			ctx = (ref->instance << 10) | 2;
++		} else {
++			ctx = (ref->instance >> 4) |
++			      ((ref->gpuobj->engine <<
++				NV40_RAMHT_CONTEXT_ENGINE_SHIFT));
++		}
++	}
++
++	co = ho = nouveau_ramht_hash_handle(dev, chan->id, ref->handle);
++	do {
++		if (!nouveau_ramht_entry_valid(dev, ramht, co)) {
++			NV_DEBUG(dev,
++				 "insert ch%d 0x%08x: h=0x%08x, c=0x%08x\n",
++				 chan->id, co, ref->handle, ctx);
++			nv_wo32(ramht, co + 0, ref->handle);
++			nv_wo32(ramht, co + 4, ctx);
++
++			list_add_tail(&ref->list, &chan->ramht_refs);
++			instmem->flush(dev);
++			return 0;
++		}
++		NV_DEBUG(dev, "collision ch%d 0x%08x: h=0x%08x\n",
++			 chan->id, co, nv_ro32(ramht, co));
++
++		co += 8;
++		if (co >= dev_priv->ramht_size)
++			co = 0;
++	} while (co != ho);
++
++	NV_ERROR(dev, "RAMHT space exhausted. ch=%d\n", chan->id);
++	return -ENOMEM;
++}
++
++void
++nouveau_ramht_remove(struct drm_device *dev, struct nouveau_gpuobj_ref *ref)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_instmem_engine *instmem = &dev_priv->engine.instmem;
++	struct nouveau_channel *chan = ref->channel;
++	struct nouveau_gpuobj *ramht = chan->ramht ? chan->ramht->gpuobj : NULL;
++	uint32_t co, ho;
++
++	if (!ramht) {
++		NV_ERROR(dev, "No hash table!\n");
++		return;
++	}
++
++	co = ho = nouveau_ramht_hash_handle(dev, chan->id, ref->handle);
++	do {
++		if (nouveau_ramht_entry_valid(dev, ramht, co) &&
++		    (ref->handle == nv_ro32(ramht, co))) {
++			NV_DEBUG(dev,
++				 "remove ch%d 0x%08x: h=0x%08x, c=0x%08x\n",
++				 chan->id, co, ref->handle,
++				 nv_ro32(ramht, co + 4));
++			nv_wo32(ramht, co + 0, 0x00000000);
++			nv_wo32(ramht, co + 4, 0x00000000);
++
++			list_del(&ref->list);
++			instmem->flush(dev);
++			return;
++		}
++
++		co += 8;
++		if (co >= dev_priv->ramht_size)
++			co = 0;
++	} while (co != ho);
++	list_del(&ref->list);
++
++	NV_ERROR(dev, "RAMHT entry not found. ch=%d, handle=0x%08x\n",
++		 chan->id, ref->handle);
++}
+diff --git a/drivers/gpu/drm/nouveau/nouveau_ramht.h b/drivers/gpu/drm/nouveau/nouveau_ramht.h
+new file mode 100644
+index 0000000..e10455c
+--- /dev/null
++++ b/drivers/gpu/drm/nouveau/nouveau_ramht.h
+@@ -0,0 +1,31 @@
++/*
++ * Copyright 2010 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: Ben Skeggs
++ */
++
++#ifndef __NOUVEAU_RAMHT_H__
++#define __NOUVEAU_RAMHT_H__
++
++extern int nouveau_ramht_insert(struct drm_device *, struct nouveau_gpuobj_ref *);
++extern void nouveau_ramht_remove(struct drm_device *, struct nouveau_gpuobj_ref *);
++
++#endif
+diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h
+index 6ca80a3..21a6e45 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_reg.h
++++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
+@@ -1,19 +1,64 @@
+ 
++#define NV04_PFB_BOOT_0						0x00100000
++#	define NV04_PFB_BOOT_0_RAM_AMOUNT			0x00000003
++#	define NV04_PFB_BOOT_0_RAM_AMOUNT_32MB			0x00000000
++#	define NV04_PFB_BOOT_0_RAM_AMOUNT_4MB			0x00000001
++#	define NV04_PFB_BOOT_0_RAM_AMOUNT_8MB			0x00000002
++#	define NV04_PFB_BOOT_0_RAM_AMOUNT_16MB			0x00000003
++#	define NV04_PFB_BOOT_0_RAM_WIDTH_128			0x00000004
++#	define NV04_PFB_BOOT_0_RAM_TYPE				0x00000028
++#	define NV04_PFB_BOOT_0_RAM_TYPE_SGRAM_8MBIT		0x00000000
++#	define NV04_PFB_BOOT_0_RAM_TYPE_SGRAM_16MBIT		0x00000008
++#	define NV04_PFB_BOOT_0_RAM_TYPE_SGRAM_16MBIT_4BANK	0x00000010
++#	define NV04_PFB_BOOT_0_RAM_TYPE_SDRAM_16MBIT		0x00000018
++#	define NV04_PFB_BOOT_0_RAM_TYPE_SDRAM_64MBIT		0x00000020
++#	define NV04_PFB_BOOT_0_RAM_TYPE_SDRAM_64MBITX16		0x00000028
++#	define NV04_PFB_BOOT_0_UMA_ENABLE			0x00000100
++#	define NV04_PFB_BOOT_0_UMA_SIZE				0x0000f000
++#define NV04_PFB_DEBUG_0					0x00100080
++#	define NV04_PFB_DEBUG_0_PAGE_MODE			0x00000001
++#	define NV04_PFB_DEBUG_0_REFRESH_OFF			0x00000010
++#	define NV04_PFB_DEBUG_0_REFRESH_COUNTX64		0x00003f00
++#	define NV04_PFB_DEBUG_0_REFRESH_SLOW_CLK		0x00004000
++#	define NV04_PFB_DEBUG_0_SAFE_MODE			0x00008000
++#	define NV04_PFB_DEBUG_0_ALOM_ENABLE			0x00010000
++#	define NV04_PFB_DEBUG_0_CASOE				0x00100000
++#	define NV04_PFB_DEBUG_0_CKE_INVERT			0x10000000
++#	define NV04_PFB_DEBUG_0_REFINC				0x20000000
++#	define NV04_PFB_DEBUG_0_SAVE_POWER_OFF			0x40000000
++#define NV04_PFB_CFG0						0x00100200
++#	define NV04_PFB_CFG0_SCRAMBLE				0x20000000
++#define NV04_PFB_CFG1						0x00100204
++#define NV04_PFB_FIFO_DATA					0x0010020c
++#	define NV10_PFB_FIFO_DATA_RAM_AMOUNT_MB_MASK		0xfff00000
++#	define NV10_PFB_FIFO_DATA_RAM_AMOUNT_MB_SHIFT		20
++#define NV10_PFB_REFCTRL					0x00100210
++#	define NV10_PFB_REFCTRL_VALID_1				(1 << 31)
++#define NV04_PFB_PAD						0x0010021c
++#	define NV04_PFB_PAD_CKE_NORMAL				(1 << 0)
++#define NV10_PFB_TILE(i)                              (0x00100240 + (i*16))
++#define NV10_PFB_TILE__SIZE					8
++#define NV10_PFB_TLIMIT(i)                            (0x00100244 + (i*16))
++#define NV10_PFB_TSIZE(i)                             (0x00100248 + (i*16))
++#define NV10_PFB_TSTATUS(i)                           (0x0010024c + (i*16))
++#define NV04_PFB_REF						0x001002d0
++#	define NV04_PFB_REF_CMD_REFRESH				(1 << 0)
++#define NV04_PFB_PRE						0x001002d4
++#	define NV04_PFB_PRE_CMD_PRECHARGE			(1 << 0)
++#define NV10_PFB_CLOSE_PAGE2					0x0010033c
++#define NV04_PFB_SCRAMBLE(i)                         (0x00100400 + 4 * (i))
++#define NV40_PFB_TILE(i)                              (0x00100600 + (i*16))
++#define NV40_PFB_TILE__SIZE_0					12
++#define NV40_PFB_TILE__SIZE_1					15
++#define NV40_PFB_TLIMIT(i)                            (0x00100604 + (i*16))
++#define NV40_PFB_TSIZE(i)                             (0x00100608 + (i*16))
++#define NV40_PFB_TSTATUS(i)                           (0x0010060c + (i*16))
++#define NV40_PFB_UNK_800					0x00100800
+ 
+-#define NV03_BOOT_0                                        0x00100000
+-#    define NV03_BOOT_0_RAM_AMOUNT                         0x00000003
+-#    define NV03_BOOT_0_RAM_AMOUNT_8MB                     0x00000000
+-#    define NV03_BOOT_0_RAM_AMOUNT_2MB                     0x00000001
+-#    define NV03_BOOT_0_RAM_AMOUNT_4MB                     0x00000002
+-#    define NV03_BOOT_0_RAM_AMOUNT_8MB_SDRAM               0x00000003
+-#    define NV04_BOOT_0_RAM_AMOUNT_32MB                    0x00000000
+-#    define NV04_BOOT_0_RAM_AMOUNT_4MB                     0x00000001
+-#    define NV04_BOOT_0_RAM_AMOUNT_8MB                     0x00000002
+-#    define NV04_BOOT_0_RAM_AMOUNT_16MB                    0x00000003
+-
+-#define NV04_FIFO_DATA                                     0x0010020c
+-#    define NV10_FIFO_DATA_RAM_AMOUNT_MB_MASK              0xfff00000
+-#    define NV10_FIFO_DATA_RAM_AMOUNT_MB_SHIFT             20
++#define NV_PEXTDEV_BOOT_0					0x00101000
++#define NV_PEXTDEV_BOOT_0_RAMCFG				0x0000003c
++#	define NV_PEXTDEV_BOOT_0_STRAP_FP_IFACE_12BIT		(8 << 12)
++#define NV_PEXTDEV_BOOT_3					0x0010100c
+ 
+ #define NV_RAMIN                                           0x00700000
+ 
+@@ -131,23 +176,6 @@
+ #define NV04_PTIMER_TIME_1                                 0x00009410
+ #define NV04_PTIMER_ALARM_0                                0x00009420
+ 
+-#define NV04_PFB_CFG0                                      0x00100200
+-#define NV04_PFB_CFG1                                      0x00100204
+-#define NV40_PFB_020C                                      0x0010020C
+-#define NV10_PFB_TILE(i)                                   (0x00100240 + (i*16))
+-#define NV10_PFB_TILE__SIZE                                8
+-#define NV10_PFB_TLIMIT(i)                                 (0x00100244 + (i*16))
+-#define NV10_PFB_TSIZE(i)                                  (0x00100248 + (i*16))
+-#define NV10_PFB_TSTATUS(i)                                (0x0010024C + (i*16))
+-#define NV10_PFB_CLOSE_PAGE2                               0x0010033C
+-#define NV40_PFB_TILE(i)                                   (0x00100600 + (i*16))
+-#define NV40_PFB_TILE__SIZE_0                              12
+-#define NV40_PFB_TILE__SIZE_1                              15
+-#define NV40_PFB_TLIMIT(i)                                 (0x00100604 + (i*16))
+-#define NV40_PFB_TSIZE(i)                                  (0x00100608 + (i*16))
+-#define NV40_PFB_TSTATUS(i)                                (0x0010060C + (i*16))
+-#define NV40_PFB_UNK_800					0x00100800
+-
+ #define NV04_PGRAPH_DEBUG_0                                0x00400080
+ #define NV04_PGRAPH_DEBUG_1                                0x00400084
+ #define NV04_PGRAPH_DEBUG_2                                0x00400088
+@@ -192,28 +220,21 @@
+ #    define NV_PGRAPH_INTR_ERROR                              (1<<20)
+ #define NV10_PGRAPH_CTX_CONTROL                            0x00400144
+ #define NV10_PGRAPH_CTX_USER                               0x00400148
+-#define NV10_PGRAPH_CTX_SWITCH1                            0x0040014C
+-#define NV10_PGRAPH_CTX_SWITCH2                            0x00400150
+-#define NV10_PGRAPH_CTX_SWITCH3                            0x00400154
+-#define NV10_PGRAPH_CTX_SWITCH4                            0x00400158
+-#define NV10_PGRAPH_CTX_SWITCH5                            0x0040015C
++#define NV10_PGRAPH_CTX_SWITCH(i)                         (0x0040014C + 0x4*(i))
  #define NV04_PGRAPH_CTX_SWITCH1                            0x00400160
 -#define NV10_PGRAPH_CTX_CACHE1                             0x00400160
 +#define NV10_PGRAPH_CTX_CACHE(i, j)                       (0x00400160	\
@@ -5479,7 +6900,7 @@ index 6ca80a3..21a6e45 100644
  #define NV50_SOR_DP_CTRL_LANE_MASK                                   0x001f0000
  #define NV50_SOR_DP_CTRL_LANE_0_ENABLED                              0x00010000
 diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
-index 1d6ee8b..491767f 100644
+index 1d6ee8b..630988a 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
 @@ -97,7 +97,6 @@ nouveau_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
@@ -5490,7 +6911,24 @@ index 1d6ee8b..491767f 100644
  	pte = nouveau_sgdma_pte(nvbe->dev, mem->mm_node->start << PAGE_SHIFT);
  	nvbe->pte_start = pte;
  	for (i = 0; i < nvbe->nr_pages; i++) {
-@@ -116,24 +115,11 @@ nouveau_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
+@@ -106,34 +105,23 @@ nouveau_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
+ 		uint32_t offset_h = upper_32_bits(dma_offset);
+ 
+ 		for (j = 0; j < PAGE_SIZE / NV_CTXDMA_PAGE_SIZE; j++) {
+-			if (dev_priv->card_type < NV_50)
+-				nv_wo32(dev, gpuobj, pte++, offset_l | 3);
+-			else {
+-				nv_wo32(dev, gpuobj, pte++, offset_l | 0x21);
+-				nv_wo32(dev, gpuobj, pte++, offset_h & 0xff);
++			if (dev_priv->card_type < NV_50) {
++				nv_wo32(gpuobj, (pte * 4) + 0, offset_l | 3);
++				pte += 1;
++			} else {
++				nv_wo32(gpuobj, (pte * 4) + 0, offset_l | 0x21);
++				nv_wo32(gpuobj, (pte * 4) + 4, offset_h & 0xff);
++				pte += 2;
+ 			}
+ 
  			dma_offset += NV_CTXDMA_PAGE_SIZE;
  		}
  	}
@@ -5518,7 +6956,7 @@ index 1d6ee8b..491767f 100644
  	}
  
  	nvbe->bound = true;
-@@ -154,7 +140,6 @@ nouveau_sgdma_unbind(struct ttm_backend *be)
+@@ -154,40 +142,28 @@ nouveau_sgdma_unbind(struct ttm_backend *be)
  	if (!nvbe->bound)
  		return 0;
  
@@ -5526,7 +6964,22 @@ index 1d6ee8b..491767f 100644
  	pte = nvbe->pte_start;
  	for (i = 0; i < nvbe->nr_pages; i++) {
  		dma_addr_t dma_offset = dev_priv->gart_info.sg_dummy_bus;
-@@ -170,24 +155,11 @@ nouveau_sgdma_unbind(struct ttm_backend *be)
+ 
+ 		for (j = 0; j < PAGE_SIZE / NV_CTXDMA_PAGE_SIZE; j++) {
+-			if (dev_priv->card_type < NV_50)
+-				nv_wo32(dev, gpuobj, pte++, dma_offset | 3);
+-			else {
+-				nv_wo32(dev, gpuobj, pte++, dma_offset | 0x21);
+-				nv_wo32(dev, gpuobj, pte++, 0x00000000);
++			if (dev_priv->card_type < NV_50) {
++				nv_wo32(gpuobj, (pte * 4) + 0, dma_offset | 3);
++				pte += 1;
++			} else {
++				nv_wo32(gpuobj, (pte * 4), dma_offset | 0x21);
++				nv_wo32(gpuobj, (pte * 4) + 4, 0x00000000);
++				pte += 2;
+ 			}
+ 
  			dma_offset += NV_CTXDMA_PAGE_SIZE;
  		}
  	}
@@ -5554,16 +7007,64 @@ index 1d6ee8b..491767f 100644
  	}
  
  	nvbe->bound = false;
-@@ -272,7 +244,6 @@ nouveau_sgdma_init(struct drm_device *dev)
- 		pci_map_page(dev->pdev, dev_priv->gart_info.sg_dummy_page, 0,
+@@ -242,6 +218,7 @@ int
+ nouveau_sgdma_init(struct drm_device *dev)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct pci_dev *pdev = dev->pdev;
+ 	struct nouveau_gpuobj *gpuobj = NULL;
+ 	uint32_t aper_size, obj_size;
+ 	int i, ret;
+@@ -267,34 +244,42 @@ nouveau_sgdma_init(struct drm_device *dev)
+ 
+ 	dev_priv->gart_info.sg_dummy_page =
+ 		alloc_page(GFP_KERNEL|__GFP_DMA32);
++	if (!dev_priv->gart_info.sg_dummy_page) {
++		nouveau_gpuobj_del(dev, &gpuobj);
++		return -ENOMEM;
++	}
++
+ 	set_bit(PG_locked, &dev_priv->gart_info.sg_dummy_page->flags);
+ 	dev_priv->gart_info.sg_dummy_bus =
+-		pci_map_page(dev->pdev, dev_priv->gart_info.sg_dummy_page, 0,
++		pci_map_page(pdev, dev_priv->gart_info.sg_dummy_page, 0,
  			     PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
++	if (pci_dma_mapping_error(pdev, dev_priv->gart_info.sg_dummy_bus)) {
++		nouveau_gpuobj_del(dev, &gpuobj);
++		return -EFAULT;
++	}
  
 -	dev_priv->engine.instmem.prepare_access(dev, true);
  	if (dev_priv->card_type < NV_50) {
  		/* Maybe use NV_DMA_TARGET_AGP for PCIE? NVIDIA do this, and
  		 * confirmed to work on c51.  Perhaps means NV_DMA_TARGET_PCIE
-@@ -294,7 +265,7 @@ nouveau_sgdma_init(struct drm_device *dev)
- 			nv_wo32(dev, gpuobj, (i+4)/4, 0);
+ 		 * on those cards? */
+-		nv_wo32(dev, gpuobj, 0, NV_CLASS_DMA_IN_MEMORY |
+-				       (1 << 12) /* PT present */ |
+-				       (0 << 13) /* PT *not* linear */ |
+-				       (NV_DMA_ACCESS_RW  << 14) |
+-				       (NV_DMA_TARGET_PCI << 16));
+-		nv_wo32(dev, gpuobj, 1, aper_size - 1);
++		nv_wo32(gpuobj, 0, NV_CLASS_DMA_IN_MEMORY |
++				   (1 << 12) /* PT present */ |
++				   (0 << 13) /* PT *not* linear */ |
++				   (NV_DMA_ACCESS_RW  << 14) |
++				   (NV_DMA_TARGET_PCI << 16));
++		nv_wo32(gpuobj, 4, aper_size - 1);
+ 		for (i = 2; i < 2 + (aper_size >> 12); i++) {
+-			nv_wo32(dev, gpuobj, i,
+-				    dev_priv->gart_info.sg_dummy_bus | 3);
++			nv_wo32(gpuobj, i * 4,
++				dev_priv->gart_info.sg_dummy_bus | 3);
+ 		}
+ 	} else {
+ 		for (i = 0; i < obj_size; i += 8) {
+-			nv_wo32(dev, gpuobj, (i+0)/4,
+-				    dev_priv->gart_info.sg_dummy_bus | 0x21);
+-			nv_wo32(dev, gpuobj, (i+4)/4, 0);
++			nv_wo32(gpuobj, i + 0,
++				dev_priv->gart_info.sg_dummy_bus | 0x21);
++			nv_wo32(gpuobj, i + 4, 0);
  		}
  	}
 -	dev_priv->engine.instmem.finish_access(dev);
@@ -5571,18 +7072,20 @@ index 1d6ee8b..491767f 100644
  
  	dev_priv->gart_info.type      = NOUVEAU_GART_SGDMA;
  	dev_priv->gart_info.aper_base = 0;
-@@ -325,14 +296,11 @@ nouveau_sgdma_get_page(struct drm_device *dev, uint32_t offset, uint32_t *page)
+@@ -325,14 +310,11 @@ nouveau_sgdma_get_page(struct drm_device *dev, uint32_t offset, uint32_t *page)
  {
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  	struct nouveau_gpuobj *gpuobj = dev_priv->gart_info.sg_ctxdma;
 -	struct nouveau_instmem_engine *instmem = &dev_priv->engine.instmem;
  	int pte;
  
- 	pte = (offset >> NV_CTXDMA_PAGE_SHIFT);
+-	pte = (offset >> NV_CTXDMA_PAGE_SHIFT);
++	pte = (offset >> NV_CTXDMA_PAGE_SHIFT) << 2;
  	if (dev_priv->card_type < NV_50) {
 -		instmem->prepare_access(dev, false);
- 		*page = nv_ro32(dev, gpuobj, (pte + 2)) & ~NV_CTXDMA_PAGE_MASK;
+-		*page = nv_ro32(dev, gpuobj, (pte + 2)) & ~NV_CTXDMA_PAGE_MASK;
 -		instmem->finish_access(dev);
++		*page = nv_ro32(gpuobj, (pte + 8)) & ~NV_CTXDMA_PAGE_MASK;
  		return 0;
  	}
  
@@ -6196,7 +7699,7 @@ index b02a231..989322b 100644
  	default:
  		NV_ERROR(dev, "unknown parameter %lld\n", setparam->param);
 diff --git a/drivers/gpu/drm/nouveau/nv04_crtc.c b/drivers/gpu/drm/nouveau/nv04_crtc.c
-index eba687f..08c7e07 100644
+index eba687f..291a4cb 100644
 --- a/drivers/gpu/drm/nouveau/nv04_crtc.c
 +++ b/drivers/gpu/drm/nouveau/nv04_crtc.c
 @@ -157,6 +157,7 @@ nv_crtc_dpms(struct drm_crtc *crtc, int mode)
@@ -6228,8 +7731,25 @@ index eba687f..08c7e07 100644
  	regp->crtc_830 = mode->crtc_vdisplay - 3;
  	regp->crtc_834 = mode->crtc_vdisplay - 1;
  
+@@ -710,6 +718,7 @@ static void nv_crtc_destroy(struct drm_crtc *crtc)
+ 
+ 	drm_crtc_cleanup(crtc);
+ 
++	nouveau_bo_unmap(nv_crtc->cursor.nvbo);
+ 	nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
+ 	kfree(nv_crtc);
+ }
+@@ -820,7 +829,7 @@ nv04_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
+ 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_FF_INDEX);
+ 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_FFLWM__INDEX);
+ 
+-	if (dev_priv->card_type >= NV_30) {
++	if (dev_priv->card_type >= NV_20) {
+ 		regp->CRTC[NV_CIO_CRE_47] = arb_lwm >> 8;
+ 		crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_47);
+ 	}
 diff --git a/drivers/gpu/drm/nouveau/nv04_dac.c b/drivers/gpu/drm/nouveau/nv04_dac.c
-index 1cb19e3..ea36270 100644
+index 1cb19e3..9cc560c 100644
 --- a/drivers/gpu/drm/nouveau/nv04_dac.c
 +++ b/drivers/gpu/drm/nouveau/nv04_dac.c
 @@ -220,6 +220,7 @@ uint32_t nv17_dac_sample_load(struct drm_encoder *encoder)
@@ -6271,7 +7791,16 @@ index 1cb19e3..ea36270 100644
  	/* nv driver and nv31 use 0xfffffeee, nv34 and 6600 use 0xfffffece */
  	routput = (saved_routput & 0xfffffece) | head << 8;
  
-@@ -304,8 +304,8 @@ uint32_t nv17_dac_sample_load(struct drm_encoder *encoder)
+@@ -291,6 +291,8 @@ uint32_t nv17_dac_sample_load(struct drm_encoder *encoder)
+ 	msleep(5);
+ 
+ 	sample = NVReadRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + regoffset);
++	/* do it again just in case it's a residual current */
++	sample &= NVReadRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + regoffset);
+ 
+ 	temp = NVReadRAMDAC(dev, head, NV_PRAMDAC_TEST_CONTROL);
+ 	NVWriteRAMDAC(dev, head, NV_PRAMDAC_TEST_CONTROL,
+@@ -304,8 +306,8 @@ uint32_t nv17_dac_sample_load(struct drm_encoder *encoder)
  		nvWriteMC(dev, NV_PBUS_POWERCTRL_4, saved_powerctrl_4);
  	nvWriteMC(dev, NV_PBUS_POWERCTRL_2, saved_powerctrl_2);
  
@@ -6282,7 +7811,7 @@ index 1cb19e3..ea36270 100644
  
  	return sample;
  }
-@@ -315,9 +315,12 @@ nv17_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector)
+@@ -315,9 +317,12 @@ nv17_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector)
  {
  	struct drm_device *dev = encoder->dev;
  	struct dcb_entry *dcb = nouveau_encoder(encoder)->dcb;
@@ -6297,7 +7826,7 @@ index 1cb19e3..ea36270 100644
  		NV_INFO(dev, "Load detected on output %c\n",
  			'@' + ffs(dcb->or));
  		return connector_status_connected;
-@@ -330,6 +333,9 @@ static bool nv04_dac_mode_fixup(struct drm_encoder *encoder,
+@@ -330,6 +335,9 @@ static bool nv04_dac_mode_fixup(struct drm_encoder *encoder,
  				struct drm_display_mode *mode,
  				struct drm_display_mode *adjusted_mode)
  {
@@ -6307,7 +7836,24 @@ index 1cb19e3..ea36270 100644
  	return true;
  }
  
-@@ -428,6 +434,17 @@ void nv04_dac_update_dacclk(struct drm_encoder *encoder, bool enable)
+@@ -344,15 +352,9 @@ static void nv04_dac_prepare(struct drm_encoder *encoder)
+ 	helper->dpms(encoder, DRM_MODE_DPMS_OFF);
+ 
+ 	nv04_dfp_disable(dev, head);
+-
+-	/* Some NV4x have unknown values (0x3f, 0x50, 0x54, 0x6b, 0x79, 0x7f)
+-	 * at LCD__INDEX which we don't alter
+-	 */
+-	if (!(crtcstate[head].CRTC[NV_CIO_CRE_LCD__INDEX] & 0x44))
+-		crtcstate[head].CRTC[NV_CIO_CRE_LCD__INDEX] = 0;
++	crtcstate[head].CRTC[NV_CIO_CRE_LCD__INDEX] = 0;
+ }
+ 
+-
+ static void nv04_dac_mode_set(struct drm_encoder *encoder,
+ 			      struct drm_display_mode *mode,
+ 			      struct drm_display_mode *adjusted_mode)
+@@ -428,6 +430,17 @@ void nv04_dac_update_dacclk(struct drm_encoder *encoder, bool enable)
  	}
  }
  
@@ -6325,7 +7871,7 @@ index 1cb19e3..ea36270 100644
  static void nv04_dac_dpms(struct drm_encoder *encoder, int mode)
  {
  	struct drm_device *dev = encoder->dev;
-@@ -501,11 +518,13 @@ static const struct drm_encoder_funcs nv04_dac_funcs = {
+@@ -501,11 +514,13 @@ static const struct drm_encoder_funcs nv04_dac_funcs = {
  	.destroy = nv04_dac_destroy,
  };
  
@@ -6341,7 +7887,7 @@ index 1cb19e3..ea36270 100644
  
  	nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL);
  	if (!nv_encoder)
-@@ -527,5 +546,6 @@ int nv04_dac_create(struct drm_device *dev, struct dcb_entry *entry)
+@@ -527,5 +542,6 @@ int nv04_dac_create(struct drm_device *dev, struct dcb_entry *entry)
  	encoder->possible_crtcs = entry->heads;
  	encoder->possible_clones = 0;
  
@@ -6349,7 +7895,7 @@ index 1cb19e3..ea36270 100644
  	return 0;
  }
 diff --git a/drivers/gpu/drm/nouveau/nv04_dfp.c b/drivers/gpu/drm/nouveau/nv04_dfp.c
-index 41634d4..a5dcf76 100644
+index 41634d4..e331b4f 100644
 --- a/drivers/gpu/drm/nouveau/nv04_dfp.c
 +++ b/drivers/gpu/drm/nouveau/nv04_dfp.c
 @@ -34,6 +34,8 @@
@@ -6398,30 +7944,118 @@ index 41634d4..a5dcf76 100644
  static bool nv04_dfp_mode_fixup(struct drm_encoder *encoder,
  				struct drm_display_mode *mode,
  				struct drm_display_mode *adjusted_mode)
-@@ -413,10 +445,6 @@ static void nv04_dfp_commit(struct drm_encoder *encoder)
+@@ -221,26 +253,21 @@ static void nv04_dfp_prepare(struct drm_encoder *encoder)
+ 
+ 	nv04_dfp_prepare_sel_clk(dev, nv_encoder, head);
+ 
+-	/* Some NV4x have unknown values (0x3f, 0x50, 0x54, 0x6b, 0x79, 0x7f)
+-	 * at LCD__INDEX which we don't alter
+-	 */
+-	if (!(*cr_lcd & 0x44)) {
+-		*cr_lcd = 0x3;
+-
+-		if (nv_two_heads(dev)) {
+-			if (nv_encoder->dcb->location == DCB_LOC_ON_CHIP)
+-				*cr_lcd |= head ? 0x0 : 0x8;
+-			else {
+-				*cr_lcd |= (nv_encoder->dcb->or << 4) & 0x30;
+-				if (nv_encoder->dcb->type == OUTPUT_LVDS)
+-					*cr_lcd |= 0x30;
+-				if ((*cr_lcd & 0x30) == (*cr_lcd_oth & 0x30)) {
+-					/* avoid being connected to both crtcs */
+-					*cr_lcd_oth &= ~0x30;
+-					NVWriteVgaCrtc(dev, head ^ 1,
+-						       NV_CIO_CRE_LCD__INDEX,
+-						       *cr_lcd_oth);
+-				}
++	*cr_lcd = 0x3;
++
++	if (nv_two_heads(dev)) {
++		if (nv_encoder->dcb->location == DCB_LOC_ON_CHIP)
++			*cr_lcd |= head ? 0x0 : 0x8;
++		else {
++			*cr_lcd |= (nv_encoder->dcb->or << 4) & 0x30;
++			if (nv_encoder->dcb->type == OUTPUT_LVDS)
++				*cr_lcd |= 0x30;
++			if ((*cr_lcd & 0x30) == (*cr_lcd_oth & 0x30)) {
++				/* avoid being connected to both crtcs */
++				*cr_lcd_oth &= ~0x30;
++				NVWriteVgaCrtc(dev, head ^ 1,
++					       NV_CIO_CRE_LCD__INDEX,
++					       *cr_lcd_oth);
+ 			}
+ 		}
+ 	}
+@@ -412,10 +439,7 @@ static void nv04_dfp_commit(struct drm_encoder *encoder)
+ 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
  	struct dcb_entry *dcbe = nv_encoder->dcb;
  	int head = nouveau_crtc(encoder->crtc)->index;
- 
+-
 -	NV_INFO(dev, "Output %s is running on CRTC %d using output %c\n",
 -		drm_get_connector_name(&nouveau_encoder_connector_get(nv_encoder)->base),
 -		nv_crtc->index, '@' + ffs(nv_encoder->dcb->or));
--
++	struct drm_encoder *slave_encoder;
+ 
  	if (dcbe->type == OUTPUT_TMDS)
  		run_tmds_table(dev, dcbe, head, nv_encoder->mode.clock);
- 	else if (dcbe->type == OUTPUT_LVDS)
-@@ -433,6 +461,11 @@ static void nv04_dfp_commit(struct drm_encoder *encoder)
+@@ -433,6 +457,12 @@ static void nv04_dfp_commit(struct drm_encoder *encoder)
  	else
  		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + nv04_dac_output_offset(encoder), 0x00100000);
  
 +	/* Init external transmitters */
-+	if (get_tmds_slave(encoder))
-+		get_slave_funcs(get_tmds_slave(encoder))->mode_set(
-+			encoder, &nv_encoder->mode, &nv_encoder->mode);
++	slave_encoder = get_tmds_slave(encoder);
++	if (slave_encoder)
++		get_slave_funcs(slave_encoder)->mode_set(
++			slave_encoder, &nv_encoder->mode, &nv_encoder->mode);
 +
  	helper->dpms(encoder, DRM_MODE_DPMS_ON);
  
  	NV_INFO(dev, "Output %s is running on CRTC %d using output %c\n",
-@@ -554,10 +587,42 @@ static void nv04_dfp_destroy(struct drm_encoder *encoder)
+@@ -440,6 +470,27 @@ static void nv04_dfp_commit(struct drm_encoder *encoder)
+ 		nv_crtc->index, '@' + ffs(nv_encoder->dcb->or));
+ }
+ 
++static void nv04_dfp_update_backlight(struct drm_encoder *encoder, int mode)
++{
++#ifdef __powerpc__
++	struct drm_device *dev = encoder->dev;
++
++	/* BIOS scripts usually take care of the backlight, thanks
++	 * Apple for your consistency.
++	 */
++	if (dev->pci_device == 0x0179 || dev->pci_device == 0x0189 ||
++	    dev->pci_device == 0x0329) {
++		if (mode == DRM_MODE_DPMS_ON) {
++			nv_mask(dev, NV_PBUS_DEBUG_DUALHEAD_CTL, 0, 1 << 31);
++			nv_mask(dev, NV_PCRTC_GPIO_EXT, 3, 1);
++		} else {
++			nv_mask(dev, NV_PBUS_DEBUG_DUALHEAD_CTL, 1 << 31, 0);
++			nv_mask(dev, NV_PCRTC_GPIO_EXT, 3, 0);
++		}
++	}
++#endif
++}
++
+ static inline bool is_powersaving_dpms(int mode)
+ {
+ 	return (mode != DRM_MODE_DPMS_ON);
+@@ -487,6 +538,7 @@ static void nv04_lvds_dpms(struct drm_encoder *encoder, int mode)
+ 					 LVDS_PANEL_OFF, 0);
+ 	}
+ 
++	nv04_dfp_update_backlight(encoder, mode);
+ 	nv04_dfp_update_fp_control(encoder, mode);
+ 
+ 	if (mode == DRM_MODE_DPMS_ON)
+@@ -510,6 +562,7 @@ static void nv04_tmds_dpms(struct drm_encoder *encoder, int mode)
+ 	NV_INFO(dev, "Setting dpms mode %d on tmds encoder (output %d)\n",
+ 		     mode, nv_encoder->dcb->index);
+ 
++	nv04_dfp_update_backlight(encoder, mode);
+ 	nv04_dfp_update_fp_control(encoder, mode);
+ }
+ 
+@@ -554,10 +607,42 @@ static void nv04_dfp_destroy(struct drm_encoder *encoder)
  
  	NV_DEBUG_KMS(encoder->dev, "\n");
  
@@ -6464,7 +8098,7 @@ index 41634d4..a5dcf76 100644
  static const struct drm_encoder_helper_funcs nv04_lvds_helper_funcs = {
  	.dpms = nv04_lvds_dpms,
  	.save = nv04_dfp_save,
-@@ -584,11 +649,12 @@ static const struct drm_encoder_funcs nv04_dfp_funcs = {
+@@ -584,11 +669,12 @@ static const struct drm_encoder_funcs nv04_dfp_funcs = {
  	.destroy = nv04_dfp_destroy,
  };
  
@@ -6479,7 +8113,7 @@ index 41634d4..a5dcf76 100644
  	int type;
  
  	switch (entry->type) {
-@@ -613,11 +679,16 @@ int nv04_dfp_create(struct drm_device *dev, struct dcb_entry *entry)
+@@ -613,11 +699,16 @@ int nv04_dfp_create(struct drm_device *dev, struct dcb_entry *entry)
  	nv_encoder->dcb = entry;
  	nv_encoder->or = ffs(entry->or) - 1;
  
@@ -6691,9 +8325,24 @@ index c7898b4..9e28cf7 100644
  }
  
 diff --git a/drivers/gpu/drm/nouveau/nv04_fifo.c b/drivers/gpu/drm/nouveau/nv04_fifo.c
-index 66fe559..06cedd9 100644
+index 66fe559..bbb87ef 100644
 --- a/drivers/gpu/drm/nouveau/nv04_fifo.c
 +++ b/drivers/gpu/drm/nouveau/nv04_fifo.c
+@@ -38,10 +38,10 @@
+ #define NV04_RAMFC_ENGINE                                        0x14
+ #define NV04_RAMFC_PULL1_ENGINE                                  0x18
+ 
+-#define RAMFC_WR(offset, val) nv_wo32(dev, chan->ramfc->gpuobj, \
+-					 NV04_RAMFC_##offset/4, (val))
+-#define RAMFC_RD(offset)      nv_ro32(dev, chan->ramfc->gpuobj, \
+-					 NV04_RAMFC_##offset/4)
++#define RAMFC_WR(offset, val) nv_wo32(chan->ramfc->gpuobj, \
++				      NV04_RAMFC_##offset, (val))
++#define RAMFC_RD(offset)      nv_ro32(chan->ramfc->gpuobj, \
++				      NV04_RAMFC_##offset)
+ 
+ void
+ nv04_fifo_disable(struct drm_device *dev)
 @@ -112,6 +112,12 @@ nv04_fifo_channel_id(struct drm_device *dev)
  			NV03_PFIFO_CACHE1_PUSH1_CHID_MASK;
  }
@@ -7544,7 +9193,7 @@ index 2e58c33..0000000
 -	return 0;
 -}
 diff --git a/drivers/gpu/drm/nouveau/nv17_tv.c b/drivers/gpu/drm/nouveau/nv17_tv.c
-index 74c8803..44fefb0 100644
+index 74c8803..703c188 100644
 --- a/drivers/gpu/drm/nouveau/nv17_tv.c
 +++ b/drivers/gpu/drm/nouveau/nv17_tv.c
 @@ -37,6 +37,7 @@ static uint32_t nv42_tv_sample_load(struct drm_encoder *encoder)
@@ -7577,7 +9226,7 @@ index 74c8803..44fefb0 100644
  
  	NVWriteRAMDAC(dev, head, NV_PRAMDAC_FP_HTOTAL, 1343);
  	NVWriteRAMDAC(dev, head, NV_PRAMDAC_FP_HSYNC_START, 1047);
-@@ -110,12 +111,27 @@ static uint32_t nv42_tv_sample_load(struct drm_encoder *encoder)
+@@ -110,12 +111,31 @@ static uint32_t nv42_tv_sample_load(struct drm_encoder *encoder)
  	NVWriteRAMDAC(dev, head, NV_PRAMDAC_FP_HSYNC_END, fp_hsync_end);
  	NVWriteRAMDAC(dev, head, NV_PRAMDAC_FP_HSYNC_START, fp_hsync_start);
  	NVWriteRAMDAC(dev, head, NV_PRAMDAC_FP_HTOTAL, fp_htotal);
@@ -7593,10 +9242,14 @@ index 74c8803..44fefb0 100644
 +get_tv_detect_quirks(struct drm_device *dev, uint32_t *pin_mask)
 +{
 +	/* Zotac FX5200 */
-+	if (dev->pdev->device == 0x0322 &&
-+	    dev->pdev->subsystem_vendor == 0x19da &&
-+	    (dev->pdev->subsystem_device == 0x1035 ||
-+	     dev->pdev->subsystem_device == 0x2035)) {
++	if (nv_match_device(dev, 0x0322, 0x19da, 0x1035) ||
++	    nv_match_device(dev, 0x0322, 0x19da, 0x2035)) {
++		*pin_mask = 0xc;
++		return false;
++	}
++
++	/* MSI nForce2 IGP */
++	if (nv_match_device(dev, 0x01f0, 0x1462, 0x5710)) {
 +		*pin_mask = 0xc;
 +		return false;
 +	}
@@ -7607,7 +9260,7 @@ index 74c8803..44fefb0 100644
  static enum drm_connector_status
  nv17_tv_detect(struct drm_encoder *encoder, struct drm_connector *connector)
  {
-@@ -124,12 +140,20 @@ nv17_tv_detect(struct drm_encoder *encoder, struct drm_connector *connector)
+@@ -124,12 +144,20 @@ nv17_tv_detect(struct drm_encoder *encoder, struct drm_connector *connector)
  	struct drm_mode_config *conf = &dev->mode_config;
  	struct nv17_tv_encoder *tv_enc = to_tv_enc(encoder);
  	struct dcb_entry *dcb = tv_enc->base.dcb;
@@ -7633,7 +9286,7 @@ index 74c8803..44fefb0 100644
  
  	switch (tv_enc->pin_mask) {
  	case 0x2:
-@@ -154,7 +178,9 @@ nv17_tv_detect(struct drm_encoder *encoder, struct drm_connector *connector)
+@@ -154,7 +182,9 @@ nv17_tv_detect(struct drm_encoder *encoder, struct drm_connector *connector)
  					 conf->tv_subconnector_property,
  					 tv_enc->subconnector);
  
@@ -7644,7 +9297,7 @@ index 74c8803..44fefb0 100644
  		NV_INFO(dev, "Load detected on output %c\n",
  			'@' + ffs(dcb->or));
  		return connector_status_connected;
-@@ -296,6 +322,9 @@ static bool nv17_tv_mode_fixup(struct drm_encoder *encoder,
+@@ -296,6 +326,9 @@ static bool nv17_tv_mode_fixup(struct drm_encoder *encoder,
  {
  	struct nv17_tv_norm_params *tv_norm = get_tv_norm(encoder);
  
@@ -7654,7 +9307,7 @@ index 74c8803..44fefb0 100644
  	if (tv_norm->kind == CTV_ENC_MODE)
  		adjusted_mode->clock = tv_norm->ctv_enc_mode.mode.clock;
  	else
-@@ -307,6 +336,8 @@ static bool nv17_tv_mode_fixup(struct drm_encoder *encoder,
+@@ -307,6 +340,8 @@ static bool nv17_tv_mode_fixup(struct drm_encoder *encoder,
  static void  nv17_tv_dpms(struct drm_encoder *encoder, int mode)
  {
  	struct drm_device *dev = encoder->dev;
@@ -7663,7 +9316,7 @@ index 74c8803..44fefb0 100644
  	struct nv17_tv_state *regs = &to_tv_enc(encoder)->state;
  	struct nv17_tv_norm_params *tv_norm = get_tv_norm(encoder);
  
-@@ -331,8 +362,8 @@ static void  nv17_tv_dpms(struct drm_encoder *encoder, int mode)
+@@ -331,8 +366,8 @@ static void  nv17_tv_dpms(struct drm_encoder *encoder, int mode)
  
  	nv_load_ptv(dev, regs, 200);
  
@@ -7674,7 +9327,27 @@ index 74c8803..44fefb0 100644
  
  	nv04_dac_update_dacclk(encoder, mode == DRM_MODE_DPMS_ON);
  }
-@@ -744,8 +775,10 @@ static struct drm_encoder_funcs nv17_tv_funcs = {
+@@ -373,15 +408,10 @@ static void nv17_tv_prepare(struct drm_encoder *encoder)
+ 
+ 	}
+ 
+-	/* Some NV4x have unknown values (0x3f, 0x50, 0x54, 0x6b, 0x79, 0x7f)
+-	 * at LCD__INDEX which we don't alter
+-	 */
+-	if (!(*cr_lcd & 0x44)) {
+-		if (tv_norm->kind == CTV_ENC_MODE)
+-			*cr_lcd = 0x1 | (head ? 0x0 : 0x8);
+-		else
+-			*cr_lcd = 0;
+-	}
++	if (tv_norm->kind == CTV_ENC_MODE)
++		*cr_lcd = 0x1 | (head ? 0x0 : 0x8);
++	else
++		*cr_lcd = 0;
+ 
+ 	/* Set the DACCLK register */
+ 	dacclk = (NVReadRAMDAC(dev, 0, dacclk_off) & ~0x30) | 0x1;
+@@ -744,8 +774,10 @@ static struct drm_encoder_funcs nv17_tv_funcs = {
  	.destroy = nv17_tv_destroy,
  };
  
@@ -7686,7 +9359,7 @@ index 74c8803..44fefb0 100644
  	struct drm_encoder *encoder;
  	struct nv17_tv_encoder *tv_enc = NULL;
  
-@@ -774,5 +807,7 @@ int nv17_tv_create(struct drm_device *dev, struct dcb_entry *entry)
+@@ -774,5 +806,7 @@ int nv17_tv_create(struct drm_device *dev, struct dcb_entry *entry)
  	encoder->possible_crtcs = entry->heads;
  	encoder->possible_clones = 0;
  
@@ -7695,9 +9368,573 @@ index 74c8803..44fefb0 100644
  	return 0;
  }
 diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c b/drivers/gpu/drm/nouveau/nv20_graph.c
-index d6fc0a8..191c15c 100644
+index d6fc0a8..cc876ef 100644
 --- a/drivers/gpu/drm/nouveau/nv20_graph.c
 +++ b/drivers/gpu/drm/nouveau/nv20_graph.c
+@@ -37,49 +37,49 @@ nv20_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
+ {
+ 	int i;
+ 
+-	nv_wo32(dev, ctx, 0x033c/4, 0xffff0000);
+-	nv_wo32(dev, ctx, 0x03a0/4, 0x0fff0000);
+-	nv_wo32(dev, ctx, 0x03a4/4, 0x0fff0000);
+-	nv_wo32(dev, ctx, 0x047c/4, 0x00000101);
+-	nv_wo32(dev, ctx, 0x0490/4, 0x00000111);
+-	nv_wo32(dev, ctx, 0x04a8/4, 0x44400000);
++	nv_wo32(ctx, 0x033c, 0xffff0000);
++	nv_wo32(ctx, 0x03a0, 0x0fff0000);
++	nv_wo32(ctx, 0x03a4, 0x0fff0000);
++	nv_wo32(ctx, 0x047c, 0x00000101);
++	nv_wo32(ctx, 0x0490, 0x00000111);
++	nv_wo32(ctx, 0x04a8, 0x44400000);
+ 	for (i = 0x04d4; i <= 0x04e0; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00030303);
++		nv_wo32(ctx, i, 0x00030303);
+ 	for (i = 0x04f4; i <= 0x0500; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00080000);
++		nv_wo32(ctx, i, 0x00080000);
+ 	for (i = 0x050c; i <= 0x0518; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x01012000);
++		nv_wo32(ctx, i, 0x01012000);
+ 	for (i = 0x051c; i <= 0x0528; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x000105b8);
++		nv_wo32(ctx, i, 0x000105b8);
+ 	for (i = 0x052c; i <= 0x0538; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00080008);
++		nv_wo32(ctx, i, 0x00080008);
+ 	for (i = 0x055c; i <= 0x0598; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x07ff0000);
+-	nv_wo32(dev, ctx, 0x05a4/4, 0x4b7fffff);
+-	nv_wo32(dev, ctx, 0x05fc/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x0604/4, 0x00004000);
+-	nv_wo32(dev, ctx, 0x0610/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x0618/4, 0x00040000);
+-	nv_wo32(dev, ctx, 0x061c/4, 0x00010000);
++		nv_wo32(ctx, i, 0x07ff0000);
++	nv_wo32(ctx, 0x05a4, 0x4b7fffff);
++	nv_wo32(ctx, 0x05fc, 0x00000001);
++	nv_wo32(ctx, 0x0604, 0x00004000);
++	nv_wo32(ctx, 0x0610, 0x00000001);
++	nv_wo32(ctx, 0x0618, 0x00040000);
++	nv_wo32(ctx, 0x061c, 0x00010000);
+ 	for (i = 0x1c1c; i <= 0x248c; i += 16) {
+-		nv_wo32(dev, ctx, (i + 0)/4, 0x10700ff9);
+-		nv_wo32(dev, ctx, (i + 4)/4, 0x0436086c);
+-		nv_wo32(dev, ctx, (i + 8)/4, 0x000c001b);
++		nv_wo32(ctx, (i + 0), 0x10700ff9);
++		nv_wo32(ctx, (i + 4), 0x0436086c);
++		nv_wo32(ctx, (i + 8), 0x000c001b);
+ 	}
+-	nv_wo32(dev, ctx, 0x281c/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2830/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x285c/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x2860/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2864/4, 0x3f000000);
+-	nv_wo32(dev, ctx, 0x286c/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x2870/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2878/4, 0xbf800000);
+-	nv_wo32(dev, ctx, 0x2880/4, 0xbf800000);
+-	nv_wo32(dev, ctx, 0x34a4/4, 0x000fe000);
+-	nv_wo32(dev, ctx, 0x3530/4, 0x000003f8);
+-	nv_wo32(dev, ctx, 0x3540/4, 0x002fe000);
++	nv_wo32(ctx, 0x281c, 0x3f800000);
++	nv_wo32(ctx, 0x2830, 0x3f800000);
++	nv_wo32(ctx, 0x285c, 0x40000000);
++	nv_wo32(ctx, 0x2860, 0x3f800000);
++	nv_wo32(ctx, 0x2864, 0x3f000000);
++	nv_wo32(ctx, 0x286c, 0x40000000);
++	nv_wo32(ctx, 0x2870, 0x3f800000);
++	nv_wo32(ctx, 0x2878, 0xbf800000);
++	nv_wo32(ctx, 0x2880, 0xbf800000);
++	nv_wo32(ctx, 0x34a4, 0x000fe000);
++	nv_wo32(ctx, 0x3530, 0x000003f8);
++	nv_wo32(ctx, 0x3540, 0x002fe000);
+ 	for (i = 0x355c; i <= 0x3578; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x001c527c);
++		nv_wo32(ctx, i, 0x001c527c);
+ }
+ 
+ static void
+@@ -87,58 +87,58 @@ nv25_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
+ {
+ 	int i;
+ 
+-	nv_wo32(dev, ctx, 0x035c/4, 0xffff0000);
+-	nv_wo32(dev, ctx, 0x03c0/4, 0x0fff0000);
+-	nv_wo32(dev, ctx, 0x03c4/4, 0x0fff0000);
+-	nv_wo32(dev, ctx, 0x049c/4, 0x00000101);
+-	nv_wo32(dev, ctx, 0x04b0/4, 0x00000111);
+-	nv_wo32(dev, ctx, 0x04c8/4, 0x00000080);
+-	nv_wo32(dev, ctx, 0x04cc/4, 0xffff0000);
+-	nv_wo32(dev, ctx, 0x04d0/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x04e4/4, 0x44400000);
+-	nv_wo32(dev, ctx, 0x04fc/4, 0x4b800000);
++	nv_wo32(ctx, 0x035c, 0xffff0000);
++	nv_wo32(ctx, 0x03c0, 0x0fff0000);
++	nv_wo32(ctx, 0x03c4, 0x0fff0000);
++	nv_wo32(ctx, 0x049c, 0x00000101);
++	nv_wo32(ctx, 0x04b0, 0x00000111);
++	nv_wo32(ctx, 0x04c8, 0x00000080);
++	nv_wo32(ctx, 0x04cc, 0xffff0000);
++	nv_wo32(ctx, 0x04d0, 0x00000001);
++	nv_wo32(ctx, 0x04e4, 0x44400000);
++	nv_wo32(ctx, 0x04fc, 0x4b800000);
+ 	for (i = 0x0510; i <= 0x051c; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00030303);
++		nv_wo32(ctx, i, 0x00030303);
+ 	for (i = 0x0530; i <= 0x053c; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00080000);
++		nv_wo32(ctx, i, 0x00080000);
+ 	for (i = 0x0548; i <= 0x0554; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x01012000);
++		nv_wo32(ctx, i, 0x01012000);
+ 	for (i = 0x0558; i <= 0x0564; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x000105b8);
++		nv_wo32(ctx, i, 0x000105b8);
+ 	for (i = 0x0568; i <= 0x0574; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00080008);
++		nv_wo32(ctx, i, 0x00080008);
+ 	for (i = 0x0598; i <= 0x05d4; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x07ff0000);
+-	nv_wo32(dev, ctx, 0x05e0/4, 0x4b7fffff);
+-	nv_wo32(dev, ctx, 0x0620/4, 0x00000080);
+-	nv_wo32(dev, ctx, 0x0624/4, 0x30201000);
+-	nv_wo32(dev, ctx, 0x0628/4, 0x70605040);
+-	nv_wo32(dev, ctx, 0x062c/4, 0xb0a09080);
+-	nv_wo32(dev, ctx, 0x0630/4, 0xf0e0d0c0);
+-	nv_wo32(dev, ctx, 0x0664/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x066c/4, 0x00004000);
+-	nv_wo32(dev, ctx, 0x0678/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x0680/4, 0x00040000);
+-	nv_wo32(dev, ctx, 0x0684/4, 0x00010000);
++		nv_wo32(ctx, i, 0x07ff0000);
++	nv_wo32(ctx, 0x05e0, 0x4b7fffff);
++	nv_wo32(ctx, 0x0620, 0x00000080);
++	nv_wo32(ctx, 0x0624, 0x30201000);
++	nv_wo32(ctx, 0x0628, 0x70605040);
++	nv_wo32(ctx, 0x062c, 0xb0a09080);
++	nv_wo32(ctx, 0x0630, 0xf0e0d0c0);
++	nv_wo32(ctx, 0x0664, 0x00000001);
++	nv_wo32(ctx, 0x066c, 0x00004000);
++	nv_wo32(ctx, 0x0678, 0x00000001);
++	nv_wo32(ctx, 0x0680, 0x00040000);
++	nv_wo32(ctx, 0x0684, 0x00010000);
+ 	for (i = 0x1b04; i <= 0x2374; i += 16) {
+-		nv_wo32(dev, ctx, (i + 0)/4, 0x10700ff9);
+-		nv_wo32(dev, ctx, (i + 4)/4, 0x0436086c);
+-		nv_wo32(dev, ctx, (i + 8)/4, 0x000c001b);
++		nv_wo32(ctx, (i + 0), 0x10700ff9);
++		nv_wo32(ctx, (i + 4), 0x0436086c);
++		nv_wo32(ctx, (i + 8), 0x000c001b);
+ 	}
+-	nv_wo32(dev, ctx, 0x2704/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2718/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2744/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x2748/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x274c/4, 0x3f000000);
+-	nv_wo32(dev, ctx, 0x2754/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x2758/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2760/4, 0xbf800000);
+-	nv_wo32(dev, ctx, 0x2768/4, 0xbf800000);
+-	nv_wo32(dev, ctx, 0x308c/4, 0x000fe000);
+-	nv_wo32(dev, ctx, 0x3108/4, 0x000003f8);
+-	nv_wo32(dev, ctx, 0x3468/4, 0x002fe000);
++	nv_wo32(ctx, 0x2704, 0x3f800000);
++	nv_wo32(ctx, 0x2718, 0x3f800000);
++	nv_wo32(ctx, 0x2744, 0x40000000);
++	nv_wo32(ctx, 0x2748, 0x3f800000);
++	nv_wo32(ctx, 0x274c, 0x3f000000);
++	nv_wo32(ctx, 0x2754, 0x40000000);
++	nv_wo32(ctx, 0x2758, 0x3f800000);
++	nv_wo32(ctx, 0x2760, 0xbf800000);
++	nv_wo32(ctx, 0x2768, 0xbf800000);
++	nv_wo32(ctx, 0x308c, 0x000fe000);
++	nv_wo32(ctx, 0x3108, 0x000003f8);
++	nv_wo32(ctx, 0x3468, 0x002fe000);
+ 	for (i = 0x3484; i <= 0x34a0; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x001c527c);
++		nv_wo32(ctx, i, 0x001c527c);
+ }
+ 
+ static void
+@@ -146,49 +146,49 @@ nv2a_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
+ {
+ 	int i;
+ 
+-	nv_wo32(dev, ctx, 0x033c/4, 0xffff0000);
+-	nv_wo32(dev, ctx, 0x03a0/4, 0x0fff0000);
+-	nv_wo32(dev, ctx, 0x03a4/4, 0x0fff0000);
+-	nv_wo32(dev, ctx, 0x047c/4, 0x00000101);
+-	nv_wo32(dev, ctx, 0x0490/4, 0x00000111);
+-	nv_wo32(dev, ctx, 0x04a8/4, 0x44400000);
++	nv_wo32(ctx, 0x033c, 0xffff0000);
++	nv_wo32(ctx, 0x03a0, 0x0fff0000);
++	nv_wo32(ctx, 0x03a4, 0x0fff0000);
++	nv_wo32(ctx, 0x047c, 0x00000101);
++	nv_wo32(ctx, 0x0490, 0x00000111);
++	nv_wo32(ctx, 0x04a8, 0x44400000);
+ 	for (i = 0x04d4; i <= 0x04e0; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00030303);
++		nv_wo32(ctx, i, 0x00030303);
+ 	for (i = 0x04f4; i <= 0x0500; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00080000);
++		nv_wo32(ctx, i, 0x00080000);
+ 	for (i = 0x050c; i <= 0x0518; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x01012000);
++		nv_wo32(ctx, i, 0x01012000);
+ 	for (i = 0x051c; i <= 0x0528; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x000105b8);
++		nv_wo32(ctx, i, 0x000105b8);
+ 	for (i = 0x052c; i <= 0x0538; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00080008);
++		nv_wo32(ctx, i, 0x00080008);
+ 	for (i = 0x055c; i <= 0x0598; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x07ff0000);
+-	nv_wo32(dev, ctx, 0x05a4/4, 0x4b7fffff);
+-	nv_wo32(dev, ctx, 0x05fc/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x0604/4, 0x00004000);
+-	nv_wo32(dev, ctx, 0x0610/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x0618/4, 0x00040000);
+-	nv_wo32(dev, ctx, 0x061c/4, 0x00010000);
++		nv_wo32(ctx, i, 0x07ff0000);
++	nv_wo32(ctx, 0x05a4, 0x4b7fffff);
++	nv_wo32(ctx, 0x05fc, 0x00000001);
++	nv_wo32(ctx, 0x0604, 0x00004000);
++	nv_wo32(ctx, 0x0610, 0x00000001);
++	nv_wo32(ctx, 0x0618, 0x00040000);
++	nv_wo32(ctx, 0x061c, 0x00010000);
+ 	for (i = 0x1a9c; i <= 0x22fc; i += 16) { /*XXX: check!! */
+-		nv_wo32(dev, ctx, (i + 0)/4, 0x10700ff9);
+-		nv_wo32(dev, ctx, (i + 4)/4, 0x0436086c);
+-		nv_wo32(dev, ctx, (i + 8)/4, 0x000c001b);
++		nv_wo32(ctx, (i + 0), 0x10700ff9);
++		nv_wo32(ctx, (i + 4), 0x0436086c);
++		nv_wo32(ctx, (i + 8), 0x000c001b);
+ 	}
+-	nv_wo32(dev, ctx, 0x269c/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x26b0/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x26dc/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x26e0/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x26e4/4, 0x3f000000);
+-	nv_wo32(dev, ctx, 0x26ec/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x26f0/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x26f8/4, 0xbf800000);
+-	nv_wo32(dev, ctx, 0x2700/4, 0xbf800000);
+-	nv_wo32(dev, ctx, 0x3024/4, 0x000fe000);
+-	nv_wo32(dev, ctx, 0x30a0/4, 0x000003f8);
+-	nv_wo32(dev, ctx, 0x33fc/4, 0x002fe000);
++	nv_wo32(ctx, 0x269c, 0x3f800000);
++	nv_wo32(ctx, 0x26b0, 0x3f800000);
++	nv_wo32(ctx, 0x26dc, 0x40000000);
++	nv_wo32(ctx, 0x26e0, 0x3f800000);
++	nv_wo32(ctx, 0x26e4, 0x3f000000);
++	nv_wo32(ctx, 0x26ec, 0x40000000);
++	nv_wo32(ctx, 0x26f0, 0x3f800000);
++	nv_wo32(ctx, 0x26f8, 0xbf800000);
++	nv_wo32(ctx, 0x2700, 0xbf800000);
++	nv_wo32(ctx, 0x3024, 0x000fe000);
++	nv_wo32(ctx, 0x30a0, 0x000003f8);
++	nv_wo32(ctx, 0x33fc, 0x002fe000);
+ 	for (i = 0x341c; i <= 0x3438; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x001c527c);
++		nv_wo32(ctx, i, 0x001c527c);
+ }
+ 
+ static void
+@@ -196,57 +196,57 @@ nv30_31_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
+ {
+ 	int i;
+ 
+-	nv_wo32(dev, ctx, 0x0410/4, 0x00000101);
+-	nv_wo32(dev, ctx, 0x0424/4, 0x00000111);
+-	nv_wo32(dev, ctx, 0x0428/4, 0x00000060);
+-	nv_wo32(dev, ctx, 0x0444/4, 0x00000080);
+-	nv_wo32(dev, ctx, 0x0448/4, 0xffff0000);
+-	nv_wo32(dev, ctx, 0x044c/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x0460/4, 0x44400000);
+-	nv_wo32(dev, ctx, 0x048c/4, 0xffff0000);
++	nv_wo32(ctx, 0x0410, 0x00000101);
++	nv_wo32(ctx, 0x0424, 0x00000111);
++	nv_wo32(ctx, 0x0428, 0x00000060);
++	nv_wo32(ctx, 0x0444, 0x00000080);
++	nv_wo32(ctx, 0x0448, 0xffff0000);
++	nv_wo32(ctx, 0x044c, 0x00000001);
++	nv_wo32(ctx, 0x0460, 0x44400000);
++	nv_wo32(ctx, 0x048c, 0xffff0000);
+ 	for (i = 0x04e0; i < 0x04e8; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x0fff0000);
+-	nv_wo32(dev, ctx, 0x04ec/4, 0x00011100);
++		nv_wo32(ctx, i, 0x0fff0000);
++	nv_wo32(ctx, 0x04ec, 0x00011100);
+ 	for (i = 0x0508; i < 0x0548; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x07ff0000);
+-	nv_wo32(dev, ctx, 0x0550/4, 0x4b7fffff);
+-	nv_wo32(dev, ctx, 0x058c/4, 0x00000080);
+-	nv_wo32(dev, ctx, 0x0590/4, 0x30201000);
+-	nv_wo32(dev, ctx, 0x0594/4, 0x70605040);
+-	nv_wo32(dev, ctx, 0x0598/4, 0xb8a89888);
+-	nv_wo32(dev, ctx, 0x059c/4, 0xf8e8d8c8);
+-	nv_wo32(dev, ctx, 0x05b0/4, 0xb0000000);
++		nv_wo32(ctx, i, 0x07ff0000);
++	nv_wo32(ctx, 0x0550, 0x4b7fffff);
++	nv_wo32(ctx, 0x058c, 0x00000080);
++	nv_wo32(ctx, 0x0590, 0x30201000);
++	nv_wo32(ctx, 0x0594, 0x70605040);
++	nv_wo32(ctx, 0x0598, 0xb8a89888);
++	nv_wo32(ctx, 0x059c, 0xf8e8d8c8);
++	nv_wo32(ctx, 0x05b0, 0xb0000000);
+ 	for (i = 0x0600; i < 0x0640; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00010588);
++		nv_wo32(ctx, i, 0x00010588);
+ 	for (i = 0x0640; i < 0x0680; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00030303);
++		nv_wo32(ctx, i, 0x00030303);
+ 	for (i = 0x06c0; i < 0x0700; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x0008aae4);
++		nv_wo32(ctx, i, 0x0008aae4);
+ 	for (i = 0x0700; i < 0x0740; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x01012000);
++		nv_wo32(ctx, i, 0x01012000);
+ 	for (i = 0x0740; i < 0x0780; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00080008);
+-	nv_wo32(dev, ctx, 0x085c/4, 0x00040000);
+-	nv_wo32(dev, ctx, 0x0860/4, 0x00010000);
++		nv_wo32(ctx, i, 0x00080008);
++	nv_wo32(ctx, 0x085c, 0x00040000);
++	nv_wo32(ctx, 0x0860, 0x00010000);
+ 	for (i = 0x0864; i < 0x0874; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00040004);
++		nv_wo32(ctx, i, 0x00040004);
+ 	for (i = 0x1f18; i <= 0x3088 ; i += 16) {
+-		nv_wo32(dev, ctx, i/4 + 0, 0x10700ff9);
+-		nv_wo32(dev, ctx, i/4 + 1, 0x0436086c);
+-		nv_wo32(dev, ctx, i/4 + 2, 0x000c001b);
++		nv_wo32(ctx, i + 0, 0x10700ff9);
++		nv_wo32(ctx, i + 1, 0x0436086c);
++		nv_wo32(ctx, i + 2, 0x000c001b);
+ 	}
+ 	for (i = 0x30b8; i < 0x30c8; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x0000ffff);
+-	nv_wo32(dev, ctx, 0x344c/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x3808/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x381c/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x3848/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x384c/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x3850/4, 0x3f000000);
+-	nv_wo32(dev, ctx, 0x3858/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x385c/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x3864/4, 0xbf800000);
+-	nv_wo32(dev, ctx, 0x386c/4, 0xbf800000);
++		nv_wo32(ctx, i, 0x0000ffff);
++	nv_wo32(ctx, 0x344c, 0x3f800000);
++	nv_wo32(ctx, 0x3808, 0x3f800000);
++	nv_wo32(ctx, 0x381c, 0x3f800000);
++	nv_wo32(ctx, 0x3848, 0x40000000);
++	nv_wo32(ctx, 0x384c, 0x3f800000);
++	nv_wo32(ctx, 0x3850, 0x3f000000);
++	nv_wo32(ctx, 0x3858, 0x40000000);
++	nv_wo32(ctx, 0x385c, 0x3f800000);
++	nv_wo32(ctx, 0x3864, 0xbf800000);
++	nv_wo32(ctx, 0x386c, 0xbf800000);
+ }
+ 
+ static void
+@@ -254,57 +254,57 @@ nv34_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
+ {
+ 	int i;
+ 
+-	nv_wo32(dev, ctx, 0x040c/4, 0x01000101);
+-	nv_wo32(dev, ctx, 0x0420/4, 0x00000111);
+-	nv_wo32(dev, ctx, 0x0424/4, 0x00000060);
+-	nv_wo32(dev, ctx, 0x0440/4, 0x00000080);
+-	nv_wo32(dev, ctx, 0x0444/4, 0xffff0000);
+-	nv_wo32(dev, ctx, 0x0448/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x045c/4, 0x44400000);
+-	nv_wo32(dev, ctx, 0x0480/4, 0xffff0000);
++	nv_wo32(ctx, 0x040c, 0x01000101);
++	nv_wo32(ctx, 0x0420, 0x00000111);
++	nv_wo32(ctx, 0x0424, 0x00000060);
++	nv_wo32(ctx, 0x0440, 0x00000080);
++	nv_wo32(ctx, 0x0444, 0xffff0000);
++	nv_wo32(ctx, 0x0448, 0x00000001);
++	nv_wo32(ctx, 0x045c, 0x44400000);
++	nv_wo32(ctx, 0x0480, 0xffff0000);
+ 	for (i = 0x04d4; i < 0x04dc; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x0fff0000);
+-	nv_wo32(dev, ctx, 0x04e0/4, 0x00011100);
++		nv_wo32(ctx, i, 0x0fff0000);
++	nv_wo32(ctx, 0x04e0, 0x00011100);
+ 	for (i = 0x04fc; i < 0x053c; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x07ff0000);
+-	nv_wo32(dev, ctx, 0x0544/4, 0x4b7fffff);
+-	nv_wo32(dev, ctx, 0x057c/4, 0x00000080);
+-	nv_wo32(dev, ctx, 0x0580/4, 0x30201000);
+-	nv_wo32(dev, ctx, 0x0584/4, 0x70605040);
+-	nv_wo32(dev, ctx, 0x0588/4, 0xb8a89888);
+-	nv_wo32(dev, ctx, 0x058c/4, 0xf8e8d8c8);
+-	nv_wo32(dev, ctx, 0x05a0/4, 0xb0000000);
++		nv_wo32(ctx, i, 0x07ff0000);
++	nv_wo32(ctx, 0x0544, 0x4b7fffff);
++	nv_wo32(ctx, 0x057c, 0x00000080);
++	nv_wo32(ctx, 0x0580, 0x30201000);
++	nv_wo32(ctx, 0x0584, 0x70605040);
++	nv_wo32(ctx, 0x0588, 0xb8a89888);
++	nv_wo32(ctx, 0x058c, 0xf8e8d8c8);
++	nv_wo32(ctx, 0x05a0, 0xb0000000);
+ 	for (i = 0x05f0; i < 0x0630; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00010588);
++		nv_wo32(ctx, i, 0x00010588);
+ 	for (i = 0x0630; i < 0x0670; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00030303);
++		nv_wo32(ctx, i, 0x00030303);
+ 	for (i = 0x06b0; i < 0x06f0; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x0008aae4);
++		nv_wo32(ctx, i, 0x0008aae4);
+ 	for (i = 0x06f0; i < 0x0730; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x01012000);
++		nv_wo32(ctx, i, 0x01012000);
+ 	for (i = 0x0730; i < 0x0770; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00080008);
+-	nv_wo32(dev, ctx, 0x0850/4, 0x00040000);
+-	nv_wo32(dev, ctx, 0x0854/4, 0x00010000);
++		nv_wo32(ctx, i, 0x00080008);
++	nv_wo32(ctx, 0x0850, 0x00040000);
++	nv_wo32(ctx, 0x0854, 0x00010000);
+ 	for (i = 0x0858; i < 0x0868; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00040004);
++		nv_wo32(ctx, i, 0x00040004);
+ 	for (i = 0x15ac; i <= 0x271c ; i += 16) {
+-		nv_wo32(dev, ctx, i/4 + 0, 0x10700ff9);
+-		nv_wo32(dev, ctx, i/4 + 1, 0x0436086c);
+-		nv_wo32(dev, ctx, i/4 + 2, 0x000c001b);
++		nv_wo32(ctx, i + 0, 0x10700ff9);
++		nv_wo32(ctx, i + 1, 0x0436086c);
++		nv_wo32(ctx, i + 2, 0x000c001b);
+ 	}
+ 	for (i = 0x274c; i < 0x275c; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x0000ffff);
+-	nv_wo32(dev, ctx, 0x2ae0/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2e9c/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2eb0/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2edc/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x2ee0/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2ee4/4, 0x3f000000);
+-	nv_wo32(dev, ctx, 0x2eec/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x2ef0/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2ef8/4, 0xbf800000);
+-	nv_wo32(dev, ctx, 0x2f00/4, 0xbf800000);
++		nv_wo32(ctx, i, 0x0000ffff);
++	nv_wo32(ctx, 0x2ae0, 0x3f800000);
++	nv_wo32(ctx, 0x2e9c, 0x3f800000);
++	nv_wo32(ctx, 0x2eb0, 0x3f800000);
++	nv_wo32(ctx, 0x2edc, 0x40000000);
++	nv_wo32(ctx, 0x2ee0, 0x3f800000);
++	nv_wo32(ctx, 0x2ee4, 0x3f000000);
++	nv_wo32(ctx, 0x2eec, 0x40000000);
++	nv_wo32(ctx, 0x2ef0, 0x3f800000);
++	nv_wo32(ctx, 0x2ef8, 0xbf800000);
++	nv_wo32(ctx, 0x2f00, 0xbf800000);
+ }
+ 
+ static void
+@@ -312,57 +312,57 @@ nv35_36_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
+ {
+ 	int i;
+ 
+-	nv_wo32(dev, ctx, 0x040c/4, 0x00000101);
+-	nv_wo32(dev, ctx, 0x0420/4, 0x00000111);
+-	nv_wo32(dev, ctx, 0x0424/4, 0x00000060);
+-	nv_wo32(dev, ctx, 0x0440/4, 0x00000080);
+-	nv_wo32(dev, ctx, 0x0444/4, 0xffff0000);
+-	nv_wo32(dev, ctx, 0x0448/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x045c/4, 0x44400000);
+-	nv_wo32(dev, ctx, 0x0488/4, 0xffff0000);
++	nv_wo32(ctx, 0x040c, 0x00000101);
++	nv_wo32(ctx, 0x0420, 0x00000111);
++	nv_wo32(ctx, 0x0424, 0x00000060);
++	nv_wo32(ctx, 0x0440, 0x00000080);
++	nv_wo32(ctx, 0x0444, 0xffff0000);
++	nv_wo32(ctx, 0x0448, 0x00000001);
++	nv_wo32(ctx, 0x045c, 0x44400000);
++	nv_wo32(ctx, 0x0488, 0xffff0000);
+ 	for (i = 0x04dc; i < 0x04e4; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x0fff0000);
+-	nv_wo32(dev, ctx, 0x04e8/4, 0x00011100);
++		nv_wo32(ctx, i, 0x0fff0000);
++	nv_wo32(ctx, 0x04e8, 0x00011100);
+ 	for (i = 0x0504; i < 0x0544; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x07ff0000);
+-	nv_wo32(dev, ctx, 0x054c/4, 0x4b7fffff);
+-	nv_wo32(dev, ctx, 0x0588/4, 0x00000080);
+-	nv_wo32(dev, ctx, 0x058c/4, 0x30201000);
+-	nv_wo32(dev, ctx, 0x0590/4, 0x70605040);
+-	nv_wo32(dev, ctx, 0x0594/4, 0xb8a89888);
+-	nv_wo32(dev, ctx, 0x0598/4, 0xf8e8d8c8);
+-	nv_wo32(dev, ctx, 0x05ac/4, 0xb0000000);
++		nv_wo32(ctx, i, 0x07ff0000);
++	nv_wo32(ctx, 0x054c, 0x4b7fffff);
++	nv_wo32(ctx, 0x0588, 0x00000080);
++	nv_wo32(ctx, 0x058c, 0x30201000);
++	nv_wo32(ctx, 0x0590, 0x70605040);
++	nv_wo32(ctx, 0x0594, 0xb8a89888);
++	nv_wo32(ctx, 0x0598, 0xf8e8d8c8);
++	nv_wo32(ctx, 0x05ac, 0xb0000000);
+ 	for (i = 0x0604; i < 0x0644; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00010588);
++		nv_wo32(ctx, i, 0x00010588);
+ 	for (i = 0x0644; i < 0x0684; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00030303);
++		nv_wo32(ctx, i, 0x00030303);
+ 	for (i = 0x06c4; i < 0x0704; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x0008aae4);
++		nv_wo32(ctx, i, 0x0008aae4);
+ 	for (i = 0x0704; i < 0x0744; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x01012000);
++		nv_wo32(ctx, i, 0x01012000);
+ 	for (i = 0x0744; i < 0x0784; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00080008);
+-	nv_wo32(dev, ctx, 0x0860/4, 0x00040000);
+-	nv_wo32(dev, ctx, 0x0864/4, 0x00010000);
++		nv_wo32(ctx, i, 0x00080008);
++	nv_wo32(ctx, 0x0860, 0x00040000);
++	nv_wo32(ctx, 0x0864, 0x00010000);
+ 	for (i = 0x0868; i < 0x0878; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00040004);
++		nv_wo32(ctx, i, 0x00040004);
+ 	for (i = 0x1f1c; i <= 0x308c ; i += 16) {
+-		nv_wo32(dev, ctx, i/4 + 0, 0x10700ff9);
+-		nv_wo32(dev, ctx, i/4 + 1, 0x0436086c);
+-		nv_wo32(dev, ctx, i/4 + 2, 0x000c001b);
++		nv_wo32(ctx, i + 0, 0x10700ff9);
++		nv_wo32(ctx, i + 4, 0x0436086c);
++		nv_wo32(ctx, i + 8, 0x000c001b);
+ 	}
+ 	for (i = 0x30bc; i < 0x30cc; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x0000ffff);
+-	nv_wo32(dev, ctx, 0x3450/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x380c/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x3820/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x384c/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x3850/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x3854/4, 0x3f000000);
+-	nv_wo32(dev, ctx, 0x385c/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x3860/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x3868/4, 0xbf800000);
+-	nv_wo32(dev, ctx, 0x3870/4, 0xbf800000);
++		nv_wo32(ctx, i, 0x0000ffff);
++	nv_wo32(ctx, 0x3450, 0x3f800000);
++	nv_wo32(ctx, 0x380c, 0x3f800000);
++	nv_wo32(ctx, 0x3820, 0x3f800000);
++	nv_wo32(ctx, 0x384c, 0x40000000);
++	nv_wo32(ctx, 0x3850, 0x3f800000);
++	nv_wo32(ctx, 0x3854, 0x3f000000);
++	nv_wo32(ctx, 0x385c, 0x40000000);
++	nv_wo32(ctx, 0x3860, 0x3f800000);
++	nv_wo32(ctx, 0x3868, 0xbf800000);
++	nv_wo32(ctx, 0x3870, 0xbf800000);
+ }
+ 
+ int
 @@ -370,68 +370,54 @@ nv20_graph_create_context(struct nouveau_channel *chan)
  {
  	struct drm_device *dev = chan->dev;
@@ -7705,7 +9942,8 @@ index d6fc0a8..191c15c 100644
 +	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
  	void (*ctx_init)(struct drm_device *, struct nouveau_gpuobj *);
 -	unsigned int ctx_size;
- 	unsigned int idoffs = 0x28/4;
+-	unsigned int idoffs = 0x28/4;
++	unsigned int idoffs = 0x28;
  	int ret;
  
  	switch (dev_priv->chipset) {
@@ -7762,15 +10000,17 @@ index d6fc0a8..191c15c 100644
  	ctx_init(dev, chan->ramin_grctx->gpuobj);
  
  	/* nv20: nv_wo32(dev, chan->ramin_grctx->gpuobj, 10, chan->id<<24); */
- 	nv_wo32(dev, chan->ramin_grctx->gpuobj, idoffs,
- 					(chan->id << 24) | 0x1); /* CTX_USER */
+-	nv_wo32(dev, chan->ramin_grctx->gpuobj, idoffs,
+-					(chan->id << 24) | 0x1); /* CTX_USER */
++	nv_wo32(chan->ramin_grctx->gpuobj, idoffs,
++		(chan->id << 24) | 0x1); /* CTX_USER */
  
 -	nv_wo32(dev, dev_priv->ctx_table->gpuobj, chan->id,
 -			chan->ramin_grctx->instance >> 4);
 -
 -	dev_priv->engine.instmem.finish_access(dev);
-+	nv_wo32(dev, pgraph->ctx_table->gpuobj, chan->id,
-+		     chan->ramin_grctx->instance >> 4);
++	nv_wo32(pgraph->ctx_table->gpuobj, chan->id * 4,
++		chan->ramin_grctx->instance >> 4);
  	return 0;
  }
  
@@ -7786,7 +10026,7 @@ index d6fc0a8..191c15c 100644
 -	dev_priv->engine.instmem.prepare_access(dev, true);
 -	nv_wo32(dev, dev_priv->ctx_table->gpuobj, chan->id, 0);
 -	dev_priv->engine.instmem.finish_access(dev);
-+	nv_wo32(dev, pgraph->ctx_table->gpuobj, chan->id, 0);
++	nv_wo32(pgraph->ctx_table->gpuobj, chan->id * 4, 0);
  }
  
  int
@@ -8059,7 +10299,7 @@ index 500ccfd..2b67f18 100644
  	nv40_fifo_do_load_context(dev, pfifo->channels - 1);
  	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1,
 diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c
-index 704a25d..ef550ce 100644
+index 704a25d..f7b59ad 100644
 --- a/drivers/gpu/drm/nouveau/nv40_graph.c
 +++ b/drivers/gpu/drm/nouveau/nv40_graph.c
 @@ -58,6 +58,7 @@ nv40_graph_create_context(struct nouveau_channel *chan)
@@ -8085,14 +10325,16 @@ index 704a25d..ef550ce 100644
 -	} else {
 -		nouveau_grctx_vals_load(dev, chan->ramin_grctx->gpuobj);
 -	}
+-	nv_wo32(dev, chan->ramin_grctx->gpuobj, 0,
+-		     chan->ramin_grctx->gpuobj->im_pramin->start);
+-	dev_priv->engine.instmem.finish_access(dev);
 +	ctx.dev = chan->dev;
 +	ctx.mode = NOUVEAU_GRCTX_VALS;
 +	ctx.data = chan->ramin_grctx->gpuobj;
 +	nv40_grctx_init(&ctx);
 +
- 	nv_wo32(dev, chan->ramin_grctx->gpuobj, 0,
- 		     chan->ramin_grctx->gpuobj->im_pramin->start);
--	dev_priv->engine.instmem.finish_access(dev);
++	nv_wo32(chan->ramin_grctx->gpuobj, 0,
++		chan->ramin_grctx->gpuobj->im_pramin->start);
  	return 0;
  }
  
@@ -8160,6 +10402,27 @@ index 704a25d..ef550ce 100644
  }
  
  struct nouveau_pgraph_object_class nv40_graph_grclass[] = {
+diff --git a/drivers/gpu/drm/nouveau/nv40_grctx.c b/drivers/gpu/drm/nouveau/nv40_grctx.c
+index 9b5c974..ce58509 100644
+--- a/drivers/gpu/drm/nouveau/nv40_grctx.c
++++ b/drivers/gpu/drm/nouveau/nv40_grctx.c
+@@ -596,13 +596,13 @@ nv40_graph_construct_shader(struct nouveau_grctx *ctx)
+ 
+ 	offset += 0x0280/4;
+ 	for (i = 0; i < 16; i++, offset += 2)
+-		nv_wo32(dev, obj, offset, 0x3f800000);
++		nv_wo32(obj, offset * 4, 0x3f800000);
+ 
+ 	for (vs = 0; vs < vs_nr; vs++, offset += vs_len) {
+ 		for (i = 0; i < vs_nr_b0 * 6; i += 6)
+-			nv_wo32(dev, obj, offset + b0_offset + i, 0x00000001);
++			nv_wo32(obj, (offset + b0_offset + i) * 4, 0x00000001);
+ 		for (i = 0; i < vs_nr_b1 * 4; i += 4)
+-			nv_wo32(dev, obj, offset + b1_offset + i, 0x3f800000);
++			nv_wo32(obj, (offset + b1_offset + i) * 4, 0x3f800000);
+ 	}
+ }
+ 
 diff --git a/drivers/gpu/drm/nouveau/nv40_mc.c b/drivers/gpu/drm/nouveau/nv40_mc.c
 index 2a3495e..e4e72c1 100644
 --- a/drivers/gpu/drm/nouveau/nv40_mc.c
@@ -8174,7 +10437,7 @@ index 2a3495e..e4e72c1 100644
  		nv_wr32(dev, NV40_PMC_1704, 0);
  		nv_wr32(dev, NV40_PMC_1708, 0);
 diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c
-index b4e4a3b..a438e56 100644
+index b4e4a3b..2423c92 100644
 --- a/drivers/gpu/drm/nouveau/nv50_crtc.c
 +++ b/drivers/gpu/drm/nouveau/nv50_crtc.c
 @@ -264,11 +264,16 @@ nv50_crtc_set_scale(struct nouveau_crtc *nv_crtc, int scaling_mode, bool update)
@@ -8224,7 +10487,17 @@ index b4e4a3b..a438e56 100644
  	}
  
  	return 0;
-@@ -440,47 +457,15 @@ nv50_crtc_prepare(struct drm_crtc *crtc)
+@@ -321,7 +338,9 @@ nv50_crtc_destroy(struct drm_crtc *crtc)
+ 
+ 	nv50_cursor_fini(nv_crtc);
+ 
++	nouveau_bo_unmap(nv_crtc->lut.nvbo);
+ 	nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo);
++	nouveau_bo_unmap(nv_crtc->cursor.nvbo);
+ 	nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
+ 	kfree(nv_crtc->mode);
+ 	kfree(nv_crtc);
+@@ -440,47 +459,15 @@ nv50_crtc_prepare(struct drm_crtc *crtc)
  {
  	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
  	struct drm_device *dev = crtc->dev;
@@ -8272,7 +10545,7 @@ index b4e4a3b..a438e56 100644
  	struct drm_device *dev = crtc->dev;
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  	struct nouveau_channel *evo = dev_priv->evo;
-@@ -491,20 +476,14 @@ nv50_crtc_commit(struct drm_crtc *crtc)
+@@ -491,20 +478,14 @@ nv50_crtc_commit(struct drm_crtc *crtc)
  
  	nv50_crtc_blank(nv_crtc, false);
  
@@ -8406,30 +10679,43 @@ index 1fd9537..1bc0859 100644
  }
  
 diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
-index 580a5d1..612fa6d 100644
+index 580a5d1..435d2b7 100644
 --- a/drivers/gpu/drm/nouveau/nv50_display.c
 +++ b/drivers/gpu/drm/nouveau/nv50_display.c
-@@ -71,14 +71,16 @@ nv50_evo_dmaobj_new(struct nouveau_channel *evo, uint32_t class, uint32_t name,
+@@ -42,6 +42,7 @@ nv50_evo_channel_del(struct nouveau_channel **pchan)
+ 	*pchan = NULL;
+ 
+ 	nouveau_gpuobj_channel_takedown(chan);
++	nouveau_bo_unmap(chan->pushbuf_bo);
+ 	nouveau_bo_ref(NULL, &chan->pushbuf_bo);
+ 
+ 	if (chan->user)
+@@ -71,14 +72,16 @@ nv50_evo_dmaobj_new(struct nouveau_channel *evo, uint32_t class, uint32_t name,
  		return ret;
  	}
  
 -	dev_priv->engine.instmem.prepare_access(dev, true);
- 	nv_wo32(dev, obj, 0, (tile_flags << 22) | (magic_flags << 16) | class);
- 	nv_wo32(dev, obj, 1, limit);
- 	nv_wo32(dev, obj, 2, offset);
- 	nv_wo32(dev, obj, 3, 0x00000000);
- 	nv_wo32(dev, obj, 4, 0x00000000);
+-	nv_wo32(dev, obj, 0, (tile_flags << 22) | (magic_flags << 16) | class);
+-	nv_wo32(dev, obj, 1, limit);
+-	nv_wo32(dev, obj, 2, offset);
+-	nv_wo32(dev, obj, 3, 0x00000000);
+-	nv_wo32(dev, obj, 4, 0x00000000);
 -	nv_wo32(dev, obj, 5, 0x00010000);
 -	dev_priv->engine.instmem.finish_access(dev);
++	nv_wo32(obj,  0, (tile_flags << 22) | (magic_flags << 16) | class);
++	nv_wo32(obj,  4, limit);
++	nv_wo32(obj,  8, offset);
++	nv_wo32(obj, 12, 0x00000000);
++	nv_wo32(obj, 16, 0x00000000);
 +	if (dev_priv->card_type < NV_C0)
-+		nv_wo32(dev, obj, 5, 0x00010000);
++		nv_wo32(obj, 20, 0x00010000);
 +	else
-+		nv_wo32(dev, obj, 5, 0x00020000);
++		nv_wo32(obj, 20, 0x00020000);
 +	dev_priv->engine.instmem.flush(dev);
  
  	return 0;
  }
-@@ -110,8 +112,8 @@ nv50_evo_channel_new(struct drm_device *dev, struct nouveau_channel **pchan)
+@@ -110,8 +113,8 @@ nv50_evo_channel_new(struct drm_device *dev, struct nouveau_channel **pchan)
  		return ret;
  	}
  
@@ -8440,7 +10726,7 @@ index 580a5d1..612fa6d 100644
  	if (ret) {
  		NV_ERROR(dev, "Error initialising EVO PRAMIN heap: %d\n", ret);
  		nv50_evo_channel_del(pchan);
-@@ -179,13 +181,25 @@ nv50_evo_channel_new(struct drm_device *dev, struct nouveau_channel **pchan)
+@@ -179,13 +182,25 @@ nv50_evo_channel_new(struct drm_device *dev, struct nouveau_channel **pchan)
  }
  
  int
@@ -8467,7 +10753,7 @@ index 580a5d1..612fa6d 100644
  	uint64_t start;
  	int ret, i;
  
-@@ -366,26 +380,13 @@ nv50_display_init(struct drm_device *dev)
+@@ -366,26 +381,13 @@ nv50_display_init(struct drm_device *dev)
  					     NV50_PDISPLAY_INTR_EN_CLK_UNK40));
  
  	/* enable hotplug interrupts */
@@ -8495,7 +10781,7 @@ index 580a5d1..612fa6d 100644
  	}
  
  	return 0;
-@@ -465,6 +466,7 @@ int nv50_display_create(struct drm_device *dev)
+@@ -465,6 +467,7 @@ int nv50_display_create(struct drm_device *dev)
  {
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  	struct dcb_table *dcb = &dev_priv->vbios.dcb;
@@ -8503,7 +10789,7 @@ index 580a5d1..612fa6d 100644
  	int ret, i;
  
  	NV_DEBUG_KMS(dev, "\n");
-@@ -507,14 +509,18 @@ int nv50_display_create(struct drm_device *dev)
+@@ -507,14 +510,18 @@ int nv50_display_create(struct drm_device *dev)
  			continue;
  		}
  
@@ -8524,7 +10810,7 @@ index 580a5d1..612fa6d 100644
  			break;
  		default:
  			NV_WARN(dev, "DCB encoder %d unknown\n", entry->type);
-@@ -522,11 +528,13 @@ int nv50_display_create(struct drm_device *dev)
+@@ -522,11 +529,13 @@ int nv50_display_create(struct drm_device *dev)
  		}
  	}
  
@@ -8543,7 +10829,7 @@ index 580a5d1..612fa6d 100644
  	}
  
  	ret = nv50_display_init(dev);
-@@ -538,7 +546,8 @@ int nv50_display_create(struct drm_device *dev)
+@@ -538,7 +547,8 @@ int nv50_display_create(struct drm_device *dev)
  	return 0;
  }
  
@@ -8553,7 +10839,7 @@ index 580a5d1..612fa6d 100644
  {
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  
-@@ -548,135 +557,30 @@ int nv50_display_destroy(struct drm_device *dev)
+@@ -548,135 +558,30 @@ int nv50_display_destroy(struct drm_device *dev)
  
  	nv50_display_disable(dev);
  	nv50_evo_channel_del(&dev_priv->evo);
@@ -8696,7 +10982,7 @@ index 580a5d1..612fa6d 100644
  	case OUTPUT_LVDS:
  		script = (mc >> 8) & 0xf;
  		if (bios->fp_no_ddc) {
-@@ -767,17 +671,88 @@ nv50_display_vblank_handler(struct drm_device *dev, uint32_t intr)
+@@ -767,17 +672,88 @@ nv50_display_vblank_handler(struct drm_device *dev, uint32_t intr)
  static void
  nv50_display_unk10_handler(struct drm_device *dev)
  {
@@ -8791,7 +11077,7 @@ index 580a5d1..612fa6d 100644
  ack:
  	nv_wr32(dev, NV50_PDISPLAY_INTR_1, NV50_PDISPLAY_INTR_1_CLK_UNK10);
  	nv_wr32(dev, 0x610030, 0x80000000);
-@@ -817,33 +792,103 @@ nv50_display_unk20_dp_hack(struct drm_device *dev, struct dcb_entry *dcb)
+@@ -817,33 +793,103 @@ nv50_display_unk20_dp_hack(struct drm_device *dev, struct dcb_entry *dcb)
  static void
  nv50_display_unk20_handler(struct drm_device *dev)
  {
@@ -8913,7 +11199,7 @@ index 580a5d1..612fa6d 100644
  		tmp = nv_rd32(dev, NV50_PDISPLAY_SOR_CLK_CTRL2(or));
  		tmp &= ~0x00000f0f;
  		if (script & 0x0100)
-@@ -853,24 +898,61 @@ nv50_display_unk20_handler(struct drm_device *dev)
+@@ -853,24 +899,61 @@ nv50_display_unk20_handler(struct drm_device *dev)
  		nv_wr32(dev, NV50_PDISPLAY_DAC_CLK_CTRL2(or), 0);
  	}
  
@@ -9000,11 +11286,58 @@ index 581d405..c551f0b 100644
  int nv50_crtc_blank(struct nouveau_crtc *, bool blank);
  int nv50_crtc_set_clock(struct drm_device *, int head, int pclk);
  
+diff --git a/drivers/gpu/drm/nouveau/nv50_fb.c b/drivers/gpu/drm/nouveau/nv50_fb.c
+index 32611bd..ad267c5 100644
+--- a/drivers/gpu/drm/nouveau/nv50_fb.c
++++ b/drivers/gpu/drm/nouveau/nv50_fb.c
+@@ -36,3 +36,42 @@ void
+ nv50_fb_takedown(struct drm_device *dev)
+ {
+ }
++
++void
++nv50_fb_vm_trap(struct drm_device *dev, int display, const char *name)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	u32 trap[6], idx, chinst;
++	int i, ch;
++
++	idx = nv_rd32(dev, 0x100c90);
++	if (!(idx & 0x80000000))
++		return;
++	idx &= 0x00ffffff;
++
++	for (i = 0; i < 6; i++) {
++		nv_wr32(dev, 0x100c90, idx | i << 24);
++		trap[i] = nv_rd32(dev, 0x100c94);
++	}
++	nv_wr32(dev, 0x100c90, idx | 0x80000000);
++
++	if (!display)
++		return;
++
++	chinst = (trap[2] << 16) | trap[1];
++	for (ch = 0; ch < dev_priv->engine.fifo.channels; ch++) {
++		struct nouveau_channel *chan = dev_priv->fifos[ch];
++
++		if (!chan || !chan->ramin)
++			continue;
++
++		if (chinst == chan->ramin->instance >> 12)
++			break;
++	}
++
++	NV_INFO(dev, "%s - VM: Trapped %s at %02x%04x%04x status %08x "
++		     "channel %d (0x%08x)\n",
++		name, (trap[5] & 0x100 ? "read" : "write"),
++		trap[5] & 0xff, trap[4] & 0xffff, trap[3] & 0xffff,
++		trap[0], ch, chinst);
++}
 diff --git a/drivers/gpu/drm/nouveau/nv50_fifo.c b/drivers/gpu/drm/nouveau/nv50_fifo.c
-index e20c0e2..fb0281a 100644
+index e20c0e2..38dbcda 100644
 --- a/drivers/gpu/drm/nouveau/nv50_fifo.c
 +++ b/drivers/gpu/drm/nouveau/nv50_fifo.c
-@@ -28,41 +28,33 @@
+@@ -28,41 +28,35 @@
  #include "drm.h"
  #include "nouveau_drv.h"
  
@@ -9035,8 +11368,12 @@ index e20c0e2..fb0281a 100644
  	/* We never schedule channel 0 or 127 */
 -	dev_priv->engine.instmem.prepare_access(dev, true);
  	for (i = 1, nr = 0; i < 127; i++) {
- 		if (dev_priv->fifos[i] && dev_priv->fifos[i]->ramfc)
- 			nv_wo32(dev, cur->gpuobj, nr++, i);
+-		if (dev_priv->fifos[i] && dev_priv->fifos[i]->ramfc)
+-			nv_wo32(dev, cur->gpuobj, nr++, i);
++		if (dev_priv->fifos[i] && dev_priv->fifos[i]->ramfc) {
++			nv_wo32(cur->gpuobj, (nr * 4), i);
++			nr++;
++		}
  	}
 -	dev_priv->engine.instmem.finish_access(dev);
 +	dev_priv->engine.instmem.flush(dev);
@@ -9053,7 +11390,7 @@ index e20c0e2..fb0281a 100644
  {
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  	struct nouveau_channel *chan = dev_priv->fifos[channel];
-@@ -70,37 +62,28 @@ nv50_fifo_channel_enable(struct drm_device *dev, int channel, bool nt)
+@@ -70,37 +64,28 @@ nv50_fifo_channel_enable(struct drm_device *dev, int channel, bool nt)
  
  	NV_DEBUG(dev, "ch%d\n", channel);
  
@@ -9097,7 +11434,7 @@ index e20c0e2..fb0281a 100644
  }
  
  static void
-@@ -133,12 +116,12 @@ nv50_fifo_init_context_table(struct drm_device *dev)
+@@ -133,12 +118,12 @@ nv50_fifo_init_context_table(struct drm_device *dev)
  
  	for (i = 0; i < NV50_PFIFO_CTX_TABLE__SIZE; i++) {
  		if (dev_priv->fifos[i])
@@ -9113,7 +11450,7 @@ index e20c0e2..fb0281a 100644
  }
  
  static void
-@@ -162,41 +145,38 @@ nv50_fifo_init_regs(struct drm_device *dev)
+@@ -162,41 +147,38 @@ nv50_fifo_init_regs(struct drm_device *dev)
  	nv_wr32(dev, 0x3270, 0);
  
  	/* Enable dummy channels setup by nv50_instmem.c */
@@ -9167,7 +11504,7 @@ index e20c0e2..fb0281a 100644
  		return ret;
  	}
  
-@@ -216,18 +196,15 @@ void
+@@ -216,18 +198,15 @@ void
  nv50_fifo_takedown(struct drm_device *dev)
  {
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -9190,7 +11527,7 @@ index e20c0e2..fb0281a 100644
  }
  
  int
-@@ -248,7 +225,7 @@ nv50_fifo_create_context(struct nouveau_channel *chan)
+@@ -248,7 +227,7 @@ nv50_fifo_create_context(struct nouveau_channel *chan)
  
  	NV_DEBUG(dev, "ch%d\n", chan->id);
  
@@ -9199,31 +11536,50 @@ index e20c0e2..fb0281a 100644
  		uint32_t ramin_poffset = chan->ramin->gpuobj->im_pramin->start;
  		uint32_t ramin_voffset = chan->ramin->gpuobj->im_backing_start;
  
-@@ -281,10 +258,10 @@ nv50_fifo_create_context(struct nouveau_channel *chan)
+@@ -281,39 +260,31 @@ nv50_fifo_create_context(struct nouveau_channel *chan)
  
  	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
  
 -	dev_priv->engine.instmem.prepare_access(dev, true);
 -
- 	nv_wo32(dev, ramfc, 0x48/4, chan->pushbuf->instance >> 4);
+-	nv_wo32(dev, ramfc, 0x48/4, chan->pushbuf->instance >> 4);
 -	nv_wo32(dev, ramfc, 0x80/4, (0xc << 24) | (chan->ramht->instance >> 4));
-+	nv_wo32(dev, ramfc, 0x80/4, (0 << 27) /* 4KiB */ |
-+				    (4 << 24) /* SEARCH_FULL */ |
-+				    (chan->ramht->instance >> 4));
- 	nv_wo32(dev, ramfc, 0x44/4, 0x2101ffff);
- 	nv_wo32(dev, ramfc, 0x60/4, 0x7fffffff);
- 	nv_wo32(dev, ramfc, 0x40/4, 0x00000000);
-@@ -295,7 +272,7 @@ nv50_fifo_create_context(struct nouveau_channel *chan)
- 				    chan->dma.ib_base * 4);
- 	nv_wo32(dev, ramfc, 0x54/4, drm_order(chan->dma.ib_max + 1) << 16);
- 
+-	nv_wo32(dev, ramfc, 0x44/4, 0x2101ffff);
+-	nv_wo32(dev, ramfc, 0x60/4, 0x7fffffff);
+-	nv_wo32(dev, ramfc, 0x40/4, 0x00000000);
+-	nv_wo32(dev, ramfc, 0x7c/4, 0x30000001);
+-	nv_wo32(dev, ramfc, 0x78/4, 0x00000000);
+-	nv_wo32(dev, ramfc, 0x3c/4, 0x403f6078);
+-	nv_wo32(dev, ramfc, 0x50/4, chan->pushbuf_base +
+-				    chan->dma.ib_base * 4);
+-	nv_wo32(dev, ramfc, 0x54/4, drm_order(chan->dma.ib_max + 1) << 16);
+-
 -	if (!IS_G80) {
+-		nv_wo32(dev, chan->ramin->gpuobj, 0, chan->id);
+-		nv_wo32(dev, chan->ramin->gpuobj, 1,
+-						chan->ramfc->instance >> 8);
+-
+-		nv_wo32(dev, ramfc, 0x88/4, chan->cache->instance >> 10);
+-		nv_wo32(dev, ramfc, 0x98/4, chan->ramin->instance >> 12);
++	nv_wo32(ramfc, 0x48, chan->pushbuf->instance >> 4);
++	nv_wo32(ramfc, 0x80, (0 << 27) /* 4KiB */ |
++			     (4 << 24) /* SEARCH_FULL */ |
++			     (chan->ramht->instance >> 4));
++	nv_wo32(ramfc, 0x44, 0x2101ffff);
++	nv_wo32(ramfc, 0x60, 0x7fffffff);
++	nv_wo32(ramfc, 0x40, 0x00000000);
++	nv_wo32(ramfc, 0x7c, 0x30000001);
++	nv_wo32(ramfc, 0x78, 0x00000000);
++	nv_wo32(ramfc, 0x3c, 0x403f6078);
++	nv_wo32(ramfc, 0x50, chan->pushbuf_base + chan->dma.ib_base * 4);
++	nv_wo32(ramfc, 0x54, drm_order(chan->dma.ib_max + 1) << 16);
++
 +	if (dev_priv->chipset != 0x50) {
- 		nv_wo32(dev, chan->ramin->gpuobj, 0, chan->id);
- 		nv_wo32(dev, chan->ramin->gpuobj, 1,
- 						chan->ramfc->instance >> 8);
-@@ -304,16 +281,10 @@ nv50_fifo_create_context(struct nouveau_channel *chan)
- 		nv_wo32(dev, ramfc, 0x98/4, chan->ramin->instance >> 12);
++		nv_wo32(chan->ramin->gpuobj, 0, chan->id);
++		nv_wo32(chan->ramin->gpuobj, 4, chan->ramfc->instance >> 8);
++
++		nv_wo32(ramfc, 0x88, chan->cache->instance >> 10);
++		nv_wo32(ramfc, 0x98, chan->ramin->instance >> 12);
  	}
  
 -	dev_priv->engine.instmem.finish_access(dev);
@@ -9257,26 +11613,106 @@ index e20c0e2..fb0281a 100644
  
  	nouveau_gpuobj_ref_del(dev, &ramfc);
  	nouveau_gpuobj_ref_del(dev, &chan->cache);
-@@ -349,8 +321,6 @@ nv50_fifo_load_context(struct nouveau_channel *chan)
+@@ -349,63 +321,59 @@ nv50_fifo_load_context(struct nouveau_channel *chan)
  
  	NV_DEBUG(dev, "ch%d\n", chan->id);
  
 -	dev_priv->engine.instmem.prepare_access(dev, false);
 -
- 	nv_wr32(dev, 0x3330, nv_ro32(dev, ramfc, 0x00/4));
- 	nv_wr32(dev, 0x3334, nv_ro32(dev, ramfc, 0x04/4));
- 	nv_wr32(dev, 0x3240, nv_ro32(dev, ramfc, 0x08/4));
-@@ -396,7 +366,7 @@ nv50_fifo_load_context(struct nouveau_channel *chan)
+-	nv_wr32(dev, 0x3330, nv_ro32(dev, ramfc, 0x00/4));
+-	nv_wr32(dev, 0x3334, nv_ro32(dev, ramfc, 0x04/4));
+-	nv_wr32(dev, 0x3240, nv_ro32(dev, ramfc, 0x08/4));
+-	nv_wr32(dev, 0x3320, nv_ro32(dev, ramfc, 0x0c/4));
+-	nv_wr32(dev, 0x3244, nv_ro32(dev, ramfc, 0x10/4));
+-	nv_wr32(dev, 0x3328, nv_ro32(dev, ramfc, 0x14/4));
+-	nv_wr32(dev, 0x3368, nv_ro32(dev, ramfc, 0x18/4));
+-	nv_wr32(dev, 0x336c, nv_ro32(dev, ramfc, 0x1c/4));
+-	nv_wr32(dev, 0x3370, nv_ro32(dev, ramfc, 0x20/4));
+-	nv_wr32(dev, 0x3374, nv_ro32(dev, ramfc, 0x24/4));
+-	nv_wr32(dev, 0x3378, nv_ro32(dev, ramfc, 0x28/4));
+-	nv_wr32(dev, 0x337c, nv_ro32(dev, ramfc, 0x2c/4));
+-	nv_wr32(dev, 0x3228, nv_ro32(dev, ramfc, 0x30/4));
+-	nv_wr32(dev, 0x3364, nv_ro32(dev, ramfc, 0x34/4));
+-	nv_wr32(dev, 0x32a0, nv_ro32(dev, ramfc, 0x38/4));
+-	nv_wr32(dev, 0x3224, nv_ro32(dev, ramfc, 0x3c/4));
+-	nv_wr32(dev, 0x324c, nv_ro32(dev, ramfc, 0x40/4));
+-	nv_wr32(dev, 0x2044, nv_ro32(dev, ramfc, 0x44/4));
+-	nv_wr32(dev, 0x322c, nv_ro32(dev, ramfc, 0x48/4));
+-	nv_wr32(dev, 0x3234, nv_ro32(dev, ramfc, 0x4c/4));
+-	nv_wr32(dev, 0x3340, nv_ro32(dev, ramfc, 0x50/4));
+-	nv_wr32(dev, 0x3344, nv_ro32(dev, ramfc, 0x54/4));
+-	nv_wr32(dev, 0x3280, nv_ro32(dev, ramfc, 0x58/4));
+-	nv_wr32(dev, 0x3254, nv_ro32(dev, ramfc, 0x5c/4));
+-	nv_wr32(dev, 0x3260, nv_ro32(dev, ramfc, 0x60/4));
+-	nv_wr32(dev, 0x3264, nv_ro32(dev, ramfc, 0x64/4));
+-	nv_wr32(dev, 0x3268, nv_ro32(dev, ramfc, 0x68/4));
+-	nv_wr32(dev, 0x326c, nv_ro32(dev, ramfc, 0x6c/4));
+-	nv_wr32(dev, 0x32e4, nv_ro32(dev, ramfc, 0x70/4));
+-	nv_wr32(dev, 0x3248, nv_ro32(dev, ramfc, 0x74/4));
+-	nv_wr32(dev, 0x2088, nv_ro32(dev, ramfc, 0x78/4));
+-	nv_wr32(dev, 0x2058, nv_ro32(dev, ramfc, 0x7c/4));
+-	nv_wr32(dev, 0x2210, nv_ro32(dev, ramfc, 0x80/4));
+-
+-	cnt = nv_ro32(dev, ramfc, 0x84/4);
++	nv_wr32(dev, 0x3330, nv_ro32(ramfc, 0x00));
++	nv_wr32(dev, 0x3334, nv_ro32(ramfc, 0x04));
++	nv_wr32(dev, 0x3240, nv_ro32(ramfc, 0x08));
++	nv_wr32(dev, 0x3320, nv_ro32(ramfc, 0x0c));
++	nv_wr32(dev, 0x3244, nv_ro32(ramfc, 0x10));
++	nv_wr32(dev, 0x3328, nv_ro32(ramfc, 0x14));
++	nv_wr32(dev, 0x3368, nv_ro32(ramfc, 0x18));
++	nv_wr32(dev, 0x336c, nv_ro32(ramfc, 0x1c));
++	nv_wr32(dev, 0x3370, nv_ro32(ramfc, 0x20));
++	nv_wr32(dev, 0x3374, nv_ro32(ramfc, 0x24));
++	nv_wr32(dev, 0x3378, nv_ro32(ramfc, 0x28));
++	nv_wr32(dev, 0x337c, nv_ro32(ramfc, 0x2c));
++	nv_wr32(dev, 0x3228, nv_ro32(ramfc, 0x30));
++	nv_wr32(dev, 0x3364, nv_ro32(ramfc, 0x34));
++	nv_wr32(dev, 0x32a0, nv_ro32(ramfc, 0x38));
++	nv_wr32(dev, 0x3224, nv_ro32(ramfc, 0x3c));
++	nv_wr32(dev, 0x324c, nv_ro32(ramfc, 0x40));
++	nv_wr32(dev, 0x2044, nv_ro32(ramfc, 0x44));
++	nv_wr32(dev, 0x322c, nv_ro32(ramfc, 0x48));
++	nv_wr32(dev, 0x3234, nv_ro32(ramfc, 0x4c));
++	nv_wr32(dev, 0x3340, nv_ro32(ramfc, 0x50));
++	nv_wr32(dev, 0x3344, nv_ro32(ramfc, 0x54));
++	nv_wr32(dev, 0x3280, nv_ro32(ramfc, 0x58));
++	nv_wr32(dev, 0x3254, nv_ro32(ramfc, 0x5c));
++	nv_wr32(dev, 0x3260, nv_ro32(ramfc, 0x60));
++	nv_wr32(dev, 0x3264, nv_ro32(ramfc, 0x64));
++	nv_wr32(dev, 0x3268, nv_ro32(ramfc, 0x68));
++	nv_wr32(dev, 0x326c, nv_ro32(ramfc, 0x6c));
++	nv_wr32(dev, 0x32e4, nv_ro32(ramfc, 0x70));
++	nv_wr32(dev, 0x3248, nv_ro32(ramfc, 0x74));
++	nv_wr32(dev, 0x2088, nv_ro32(ramfc, 0x78));
++	nv_wr32(dev, 0x2058, nv_ro32(ramfc, 0x7c));
++	nv_wr32(dev, 0x2210, nv_ro32(ramfc, 0x80));
++
++	cnt = nv_ro32(ramfc, 0x84);
+ 	for (ptr = 0; ptr < cnt; ptr++) {
+ 		nv_wr32(dev, NV40_PFIFO_CACHE1_METHOD(ptr),
+-			nv_ro32(dev, cache, (ptr * 2) + 0));
++			nv_ro32(cache, (ptr * 8) + 0));
+ 		nv_wr32(dev, NV40_PFIFO_CACHE1_DATA(ptr),
+-			nv_ro32(dev, cache, (ptr * 2) + 1));
++			nv_ro32(cache, (ptr * 8) + 4));
+ 	}
+ 	nv_wr32(dev, NV03_PFIFO_CACHE1_PUT, cnt << 2);
  	nv_wr32(dev, NV03_PFIFO_CACHE1_GET, 0);
  
  	/* guessing that all the 0x34xx regs aren't on NV50 */
 -	if (!IS_G80) {
+-		nv_wr32(dev, 0x340c, nv_ro32(dev, ramfc, 0x88/4));
+-		nv_wr32(dev, 0x3400, nv_ro32(dev, ramfc, 0x8c/4));
+-		nv_wr32(dev, 0x3404, nv_ro32(dev, ramfc, 0x90/4));
+-		nv_wr32(dev, 0x3408, nv_ro32(dev, ramfc, 0x94/4));
+-		nv_wr32(dev, 0x3410, nv_ro32(dev, ramfc, 0x98/4));
 +	if (dev_priv->chipset != 0x50) {
- 		nv_wr32(dev, 0x340c, nv_ro32(dev, ramfc, 0x88/4));
- 		nv_wr32(dev, 0x3400, nv_ro32(dev, ramfc, 0x8c/4));
- 		nv_wr32(dev, 0x3404, nv_ro32(dev, ramfc, 0x90/4));
-@@ -404,8 +374,6 @@ nv50_fifo_load_context(struct nouveau_channel *chan)
- 		nv_wr32(dev, 0x3410, nv_ro32(dev, ramfc, 0x98/4));
++		nv_wr32(dev, 0x340c, nv_ro32(ramfc, 0x88));
++		nv_wr32(dev, 0x3400, nv_ro32(ramfc, 0x8c));
++		nv_wr32(dev, 0x3404, nv_ro32(ramfc, 0x90));
++		nv_wr32(dev, 0x3408, nv_ro32(ramfc, 0x94));
++		nv_wr32(dev, 0x3410, nv_ro32(ramfc, 0x98));
  	}
  
 -	dev_priv->engine.instmem.finish_access(dev);
@@ -9284,26 +11720,110 @@ index e20c0e2..fb0281a 100644
  	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, chan->id | (1<<16));
  	return 0;
  }
-@@ -434,8 +402,6 @@ nv50_fifo_unload_context(struct drm_device *dev)
+@@ -434,64 +402,63 @@ nv50_fifo_unload_context(struct drm_device *dev)
  	ramfc = chan->ramfc->gpuobj;
  	cache = chan->cache->gpuobj;
  
 -	dev_priv->engine.instmem.prepare_access(dev, true);
 -
- 	nv_wo32(dev, ramfc, 0x00/4, nv_rd32(dev, 0x3330));
- 	nv_wo32(dev, ramfc, 0x04/4, nv_rd32(dev, 0x3334));
- 	nv_wo32(dev, ramfc, 0x08/4, nv_rd32(dev, 0x3240));
-@@ -482,7 +448,7 @@ nv50_fifo_unload_context(struct drm_device *dev)
+-	nv_wo32(dev, ramfc, 0x00/4, nv_rd32(dev, 0x3330));
+-	nv_wo32(dev, ramfc, 0x04/4, nv_rd32(dev, 0x3334));
+-	nv_wo32(dev, ramfc, 0x08/4, nv_rd32(dev, 0x3240));
+-	nv_wo32(dev, ramfc, 0x0c/4, nv_rd32(dev, 0x3320));
+-	nv_wo32(dev, ramfc, 0x10/4, nv_rd32(dev, 0x3244));
+-	nv_wo32(dev, ramfc, 0x14/4, nv_rd32(dev, 0x3328));
+-	nv_wo32(dev, ramfc, 0x18/4, nv_rd32(dev, 0x3368));
+-	nv_wo32(dev, ramfc, 0x1c/4, nv_rd32(dev, 0x336c));
+-	nv_wo32(dev, ramfc, 0x20/4, nv_rd32(dev, 0x3370));
+-	nv_wo32(dev, ramfc, 0x24/4, nv_rd32(dev, 0x3374));
+-	nv_wo32(dev, ramfc, 0x28/4, nv_rd32(dev, 0x3378));
+-	nv_wo32(dev, ramfc, 0x2c/4, nv_rd32(dev, 0x337c));
+-	nv_wo32(dev, ramfc, 0x30/4, nv_rd32(dev, 0x3228));
+-	nv_wo32(dev, ramfc, 0x34/4, nv_rd32(dev, 0x3364));
+-	nv_wo32(dev, ramfc, 0x38/4, nv_rd32(dev, 0x32a0));
+-	nv_wo32(dev, ramfc, 0x3c/4, nv_rd32(dev, 0x3224));
+-	nv_wo32(dev, ramfc, 0x40/4, nv_rd32(dev, 0x324c));
+-	nv_wo32(dev, ramfc, 0x44/4, nv_rd32(dev, 0x2044));
+-	nv_wo32(dev, ramfc, 0x48/4, nv_rd32(dev, 0x322c));
+-	nv_wo32(dev, ramfc, 0x4c/4, nv_rd32(dev, 0x3234));
+-	nv_wo32(dev, ramfc, 0x50/4, nv_rd32(dev, 0x3340));
+-	nv_wo32(dev, ramfc, 0x54/4, nv_rd32(dev, 0x3344));
+-	nv_wo32(dev, ramfc, 0x58/4, nv_rd32(dev, 0x3280));
+-	nv_wo32(dev, ramfc, 0x5c/4, nv_rd32(dev, 0x3254));
+-	nv_wo32(dev, ramfc, 0x60/4, nv_rd32(dev, 0x3260));
+-	nv_wo32(dev, ramfc, 0x64/4, nv_rd32(dev, 0x3264));
+-	nv_wo32(dev, ramfc, 0x68/4, nv_rd32(dev, 0x3268));
+-	nv_wo32(dev, ramfc, 0x6c/4, nv_rd32(dev, 0x326c));
+-	nv_wo32(dev, ramfc, 0x70/4, nv_rd32(dev, 0x32e4));
+-	nv_wo32(dev, ramfc, 0x74/4, nv_rd32(dev, 0x3248));
+-	nv_wo32(dev, ramfc, 0x78/4, nv_rd32(dev, 0x2088));
+-	nv_wo32(dev, ramfc, 0x7c/4, nv_rd32(dev, 0x2058));
+-	nv_wo32(dev, ramfc, 0x80/4, nv_rd32(dev, 0x2210));
++	nv_wo32(ramfc, 0x00, nv_rd32(dev, 0x3330));
++	nv_wo32(ramfc, 0x04, nv_rd32(dev, 0x3334));
++	nv_wo32(ramfc, 0x08, nv_rd32(dev, 0x3240));
++	nv_wo32(ramfc, 0x0c, nv_rd32(dev, 0x3320));
++	nv_wo32(ramfc, 0x10, nv_rd32(dev, 0x3244));
++	nv_wo32(ramfc, 0x14, nv_rd32(dev, 0x3328));
++	nv_wo32(ramfc, 0x18, nv_rd32(dev, 0x3368));
++	nv_wo32(ramfc, 0x1c, nv_rd32(dev, 0x336c));
++	nv_wo32(ramfc, 0x20, nv_rd32(dev, 0x3370));
++	nv_wo32(ramfc, 0x24, nv_rd32(dev, 0x3374));
++	nv_wo32(ramfc, 0x28, nv_rd32(dev, 0x3378));
++	nv_wo32(ramfc, 0x2c, nv_rd32(dev, 0x337c));
++	nv_wo32(ramfc, 0x30, nv_rd32(dev, 0x3228));
++	nv_wo32(ramfc, 0x34, nv_rd32(dev, 0x3364));
++	nv_wo32(ramfc, 0x38, nv_rd32(dev, 0x32a0));
++	nv_wo32(ramfc, 0x3c, nv_rd32(dev, 0x3224));
++	nv_wo32(ramfc, 0x40, nv_rd32(dev, 0x324c));
++	nv_wo32(ramfc, 0x44, nv_rd32(dev, 0x2044));
++	nv_wo32(ramfc, 0x48, nv_rd32(dev, 0x322c));
++	nv_wo32(ramfc, 0x4c, nv_rd32(dev, 0x3234));
++	nv_wo32(ramfc, 0x50, nv_rd32(dev, 0x3340));
++	nv_wo32(ramfc, 0x54, nv_rd32(dev, 0x3344));
++	nv_wo32(ramfc, 0x58, nv_rd32(dev, 0x3280));
++	nv_wo32(ramfc, 0x5c, nv_rd32(dev, 0x3254));
++	nv_wo32(ramfc, 0x60, nv_rd32(dev, 0x3260));
++	nv_wo32(ramfc, 0x64, nv_rd32(dev, 0x3264));
++	nv_wo32(ramfc, 0x68, nv_rd32(dev, 0x3268));
++	nv_wo32(ramfc, 0x6c, nv_rd32(dev, 0x326c));
++	nv_wo32(ramfc, 0x70, nv_rd32(dev, 0x32e4));
++	nv_wo32(ramfc, 0x74, nv_rd32(dev, 0x3248));
++	nv_wo32(ramfc, 0x78, nv_rd32(dev, 0x2088));
++	nv_wo32(ramfc, 0x7c, nv_rd32(dev, 0x2058));
++	nv_wo32(ramfc, 0x80, nv_rd32(dev, 0x2210));
+ 
+ 	put = (nv_rd32(dev, NV03_PFIFO_CACHE1_PUT) & 0x7ff) >> 2;
+ 	get = (nv_rd32(dev, NV03_PFIFO_CACHE1_GET) & 0x7ff) >> 2;
+ 	ptr = 0;
+ 	while (put != get) {
+-		nv_wo32(dev, cache, ptr++,
+-			    nv_rd32(dev, NV40_PFIFO_CACHE1_METHOD(get)));
+-		nv_wo32(dev, cache, ptr++,
+-			    nv_rd32(dev, NV40_PFIFO_CACHE1_DATA(get)));
++		nv_wo32(cache, ptr + 0,
++			nv_rd32(dev, NV40_PFIFO_CACHE1_METHOD(get)));
++		nv_wo32(cache, ptr + 4,
++			nv_rd32(dev, NV40_PFIFO_CACHE1_DATA(get)));
+ 		get = (get + 1) & 0x1ff;
++		ptr += 8;
  	}
  
  	/* guessing that all the 0x34xx regs aren't on NV50 */
 -	if (!IS_G80) {
+-		nv_wo32(dev, ramfc, 0x84/4, ptr >> 1);
+-		nv_wo32(dev, ramfc, 0x88/4, nv_rd32(dev, 0x340c));
+-		nv_wo32(dev, ramfc, 0x8c/4, nv_rd32(dev, 0x3400));
+-		nv_wo32(dev, ramfc, 0x90/4, nv_rd32(dev, 0x3404));
+-		nv_wo32(dev, ramfc, 0x94/4, nv_rd32(dev, 0x3408));
+-		nv_wo32(dev, ramfc, 0x98/4, nv_rd32(dev, 0x3410));
 +	if (dev_priv->chipset != 0x50) {
- 		nv_wo32(dev, ramfc, 0x84/4, ptr >> 1);
- 		nv_wo32(dev, ramfc, 0x88/4, nv_rd32(dev, 0x340c));
- 		nv_wo32(dev, ramfc, 0x8c/4, nv_rd32(dev, 0x3400));
-@@ -491,7 +457,7 @@ nv50_fifo_unload_context(struct drm_device *dev)
- 		nv_wo32(dev, ramfc, 0x98/4, nv_rd32(dev, 0x3410));
++		nv_wo32(ramfc, 0x84, ptr >> 3);
++		nv_wo32(ramfc, 0x88, nv_rd32(dev, 0x340c));
++		nv_wo32(ramfc, 0x8c, nv_rd32(dev, 0x3400));
++		nv_wo32(ramfc, 0x90, nv_rd32(dev, 0x3404));
++		nv_wo32(ramfc, 0x94, nv_rd32(dev, 0x3408));
++		nv_wo32(ramfc, 0x98, nv_rd32(dev, 0x3410));
  	}
  
 -	dev_priv->engine.instmem.finish_access(dev);
@@ -9355,7 +11875,7 @@ index bb47ad7..b2fab2b 100644
 +	return 0;
 +}
 diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c
-index b203d06..1413028 100644
+index b203d06..17a8d78 100644
 --- a/drivers/gpu/drm/nouveau/nv50_graph.c
 +++ b/drivers/gpu/drm/nouveau/nv50_graph.c
 @@ -30,8 +30,6 @@
@@ -9446,23 +11966,21 @@ index b203d06..1413028 100644
  	int hdr, ret;
  
  	NV_DEBUG(dev, "ch%d\n", chan->id);
-@@ -223,10 +217,9 @@ nv50_graph_create_context(struct nouveau_channel *chan)
+@@ -223,32 +217,25 @@ nv50_graph_create_context(struct nouveau_channel *chan)
  				     NVOBJ_FLAG_ZERO_FREE, &chan->ramin_grctx);
  	if (ret)
  		return ret;
 -	ctx = chan->ramin_grctx->gpuobj;
-+	obj = chan->ramin_grctx->gpuobj;
- 
+-
 -	hdr = IS_G80 ? 0x200 : 0x20;
 -	dev_priv->engine.instmem.prepare_access(dev, true);
-+	hdr = (dev_priv->chipset == 0x50) ? 0x200 : 0x20;
- 	nv_wo32(dev, ramin, (hdr + 0x00)/4, 0x00190002);
- 	nv_wo32(dev, ramin, (hdr + 0x04)/4, chan->ramin_grctx->instance +
- 					   pgraph->grctx_size - 1);
-@@ -234,21 +227,15 @@ nv50_graph_create_context(struct nouveau_channel *chan)
- 	nv_wo32(dev, ramin, (hdr + 0x0c)/4, 0);
- 	nv_wo32(dev, ramin, (hdr + 0x10)/4, 0);
- 	nv_wo32(dev, ramin, (hdr + 0x14)/4, 0x00010000);
+-	nv_wo32(dev, ramin, (hdr + 0x00)/4, 0x00190002);
+-	nv_wo32(dev, ramin, (hdr + 0x04)/4, chan->ramin_grctx->instance +
+-					   pgraph->grctx_size - 1);
+-	nv_wo32(dev, ramin, (hdr + 0x08)/4, chan->ramin_grctx->instance);
+-	nv_wo32(dev, ramin, (hdr + 0x0c)/4, 0);
+-	nv_wo32(dev, ramin, (hdr + 0x10)/4, 0);
+-	nv_wo32(dev, ramin, (hdr + 0x14)/4, 0x00010000);
 -	dev_priv->engine.instmem.finish_access(dev);
 -
 -	dev_priv->engine.instmem.prepare_access(dev, true);
@@ -9477,14 +11995,24 @@ index b203d06..1413028 100644
 -	}
 -	nv_wo32(dev, ctx, 0x00000/4, chan->ramin->instance >> 12);
 -	dev_priv->engine.instmem.finish_access(dev);
- 
++	obj = chan->ramin_grctx->gpuobj;
++
++	hdr = (dev_priv->chipset == 0x50) ? 0x200 : 0x20;
++	nv_wo32(ramin, hdr + 0x00, 0x00190002);
++	nv_wo32(ramin, hdr + 0x04, chan->ramin_grctx->instance +
++				   pgraph->grctx_size - 1);
++	nv_wo32(ramin, hdr + 0x08, chan->ramin_grctx->instance);
++	nv_wo32(ramin, hdr + 0x0c, 0);
++	nv_wo32(ramin, hdr + 0x10, 0);
++	nv_wo32(ramin, hdr + 0x14, 0x00010000);
++
 +	ctx.dev = chan->dev;
 +	ctx.mode = NOUVEAU_GRCTX_VALS;
 +	ctx.data = obj;
 +	nv50_grctx_init(&ctx);
 +
-+	nv_wo32(dev, obj, 0x00000/4, chan->ramin->instance >> 12);
-+
++	nv_wo32(obj, 0x00000, chan->ramin->instance >> 12);
+ 
 +	dev_priv->engine.instmem.flush(dev);
  	return 0;
  }
@@ -9503,14 +12031,3813 @@ index b203d06..1413028 100644
  
 -	dev_priv->engine.instmem.prepare_access(dev, true);
  	for (i = hdr; i < hdr + 24; i += 4)
- 		nv_wo32(dev, chan->ramin->gpuobj, i/4, 0);
+-		nv_wo32(dev, chan->ramin->gpuobj, i/4, 0);
 -	dev_priv->engine.instmem.finish_access(dev);
++		nv_wo32(chan->ramin->gpuobj, i, 0);
 +	dev_priv->engine.instmem.flush(dev);
  
  	nouveau_gpuobj_ref_del(dev, &chan->ramin_grctx);
  }
+diff --git a/drivers/gpu/drm/nouveau/nv50_grctx.c b/drivers/gpu/drm/nouveau/nv50_grctx.c
+index 42a8fb2..336aab2 100644
+--- a/drivers/gpu/drm/nouveau/nv50_grctx.c
++++ b/drivers/gpu/drm/nouveau/nv50_grctx.c
+@@ -103,6 +103,9 @@
+ #include "nouveau_drv.h"
+ #include "nouveau_grctx.h"
+ 
++#define IS_NVA3F(x) (((x) > 0xa0 && (x) < 0xaa) || (x) == 0xaf)
++#define IS_NVAAF(x) ((x) >= 0xaa && (x) <= 0xac)
++
+ /*
+  * This code deals with PGRAPH contexts on NV50 family cards. Like NV40, it's
+  * the GPU itself that does context-switching, but it needs a special
+@@ -182,6 +185,7 @@ nv50_grctx_init(struct nouveau_grctx *ctx)
+ 	case 0xa8:
+ 	case 0xaa:
+ 	case 0xac:
++	case 0xaf:
+ 		break;
+ 	default:
+ 		NV_ERROR(ctx->dev, "I don't know how to make a ctxprog for "
+@@ -268,6 +272,9 @@ nv50_grctx_init(struct nouveau_grctx *ctx)
+  */
+ 
+ static void
++nv50_graph_construct_mmio_ddata(struct nouveau_grctx *ctx);
++
++static void
+ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+@@ -286,7 +293,7 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+ 		gr_def(ctx, 0x400840, 0xffe806a8);
+ 	}
+ 	gr_def(ctx, 0x400844, 0x00000002);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
++	if (IS_NVA3F(dev_priv->chipset))
+ 		gr_def(ctx, 0x400894, 0x00001000);
+ 	gr_def(ctx, 0x4008e8, 0x00000003);
+ 	gr_def(ctx, 0x4008ec, 0x00001000);
+@@ -299,13 +306,15 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+ 
+ 	if (dev_priv->chipset >= 0xa0)
+ 		cp_ctx(ctx, 0x400b00, 0x1);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
++	if (IS_NVA3F(dev_priv->chipset)) {
+ 		cp_ctx(ctx, 0x400b10, 0x1);
+ 		gr_def(ctx, 0x400b10, 0x0001629d);
+ 		cp_ctx(ctx, 0x400b20, 0x1);
+ 		gr_def(ctx, 0x400b20, 0x0001629d);
+ 	}
+ 
++	nv50_graph_construct_mmio_ddata(ctx);
++
+ 	/* 0C00: VFETCH */
+ 	cp_ctx(ctx, 0x400c08, 0x2);
+ 	gr_def(ctx, 0x400c08, 0x0000fe0c);
+@@ -314,7 +323,7 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+ 	if (dev_priv->chipset < 0xa0) {
+ 		cp_ctx(ctx, 0x401008, 0x4);
+ 		gr_def(ctx, 0x401014, 0x00001000);
+-	} else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa) {
++	} else if (!IS_NVA3F(dev_priv->chipset)) {
+ 		cp_ctx(ctx, 0x401008, 0x5);
+ 		gr_def(ctx, 0x401018, 0x00001000);
+ 	} else {
+@@ -368,10 +377,13 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+ 	case 0xa3:
+ 	case 0xa5:
+ 	case 0xa8:
++	case 0xaf:
+ 		gr_def(ctx, 0x401c00, 0x142500df);
+ 		break;
+ 	}
+ 
++	/* 2000 */
++
+ 	/* 2400 */
+ 	cp_ctx(ctx, 0x402400, 0x1);
+ 	if (dev_priv->chipset == 0x50)
+@@ -380,12 +392,12 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+ 		cp_ctx(ctx, 0x402408, 0x2);
+ 	gr_def(ctx, 0x402408, 0x00000600);
+ 
+-	/* 2800 */
++	/* 2800: CSCHED */
+ 	cp_ctx(ctx, 0x402800, 0x1);
+ 	if (dev_priv->chipset == 0x50)
+ 		gr_def(ctx, 0x402800, 0x00000006);
+ 
+-	/* 2C00 */
++	/* 2C00: ZCULL */
+ 	cp_ctx(ctx, 0x402c08, 0x6);
+ 	if (dev_priv->chipset != 0x50)
+ 		gr_def(ctx, 0x402c14, 0x01000000);
+@@ -396,23 +408,23 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+ 		cp_ctx(ctx, 0x402ca0, 0x2);
+ 	if (dev_priv->chipset < 0xa0)
+ 		gr_def(ctx, 0x402ca0, 0x00000400);
+-	else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa)
++	else if (!IS_NVA3F(dev_priv->chipset))
+ 		gr_def(ctx, 0x402ca0, 0x00000800);
+ 	else
+ 		gr_def(ctx, 0x402ca0, 0x00000400);
+ 	cp_ctx(ctx, 0x402cac, 0x4);
+ 
+-	/* 3000 */
++	/* 3000: ENG2D */
+ 	cp_ctx(ctx, 0x403004, 0x1);
+ 	gr_def(ctx, 0x403004, 0x00000001);
+ 
+-	/* 3404 */
++	/* 3400 */
+ 	if (dev_priv->chipset >= 0xa0) {
+ 		cp_ctx(ctx, 0x403404, 0x1);
+ 		gr_def(ctx, 0x403404, 0x00000001);
+ 	}
+ 
+-	/* 5000 */
++	/* 5000: CCACHE */
+ 	cp_ctx(ctx, 0x405000, 0x1);
+ 	switch (dev_priv->chipset) {
+ 	case 0x50:
+@@ -425,6 +437,7 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+ 	case 0xa8:
+ 	case 0xaa:
+ 	case 0xac:
++	case 0xaf:
+ 		gr_def(ctx, 0x405000, 0x000e0080);
+ 		break;
+ 	case 0x86:
+@@ -441,210 +454,6 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+ 	cp_ctx(ctx, 0x405024, 0x1);
+ 	cp_ctx(ctx, 0x40502c, 0x1);
+ 
+-	/* 5400 or maybe 4800 */
+-	if (dev_priv->chipset == 0x50) {
+-		offset = 0x405400;
+-		cp_ctx(ctx, 0x405400, 0xea);
+-	} else if (dev_priv->chipset < 0x94) {
+-		offset = 0x405400;
+-		cp_ctx(ctx, 0x405400, 0xcb);
+-	} else if (dev_priv->chipset < 0xa0) {
+-		offset = 0x405400;
+-		cp_ctx(ctx, 0x405400, 0xcc);
+-	} else if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		offset = 0x404800;
+-		cp_ctx(ctx, 0x404800, 0xda);
+-	} else {
+-		offset = 0x405400;
+-		cp_ctx(ctx, 0x405400, 0xd4);
+-	}
+-	gr_def(ctx, offset + 0x0c, 0x00000002);
+-	gr_def(ctx, offset + 0x10, 0x00000001);
+-	if (dev_priv->chipset >= 0x94)
+-		offset += 4;
+-	gr_def(ctx, offset + 0x1c, 0x00000001);
+-	gr_def(ctx, offset + 0x20, 0x00000100);
+-	gr_def(ctx, offset + 0x38, 0x00000002);
+-	gr_def(ctx, offset + 0x3c, 0x00000001);
+-	gr_def(ctx, offset + 0x40, 0x00000001);
+-	gr_def(ctx, offset + 0x50, 0x00000001);
+-	gr_def(ctx, offset + 0x54, 0x003fffff);
+-	gr_def(ctx, offset + 0x58, 0x00001fff);
+-	gr_def(ctx, offset + 0x60, 0x00000001);
+-	gr_def(ctx, offset + 0x64, 0x00000001);
+-	gr_def(ctx, offset + 0x6c, 0x00000001);
+-	gr_def(ctx, offset + 0x70, 0x00000001);
+-	gr_def(ctx, offset + 0x74, 0x00000001);
+-	gr_def(ctx, offset + 0x78, 0x00000004);
+-	gr_def(ctx, offset + 0x7c, 0x00000001);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		offset += 4;
+-	gr_def(ctx, offset + 0x80, 0x00000001);
+-	gr_def(ctx, offset + 0x84, 0x00000001);
+-	gr_def(ctx, offset + 0x88, 0x00000007);
+-	gr_def(ctx, offset + 0x8c, 0x00000001);
+-	gr_def(ctx, offset + 0x90, 0x00000007);
+-	gr_def(ctx, offset + 0x94, 0x00000001);
+-	gr_def(ctx, offset + 0x98, 0x00000001);
+-	gr_def(ctx, offset + 0x9c, 0x00000001);
+-	if (dev_priv->chipset == 0x50) {
+-		 gr_def(ctx, offset + 0xb0, 0x00000001);
+-		 gr_def(ctx, offset + 0xb4, 0x00000001);
+-		 gr_def(ctx, offset + 0xbc, 0x00000001);
+-		 gr_def(ctx, offset + 0xc0, 0x0000000a);
+-		 gr_def(ctx, offset + 0xd0, 0x00000040);
+-		 gr_def(ctx, offset + 0xd8, 0x00000002);
+-		 gr_def(ctx, offset + 0xdc, 0x00000100);
+-		 gr_def(ctx, offset + 0xe0, 0x00000001);
+-		 gr_def(ctx, offset + 0xe4, 0x00000100);
+-		 gr_def(ctx, offset + 0x100, 0x00000001);
+-		 gr_def(ctx, offset + 0x124, 0x00000004);
+-		 gr_def(ctx, offset + 0x13c, 0x00000001);
+-		 gr_def(ctx, offset + 0x140, 0x00000100);
+-		 gr_def(ctx, offset + 0x148, 0x00000001);
+-		 gr_def(ctx, offset + 0x154, 0x00000100);
+-		 gr_def(ctx, offset + 0x158, 0x00000001);
+-		 gr_def(ctx, offset + 0x15c, 0x00000100);
+-		 gr_def(ctx, offset + 0x164, 0x00000001);
+-		 gr_def(ctx, offset + 0x170, 0x00000100);
+-		 gr_def(ctx, offset + 0x174, 0x00000001);
+-		 gr_def(ctx, offset + 0x17c, 0x00000001);
+-		 gr_def(ctx, offset + 0x188, 0x00000002);
+-		 gr_def(ctx, offset + 0x190, 0x00000001);
+-		 gr_def(ctx, offset + 0x198, 0x00000001);
+-		 gr_def(ctx, offset + 0x1ac, 0x00000003);
+-		 offset += 0xd0;
+-	} else {
+-		gr_def(ctx, offset + 0xb0, 0x00000001);
+-		gr_def(ctx, offset + 0xb4, 0x00000100);
+-		gr_def(ctx, offset + 0xbc, 0x00000001);
+-		gr_def(ctx, offset + 0xc8, 0x00000100);
+-		gr_def(ctx, offset + 0xcc, 0x00000001);
+-		gr_def(ctx, offset + 0xd0, 0x00000100);
+-		gr_def(ctx, offset + 0xd8, 0x00000001);
+-		gr_def(ctx, offset + 0xe4, 0x00000100);
+-	}
+-	gr_def(ctx, offset + 0xf8, 0x00000004);
+-	gr_def(ctx, offset + 0xfc, 0x00000070);
+-	gr_def(ctx, offset + 0x100, 0x00000080);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		offset += 4;
+-	gr_def(ctx, offset + 0x114, 0x0000000c);
+-	if (dev_priv->chipset == 0x50)
+-		offset -= 4;
+-	gr_def(ctx, offset + 0x11c, 0x00000008);
+-	gr_def(ctx, offset + 0x120, 0x00000014);
+-	if (dev_priv->chipset == 0x50) {
+-		gr_def(ctx, offset + 0x124, 0x00000026);
+-		offset -= 0x18;
+-	} else {
+-		gr_def(ctx, offset + 0x128, 0x00000029);
+-		gr_def(ctx, offset + 0x12c, 0x00000027);
+-		gr_def(ctx, offset + 0x130, 0x00000026);
+-		gr_def(ctx, offset + 0x134, 0x00000008);
+-		gr_def(ctx, offset + 0x138, 0x00000004);
+-		gr_def(ctx, offset + 0x13c, 0x00000027);
+-	}
+-	gr_def(ctx, offset + 0x148, 0x00000001);
+-	gr_def(ctx, offset + 0x14c, 0x00000002);
+-	gr_def(ctx, offset + 0x150, 0x00000003);
+-	gr_def(ctx, offset + 0x154, 0x00000004);
+-	gr_def(ctx, offset + 0x158, 0x00000005);
+-	gr_def(ctx, offset + 0x15c, 0x00000006);
+-	gr_def(ctx, offset + 0x160, 0x00000007);
+-	gr_def(ctx, offset + 0x164, 0x00000001);
+-	gr_def(ctx, offset + 0x1a8, 0x000000cf);
+-	if (dev_priv->chipset == 0x50)
+-		offset -= 4;
+-	gr_def(ctx, offset + 0x1d8, 0x00000080);
+-	gr_def(ctx, offset + 0x1dc, 0x00000004);
+-	gr_def(ctx, offset + 0x1e0, 0x00000004);
+-	if (dev_priv->chipset == 0x50)
+-		offset -= 4;
+-	else
+-		gr_def(ctx, offset + 0x1e4, 0x00000003);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		gr_def(ctx, offset + 0x1ec, 0x00000003);
+-		offset += 8;
+-	}
+-	gr_def(ctx, offset + 0x1e8, 0x00000001);
+-	if (dev_priv->chipset == 0x50)
+-		offset -= 4;
+-	gr_def(ctx, offset + 0x1f4, 0x00000012);
+-	gr_def(ctx, offset + 0x1f8, 0x00000010);
+-	gr_def(ctx, offset + 0x1fc, 0x0000000c);
+-	gr_def(ctx, offset + 0x200, 0x00000001);
+-	gr_def(ctx, offset + 0x210, 0x00000004);
+-	gr_def(ctx, offset + 0x214, 0x00000002);
+-	gr_def(ctx, offset + 0x218, 0x00000004);
+-	if (dev_priv->chipset >= 0xa0)
+-		offset += 4;
+-	gr_def(ctx, offset + 0x224, 0x003fffff);
+-	gr_def(ctx, offset + 0x228, 0x00001fff);
+-	if (dev_priv->chipset == 0x50)
+-		offset -= 0x20;
+-	else if (dev_priv->chipset >= 0xa0) {
+-		gr_def(ctx, offset + 0x250, 0x00000001);
+-		gr_def(ctx, offset + 0x254, 0x00000001);
+-		gr_def(ctx, offset + 0x258, 0x00000002);
+-		offset += 0x10;
+-	}
+-	gr_def(ctx, offset + 0x250, 0x00000004);
+-	gr_def(ctx, offset + 0x254, 0x00000014);
+-	gr_def(ctx, offset + 0x258, 0x00000001);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		offset += 4;
+-	gr_def(ctx, offset + 0x264, 0x00000002);
+-	if (dev_priv->chipset >= 0xa0)
+-		offset += 8;
+-	gr_def(ctx, offset + 0x270, 0x00000001);
+-	gr_def(ctx, offset + 0x278, 0x00000002);
+-	gr_def(ctx, offset + 0x27c, 0x00001000);
+-	if (dev_priv->chipset == 0x50)
+-		offset -= 0xc;
+-	else {
+-		gr_def(ctx, offset + 0x280, 0x00000e00);
+-		gr_def(ctx, offset + 0x284, 0x00001000);
+-		gr_def(ctx, offset + 0x288, 0x00001e00);
+-	}
+-	gr_def(ctx, offset + 0x290, 0x00000001);
+-	gr_def(ctx, offset + 0x294, 0x00000001);
+-	gr_def(ctx, offset + 0x298, 0x00000001);
+-	gr_def(ctx, offset + 0x29c, 0x00000001);
+-	gr_def(ctx, offset + 0x2a0, 0x00000001);
+-	gr_def(ctx, offset + 0x2b0, 0x00000200);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		gr_def(ctx, offset + 0x2b4, 0x00000200);
+-		offset += 4;
+-	}
+-	if (dev_priv->chipset < 0xa0) {
+-		gr_def(ctx, offset + 0x2b8, 0x00000001);
+-		gr_def(ctx, offset + 0x2bc, 0x00000070);
+-		gr_def(ctx, offset + 0x2c0, 0x00000080);
+-		gr_def(ctx, offset + 0x2cc, 0x00000001);
+-		gr_def(ctx, offset + 0x2d0, 0x00000070);
+-		gr_def(ctx, offset + 0x2d4, 0x00000080);
+-	} else {
+-		gr_def(ctx, offset + 0x2b8, 0x00000001);
+-		gr_def(ctx, offset + 0x2bc, 0x000000f0);
+-		gr_def(ctx, offset + 0x2c0, 0x000000ff);
+-		gr_def(ctx, offset + 0x2cc, 0x00000001);
+-		gr_def(ctx, offset + 0x2d0, 0x000000f0);
+-		gr_def(ctx, offset + 0x2d4, 0x000000ff);
+-		gr_def(ctx, offset + 0x2dc, 0x00000009);
+-		offset += 4;
+-	}
+-	gr_def(ctx, offset + 0x2e4, 0x00000001);
+-	gr_def(ctx, offset + 0x2e8, 0x000000cf);
+-	gr_def(ctx, offset + 0x2f0, 0x00000001);
+-	gr_def(ctx, offset + 0x300, 0x000000cf);
+-	gr_def(ctx, offset + 0x308, 0x00000002);
+-	gr_def(ctx, offset + 0x310, 0x00000001);
+-	gr_def(ctx, offset + 0x318, 0x00000001);
+-	gr_def(ctx, offset + 0x320, 0x000000cf);
+-	gr_def(ctx, offset + 0x324, 0x000000cf);
+-	gr_def(ctx, offset + 0x328, 0x00000001);
+-
+ 	/* 6000? */
+ 	if (dev_priv->chipset == 0x50)
+ 		cp_ctx(ctx, 0x4063e0, 0x1);
+@@ -661,7 +470,7 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+ 			gr_def(ctx, 0x406818, 0x00000f80);
+ 		else
+ 			gr_def(ctx, 0x406818, 0x00001f80);
+-		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
++		if (IS_NVA3F(dev_priv->chipset))
+ 			gr_def(ctx, 0x40681c, 0x00000030);
+ 		cp_ctx(ctx, 0x406830, 0x3);
+ 	}
+@@ -706,7 +515,7 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+ 
+ 			if (dev_priv->chipset < 0xa0)
+ 				cp_ctx(ctx, 0x407094 + (i<<8), 1);
+-			else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa)
++			else if (!IS_NVA3F(dev_priv->chipset))
+ 				cp_ctx(ctx, 0x407094 + (i<<8), 3);
+ 			else {
+ 				cp_ctx(ctx, 0x407094 + (i<<8), 4);
+@@ -799,6 +608,7 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+ 				case 0xa8:
+ 				case 0xaa:
+ 				case 0xac:
++				case 0xaf:
+ 					gr_def(ctx, offset + 0x1c, 0x300c0000);
+ 					break;
+ 				}
+@@ -825,7 +635,7 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+ 				gr_def(ctx, base + 0x304, 0x00007070);
+ 			else if (dev_priv->chipset < 0xa0)
+ 				gr_def(ctx, base + 0x304, 0x00027070);
+-			else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa)
++			else if (!IS_NVA3F(dev_priv->chipset))
+ 				gr_def(ctx, base + 0x304, 0x01127070);
+ 			else
+ 				gr_def(ctx, base + 0x304, 0x05127070);
+@@ -849,7 +659,7 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+ 			if (dev_priv->chipset < 0xa0) {
+ 				cp_ctx(ctx, base + 0x340, 9);
+ 				offset = base + 0x340;
+-			} else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa) {
++			} else if (!IS_NVA3F(dev_priv->chipset)) {
+ 				cp_ctx(ctx, base + 0x33c, 0xb);
+ 				offset = base + 0x344;
+ 			} else {
+@@ -880,7 +690,7 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+ 			gr_def(ctx, offset + 0x0, 0x000001f0);
+ 			gr_def(ctx, offset + 0x4, 0x00000001);
+ 			gr_def(ctx, offset + 0x8, 0x00000003);
+-			if (dev_priv->chipset == 0x50 || dev_priv->chipset >= 0xaa)
++			if (dev_priv->chipset == 0x50 || IS_NVAAF(dev_priv->chipset))
+ 				gr_def(ctx, offset + 0xc, 0x00008000);
+ 			gr_def(ctx, offset + 0x14, 0x00039e00);
+ 			cp_ctx(ctx, offset + 0x1c, 2);
+@@ -892,7 +702,7 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+ 
+ 			if (dev_priv->chipset >= 0xa0) {
+ 				cp_ctx(ctx, base + 0x54c, 2);
+-				if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa)
++				if (!IS_NVA3F(dev_priv->chipset))
+ 					gr_def(ctx, base + 0x54c, 0x003fe006);
+ 				else
+ 					gr_def(ctx, base + 0x54c, 0x003fe007);
+@@ -948,6 +758,336 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+ 	}
+ }
+ 
++static void
++dd_emit(struct nouveau_grctx *ctx, int num, uint32_t val) {
++	int i;
++	if (val && ctx->mode == NOUVEAU_GRCTX_VALS)
++		for (i = 0; i < num; i++)
++			nv_wo32(ctx->data, 4 * (ctx->ctxvals_pos + i), val);
++	ctx->ctxvals_pos += num;
++}
++
++static void
++nv50_graph_construct_mmio_ddata(struct nouveau_grctx *ctx)
++{
++	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
++	int base, num;
++	base = ctx->ctxvals_pos;
++
++	/* tesla state */
++	dd_emit(ctx, 1, 0);	/* 00000001 UNK0F90 */
++	dd_emit(ctx, 1, 0);	/* 00000001 UNK135C */
++
++	/* SRC_TIC state */
++	dd_emit(ctx, 1, 0);	/* 00000007 SRC_TILE_MODE_Z */
++	dd_emit(ctx, 1, 2);	/* 00000007 SRC_TILE_MODE_Y */
++	dd_emit(ctx, 1, 1);	/* 00000001 SRC_LINEAR #1 */
++	dd_emit(ctx, 1, 0);	/* 000000ff SRC_ADDRESS_HIGH */
++	dd_emit(ctx, 1, 0);	/* 00000001 SRC_SRGB */
++	if (dev_priv->chipset >= 0x94)
++		dd_emit(ctx, 1, 0);	/* 00000003 eng2d UNK0258 */
++	dd_emit(ctx, 1, 1);	/* 00000fff SRC_DEPTH */
++	dd_emit(ctx, 1, 0x100);	/* 0000ffff SRC_HEIGHT */
++
++	/* turing state */
++	dd_emit(ctx, 1, 0);		/* 0000000f TEXTURES_LOG2 */
++	dd_emit(ctx, 1, 0);		/* 0000000f SAMPLERS_LOG2 */
++	dd_emit(ctx, 1, 0);		/* 000000ff CB_DEF_ADDRESS_HIGH */
++	dd_emit(ctx, 1, 0);		/* ffffffff CB_DEF_ADDRESS_LOW */
++	dd_emit(ctx, 1, 0);		/* ffffffff SHARED_SIZE */
++	dd_emit(ctx, 1, 2);		/* ffffffff REG_MODE */
++	dd_emit(ctx, 1, 1);		/* 0000ffff BLOCK_ALLOC_THREADS */
++	dd_emit(ctx, 1, 1);		/* 00000001 LANES32 */
++	dd_emit(ctx, 1, 0);		/* 000000ff UNK370 */
++	dd_emit(ctx, 1, 0);		/* 000000ff USER_PARAM_UNK */
++	dd_emit(ctx, 1, 0);		/* 000000ff USER_PARAM_COUNT */
++	dd_emit(ctx, 1, 1);		/* 000000ff UNK384 bits 8-15 */
++	dd_emit(ctx, 1, 0x3fffff);	/* 003fffff TIC_LIMIT */
++	dd_emit(ctx, 1, 0x1fff);	/* 000fffff TSC_LIMIT */
++	dd_emit(ctx, 1, 0);		/* 0000ffff CB_ADDR_INDEX */
++	dd_emit(ctx, 1, 1);		/* 000007ff BLOCKDIM_X */
++	dd_emit(ctx, 1, 1);		/* 000007ff BLOCKDIM_XMY */
++	dd_emit(ctx, 1, 0);		/* 00000001 BLOCKDIM_XMY_OVERFLOW */
++	dd_emit(ctx, 1, 1);		/* 0003ffff BLOCKDIM_XMYMZ */
++	dd_emit(ctx, 1, 1);		/* 000007ff BLOCKDIM_Y */
++	dd_emit(ctx, 1, 1);		/* 0000007f BLOCKDIM_Z */
++	dd_emit(ctx, 1, 4);		/* 000000ff CP_REG_ALLOC_TEMP */
++	dd_emit(ctx, 1, 1);		/* 00000001 BLOCKDIM_DIRTY */
++	if (IS_NVA3F(dev_priv->chipset))
++		dd_emit(ctx, 1, 0);	/* 00000003 UNK03E8 */
++	dd_emit(ctx, 1, 1);		/* 0000007f BLOCK_ALLOC_HALFWARPS */
++	dd_emit(ctx, 1, 1);		/* 00000007 LOCAL_WARPS_NO_CLAMP */
++	dd_emit(ctx, 1, 7);		/* 00000007 LOCAL_WARPS_LOG_ALLOC */
++	dd_emit(ctx, 1, 1);		/* 00000007 STACK_WARPS_NO_CLAMP */
++	dd_emit(ctx, 1, 7);		/* 00000007 STACK_WARPS_LOG_ALLOC */
++	dd_emit(ctx, 1, 1);		/* 00001fff BLOCK_ALLOC_REGSLOTS_PACKED */
++	dd_emit(ctx, 1, 1);		/* 00001fff BLOCK_ALLOC_REGSLOTS_STRIDED */
++	dd_emit(ctx, 1, 1);		/* 000007ff BLOCK_ALLOC_THREADS */
++
++	/* compat 2d state */
++	if (dev_priv->chipset == 0x50) {
++		dd_emit(ctx, 4, 0);		/* 0000ffff clip X, Y, W, H */
++
++		dd_emit(ctx, 1, 1);		/* ffffffff chroma COLOR_FORMAT */
++
++		dd_emit(ctx, 1, 1);		/* ffffffff pattern COLOR_FORMAT */
++		dd_emit(ctx, 1, 0);		/* ffffffff pattern SHAPE */
++		dd_emit(ctx, 1, 1);		/* ffffffff pattern PATTERN_SELECT */
++
++		dd_emit(ctx, 1, 0xa);		/* ffffffff surf2d SRC_FORMAT */
++		dd_emit(ctx, 1, 0);		/* ffffffff surf2d DMA_SRC */
++		dd_emit(ctx, 1, 0);		/* 000000ff surf2d SRC_ADDRESS_HIGH */
++		dd_emit(ctx, 1, 0);		/* ffffffff surf2d SRC_ADDRESS_LOW */
++		dd_emit(ctx, 1, 0x40);		/* 0000ffff surf2d SRC_PITCH */
++		dd_emit(ctx, 1, 0);		/* 0000000f surf2d SRC_TILE_MODE_Z */
++		dd_emit(ctx, 1, 2);		/* 0000000f surf2d SRC_TILE_MODE_Y */
++		dd_emit(ctx, 1, 0x100);		/* ffffffff surf2d SRC_HEIGHT */
++		dd_emit(ctx, 1, 1);		/* 00000001 surf2d SRC_LINEAR */
++		dd_emit(ctx, 1, 0x100);		/* ffffffff surf2d SRC_WIDTH */
++
++		dd_emit(ctx, 1, 0);		/* 0000ffff gdirect CLIP_B_X */
++		dd_emit(ctx, 1, 0);		/* 0000ffff gdirect CLIP_B_Y */
++		dd_emit(ctx, 1, 0);		/* 0000ffff gdirect CLIP_C_X */
++		dd_emit(ctx, 1, 0);		/* 0000ffff gdirect CLIP_C_Y */
++		dd_emit(ctx, 1, 0);		/* 0000ffff gdirect CLIP_D_X */
++		dd_emit(ctx, 1, 0);		/* 0000ffff gdirect CLIP_D_Y */
++		dd_emit(ctx, 1, 1);		/* ffffffff gdirect COLOR_FORMAT */
++		dd_emit(ctx, 1, 0);		/* ffffffff gdirect OPERATION */
++		dd_emit(ctx, 1, 0);		/* 0000ffff gdirect POINT_X */
++		dd_emit(ctx, 1, 0);		/* 0000ffff gdirect POINT_Y */
++
++		dd_emit(ctx, 1, 0);		/* 0000ffff blit SRC_Y */
++		dd_emit(ctx, 1, 0);		/* ffffffff blit OPERATION */
++
++		dd_emit(ctx, 1, 0);		/* ffffffff ifc OPERATION */
++
++		dd_emit(ctx, 1, 0);		/* ffffffff iifc INDEX_FORMAT */
++		dd_emit(ctx, 1, 0);		/* ffffffff iifc LUT_OFFSET */
++		dd_emit(ctx, 1, 4);		/* ffffffff iifc COLOR_FORMAT */
++		dd_emit(ctx, 1, 0);		/* ffffffff iifc OPERATION */
++	}
++
++	/* m2mf state */
++	dd_emit(ctx, 1, 0);		/* ffffffff m2mf LINE_COUNT */
++	dd_emit(ctx, 1, 0);		/* ffffffff m2mf LINE_LENGTH_IN */
++	dd_emit(ctx, 2, 0);		/* ffffffff m2mf OFFSET_IN, OFFSET_OUT */
++	dd_emit(ctx, 1, 1);		/* ffffffff m2mf TILING_DEPTH_OUT */
++	dd_emit(ctx, 1, 0x100);		/* ffffffff m2mf TILING_HEIGHT_OUT */
++	dd_emit(ctx, 1, 0);		/* ffffffff m2mf TILING_POSITION_OUT_Z */
++	dd_emit(ctx, 1, 1);		/* 00000001 m2mf LINEAR_OUT */
++	dd_emit(ctx, 2, 0);		/* 0000ffff m2mf TILING_POSITION_OUT_X, Y */
++	dd_emit(ctx, 1, 0x100);		/* ffffffff m2mf TILING_PITCH_OUT */
++	dd_emit(ctx, 1, 1);		/* ffffffff m2mf TILING_DEPTH_IN */
++	dd_emit(ctx, 1, 0x100);		/* ffffffff m2mf TILING_HEIGHT_IN */
++	dd_emit(ctx, 1, 0);		/* ffffffff m2mf TILING_POSITION_IN_Z */
++	dd_emit(ctx, 1, 1);		/* 00000001 m2mf LINEAR_IN */
++	dd_emit(ctx, 2, 0);		/* 0000ffff m2mf TILING_POSITION_IN_X, Y */
++	dd_emit(ctx, 1, 0x100);		/* ffffffff m2mf TILING_PITCH_IN */
++
++	/* more compat 2d state */
++	if (dev_priv->chipset == 0x50) {
++		dd_emit(ctx, 1, 1);		/* ffffffff line COLOR_FORMAT */
++		dd_emit(ctx, 1, 0);		/* ffffffff line OPERATION */
++
++		dd_emit(ctx, 1, 1);		/* ffffffff triangle COLOR_FORMAT */
++		dd_emit(ctx, 1, 0);		/* ffffffff triangle OPERATION */
++
++		dd_emit(ctx, 1, 0);		/* 0000000f sifm TILE_MODE_Z */
++		dd_emit(ctx, 1, 2);		/* 0000000f sifm TILE_MODE_Y */
++		dd_emit(ctx, 1, 0);		/* 000000ff sifm FORMAT_FILTER */
++		dd_emit(ctx, 1, 1);		/* 000000ff sifm FORMAT_ORIGIN */
++		dd_emit(ctx, 1, 0);		/* 0000ffff sifm SRC_PITCH */
++		dd_emit(ctx, 1, 1);		/* 00000001 sifm SRC_LINEAR */
++		dd_emit(ctx, 1, 0);		/* 000000ff sifm SRC_OFFSET_HIGH */
++		dd_emit(ctx, 1, 0);		/* ffffffff sifm SRC_OFFSET */
++		dd_emit(ctx, 1, 0);		/* 0000ffff sifm SRC_HEIGHT */
++		dd_emit(ctx, 1, 0);		/* 0000ffff sifm SRC_WIDTH */
++		dd_emit(ctx, 1, 3);		/* ffffffff sifm COLOR_FORMAT */
++		dd_emit(ctx, 1, 0);		/* ffffffff sifm OPERATION */
++
++		dd_emit(ctx, 1, 0);		/* ffffffff sifc OPERATION */
++	}
++
++	/* tesla state */
++	dd_emit(ctx, 1, 0);		/* 0000000f GP_TEXTURES_LOG2 */
++	dd_emit(ctx, 1, 0);		/* 0000000f GP_SAMPLERS_LOG2 */
++	dd_emit(ctx, 1, 0);		/* 000000ff */
++	dd_emit(ctx, 1, 0);		/* ffffffff */
++	dd_emit(ctx, 1, 4);		/* 000000ff UNK12B0_0 */
++	dd_emit(ctx, 1, 0x70);		/* 000000ff UNK12B0_1 */
++	dd_emit(ctx, 1, 0x80);		/* 000000ff UNK12B0_3 */
++	dd_emit(ctx, 1, 0);		/* 000000ff UNK12B0_2 */
++	dd_emit(ctx, 1, 0);		/* 0000000f FP_TEXTURES_LOG2 */
++	dd_emit(ctx, 1, 0);		/* 0000000f FP_SAMPLERS_LOG2 */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		dd_emit(ctx, 1, 0);	/* ffffffff */
++		dd_emit(ctx, 1, 0);	/* 0000007f MULTISAMPLE_SAMPLES_LOG2 */
++	} else {
++		dd_emit(ctx, 1, 0);	/* 0000000f MULTISAMPLE_SAMPLES_LOG2 */
++	} 
++	dd_emit(ctx, 1, 0xc);		/* 000000ff SEMANTIC_COLOR.BFC0_ID */
++	if (dev_priv->chipset != 0x50)
++		dd_emit(ctx, 1, 0);	/* 00000001 SEMANTIC_COLOR.CLMP_EN */
++	dd_emit(ctx, 1, 8);		/* 000000ff SEMANTIC_COLOR.COLR_NR */
++	dd_emit(ctx, 1, 0x14);		/* 000000ff SEMANTIC_COLOR.FFC0_ID */
++	if (dev_priv->chipset == 0x50) {
++		dd_emit(ctx, 1, 0);	/* 000000ff SEMANTIC_LAYER */
++		dd_emit(ctx, 1, 0);	/* 00000001 */
++	} else {
++		dd_emit(ctx, 1, 0);	/* 00000001 SEMANTIC_PTSZ.ENABLE */
++		dd_emit(ctx, 1, 0x29);	/* 000000ff SEMANTIC_PTSZ.PTSZ_ID */
++		dd_emit(ctx, 1, 0x27);	/* 000000ff SEMANTIC_PRIM */
++		dd_emit(ctx, 1, 0x26);	/* 000000ff SEMANTIC_LAYER */
++		dd_emit(ctx, 1, 8);	/* 0000000f SMENATIC_CLIP.CLIP_HIGH */
++		dd_emit(ctx, 1, 4);	/* 000000ff SEMANTIC_CLIP.CLIP_LO */
++		dd_emit(ctx, 1, 0x27);	/* 000000ff UNK0FD4 */
++		dd_emit(ctx, 1, 0);	/* 00000001 UNK1900 */
++	}
++	dd_emit(ctx, 1, 0);		/* 00000007 RT_CONTROL_MAP0 */
++	dd_emit(ctx, 1, 1);		/* 00000007 RT_CONTROL_MAP1 */
++	dd_emit(ctx, 1, 2);		/* 00000007 RT_CONTROL_MAP2 */
++	dd_emit(ctx, 1, 3);		/* 00000007 RT_CONTROL_MAP3 */
++	dd_emit(ctx, 1, 4);		/* 00000007 RT_CONTROL_MAP4 */
++	dd_emit(ctx, 1, 5);		/* 00000007 RT_CONTROL_MAP5 */
++	dd_emit(ctx, 1, 6);		/* 00000007 RT_CONTROL_MAP6 */
++	dd_emit(ctx, 1, 7);		/* 00000007 RT_CONTROL_MAP7 */
++	dd_emit(ctx, 1, 1);		/* 0000000f RT_CONTROL_COUNT */
++	dd_emit(ctx, 8, 0);		/* 00000001 RT_HORIZ_UNK */
++	dd_emit(ctx, 8, 0);		/* ffffffff RT_ADDRESS_LOW */
++	dd_emit(ctx, 1, 0xcf);		/* 000000ff RT_FORMAT */
++	dd_emit(ctx, 7, 0);		/* 000000ff RT_FORMAT */
++	if (dev_priv->chipset != 0x50)
++		dd_emit(ctx, 3, 0);	/* 1, 1, 1 */
++	else
++		dd_emit(ctx, 2, 0);	/* 1, 1 */
++	dd_emit(ctx, 1, 0);		/* ffffffff GP_ENABLE */
++	dd_emit(ctx, 1, 0x80);		/* 0000ffff GP_VERTEX_OUTPUT_COUNT*/
++	dd_emit(ctx, 1, 4);		/* 000000ff GP_REG_ALLOC_RESULT */
++	dd_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		dd_emit(ctx, 1, 3);	/* 00000003 */
++		dd_emit(ctx, 1, 0);	/* 00000001 UNK1418. Alone. */
++	}
++	if (dev_priv->chipset != 0x50)
++		dd_emit(ctx, 1, 3);	/* 00000003 UNK15AC */
++	dd_emit(ctx, 1, 1);		/* ffffffff RASTERIZE_ENABLE */
++	dd_emit(ctx, 1, 0);		/* 00000001 FP_CONTROL.EXPORTS_Z */
++	if (dev_priv->chipset != 0x50)
++		dd_emit(ctx, 1, 0);	/* 00000001 FP_CONTROL.MULTIPLE_RESULTS */
++	dd_emit(ctx, 1, 0x12);		/* 000000ff FP_INTERPOLANT_CTRL.COUNT */
++	dd_emit(ctx, 1, 0x10);		/* 000000ff FP_INTERPOLANT_CTRL.COUNT_NONFLAT */
++	dd_emit(ctx, 1, 0xc);		/* 000000ff FP_INTERPOLANT_CTRL.OFFSET */
++	dd_emit(ctx, 1, 1);		/* 00000001 FP_INTERPOLANT_CTRL.UMASK.W */
++	dd_emit(ctx, 1, 0);		/* 00000001 FP_INTERPOLANT_CTRL.UMASK.X */
++	dd_emit(ctx, 1, 0);		/* 00000001 FP_INTERPOLANT_CTRL.UMASK.Y */
++	dd_emit(ctx, 1, 0);		/* 00000001 FP_INTERPOLANT_CTRL.UMASK.Z */
++	dd_emit(ctx, 1, 4);		/* 000000ff FP_RESULT_COUNT */
++	dd_emit(ctx, 1, 2);		/* ffffffff REG_MODE */
++	dd_emit(ctx, 1, 4);		/* 000000ff FP_REG_ALLOC_TEMP */
++	if (dev_priv->chipset >= 0xa0)
++		dd_emit(ctx, 1, 0);	/* ffffffff */
++	dd_emit(ctx, 1, 0);		/* 00000001 GP_BUILTIN_RESULT_EN.LAYER_IDX */
++	dd_emit(ctx, 1, 0);		/* ffffffff STRMOUT_ENABLE */
++	dd_emit(ctx, 1, 0x3fffff);	/* 003fffff TIC_LIMIT */
++	dd_emit(ctx, 1, 0x1fff);	/* 000fffff TSC_LIMIT */
++	dd_emit(ctx, 1, 0);		/* 00000001 VERTEX_TWO_SIDE_ENABLE*/
++	if (dev_priv->chipset != 0x50)
++		dd_emit(ctx, 8, 0);	/* 00000001 */
++	if (dev_priv->chipset >= 0xa0) {
++		dd_emit(ctx, 1, 1);	/* 00000007 VTX_ATTR_DEFINE.COMP */
++		dd_emit(ctx, 1, 1);	/* 00000007 VTX_ATTR_DEFINE.SIZE */
++		dd_emit(ctx, 1, 2);	/* 00000007 VTX_ATTR_DEFINE.TYPE */
++		dd_emit(ctx, 1, 0);	/* 000000ff VTX_ATTR_DEFINE.ATTR */
++	}
++	dd_emit(ctx, 1, 4);		/* 0000007f VP_RESULT_MAP_SIZE */
++	dd_emit(ctx, 1, 0x14);		/* 0000001f ZETA_FORMAT */
++	dd_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	dd_emit(ctx, 1, 0);		/* 0000000f VP_TEXTURES_LOG2 */
++	dd_emit(ctx, 1, 0);		/* 0000000f VP_SAMPLERS_LOG2 */
++	if (IS_NVA3F(dev_priv->chipset))
++		dd_emit(ctx, 1, 0);	/* 00000001 */
++	dd_emit(ctx, 1, 2);		/* 00000003 POLYGON_MODE_BACK */
++	if (dev_priv->chipset >= 0xa0)
++		dd_emit(ctx, 1, 0);	/* 00000003 VTX_ATTR_DEFINE.SIZE - 1 */
++	dd_emit(ctx, 1, 0);		/* 0000ffff CB_ADDR_INDEX */
++	if (dev_priv->chipset >= 0xa0)
++		dd_emit(ctx, 1, 0);	/* 00000003 */
++	dd_emit(ctx, 1, 0);		/* 00000001 CULL_FACE_ENABLE */
++	dd_emit(ctx, 1, 1);		/* 00000003 CULL_FACE */
++	dd_emit(ctx, 1, 0);		/* 00000001 FRONT_FACE */
++	dd_emit(ctx, 1, 2);		/* 00000003 POLYGON_MODE_FRONT */
++	dd_emit(ctx, 1, 0x1000);	/* 00007fff UNK141C */
++	if (dev_priv->chipset != 0x50) {
++		dd_emit(ctx, 1, 0xe00);		/* 7fff */
++		dd_emit(ctx, 1, 0x1000);	/* 7fff */
++		dd_emit(ctx, 1, 0x1e00);	/* 7fff */
++	}
++	dd_emit(ctx, 1, 0);		/* 00000001 BEGIN_END_ACTIVE */
++	dd_emit(ctx, 1, 1);		/* 00000001 POLYGON_MODE_??? */
++	dd_emit(ctx, 1, 1);		/* 000000ff GP_REG_ALLOC_TEMP / 4 rounded up */
++	dd_emit(ctx, 1, 1);		/* 000000ff FP_REG_ALLOC_TEMP... without /4? */
++	dd_emit(ctx, 1, 1);		/* 000000ff VP_REG_ALLOC_TEMP / 4 rounded up */
++	dd_emit(ctx, 1, 1);		/* 00000001 */
++	dd_emit(ctx, 1, 0);		/* 00000001 */
++	dd_emit(ctx, 1, 0);		/* 00000001 VTX_ATTR_MASK_UNK0 nonempty */
++	dd_emit(ctx, 1, 0);		/* 00000001 VTX_ATTR_MASK_UNK1 nonempty */
++	dd_emit(ctx, 1, 0x200);		/* 0003ffff GP_VERTEX_OUTPUT_COUNT*GP_REG_ALLOC_RESULT */
++	if (IS_NVA3F(dev_priv->chipset))
++		dd_emit(ctx, 1, 0x200);
++	dd_emit(ctx, 1, 0);		/* 00000001 */
++	if (dev_priv->chipset < 0xa0) {
++		dd_emit(ctx, 1, 1);	/* 00000001 */
++		dd_emit(ctx, 1, 0x70);	/* 000000ff */
++		dd_emit(ctx, 1, 0x80);	/* 000000ff */
++		dd_emit(ctx, 1, 0);	/* 000000ff */
++		dd_emit(ctx, 1, 0);	/* 00000001 */
++		dd_emit(ctx, 1, 1);	/* 00000001 */
++		dd_emit(ctx, 1, 0x70);	/* 000000ff */
++		dd_emit(ctx, 1, 0x80);	/* 000000ff */
++		dd_emit(ctx, 1, 0);	/* 000000ff */
++	} else {
++		dd_emit(ctx, 1, 1);	/* 00000001 */
++		dd_emit(ctx, 1, 0xf0);	/* 000000ff */
++		dd_emit(ctx, 1, 0xff);	/* 000000ff */
++		dd_emit(ctx, 1, 0);	/* 000000ff */
++		dd_emit(ctx, 1, 0);	/* 00000001 */
++		dd_emit(ctx, 1, 1);	/* 00000001 */
++		dd_emit(ctx, 1, 0xf0);	/* 000000ff */
++		dd_emit(ctx, 1, 0xff);	/* 000000ff */
++		dd_emit(ctx, 1, 0);	/* 000000ff */
++		dd_emit(ctx, 1, 9);	/* 0000003f UNK114C.COMP,SIZE */
++	}
++
++	/* eng2d state */
++	dd_emit(ctx, 1, 0);		/* 00000001 eng2d COLOR_KEY_ENABLE */
++	dd_emit(ctx, 1, 0);		/* 00000007 eng2d COLOR_KEY_FORMAT */
++	dd_emit(ctx, 1, 1);		/* ffffffff eng2d DST_DEPTH */
++	dd_emit(ctx, 1, 0xcf);		/* 000000ff eng2d DST_FORMAT */
++	dd_emit(ctx, 1, 0);		/* ffffffff eng2d DST_LAYER */
++	dd_emit(ctx, 1, 1);		/* 00000001 eng2d DST_LINEAR */
++	dd_emit(ctx, 1, 0);		/* 00000007 eng2d PATTERN_COLOR_FORMAT */
++	dd_emit(ctx, 1, 0);		/* 00000007 eng2d OPERATION */
++	dd_emit(ctx, 1, 0);		/* 00000003 eng2d PATTERN_SELECT */
++	dd_emit(ctx, 1, 0xcf);		/* 000000ff eng2d SIFC_FORMAT */
++	dd_emit(ctx, 1, 0);		/* 00000001 eng2d SIFC_BITMAP_ENABLE */
++	dd_emit(ctx, 1, 2);		/* 00000003 eng2d SIFC_BITMAP_UNK808 */
++	dd_emit(ctx, 1, 0);		/* ffffffff eng2d BLIT_DU_DX_FRACT */
++	dd_emit(ctx, 1, 1);		/* ffffffff eng2d BLIT_DU_DX_INT */
++	dd_emit(ctx, 1, 0);		/* ffffffff eng2d BLIT_DV_DY_FRACT */
++	dd_emit(ctx, 1, 1);		/* ffffffff eng2d BLIT_DV_DY_INT */
++	dd_emit(ctx, 1, 0);		/* 00000001 eng2d BLIT_CONTROL_FILTER */
++	dd_emit(ctx, 1, 0xcf);		/* 000000ff eng2d DRAW_COLOR_FORMAT */
++	dd_emit(ctx, 1, 0xcf);		/* 000000ff eng2d SRC_FORMAT */
++	dd_emit(ctx, 1, 1);		/* 00000001 eng2d SRC_LINEAR #2 */
++
++	num = ctx->ctxvals_pos - base;
++	ctx->ctxvals_pos = base;
++	if (IS_NVA3F(dev_priv->chipset))
++		cp_ctx(ctx, 0x404800, num);
++	else
++		cp_ctx(ctx, 0x405400, num);
++}
++
+ /*
+  * xfer areas. These are a pain.
+  *
+@@ -990,28 +1130,33 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+  * without the help of ctxprog.
+  */
+ 
+-static inline void
++static void
+ xf_emit(struct nouveau_grctx *ctx, int num, uint32_t val) {
+ 	int i;
+ 	if (val && ctx->mode == NOUVEAU_GRCTX_VALS)
+ 		for (i = 0; i < num; i++)
+-			nv_wo32(ctx->dev, ctx->data, ctx->ctxvals_pos + (i << 3), val);
++			nv_wo32(ctx->data, 4 * (ctx->ctxvals_pos + (i << 3)), val);
+ 	ctx->ctxvals_pos += num << 3;
+ }
+ 
+ /* Gene declarations... */
+ 
++static void nv50_graph_construct_gene_dispatch(struct nouveau_grctx *ctx);
+ static void nv50_graph_construct_gene_m2mf(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk1(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk2(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk3(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk4(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk5(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk6(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk7(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk8(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk9(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk10(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_ccache(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_unk10xx(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_unk14xx(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_zcull(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_clipid(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_unk24xx(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_vfetch(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_eng2d(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_csched(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_unk1cxx(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_strmout(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_unk34xx(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_ropm1(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_ropm2(struct nouveau_grctx *ctx);
+ static void nv50_graph_construct_gene_ropc(struct nouveau_grctx *ctx);
+ static void nv50_graph_construct_xfer_tp(struct nouveau_grctx *ctx);
+ 
+@@ -1030,102 +1175,32 @@ nv50_graph_construct_xfer1(struct nouveau_grctx *ctx)
+ 	if (dev_priv->chipset < 0xa0) {
+ 		/* Strand 0 */
+ 		ctx->ctxvals_pos = offset;
+-		switch (dev_priv->chipset) {
+-		case 0x50:
+-			xf_emit(ctx, 0x99, 0);
+-			break;
+-		case 0x84:
+-		case 0x86:
+-			xf_emit(ctx, 0x384, 0);
+-			break;
+-		case 0x92:
+-		case 0x94:
+-		case 0x96:
+-		case 0x98:
+-			xf_emit(ctx, 0x380, 0);
+-			break;
+-		}
+-		nv50_graph_construct_gene_m2mf (ctx);
+-		switch (dev_priv->chipset) {
+-		case 0x50:
+-		case 0x84:
+-		case 0x86:
+-		case 0x98:
+-			xf_emit(ctx, 0x4c4, 0);
+-			break;
+-		case 0x92:
+-		case 0x94:
+-		case 0x96:
+-			xf_emit(ctx, 0x984, 0);
+-			break;
+-		}
+-		nv50_graph_construct_gene_unk5(ctx);
+-		if (dev_priv->chipset == 0x50)
+-			xf_emit(ctx, 0xa, 0);
+-		else
+-			xf_emit(ctx, 0xb, 0);
+-		nv50_graph_construct_gene_unk4(ctx);
+-		nv50_graph_construct_gene_unk3(ctx);
++		nv50_graph_construct_gene_dispatch(ctx);
++		nv50_graph_construct_gene_m2mf(ctx);
++		nv50_graph_construct_gene_unk24xx(ctx);
++		nv50_graph_construct_gene_clipid(ctx);
++		nv50_graph_construct_gene_zcull(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+ 		/* Strand 1 */
+ 		ctx->ctxvals_pos = offset + 0x1;
+-		nv50_graph_construct_gene_unk6(ctx);
+-		nv50_graph_construct_gene_unk7(ctx);
+-		nv50_graph_construct_gene_unk8(ctx);
+-		switch (dev_priv->chipset) {
+-		case 0x50:
+-		case 0x92:
+-			xf_emit(ctx, 0xfb, 0);
+-			break;
+-		case 0x84:
+-			xf_emit(ctx, 0xd3, 0);
+-			break;
+-		case 0x94:
+-		case 0x96:
+-			xf_emit(ctx, 0xab, 0);
+-			break;
+-		case 0x86:
+-		case 0x98:
+-			xf_emit(ctx, 0x6b, 0);
+-			break;
+-		}
+-		xf_emit(ctx, 2, 0x4e3bfdf);
+-		xf_emit(ctx, 4, 0);
+-		xf_emit(ctx, 1, 0x0fac6881);
+-		xf_emit(ctx, 0xb, 0);
+-		xf_emit(ctx, 2, 0x4e3bfdf);
++		nv50_graph_construct_gene_vfetch(ctx);
++		nv50_graph_construct_gene_eng2d(ctx);
++		nv50_graph_construct_gene_csched(ctx);
++		nv50_graph_construct_gene_ropm1(ctx);
++		nv50_graph_construct_gene_ropm2(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+ 		/* Strand 2 */
+ 		ctx->ctxvals_pos = offset + 0x2;
+-		switch (dev_priv->chipset) {
+-		case 0x50:
+-		case 0x92:
+-			xf_emit(ctx, 0xa80, 0);
+-			break;
+-		case 0x84:
+-			xf_emit(ctx, 0xa7e, 0);
+-			break;
+-		case 0x94:
+-		case 0x96:
+-			xf_emit(ctx, 0xa7c, 0);
+-			break;
+-		case 0x86:
+-		case 0x98:
+-			xf_emit(ctx, 0xa7a, 0);
+-			break;
+-		}
+-		xf_emit(ctx, 1, 0x3fffff);
+-		xf_emit(ctx, 2, 0);
+-		xf_emit(ctx, 1, 0x1fff);
+-		xf_emit(ctx, 0xe, 0);
+-		nv50_graph_construct_gene_unk9(ctx);
+-		nv50_graph_construct_gene_unk2(ctx);
+-		nv50_graph_construct_gene_unk1(ctx);
+-		nv50_graph_construct_gene_unk10(ctx);
++		nv50_graph_construct_gene_ccache(ctx);
++		nv50_graph_construct_gene_unk1cxx(ctx);
++		nv50_graph_construct_gene_strmout(ctx);
++		nv50_graph_construct_gene_unk14xx(ctx);
++		nv50_graph_construct_gene_unk10xx(ctx);
++		nv50_graph_construct_gene_unk34xx(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+@@ -1150,86 +1225,46 @@ nv50_graph_construct_xfer1(struct nouveau_grctx *ctx)
+ 	} else {
+ 		/* Strand 0 */
+ 		ctx->ctxvals_pos = offset;
+-		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-			xf_emit(ctx, 0x385, 0);
+-		else
+-			xf_emit(ctx, 0x384, 0);
++		nv50_graph_construct_gene_dispatch(ctx);
+ 		nv50_graph_construct_gene_m2mf(ctx);
+-		xf_emit(ctx, 0x950, 0);
+-		nv50_graph_construct_gene_unk10(ctx);
+-		xf_emit(ctx, 1, 0x0fac6881);
+-		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-			xf_emit(ctx, 1, 1);
+-			xf_emit(ctx, 3, 0);
+-		}
+-		nv50_graph_construct_gene_unk8(ctx);
+-		if (dev_priv->chipset == 0xa0)
+-			xf_emit(ctx, 0x189, 0);
+-		else if (dev_priv->chipset == 0xa3)
+-			xf_emit(ctx, 0xd5, 0);
+-		else if (dev_priv->chipset == 0xa5)
+-			xf_emit(ctx, 0x99, 0);
+-		else if (dev_priv->chipset == 0xaa)
+-			xf_emit(ctx, 0x65, 0);
+-		else
+-			xf_emit(ctx, 0x6d, 0);
+-		nv50_graph_construct_gene_unk9(ctx);
++		nv50_graph_construct_gene_unk34xx(ctx);
++		nv50_graph_construct_gene_csched(ctx);
++		nv50_graph_construct_gene_unk1cxx(ctx);
++		nv50_graph_construct_gene_strmout(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+ 		/* Strand 1 */
+ 		ctx->ctxvals_pos = offset + 1;
+-		nv50_graph_construct_gene_unk1(ctx);
++		nv50_graph_construct_gene_unk10xx(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+ 		/* Strand 2 */
+ 		ctx->ctxvals_pos = offset + 2;
+-		if (dev_priv->chipset == 0xa0) {
+-			nv50_graph_construct_gene_unk2(ctx);
+-		}
+-		xf_emit(ctx, 0x36, 0);
+-		nv50_graph_construct_gene_unk5(ctx);
++		if (dev_priv->chipset == 0xa0)
++			nv50_graph_construct_gene_unk14xx(ctx);
++		nv50_graph_construct_gene_unk24xx(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+ 		/* Strand 3 */
+ 		ctx->ctxvals_pos = offset + 3;
+-		xf_emit(ctx, 1, 0);
+-		xf_emit(ctx, 1, 1);
+-		nv50_graph_construct_gene_unk6(ctx);
++		nv50_graph_construct_gene_vfetch(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+ 		/* Strand 4 */
+ 		ctx->ctxvals_pos = offset + 4;
+-		if (dev_priv->chipset == 0xa0)
+-			xf_emit(ctx, 0xa80, 0);
+-		else if (dev_priv->chipset == 0xa3)
+-			xf_emit(ctx, 0xa7c, 0);
+-		else
+-			xf_emit(ctx, 0xa7a, 0);
+-		xf_emit(ctx, 1, 0x3fffff);
+-		xf_emit(ctx, 2, 0);
+-		xf_emit(ctx, 1, 0x1fff);
++		nv50_graph_construct_gene_ccache(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+ 		/* Strand 5 */
+ 		ctx->ctxvals_pos = offset + 5;
+-		xf_emit(ctx, 1, 0);
+-		xf_emit(ctx, 1, 0x0fac6881);
+-		xf_emit(ctx, 0xb, 0);
+-		xf_emit(ctx, 2, 0x4e3bfdf);
+-		xf_emit(ctx, 3, 0);
+-		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-			xf_emit(ctx, 1, 0x11);
+-		xf_emit(ctx, 1, 0);
+-		xf_emit(ctx, 2, 0x4e3bfdf);
+-		xf_emit(ctx, 2, 0);
+-		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-			xf_emit(ctx, 1, 0x11);
+-		xf_emit(ctx, 1, 0);
++		nv50_graph_construct_gene_ropm2(ctx);
++		nv50_graph_construct_gene_ropm1(ctx);
++		/* per-ROP context */
+ 		for (i = 0; i < 8; i++)
+ 			if (units & (1<<(i+16)))
+ 				nv50_graph_construct_gene_ropc(ctx);
+@@ -1238,10 +1273,9 @@ nv50_graph_construct_xfer1(struct nouveau_grctx *ctx)
+ 
+ 		/* Strand 6 */
+ 		ctx->ctxvals_pos = offset + 6;
+-		nv50_graph_construct_gene_unk3(ctx);
+-		xf_emit(ctx, 0xb, 0);
+-		nv50_graph_construct_gene_unk4(ctx);
+-		nv50_graph_construct_gene_unk7(ctx);
++		nv50_graph_construct_gene_zcull(ctx);
++		nv50_graph_construct_gene_clipid(ctx);
++		nv50_graph_construct_gene_eng2d(ctx);
+ 		if (units & (1 << 0))
+ 			nv50_graph_construct_xfer_tp(ctx);
+ 		if (units & (1 << 1))
+@@ -1269,7 +1303,7 @@ nv50_graph_construct_xfer1(struct nouveau_grctx *ctx)
+ 			if (units & (1 << 9))
+ 				nv50_graph_construct_xfer_tp(ctx);
+ 		} else {
+-			nv50_graph_construct_gene_unk2(ctx);
++			nv50_graph_construct_gene_unk14xx(ctx);
+ 		}
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+@@ -1290,9 +1324,70 @@ nv50_graph_construct_xfer1(struct nouveau_grctx *ctx)
+  */
+ 
+ static void
++nv50_graph_construct_gene_dispatch(struct nouveau_grctx *ctx)
++{
++	/* start of strand 0 */
++	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
++	/* SEEK */
++	if (dev_priv->chipset == 0x50)
++		xf_emit(ctx, 5, 0);
++	else if (!IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 6, 0);
++	else
++		xf_emit(ctx, 4, 0);
++	/* SEEK */
++	/* the PGRAPH's internal FIFO */
++	if (dev_priv->chipset == 0x50)
++		xf_emit(ctx, 8*3, 0);
++	else
++		xf_emit(ctx, 0x100*3, 0);
++	/* and another bonus slot?!? */
++	xf_emit(ctx, 3, 0);
++	/* and YET ANOTHER bonus slot? */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 3, 0);
++	/* SEEK */
++	/* CTX_SWITCH: caches of gr objects bound to subchannels. 8 values, last used index */
++	xf_emit(ctx, 9, 0);
++	/* SEEK */
++	xf_emit(ctx, 9, 0);
++	/* SEEK */
++	xf_emit(ctx, 9, 0);
++	/* SEEK */
++	xf_emit(ctx, 9, 0);
++	/* SEEK */
++	if (dev_priv->chipset < 0x90)
++		xf_emit(ctx, 4, 0);
++	/* SEEK */
++	xf_emit(ctx, 2, 0);
++	/* SEEK */
++	xf_emit(ctx, 6*2, 0);
++	xf_emit(ctx, 2, 0);
++	/* SEEK */
++	xf_emit(ctx, 2, 0);
++	/* SEEK */
++	xf_emit(ctx, 6*2, 0);
++	xf_emit(ctx, 2, 0);
++	/* SEEK */
++	if (dev_priv->chipset == 0x50)
++		xf_emit(ctx, 0x1c, 0);
++	else if (dev_priv->chipset < 0xa0)
++		xf_emit(ctx, 0x1e, 0);
++	else
++		xf_emit(ctx, 0x22, 0);
++	/* SEEK */
++	xf_emit(ctx, 0x15, 0);
++}
++
++static void
+ nv50_graph_construct_gene_m2mf(struct nouveau_grctx *ctx)
+ {
+-	/* m2mf state */
++	/* Strand 0, right after dispatch */
++	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
++	int smallm2mf = 0;
++	if (dev_priv->chipset < 0x92 || dev_priv->chipset == 0x98)
++		smallm2mf = 1;
++	/* SEEK */
+ 	xf_emit (ctx, 1, 0);		/* DMA_NOTIFY instance >> 4 */
+ 	xf_emit (ctx, 1, 0);		/* DMA_BUFFER_IN instance >> 4 */
+ 	xf_emit (ctx, 1, 0);		/* DMA_BUFFER_OUT instance >> 4 */
+@@ -1319,427 +1414,975 @@ nv50_graph_construct_gene_m2mf(struct nouveau_grctx *ctx)
+ 	xf_emit (ctx, 1, 0);		/* TILING_POSITION_OUT */
+ 	xf_emit (ctx, 1, 0);		/* OFFSET_IN_HIGH */
+ 	xf_emit (ctx, 1, 0);		/* OFFSET_OUT_HIGH */
++	/* SEEK */
++	if (smallm2mf)
++		xf_emit(ctx, 0x40, 0);	/* 20 * ffffffff, 3ffff */
++	else
++		xf_emit(ctx, 0x100, 0);	/* 80 * ffffffff, 3ffff */
++	xf_emit(ctx, 4, 0);		/* 1f/7f, 0, 1f/7f, 0 [1f for smallm2mf, 7f otherwise] */
++	/* SEEK */
++	if (smallm2mf)
++		xf_emit(ctx, 0x400, 0);	/* ffffffff */
++	else
++		xf_emit(ctx, 0x800, 0);	/* ffffffff */
++	xf_emit(ctx, 4, 0);		/* ff/1ff, 0, 0, 0 [ff for smallm2mf, 1ff otherwise] */
++	/* SEEK */
++	xf_emit(ctx, 0x40, 0);		/* 20 * bits ffffffff, 3ffff */
++	xf_emit(ctx, 0x6, 0);		/* 1f, 0, 1f, 0, 1f, 0 */
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk1(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_ccache(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	/* end of area 2 on pre-NVA0, area 1 on NVAx */
+-	xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x80);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 0x80c14);
+-	xf_emit(ctx, 1, 0);
+-	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 1, 0x3ff);
+-	else
+-		xf_emit(ctx, 1, 0x7ff);
++	xf_emit(ctx, 2, 0);		/* RO */
++	xf_emit(ctx, 0x800, 0);		/* ffffffff */
+ 	switch (dev_priv->chipset) {
+ 	case 0x50:
+-	case 0x86:
+-	case 0x98:
+-	case 0xaa:
+-	case 0xac:
+-		xf_emit(ctx, 0x542, 0);
++	case 0x92:
++	case 0xa0:
++		xf_emit(ctx, 0x2b, 0);
+ 		break;
+ 	case 0x84:
+-	case 0x92:
++		xf_emit(ctx, 0x29, 0);
++		break;
+ 	case 0x94:
+ 	case 0x96:
+-		xf_emit(ctx, 0x942, 0);
+-		break;
+-	case 0xa0:
+ 	case 0xa3:
+-		xf_emit(ctx, 0x2042, 0);
++		xf_emit(ctx, 0x27, 0);
+ 		break;
++	case 0x86:
++	case 0x98:
+ 	case 0xa5:
+ 	case 0xa8:
+-		xf_emit(ctx, 0x842, 0);
++	case 0xaa:
++	case 0xac:
++	case 0xaf:
++		xf_emit(ctx, 0x25, 0);
+ 		break;
+ 	}
+-	xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x80);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x27);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x26);
+-	xf_emit(ctx, 3, 0);
++	/* CB bindings, 0x80 of them. first word is address >> 8, second is
++	 * size >> 4 | valid << 24 */
++	xf_emit(ctx, 0x100, 0);		/* ffffffff CB_DEF */
++	xf_emit(ctx, 1, 0);		/* 0000007f CB_ADDR_BUFFER */
++	xf_emit(ctx, 1, 0);		/* 0 */
++	xf_emit(ctx, 0x30, 0);		/* ff SET_PROGRAM_CB */
++	xf_emit(ctx, 1, 0);		/* 3f last SET_PROGRAM_CB */
++	xf_emit(ctx, 4, 0);		/* RO */
++	xf_emit(ctx, 0x100, 0);		/* ffffffff */
++	xf_emit(ctx, 8, 0);		/* 1f, 0, 0, ... */
++	xf_emit(ctx, 8, 0);		/* ffffffff */
++	xf_emit(ctx, 4, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 3 */
++	xf_emit(ctx, 1, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_CODE_CB */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_TIC */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_TSC */
++	xf_emit(ctx, 1, 0);		/* 00000001 LINKED_TSC */
++	xf_emit(ctx, 1, 0);		/* 000000ff TIC_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff TIC_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0x3fffff);	/* 003fffff TIC_LIMIT */
++	xf_emit(ctx, 1, 0);		/* 000000ff TSC_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff TSC_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0x1fff);	/* 000fffff TSC_LIMIT */
++	xf_emit(ctx, 1, 0);		/* 000000ff VP_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff VP_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0);		/* 00ffffff VP_START_ID */
++	xf_emit(ctx, 1, 0);		/* 000000ff CB_DEF_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff CB_DEF_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 000000ff GP_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff GP_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0);		/* 00ffffff GP_START_ID */
++	xf_emit(ctx, 1, 0);		/* 000000ff FP_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff FP_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0);		/* 00ffffff FP_START_ID */
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk10(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_unk10xx(struct nouveau_grctx *ctx)
+ {
++	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
++	int i;
+ 	/* end of area 2 on pre-NVA0, area 1 on NVAx */
+-	xf_emit(ctx, 0x10, 0x04000000);
+-	xf_emit(ctx, 0x24, 0);
+-	xf_emit(ctx, 2, 0x04e3bfdf);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0x1fe21);
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 4);		/* 0000007f VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0x80);		/* 0000ffff GP_VERTEX_OUTPUT_COUNT */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_REG_ALLOC_RESULT */
++	xf_emit(ctx, 1, 0x80c14);	/* 01ffffff SEMANTIC_COLOR */
++	xf_emit(ctx, 1, 0);		/* 00000001 VERTEX_TWO_SIDE_ENABLE */
++	if (dev_priv->chipset == 0x50)
++		xf_emit(ctx, 1, 0x3ff);
++	else
++		xf_emit(ctx, 1, 0x7ff);	/* 000007ff */
++	xf_emit(ctx, 1, 0);		/* 111/113 */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	for (i = 0; i < 8; i++) {
++		switch (dev_priv->chipset) {
++		case 0x50:
++		case 0x86:
++		case 0x98:
++		case 0xaa:
++		case 0xac:
++			xf_emit(ctx, 0xa0, 0);	/* ffffffff */
++			break;
++		case 0x84:
++		case 0x92:
++		case 0x94:
++		case 0x96:
++			xf_emit(ctx, 0x120, 0);
++			break;
++		case 0xa5:
++		case 0xa8:
++			xf_emit(ctx, 0x100, 0);	/* ffffffff */
++			break;
++		case 0xa0:
++		case 0xa3:
++		case 0xaf:
++			xf_emit(ctx, 0x400, 0);	/* ffffffff */
++			break;
++		}
++		xf_emit(ctx, 4, 0);	/* 3f, 0, 0, 0 */
++		xf_emit(ctx, 4, 0);	/* ffffffff */
++	}
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 4);		/* 0000007f VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0x80);		/* 0000ffff GP_VERTEX_OUTPUT_COUNT */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_REG_ALLOC_TEMP */
++	xf_emit(ctx, 1, 1);		/* 00000001 RASTERIZE_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1900 */
++	xf_emit(ctx, 1, 0x27);		/* 000000ff UNK0FD4 */
++	xf_emit(ctx, 1, 0);		/* 0001ffff GP_BUILTIN_RESULT_EN */
++	xf_emit(ctx, 1, 0x26);		/* 000000ff SEMANTIC_LAYER */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++}
++
++static void
++nv50_graph_construct_gene_unk34xx(struct nouveau_grctx *ctx)
++{
++	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
++	/* end of area 2 on pre-NVA0, area 1 on NVAx */
++	xf_emit(ctx, 1, 0);		/* 00000001 VIEWPORT_CLIP_RECTS_EN */
++	xf_emit(ctx, 1, 0);		/* 00000003 VIEWPORT_CLIP_MODE */
++	xf_emit(ctx, 0x10, 0x04000000);	/* 07ffffff VIEWPORT_CLIP_HORIZ*8, VIEWPORT_CLIP_VERT*8 */
++	xf_emit(ctx, 1, 0);		/* 00000001 POLYGON_STIPPLE_ENABLE */
++	xf_emit(ctx, 0x20, 0);		/* ffffffff POLYGON_STIPPLE */
++	xf_emit(ctx, 2, 0);		/* 00007fff WINDOW_OFFSET_XY */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 1, 0x04e3bfdf);	/* ffffffff UNK0D64 */
++	xf_emit(ctx, 1, 0x04e3bfdf);	/* ffffffff UNK0DF4 */
++	xf_emit(ctx, 1, 0);		/* 00000003 WINDOW_ORIGIN */
++	xf_emit(ctx, 1, 0);		/* 00000007 */
++	xf_emit(ctx, 1, 0x1fe21);	/* 0001ffff tesla UNK0FAC */
++	if (dev_priv->chipset >= 0xa0)
++		xf_emit(ctx, 1, 0x0fac6881);
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 1, 1);
++		xf_emit(ctx, 3, 0);
++	}
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk2(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_unk14xx(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+ 	/* middle of area 2 on pre-NVA0, beginning of area 2 on NVA0, area 7 on >NVA0 */
+ 	if (dev_priv->chipset != 0x50) {
+-		xf_emit(ctx, 5, 0);
+-		xf_emit(ctx, 1, 0x80c14);
+-		xf_emit(ctx, 2, 0);
+-		xf_emit(ctx, 1, 0x804);
+-		xf_emit(ctx, 1, 0);
+-		xf_emit(ctx, 2, 4);
+-		xf_emit(ctx, 1, 0x8100c12);
++		xf_emit(ctx, 5, 0);		/* ffffffff */
++		xf_emit(ctx, 1, 0x80c14);	/* 01ffffff SEMANTIC_COLOR */
++		xf_emit(ctx, 1, 0);		/* 00000001 */
++		xf_emit(ctx, 1, 0);		/* 000003ff */
++		xf_emit(ctx, 1, 0x804);		/* 00000fff SEMANTIC_CLIP */
++		xf_emit(ctx, 1, 0);		/* 00000001 */
++		xf_emit(ctx, 2, 4);		/* 7f, ff */
++		xf_emit(ctx, 1, 0x8100c12);	/* 1fffffff FP_INTERPOLANT_CTRL */
+ 	}
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x10);
+-	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 3, 0);
+-	else
+-		xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 0x804);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x1a);
++	xf_emit(ctx, 1, 0);			/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 4);			/* 0000007f VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 4);			/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);			/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0x10);			/* 7f/ff VIEW_VOLUME_CLIP_CTRL */
++	xf_emit(ctx, 1, 0);			/* 000000ff VP_CLIP_DISTANCE_ENABLE */
+ 	if (dev_priv->chipset != 0x50)
+-		xf_emit(ctx, 1, 0x7f);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x80c14);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x8100c12);
+-	xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x10);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x8100c12);
+-	xf_emit(ctx, 6, 0);
+-	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 1, 0x3ff);
+-	else
+-		xf_emit(ctx, 1, 0x7ff);
+-	xf_emit(ctx, 1, 0x80c14);
+-	xf_emit(ctx, 0x38, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0x10);
+-	xf_emit(ctx, 0x38, 0);
+-	xf_emit(ctx, 2, 0x88);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 0x16, 0);
+-	xf_emit(ctx, 1, 0x26);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0x3f800000);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 4, 0);
+-	else
+-		xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0x1a);
+-	xf_emit(ctx, 1, 0x10);
++		xf_emit(ctx, 1, 0);		/* 3ff */
++	xf_emit(ctx, 1, 0);			/* 000000ff tesla UNK1940 */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK0D7C */
++	xf_emit(ctx, 1, 0x804);			/* 00000fff SEMANTIC_CLIP */
++	xf_emit(ctx, 1, 1);			/* 00000001 VIEWPORT_TRANSFORM_EN */
++	xf_emit(ctx, 1, 0x1a);			/* 0000001f POLYGON_MODE */
+ 	if (dev_priv->chipset != 0x50)
+-		xf_emit(ctx, 0x28, 0);
++		xf_emit(ctx, 1, 0x7f);		/* 000000ff tesla UNK0FFC */
++	xf_emit(ctx, 1, 0);			/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 1);			/* 00000001 SHADE_MODEL */
++	xf_emit(ctx, 1, 0x80c14);		/* 01ffffff SEMANTIC_COLOR */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK1900 */
++	xf_emit(ctx, 1, 0x8100c12);		/* 1fffffff FP_INTERPOLANT_CTRL */
++	xf_emit(ctx, 1, 4);			/* 0000007f VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 4);			/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);			/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0x10);			/* 7f/ff VIEW_VOLUME_CLIP_CTRL */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK0D7C */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK0F8C */
++	xf_emit(ctx, 1, 0);			/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 1);			/* 00000001 VIEWPORT_TRANSFORM_EN */
++	xf_emit(ctx, 1, 0x8100c12);		/* 1fffffff FP_INTERPOLANT_CTRL */
++	xf_emit(ctx, 4, 0);			/* ffffffff NOPERSPECTIVE_BITMAP */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK1900 */
++	xf_emit(ctx, 1, 0);			/* 0000000f */
++	if (dev_priv->chipset == 0x50)
++		xf_emit(ctx, 1, 0x3ff);		/* 000003ff tesla UNK0D68 */
+ 	else
+-		xf_emit(ctx, 0x25, 0);
+-	xf_emit(ctx, 1, 0x52);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x26);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x1a);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0x00ffff00);
+-	xf_emit(ctx, 1, 0);
++		xf_emit(ctx, 1, 0x7ff);		/* 000007ff tesla UNK0D68 */
++	xf_emit(ctx, 1, 0x80c14);		/* 01ffffff SEMANTIC_COLOR */
++	xf_emit(ctx, 1, 0);			/* 00000001 VERTEX_TWO_SIDE_ENABLE */
++	xf_emit(ctx, 0x30, 0);			/* ffffffff VIEWPORT_SCALE: X0, Y0, Z0, X1, Y1, ... */
++	xf_emit(ctx, 3, 0);			/* f, 0, 0 */
++	xf_emit(ctx, 3, 0);			/* ffffffff last VIEWPORT_SCALE? */
++	xf_emit(ctx, 1, 0);			/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 1);			/* 00000001 VIEWPORT_TRANSFORM_EN */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK1900 */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK1924 */
++	xf_emit(ctx, 1, 0x10);			/* 000000ff VIEW_VOLUME_CLIP_CTRL */
++	xf_emit(ctx, 1, 0);			/* 00000001 */
++	xf_emit(ctx, 0x30, 0);			/* ffffffff VIEWPORT_TRANSLATE */
++	xf_emit(ctx, 3, 0);			/* f, 0, 0 */
++	xf_emit(ctx, 3, 0);			/* ffffffff */
++	xf_emit(ctx, 1, 0);			/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 2, 0x88);			/* 000001ff tesla UNK19D8 */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK1924 */
++	xf_emit(ctx, 1, 0);			/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 4);			/* 0000000f CULL_MODE */
++	xf_emit(ctx, 2, 0);			/* 07ffffff SCREEN_SCISSOR */
++	xf_emit(ctx, 2, 0);			/* 00007fff WINDOW_OFFSET_XY */
++	xf_emit(ctx, 1, 0);			/* 00000003 WINDOW_ORIGIN */
++	xf_emit(ctx, 0x10, 0);			/* 00000001 SCISSOR_ENABLE */
++	xf_emit(ctx, 1, 0);			/* 0001ffff GP_BUILTIN_RESULT_EN */
++	xf_emit(ctx, 1, 0x26);			/* 000000ff SEMANTIC_LAYER */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK1900 */
++	xf_emit(ctx, 1, 0);			/* 0000000f */
++	xf_emit(ctx, 1, 0x3f800000);		/* ffffffff LINE_WIDTH */
++	xf_emit(ctx, 1, 0);			/* 00000001 LINE_STIPPLE_ENABLE */
++	xf_emit(ctx, 1, 0);			/* 00000001 LINE_SMOOTH_ENABLE */
++	xf_emit(ctx, 1, 0);			/* 00000007 MULTISAMPLE_SAMPLES_LOG2 */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 0);		/* 00000001 */
++	xf_emit(ctx, 1, 0x1a);			/* 0000001f POLYGON_MODE */
++	xf_emit(ctx, 1, 0x10);			/* 000000ff VIEW_VOLUME_CLIP_CTRL */
++	if (dev_priv->chipset != 0x50) {
++		xf_emit(ctx, 1, 0);		/* ffffffff */
++		xf_emit(ctx, 1, 0);		/* 00000001 */
++		xf_emit(ctx, 1, 0);		/* 000003ff */
++	}
++	xf_emit(ctx, 0x20, 0);			/* 10xbits ffffffff, 3fffff. SCISSOR_* */
++	xf_emit(ctx, 1, 0);			/* f */
++	xf_emit(ctx, 1, 0);			/* 0? */
++	xf_emit(ctx, 1, 0);			/* ffffffff */
++	xf_emit(ctx, 1, 0);			/* 003fffff */
++	xf_emit(ctx, 1, 0);			/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 0x52);			/* 000001ff SEMANTIC_PTSZ */
++	xf_emit(ctx, 1, 0);			/* 0001ffff GP_BUILTIN_RESULT_EN */
++	xf_emit(ctx, 1, 0x26);			/* 000000ff SEMANTIC_LAYER */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK1900 */
++	xf_emit(ctx, 1, 4);			/* 0000007f VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 4);			/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);			/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0x1a);			/* 0000001f POLYGON_MODE */
++	xf_emit(ctx, 1, 0);			/* 00000001 LINE_SMOOTH_ENABLE */
++	xf_emit(ctx, 1, 0);			/* 00000001 LINE_STIPPLE_ENABLE */
++	xf_emit(ctx, 1, 0x00ffff00);		/* 00ffffff LINE_STIPPLE_PATTERN */
++	xf_emit(ctx, 1, 0);			/* 0000000f */
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk3(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_zcull(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	/* end of area 0 on pre-NVA0, beginning of area 6 on NVAx */
+-	xf_emit(ctx, 1, 0x3f);
+-	xf_emit(ctx, 0xa, 0);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 2, 0x04000000);
+-	xf_emit(ctx, 8, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 4);
+-	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 0x10, 0);
+-	else
+-		xf_emit(ctx, 0x11, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x1001);
+-	xf_emit(ctx, 4, 0xffff);
+-	xf_emit(ctx, 0x20, 0);
+-	xf_emit(ctx, 0x10, 0x3f800000);
+-	xf_emit(ctx, 1, 0x10);
+-	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 1, 0);
+-	else
+-		xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 3);
+-	xf_emit(ctx, 2, 0);
++	/* end of strand 0 on pre-NVA0, beginning of strand 6 on NVAx */
++	/* SEEK */
++	xf_emit(ctx, 1, 0x3f);		/* 0000003f UNK1590 */
++	xf_emit(ctx, 1, 0);		/* 00000001 ALPHA_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000007 MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_BACK_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_FUNC_REF */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_MASK */
++	xf_emit(ctx, 3, 0);		/* 00000007 STENCIL_BACK_OP_FAIL, ZFAIL, ZPASS */
++	xf_emit(ctx, 1, 2);		/* 00000003 tesla UNK143C */
++	xf_emit(ctx, 2, 0x04000000);	/* 07ffffff tesla UNK0D6C */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 1, 0);		/* 00000001 CLIPID_ENABLE */
++	xf_emit(ctx, 2, 0);		/* ffffffff DEPTH_BOUNDS */
++	xf_emit(ctx, 1, 0);		/* 00000001 */
++	xf_emit(ctx, 1, 0);		/* 00000007 DEPTH_TEST_FUNC */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 0000000f CULL_MODE */
++	xf_emit(ctx, 1, 0);		/* 0000ffff */
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK0FB0 */
++	xf_emit(ctx, 1, 0);		/* 00000001 POLYGON_STIPPLE_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 00000007 FP_CONTROL */
++	xf_emit(ctx, 1, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 0001ffff GP_BUILTIN_RESULT_EN */
++	xf_emit(ctx, 1, 0);		/* 000000ff CLEAR_STENCIL */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_FRONT_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_FUNC_REF */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_MASK */
++	xf_emit(ctx, 3, 0);		/* 00000007 STENCIL_FRONT_OP_FAIL, ZFAIL, ZPASS */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_BACK_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffffffff CLEAR_DEPTH */
++	xf_emit(ctx, 1, 0);		/* 00000007 */
++	if (dev_priv->chipset != 0x50)
++		xf_emit(ctx, 1, 0);	/* 00000003 tesla UNK1108 */
++	xf_emit(ctx, 1, 0);		/* 00000001 SAMPLECNT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	xf_emit(ctx, 1, 0x1001);	/* 00001fff ZETA_ARRAY_MODE */
++	/* SEEK */
++	xf_emit(ctx, 4, 0xffff);	/* 0000ffff MSAA_MASK */
++	xf_emit(ctx, 0x10, 0);		/* 00000001 SCISSOR_ENABLE */
++	xf_emit(ctx, 0x10, 0);		/* ffffffff DEPTH_RANGE_NEAR */
++	xf_emit(ctx, 0x10, 0x3f800000);	/* ffffffff DEPTH_RANGE_FAR */
++	xf_emit(ctx, 1, 0x10);		/* 7f/ff/3ff VIEW_VOLUME_CLIP_CTRL */
++	xf_emit(ctx, 1, 0);		/* 00000001 VIEWPORT_CLIP_RECTS_EN */
++	xf_emit(ctx, 1, 3);		/* 00000003 FP_CTRL_UNK196C */
++	xf_emit(ctx, 1, 0);		/* 00000003 tesla UNK1968 */
++	if (dev_priv->chipset != 0x50)
++		xf_emit(ctx, 1, 0);	/* 0fffffff tesla UNK1104 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK151C */
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk4(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_clipid(struct nouveau_grctx *ctx)
+ {
+-	/* middle of area 0 on pre-NVA0, middle of area 6 on NVAx */
+-	xf_emit(ctx, 2, 0x04000000);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x80);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0x80);
+-	xf_emit(ctx, 1, 0);
++	/* middle of strand 0 on pre-NVA0 [after 24xx], middle of area 6 on NVAx */
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 00000007 UNK0FB4 */
++	/* SEEK */
++	xf_emit(ctx, 4, 0);		/* 07ffffff CLIPID_REGION_HORIZ */
++	xf_emit(ctx, 4, 0);		/* 07ffffff CLIPID_REGION_VERT */
++	xf_emit(ctx, 2, 0);		/* 07ffffff SCREEN_SCISSOR */
++	xf_emit(ctx, 2, 0x04000000);	/* 07ffffff UNK1508 */
++	xf_emit(ctx, 1, 0);		/* 00000001 CLIPID_ENABLE */
++	xf_emit(ctx, 1, 0x80);		/* 00003fff CLIPID_WIDTH */
++	xf_emit(ctx, 1, 0);		/* 000000ff CLIPID_ID */
++	xf_emit(ctx, 1, 0);		/* 000000ff CLIPID_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff CLIPID_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0x80);		/* 00003fff CLIPID_HEIGHT */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_CLIPID */
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk5(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_unk24xx(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	/* middle of area 0 on pre-NVA0 [after m2mf], end of area 2 on NVAx */
+-	xf_emit(ctx, 2, 4);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 0x1c4d, 0);
++	int i;
++	/* middle of strand 0 on pre-NVA0 [after m2mf], end of strand 2 on NVAx */
++	/* SEEK */
++	xf_emit(ctx, 0x33, 0);
++	/* SEEK */
++	xf_emit(ctx, 2, 0);
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 0000007f VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	/* SEEK */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 4, 0);	/* RO */
++		xf_emit(ctx, 0xe10, 0); /* 190 * 9: 8*ffffffff, 7ff */
++		xf_emit(ctx, 1, 0);	/* 1ff */
++		xf_emit(ctx, 8, 0);	/* 0? */
++		xf_emit(ctx, 9, 0);	/* ffffffff, 7ff */
++
++		xf_emit(ctx, 4, 0);	/* RO */
++		xf_emit(ctx, 0xe10, 0); /* 190 * 9: 8*ffffffff, 7ff */
++		xf_emit(ctx, 1, 0);	/* 1ff */
++		xf_emit(ctx, 8, 0);	/* 0? */
++		xf_emit(ctx, 9, 0);	/* ffffffff, 7ff */
++	}
+ 	else
+-		xf_emit(ctx, 0x1c4b, 0);
+-	xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0x8100c12);
++	{
++		xf_emit(ctx, 0xc, 0);	/* RO */
++		/* SEEK */
++		xf_emit(ctx, 0xe10, 0); /* 190 * 9: 8*ffffffff, 7ff */
++		xf_emit(ctx, 1, 0);	/* 1ff */
++		xf_emit(ctx, 8, 0);	/* 0? */
++
++		/* SEEK */
++		xf_emit(ctx, 0xc, 0);	/* RO */
++		/* SEEK */
++		xf_emit(ctx, 0xe10, 0); /* 190 * 9: 8*ffffffff, 7ff */
++		xf_emit(ctx, 1, 0);	/* 1ff */
++		xf_emit(ctx, 8, 0);	/* 0? */
++	}
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 4);		/* 0000007f VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0x8100c12);	/* 1fffffff FP_INTERPOLANT_CTRL */
+ 	if (dev_priv->chipset != 0x50)
+-		xf_emit(ctx, 1, 3);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x8100c12);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x80c14);
+-	xf_emit(ctx, 1, 1);
++		xf_emit(ctx, 1, 3);	/* 00000003 tesla UNK1100 */
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0x8100c12);	/* 1fffffff FP_INTERPOLANT_CTRL */
++	xf_emit(ctx, 1, 0);		/* 0000000f VP_GP_BUILTIN_ATTR_EN */
++	xf_emit(ctx, 1, 0x80c14);	/* 01ffffff SEMANTIC_COLOR */
++	xf_emit(ctx, 1, 1);		/* 00000001 */
++	/* SEEK */
+ 	if (dev_priv->chipset >= 0xa0)
+-		xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0x80c14);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0x8100c12);
+-	xf_emit(ctx, 1, 0x27);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 0x3c1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 0x16, 0);
+-	xf_emit(ctx, 1, 0x8100c12);
+-	xf_emit(ctx, 1, 0);
++		xf_emit(ctx, 2, 4);	/* 000000ff */
++	xf_emit(ctx, 1, 0x80c14);	/* 01ffffff SEMANTIC_COLOR */
++	xf_emit(ctx, 1, 0);		/* 00000001 VERTEX_TWO_SIDE_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 POINT_SPRITE_ENABLE */
++	xf_emit(ctx, 1, 0x8100c12);	/* 1fffffff FP_INTERPOLANT_CTRL */
++	xf_emit(ctx, 1, 0x27);		/* 000000ff SEMANTIC_PRIM_ID */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 0000000f */
++	xf_emit(ctx, 1, 1);		/* 00000001 */
++	for (i = 0; i < 10; i++) {
++		/* SEEK */
++		xf_emit(ctx, 0x40, 0);		/* ffffffff */
++		xf_emit(ctx, 0x10, 0);		/* 3, 0, 0.... */
++		xf_emit(ctx, 0x10, 0);		/* ffffffff */
++	}
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 00000001 POINT_SPRITE_CTRL */
++	xf_emit(ctx, 1, 1);		/* 00000001 */
++	xf_emit(ctx, 1, 0);		/* ffffffff */
++	xf_emit(ctx, 4, 0);		/* ffffffff NOPERSPECTIVE_BITMAP */
++	xf_emit(ctx, 0x10, 0);		/* 00ffffff POINT_COORD_REPLACE_MAP */
++	xf_emit(ctx, 1, 0);		/* 00000003 WINDOW_ORIGIN */
++	xf_emit(ctx, 1, 0x8100c12);	/* 1fffffff FP_INTERPOLANT_CTRL */
++	if (dev_priv->chipset != 0x50)
++		xf_emit(ctx, 1, 0);	/* 000003ff */
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk6(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_vfetch(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	/* beginning of area 1 on pre-NVA0 [after m2mf], area 3 on NVAx */
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 0xf);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 8, 0);
+-	else
+-		xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 0x20);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 0x11, 0);
++	int acnt = 0x10, rep, i;
++	/* beginning of strand 1 on pre-NVA0, strand 3 on NVAx */
++	if (IS_NVA3F(dev_priv->chipset))
++		acnt = 0x20;
++	/* SEEK */
++	if (dev_priv->chipset >= 0xa0) {
++		xf_emit(ctx, 1, 0);	/* ffffffff tesla UNK13A4 */
++		xf_emit(ctx, 1, 1);	/* 00000fff tesla UNK1318 */
++	}
++	xf_emit(ctx, 1, 0);		/* ffffffff VERTEX_BUFFER_FIRST */
++	xf_emit(ctx, 1, 0);		/* 00000001 PRIMITIVE_RESTART_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK0DE8 */
++	xf_emit(ctx, 1, 0);		/* ffffffff PRIMITIVE_RESTART_INDEX */
++	xf_emit(ctx, 1, 0xf);		/* ffffffff VP_ATTR_EN */
++	xf_emit(ctx, (acnt/8)-1, 0);	/* ffffffff VP_ATTR_EN */
++	xf_emit(ctx, acnt/8, 0);	/* ffffffff VTX_ATR_MASK_UNK0DD0 */
++	xf_emit(ctx, 1, 0);		/* 0000000f VP_GP_BUILTIN_ATTR_EN */
++	xf_emit(ctx, 1, 0x20);		/* 0000ffff tesla UNK129C */
++	xf_emit(ctx, 1, 0);		/* 000000ff turing UNK370??? */
++	xf_emit(ctx, 1, 0);		/* 0000ffff turing USER_PARAM_COUNT */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	/* SEEK */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 0xb, 0);	/* RO */
+ 	else if (dev_priv->chipset >= 0xa0)
+-		xf_emit(ctx, 0xf, 0);
++		xf_emit(ctx, 0x9, 0);	/* RO */
+ 	else
+-		xf_emit(ctx, 0xe, 0);
+-	xf_emit(ctx, 1, 0x1a);
+-	xf_emit(ctx, 0xd, 0);
+-	xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 8);
+-	xf_emit(ctx, 1, 0);
++		xf_emit(ctx, 0x8, 0);	/* RO */
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 00000001 EDGE_FLAG */
++	xf_emit(ctx, 1, 0);		/* 00000001 PROVOKING_VERTEX_LAST */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0x1a);		/* 0000001f POLYGON_MODE */
++	/* SEEK */
++	xf_emit(ctx, 0xc, 0);		/* RO */
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 7f/ff */
++	xf_emit(ctx, 1, 4);		/* 7f/ff VP_REG_ALLOC_RESULT */
++	xf_emit(ctx, 1, 4);		/* 7f/ff VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);		/* 0000000f VP_GP_BUILTIN_ATTR_EN */
++	xf_emit(ctx, 1, 4);		/* 000001ff UNK1A28 */
++	xf_emit(ctx, 1, 8);		/* 000001ff UNK0DF0 */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
+ 	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 1, 0x3ff);
++		xf_emit(ctx, 1, 0x3ff);	/* 3ff tesla UNK0D68 */
+ 	else
+-		xf_emit(ctx, 1, 0x7ff);
++		xf_emit(ctx, 1, 0x7ff);	/* 7ff tesla UNK0D68 */
+ 	if (dev_priv->chipset == 0xa8)
+-		xf_emit(ctx, 1, 0x1e00);
+-	xf_emit(ctx, 0xc, 0);
+-	xf_emit(ctx, 1, 0xf);
+-	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 0x125, 0);
+-	else if (dev_priv->chipset < 0xa0)
+-		xf_emit(ctx, 0x126, 0);
+-	else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa)
+-		xf_emit(ctx, 0x124, 0);
++		xf_emit(ctx, 1, 0x1e00);	/* 7fff */
++	/* SEEK */
++	xf_emit(ctx, 0xc, 0);		/* RO or close */
++	/* SEEK */
++	xf_emit(ctx, 1, 0xf);		/* ffffffff VP_ATTR_EN */
++	xf_emit(ctx, (acnt/8)-1, 0);	/* ffffffff VP_ATTR_EN */
++	xf_emit(ctx, 1, 0);		/* 0000000f VP_GP_BUILTIN_ATTR_EN */
++	if (dev_priv->chipset > 0x50 && dev_priv->chipset < 0xa0)
++		xf_emit(ctx, 2, 0);	/* ffffffff */
+ 	else
+-		xf_emit(ctx, 0x1f7, 0);
+-	xf_emit(ctx, 1, 0xf);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 3, 0);
++		xf_emit(ctx, 1, 0);	/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 00000003 tesla UNK0FD8 */
++	/* SEEK */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 0x10, 0);	/* 0? */
++		xf_emit(ctx, 2, 0);	/* weird... */
++		xf_emit(ctx, 2, 0);	/* RO */
++	} else {
++		xf_emit(ctx, 8, 0);	/* 0? */
++		xf_emit(ctx, 1, 0);	/* weird... */
++		xf_emit(ctx, 2, 0);	/* RO */
++	}
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* ffffffff VB_ELEMENT_BASE */
++	xf_emit(ctx, 1, 0);		/* ffffffff UNK1438 */
++	xf_emit(ctx, acnt, 0);		/* 1 tesla UNK1000 */
++	if (dev_priv->chipset >= 0xa0)
++		xf_emit(ctx, 1, 0);	/* ffffffff tesla UNK1118? */
++	/* SEEK */
++	xf_emit(ctx, acnt, 0);		/* ffffffff VERTEX_ARRAY_UNK90C */
++	xf_emit(ctx, 1, 0);		/* f/1f */
++	/* SEEK */
++	xf_emit(ctx, acnt, 0);		/* ffffffff VERTEX_ARRAY_UNK90C */
++	xf_emit(ctx, 1, 0);		/* f/1f */
++	/* SEEK */
++	xf_emit(ctx, acnt, 0);		/* RO */
++	xf_emit(ctx, 2, 0);		/* RO */
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK111C? */
++	xf_emit(ctx, 1, 0);		/* RO */
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 000000ff UNK15F4_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff UNK15F4_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0);		/* 000000ff UNK0F84_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff UNK0F84_ADDRESS_LOW */
++	/* SEEK */
++	xf_emit(ctx, acnt, 0);		/* 00003fff VERTEX_ARRAY_ATTRIB_OFFSET */
++	xf_emit(ctx, 3, 0);		/* f/1f */
++	/* SEEK */
++	xf_emit(ctx, acnt, 0);		/* 00000fff VERTEX_ARRAY_STRIDE */
++	xf_emit(ctx, 3, 0);		/* f/1f */
++	/* SEEK */
++	xf_emit(ctx, acnt, 0);		/* ffffffff VERTEX_ARRAY_LOW */
++	xf_emit(ctx, 3, 0);		/* f/1f */
++	/* SEEK */
++	xf_emit(ctx, acnt, 0);		/* 000000ff VERTEX_ARRAY_HIGH */
++	xf_emit(ctx, 3, 0);		/* f/1f */
++	/* SEEK */
++	xf_emit(ctx, acnt, 0);		/* ffffffff VERTEX_LIMIT_LOW */
++	xf_emit(ctx, 3, 0);		/* f/1f */
++	/* SEEK */
++	xf_emit(ctx, acnt, 0);		/* 000000ff VERTEX_LIMIT_HIGH */
++	xf_emit(ctx, 3, 0);		/* f/1f */
++	/* SEEK */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, acnt, 0);		/* f */
++		xf_emit(ctx, 3, 0);		/* f/1f */
++	}
++	/* SEEK */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 2, 0);	/* RO */
++	else
++		xf_emit(ctx, 5, 0);	/* RO */
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* ffff DMA_VTXBUF */
++	/* SEEK */
++	if (dev_priv->chipset < 0xa0) {
++		xf_emit(ctx, 0x41, 0);	/* RO */
++		/* SEEK */
++		xf_emit(ctx, 0x11, 0);	/* RO */
++	} else if (!IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 0x50, 0);	/* RO */
+ 	else
+-		xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 0xa1, 0);
++		xf_emit(ctx, 0x58, 0);	/* RO */
++	/* SEEK */
++	xf_emit(ctx, 1, 0xf);		/* ffffffff VP_ATTR_EN */
++	xf_emit(ctx, (acnt/8)-1, 0);	/* ffffffff VP_ATTR_EN */
++	xf_emit(ctx, 1, 1);		/* 1 UNK0DEC */
++	/* SEEK */
++	xf_emit(ctx, acnt*4, 0);	/* ffffffff VTX_ATTR */
++	xf_emit(ctx, 4, 0);		/* f/1f, 0, 0, 0 */
++	/* SEEK */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 0x1d, 0);	/* RO */
+ 	else
+-		xf_emit(ctx, 0x5a, 0);
+-	xf_emit(ctx, 1, 0xf);
++		xf_emit(ctx, 0x16, 0);	/* RO */
++	/* SEEK */
++	xf_emit(ctx, 1, 0xf);		/* ffffffff VP_ATTR_EN */
++	xf_emit(ctx, (acnt/8)-1, 0);	/* ffffffff VP_ATTR_EN */
++	/* SEEK */
+ 	if (dev_priv->chipset < 0xa0)
+-		xf_emit(ctx, 0x834, 0);
+-	else if (dev_priv->chipset == 0xa0)
+-		xf_emit(ctx, 0x1873, 0);
+-	else if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 0x8ba, 0);
++		xf_emit(ctx, 8, 0);	/* RO */
++	else if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 0xc, 0);	/* RO */
++	else
++		xf_emit(ctx, 7, 0);	/* RO */
++	/* SEEK */
++	xf_emit(ctx, 0xa, 0);		/* RO */
++	if (dev_priv->chipset == 0xa0)
++		rep = 0xc;
++	else
++		rep = 4;
++	for (i = 0; i < rep; i++) {
++		/* SEEK */
++		if (IS_NVA3F(dev_priv->chipset))
++			xf_emit(ctx, 0x20, 0);	/* ffffffff */
++		xf_emit(ctx, 0x200, 0);	/* ffffffff */
++		xf_emit(ctx, 4, 0);	/* 7f/ff, 0, 0, 0 */
++		xf_emit(ctx, 4, 0);	/* ffffffff */
++	}
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 113/111 */
++	xf_emit(ctx, 1, 0xf);		/* ffffffff VP_ATTR_EN */
++	xf_emit(ctx, (acnt/8)-1, 0);	/* ffffffff VP_ATTR_EN */
++	xf_emit(ctx, acnt/8, 0);	/* ffffffff VTX_ATTR_MASK_UNK0DD0 */
++	xf_emit(ctx, 1, 0);		/* 0000000f VP_GP_BUILTIN_ATTR_EN */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	/* SEEK */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 7, 0);	/* weird... */
+ 	else
+-		xf_emit(ctx, 0x833, 0);
+-	xf_emit(ctx, 1, 0xf);
+-	xf_emit(ctx, 0xf, 0);
++		xf_emit(ctx, 5, 0);	/* weird... */
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk7(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_eng2d(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	/* middle of area 1 on pre-NVA0 [after m2mf], middle of area 6 on NVAx */
+-	xf_emit(ctx, 2, 0);
+-	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 2, 1);
+-	else
+-		xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 2, 0x100);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 8);
+-	xf_emit(ctx, 5, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 3, 1);
+-	xf_emit(ctx, 1, 0xcf);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 6, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 3, 1);
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x15);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0x4444480);
+-	xf_emit(ctx, 0x37, 0);
++	/* middle of strand 1 on pre-NVA0 [after vfetch], middle of strand 6 on NVAx */
++	/* SEEK */
++	xf_emit(ctx, 2, 0);		/* 0001ffff CLIP_X, CLIP_Y */
++	xf_emit(ctx, 2, 0);		/* 0000ffff CLIP_W, CLIP_H */
++	xf_emit(ctx, 1, 0);		/* 00000001 CLIP_ENABLE */
++	if (dev_priv->chipset < 0xa0) {
++		/* this is useless on everything but the original NV50,
++		 * guess they forgot to nuke it. Or just didn't bother. */
++		xf_emit(ctx, 2, 0);	/* 0000ffff IFC_CLIP_X, Y */
++		xf_emit(ctx, 2, 1);	/* 0000ffff IFC_CLIP_W, H */
++		xf_emit(ctx, 1, 0);	/* 00000001 IFC_CLIP_ENABLE */
++	}
++	xf_emit(ctx, 1, 1);		/* 00000001 DST_LINEAR */
++	xf_emit(ctx, 1, 0x100);		/* 0001ffff DST_WIDTH */
++	xf_emit(ctx, 1, 0x100);		/* 0001ffff DST_HEIGHT */
++	xf_emit(ctx, 1, 0x11);		/* 3f[NV50]/7f[NV84+] DST_FORMAT */
++	xf_emit(ctx, 1, 0);		/* 0001ffff DRAW_POINT_X */
++	xf_emit(ctx, 1, 8);		/* 0000000f DRAW_UNK58C */
++	xf_emit(ctx, 1, 0);		/* 000fffff SIFC_DST_X_FRACT */
++	xf_emit(ctx, 1, 0);		/* 0001ffff SIFC_DST_X_INT */
++	xf_emit(ctx, 1, 0);		/* 000fffff SIFC_DST_Y_FRACT */
++	xf_emit(ctx, 1, 0);		/* 0001ffff SIFC_DST_Y_INT */
++	xf_emit(ctx, 1, 0);		/* 000fffff SIFC_DX_DU_FRACT */
++	xf_emit(ctx, 1, 1);		/* 0001ffff SIFC_DX_DU_INT */
++	xf_emit(ctx, 1, 0);		/* 000fffff SIFC_DY_DV_FRACT */
++	xf_emit(ctx, 1, 1);		/* 0001ffff SIFC_DY_DV_INT */
++	xf_emit(ctx, 1, 1);		/* 0000ffff SIFC_WIDTH */
++	xf_emit(ctx, 1, 1);		/* 0000ffff SIFC_HEIGHT */
++	xf_emit(ctx, 1, 0xcf);		/* 000000ff SIFC_FORMAT */
++	xf_emit(ctx, 1, 2);		/* 00000003 SIFC_BITMAP_UNK808 */
++	xf_emit(ctx, 1, 0);		/* 00000003 SIFC_BITMAP_LINE_PACK_MODE */
++	xf_emit(ctx, 1, 0);		/* 00000001 SIFC_BITMAP_LSB_FIRST */
++	xf_emit(ctx, 1, 0);		/* 00000001 SIFC_BITMAP_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 0000ffff BLIT_DST_X */
++	xf_emit(ctx, 1, 0);		/* 0000ffff BLIT_DST_Y */
++	xf_emit(ctx, 1, 0);		/* 000fffff BLIT_DU_DX_FRACT */
++	xf_emit(ctx, 1, 1);		/* 0001ffff BLIT_DU_DX_INT */
++	xf_emit(ctx, 1, 0);		/* 000fffff BLIT_DV_DY_FRACT */
++	xf_emit(ctx, 1, 1);		/* 0001ffff BLIT_DV_DY_INT */
++	xf_emit(ctx, 1, 1);		/* 0000ffff BLIT_DST_W */
++	xf_emit(ctx, 1, 1);		/* 0000ffff BLIT_DST_H */
++	xf_emit(ctx, 1, 0);		/* 000fffff BLIT_SRC_X_FRACT */
++	xf_emit(ctx, 1, 0);		/* 0001ffff BLIT_SRC_X_INT */
++	xf_emit(ctx, 1, 0);		/* 000fffff BLIT_SRC_Y_FRACT */
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK888 */
++	xf_emit(ctx, 1, 4);		/* 0000003f UNK884 */
++	xf_emit(ctx, 1, 0);		/* 00000007 UNK880 */
++	xf_emit(ctx, 1, 1);		/* 0000001f tesla UNK0FB8 */
++	xf_emit(ctx, 1, 0x15);		/* 000000ff tesla UNK128C */
++	xf_emit(ctx, 2, 0);		/* 00000007, ffff0ff3 */
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK260 */
++	xf_emit(ctx, 1, 0x4444480);	/* 1fffffff UNK870 */
++	/* SEEK */
++	xf_emit(ctx, 0x10, 0);
++	/* SEEK */
++	xf_emit(ctx, 0x27, 0);
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk8(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_csched(struct nouveau_grctx *ctx)
+ {
+-	/* middle of area 1 on pre-NVA0 [after m2mf], middle of area 0 on NVAx */
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 0x8100c12);
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 0x100);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0x10001);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x10001);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x10001);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 2);
++	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
++	/* middle of strand 1 on pre-NVA0 [after eng2d], middle of strand 0 on NVAx */
++	/* SEEK */
++	xf_emit(ctx, 2, 0);		/* 00007fff WINDOW_OFFSET_XY... what is it doing here??? */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1924 */
++	xf_emit(ctx, 1, 0);		/* 00000003 WINDOW_ORIGIN */
++	xf_emit(ctx, 1, 0x8100c12);	/* 1fffffff FP_INTERPOLANT_CTRL */
++	xf_emit(ctx, 1, 0);		/* 000003ff */
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* ffffffff turing UNK364 */
++	xf_emit(ctx, 1, 0);		/* 0000000f turing UNK36C */
++	xf_emit(ctx, 1, 0);		/* 0000ffff USER_PARAM_COUNT */
++	xf_emit(ctx, 1, 0x100);		/* 00ffffff turing UNK384 */
++	xf_emit(ctx, 1, 0);		/* 0000000f turing UNK2A0 */
++	xf_emit(ctx, 1, 0);		/* 0000ffff GRIDID */
++	xf_emit(ctx, 1, 0x10001);	/* ffffffff GRIDDIM_XY */
++	xf_emit(ctx, 1, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0x10001);	/* ffffffff BLOCKDIM_XY */
++	xf_emit(ctx, 1, 1);		/* 0000ffff BLOCKDIM_Z */
++	xf_emit(ctx, 1, 0x10001);	/* 00ffffff BLOCK_ALLOC */
++	xf_emit(ctx, 1, 1);		/* 00000001 LANES32 */
++	xf_emit(ctx, 1, 4);		/* 000000ff FP_REG_ALLOC_TEMP */
++	xf_emit(ctx, 1, 2);		/* 00000003 REG_MODE */
++	/* SEEK */
++	xf_emit(ctx, 0x40, 0);		/* ffffffff USER_PARAM */
++	switch (dev_priv->chipset) {
++	case 0x50:
++	case 0x92:
++		xf_emit(ctx, 8, 0);	/* 7, 0, 0, 0, ... */
++		xf_emit(ctx, 0x80, 0);	/* fff */
++		xf_emit(ctx, 2, 0);	/* ff, fff */
++		xf_emit(ctx, 0x10*2, 0);	/* ffffffff, 1f */
++		break;
++	case 0x84:
++		xf_emit(ctx, 8, 0);	/* 7, 0, 0, 0, ... */
++		xf_emit(ctx, 0x60, 0);	/* fff */
++		xf_emit(ctx, 2, 0);	/* ff, fff */
++		xf_emit(ctx, 0xc*2, 0);	/* ffffffff, 1f */
++		break;
++	case 0x94:
++	case 0x96:
++		xf_emit(ctx, 8, 0);	/* 7, 0, 0, 0, ... */
++		xf_emit(ctx, 0x40, 0);	/* fff */
++		xf_emit(ctx, 2, 0);	/* ff, fff */
++		xf_emit(ctx, 8*2, 0);	/* ffffffff, 1f */
++		break;
++	case 0x86:
++	case 0x98:
++		xf_emit(ctx, 4, 0);	/* f, 0, 0, 0 */
++		xf_emit(ctx, 0x10, 0);	/* fff */
++		xf_emit(ctx, 2, 0);	/* ff, fff */
++		xf_emit(ctx, 2*2, 0);	/* ffffffff, 1f */
++		break;
++	case 0xa0:
++		xf_emit(ctx, 8, 0);	/* 7, 0, 0, 0, ... */
++		xf_emit(ctx, 0xf0, 0);	/* fff */
++		xf_emit(ctx, 2, 0);	/* ff, fff */
++		xf_emit(ctx, 0x1e*2, 0);	/* ffffffff, 1f */
++		break;
++	case 0xa3:
++		xf_emit(ctx, 8, 0);	/* 7, 0, 0, 0, ... */
++		xf_emit(ctx, 0x60, 0);	/* fff */
++		xf_emit(ctx, 2, 0);	/* ff, fff */
++		xf_emit(ctx, 0xc*2, 0);	/* ffffffff, 1f */
++		break;
++	case 0xa5:
++	case 0xaf:
++		xf_emit(ctx, 8, 0);	/* 7, 0, 0, 0, ... */
++		xf_emit(ctx, 0x30, 0);	/* fff */
++		xf_emit(ctx, 2, 0);	/* ff, fff */
++		xf_emit(ctx, 6*2, 0);	/* ffffffff, 1f */
++		break;
++	case 0xaa:
++		xf_emit(ctx, 0x12, 0);
++		break;
++	case 0xa8:
++	case 0xac:
++		xf_emit(ctx, 4, 0);	/* f, 0, 0, 0 */
++		xf_emit(ctx, 0x10, 0);	/* fff */
++		xf_emit(ctx, 2, 0);	/* ff, fff */
++		xf_emit(ctx, 2*2, 0);	/* ffffffff, 1f */
++		break;
++	}
++	xf_emit(ctx, 1, 0);		/* 0000000f */
++	xf_emit(ctx, 1, 0);		/* 00000000 */
++	xf_emit(ctx, 1, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 0000001f */
++	xf_emit(ctx, 4, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 00000003 turing UNK35C */
++	xf_emit(ctx, 1, 0);		/* ffffffff */
++	xf_emit(ctx, 4, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 00000003 turing UNK35C */
++	xf_emit(ctx, 1, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 000000ff */
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk9(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_unk1cxx(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	/* middle of area 2 on pre-NVA0 [after m2mf], end of area 0 on NVAx */
+-	xf_emit(ctx, 1, 0x3f800000);
+-	xf_emit(ctx, 6, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 0x1a);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 0x12, 0);
+-	xf_emit(ctx, 1, 0x00ffff00);
+-	xf_emit(ctx, 6, 0);
+-	xf_emit(ctx, 1, 0xf);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0x0fac6881);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 0xf, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 2, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 1, 3);
++	xf_emit(ctx, 2, 0);		/* 00007fff WINDOW_OFFSET_XY */
++	xf_emit(ctx, 1, 0x3f800000);	/* ffffffff LINE_WIDTH */
++	xf_emit(ctx, 1, 0);		/* 00000001 LINE_SMOOTH_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1658 */
++	xf_emit(ctx, 1, 0);		/* 00000001 POLYGON_SMOOTH_ENABLE */
++	xf_emit(ctx, 3, 0);		/* 00000001 POLYGON_OFFSET_*_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 0000000f CULL_MODE */
++	xf_emit(ctx, 1, 0x1a);		/* 0000001f POLYGON_MODE */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 0);		/* 00000001 POINT_SPRITE_ENABLE */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK165C */
++	xf_emit(ctx, 0x10, 0);		/* 00000001 SCISSOR_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 1, 0);		/* 00000001 LINE_STIPPLE_ENABLE */
++	xf_emit(ctx, 1, 0x00ffff00);	/* 00ffffff LINE_STIPPLE_PATTERN */
++	xf_emit(ctx, 1, 0);		/* ffffffff POLYGON_OFFSET_UNITS */
++	xf_emit(ctx, 1, 0);		/* ffffffff POLYGON_OFFSET_FACTOR */
++	xf_emit(ctx, 1, 0);		/* 00000003 tesla UNK1668 */
++	xf_emit(ctx, 2, 0);		/* 07ffffff SCREEN_SCISSOR */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1900 */
++	xf_emit(ctx, 1, 0xf);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 7, 0);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 1, 0x0fac6881);	/* 0fffffff RT_CONTROL */
++	xf_emit(ctx, 1, 0x11);		/* 0000007f RT_FORMAT */
++	xf_emit(ctx, 7, 0);		/* 0000007f RT_FORMAT */
++	xf_emit(ctx, 8, 0);		/* 00000001 RT_HORIZ_LINEAR */
++	xf_emit(ctx, 1, 4);		/* 00000007 FP_CONTROL */
++	xf_emit(ctx, 1, 0);		/* 00000001 ALPHA_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000007 ALPHA_TEST_FUNC */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 3);	/* 00000003 UNK16B4 */
+ 	else if (dev_priv->chipset >= 0xa0)
+-		xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 2, 0x04000000);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 5);
+-	xf_emit(ctx, 1, 0x52);
+-	if (dev_priv->chipset == 0x50) {
+-		xf_emit(ctx, 0x13, 0);
+-	} else {
+-		xf_emit(ctx, 4, 0);
+-		xf_emit(ctx, 1, 1);
+-		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-			xf_emit(ctx, 0x11, 0);
+-		else
+-			xf_emit(ctx, 0x10, 0);
++		xf_emit(ctx, 1, 1);	/* 00000001 UNK16B4 */
++	xf_emit(ctx, 1, 0);		/* 00000003 MULTISAMPLE_CTRL */
++	xf_emit(ctx, 1, 0);		/* 00000003 tesla UNK0F90 */
++	xf_emit(ctx, 1, 2);		/* 00000003 tesla UNK143C */
++	xf_emit(ctx, 2, 0x04000000);	/* 07ffffff tesla UNK0D6C */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_MASK */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 SAMPLECNT_ENABLE */
++	xf_emit(ctx, 1, 5);		/* 0000000f UNK1408 */
++	xf_emit(ctx, 1, 0x52);		/* 000001ff SEMANTIC_PTSZ */
++	xf_emit(ctx, 1, 0);		/* ffffffff POINT_SIZE */
++	xf_emit(ctx, 1, 0);		/* 00000001 */
++	xf_emit(ctx, 1, 0);		/* 00000007 tesla UNK0FB4 */
++	if (dev_priv->chipset != 0x50) {
++		xf_emit(ctx, 1, 0);	/* 3ff */
++		xf_emit(ctx, 1, 1);	/* 00000001 tesla UNK1110 */
+ 	}
+-	xf_emit(ctx, 0x10, 0x3f800000);
+-	xf_emit(ctx, 1, 0x10);
+-	xf_emit(ctx, 0x26, 0);
+-	xf_emit(ctx, 1, 0x8100c12);
+-	xf_emit(ctx, 1, 5);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 4, 0xffff);
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 0);	/* 00000003 tesla UNK1928 */
++	xf_emit(ctx, 0x10, 0);		/* ffffffff DEPTH_RANGE_NEAR */
++	xf_emit(ctx, 0x10, 0x3f800000);	/* ffffffff DEPTH_RANGE_FAR */
++	xf_emit(ctx, 1, 0x10);		/* 000000ff VIEW_VOLUME_CLIP_CTRL */
++	xf_emit(ctx, 0x20, 0);		/* 07ffffff VIEWPORT_HORIZ, then VIEWPORT_VERT. (W&0x3fff)<<13 | (X&0x1fff). */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK187C */
++	xf_emit(ctx, 1, 0);		/* 00000003 WINDOW_ORIGIN */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_BACK_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_MASK */
++	xf_emit(ctx, 1, 0x8100c12);	/* 1fffffff FP_INTERPOLANT_CTRL */
++	xf_emit(ctx, 1, 5);		/* 0000000f tesla UNK1220 */
++	xf_emit(ctx, 1, 0);		/* 00000007 MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 000000ff tesla UNK1A20 */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 VERTEX_TWO_SIDE_ENABLE */
++	xf_emit(ctx, 4, 0xffff);	/* 0000ffff MSAA_MASK */
+ 	if (dev_priv->chipset != 0x50)
+-		xf_emit(ctx, 1, 3);
++		xf_emit(ctx, 1, 3);	/* 00000003 tesla UNK1100 */
+ 	if (dev_priv->chipset < 0xa0)
+-		xf_emit(ctx, 0x1f, 0);
+-	else if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 0xc, 0);
+-	else
+-		xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0x00ffff00);
+-	xf_emit(ctx, 1, 0x1a);
++		xf_emit(ctx, 0x1c, 0);	/* RO */
++	else if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 0x9, 0);
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK1534 */
++	xf_emit(ctx, 1, 0);		/* 00000001 LINE_SMOOTH_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 LINE_STIPPLE_ENABLE */
++	xf_emit(ctx, 1, 0x00ffff00);	/* 00ffffff LINE_STIPPLE_PATTERN */
++	xf_emit(ctx, 1, 0x1a);		/* 0000001f POLYGON_MODE */
++	xf_emit(ctx, 1, 0);		/* 00000003 WINDOW_ORIGIN */
+ 	if (dev_priv->chipset != 0x50) {
+-		xf_emit(ctx, 1, 0);
+-		xf_emit(ctx, 1, 3);
++		xf_emit(ctx, 1, 3);	/* 00000003 tesla UNK1100 */
++		xf_emit(ctx, 1, 0);	/* 3ff */
+ 	}
++	/* XXX: the following block could belong either to unk1cxx, or
++	 * to STRMOUT. Rather hard to tell. */
+ 	if (dev_priv->chipset < 0xa0)
+-		xf_emit(ctx, 0x26, 0);
++		xf_emit(ctx, 0x25, 0);
+ 	else
+-		xf_emit(ctx, 0x3c, 0);
+-	xf_emit(ctx, 1, 0x102);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 4, 4);
+-	if (dev_priv->chipset >= 0xa0)
+-		xf_emit(ctx, 8, 0);
+-	xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0);
++		xf_emit(ctx, 0x3b, 0);
++}
++
++static void
++nv50_graph_construct_gene_strmout(struct nouveau_grctx *ctx)
++{
++	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
++	xf_emit(ctx, 1, 0x102);		/* 0000ffff STRMOUT_BUFFER_CTRL */
++	xf_emit(ctx, 1, 0);		/* ffffffff STRMOUT_PRIMITIVE_COUNT */
++	xf_emit(ctx, 4, 4);		/* 000000ff STRMOUT_NUM_ATTRIBS */
++	if (dev_priv->chipset >= 0xa0) {
++		xf_emit(ctx, 4, 0);	/* ffffffff UNK1A8C */
++		xf_emit(ctx, 4, 0);	/* ffffffff UNK1780 */
++	}
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 4);		/* 0000007f VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
+ 	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 1, 0x3ff);
++		xf_emit(ctx, 1, 0x3ff);	/* 000003ff tesla UNK0D68 */
+ 	else
+-		xf_emit(ctx, 1, 0x7ff);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x102);
+-	xf_emit(ctx, 9, 0);
+-	xf_emit(ctx, 4, 4);
+-	xf_emit(ctx, 0x2c, 0);
++		xf_emit(ctx, 1, 0x7ff);	/* 000007ff tesla UNK0D68 */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	/* SEEK */
++	xf_emit(ctx, 1, 0x102);		/* 0000ffff STRMOUT_BUFFER_CTRL */
++	xf_emit(ctx, 1, 0);		/* ffffffff STRMOUT_PRIMITIVE_COUNT */
++	xf_emit(ctx, 4, 0);		/* 000000ff STRMOUT_ADDRESS_HIGH */
++	xf_emit(ctx, 4, 0);		/* ffffffff STRMOUT_ADDRESS_LOW */
++	xf_emit(ctx, 4, 4);		/* 000000ff STRMOUT_NUM_ATTRIBS */
++	if (dev_priv->chipset >= 0xa0) {
++		xf_emit(ctx, 4, 0);	/* ffffffff UNK1A8C */
++		xf_emit(ctx, 4, 0);	/* ffffffff UNK1780 */
++	}
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_STRMOUT */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_QUERY */
++	xf_emit(ctx, 1, 0);		/* 000000ff QUERY_ADDRESS_HIGH */
++	xf_emit(ctx, 2, 0);		/* ffffffff QUERY_ADDRESS_LOW QUERY_COUNTER */
++	xf_emit(ctx, 2, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	/* SEEK */
++	xf_emit(ctx, 0x20, 0);		/* ffffffff STRMOUT_MAP */
++	xf_emit(ctx, 1, 0);		/* 0000000f */
++	xf_emit(ctx, 1, 0);		/* 00000000? */
++	xf_emit(ctx, 2, 0);		/* ffffffff */
++}
++
++static void
++nv50_graph_construct_gene_ropm1(struct nouveau_grctx *ctx)
++{
++	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
++	xf_emit(ctx, 1, 0x4e3bfdf);	/* ffffffff UNK0D64 */
++	xf_emit(ctx, 1, 0x4e3bfdf);	/* ffffffff UNK0DF4 */
++	xf_emit(ctx, 1, 0);		/* 00000007 */
++	xf_emit(ctx, 1, 0);		/* 000003ff */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 0x11);	/* 000000ff tesla UNK1968 */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
++}
++
++static void
++nv50_graph_construct_gene_ropm2(struct nouveau_grctx *ctx)
++{
++	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_QUERY */
++	xf_emit(ctx, 1, 0x0fac6881);	/* 0fffffff RT_CONTROL */
++	xf_emit(ctx, 2, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 000000ff QUERY_ADDRESS_HIGH */
++	xf_emit(ctx, 2, 0);		/* ffffffff QUERY_ADDRESS_LOW, COUNTER */
++	xf_emit(ctx, 1, 0);		/* 00000001 SAMPLECNT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 7 */
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_QUERY */
++	xf_emit(ctx, 1, 0);		/* 000000ff QUERY_ADDRESS_HIGH */
++	xf_emit(ctx, 2, 0);		/* ffffffff QUERY_ADDRESS_LOW, COUNTER */
++	xf_emit(ctx, 1, 0x4e3bfdf);	/* ffffffff UNK0D64 */
++	xf_emit(ctx, 1, 0x4e3bfdf);	/* ffffffff UNK0DF4 */
++	xf_emit(ctx, 1, 0);		/* 00000001 eng2d UNK260 */
++	xf_emit(ctx, 1, 0);		/* ff/3ff */
++	xf_emit(ctx, 1, 0);		/* 00000007 */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 0x11);	/* 000000ff tesla UNK1968 */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
+ }
+ 
+ static void
+@@ -1749,443 +2392,709 @@ nv50_graph_construct_gene_ropc(struct nouveau_grctx *ctx)
+ 	int magic2;
+ 	if (dev_priv->chipset == 0x50) {
+ 		magic2 = 0x00003e60;
+-	} else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa) {
++	} else if (!IS_NVA3F(dev_priv->chipset)) {
+ 		magic2 = 0x001ffe67;
+ 	} else {
+ 		magic2 = 0x00087e67;
+ 	}
+-	xf_emit(ctx, 8, 0);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, magic2);
+-	xf_emit(ctx, 4, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 7, 0);
+-	if (dev_priv->chipset >= 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 1, 0x15);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x10);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 4, 0);
++	xf_emit(ctx, 1, 0);		/* f/7 MUTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_BACK_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_MASK */
++	xf_emit(ctx, 3, 0);		/* 00000007 STENCIL_BACK_OP_FAIL, ZFAIL, ZPASS */
++	xf_emit(ctx, 1, 2);		/* 00000003 tesla UNK143C */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 1, magic2);	/* 001fffff tesla UNK0F78 */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_BOUNDS_EN */
++	xf_emit(ctx, 1, 0);		/* 00000007 DEPTH_TEST_FUNC */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_FRONT_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_MASK */
++	xf_emit(ctx, 3, 0);		/* 00000007 STENCIL_FRONT_OP_FAIL, ZFAIL, ZPASS */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	if (dev_priv->chipset >= 0xa0 && !IS_NVAAF(dev_priv->chipset))
++		xf_emit(ctx, 1, 0x15);	/* 000000ff */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_BACK_ENABLE */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK15B4 */
++	xf_emit(ctx, 1, 0x10);		/* 3ff/ff VIEW_VOLUME_CLIP_CTRL */
++	xf_emit(ctx, 1, 0);		/* ffffffff CLEAR_DEPTH */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
+ 	if (dev_priv->chipset == 0x86 || dev_priv->chipset == 0x92 || dev_priv->chipset == 0x98 || dev_priv->chipset >= 0xa0) {
+-		xf_emit(ctx, 1, 4);
+-		xf_emit(ctx, 1, 0x400);
+-		xf_emit(ctx, 1, 0x300);
+-		xf_emit(ctx, 1, 0x1001);
++		xf_emit(ctx, 3, 0);	/* ff, ffffffff, ffffffff */
++		xf_emit(ctx, 1, 4);	/* 7 */
++		xf_emit(ctx, 1, 0x400);	/* fffffff */
++		xf_emit(ctx, 1, 0x300);	/* ffff */
++		xf_emit(ctx, 1, 0x1001);	/* 1fff */
+ 		if (dev_priv->chipset != 0xa0) {
+-			if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-				xf_emit(ctx, 1, 0);
++			if (IS_NVA3F(dev_priv->chipset))
++				xf_emit(ctx, 1, 0);	/* 0000000f UNK15C8 */
+ 			else
+-				xf_emit(ctx, 1, 0x15);
++				xf_emit(ctx, 1, 0x15);	/* ff */
+ 		}
+-		xf_emit(ctx, 3, 0);
+ 	}
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 8, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x10);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 0x13, 0);
+-	xf_emit(ctx, 1, 0x10);
+-	xf_emit(ctx, 0x10, 0);
+-	xf_emit(ctx, 0x10, 0x3f800000);
+-	xf_emit(ctx, 0x19, 0);
+-	xf_emit(ctx, 1, 0x10);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x3f);
+-	xf_emit(ctx, 6, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
++	xf_emit(ctx, 1, 0);		/* 00000007 MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_BACK_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 1, 2);		/* 00000003 tesla UNK143C */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_BOUNDS_EN */
++	xf_emit(ctx, 1, 0);		/* 00000007 DEPTH_TEST_FUNC */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_FRONT_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_BACK_ENABLE */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK15B4 */
++	xf_emit(ctx, 1, 0x10);		/* 7f/ff VIEW_VOLUME_CLIP_CTRL */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1900 */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_BACK_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_FUNC_REF */
++	xf_emit(ctx, 2, 0);		/* ffffffff DEPTH_BOUNDS */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_BOUNDS_EN */
++	xf_emit(ctx, 1, 0);		/* 00000007 DEPTH_TEST_FUNC */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 0000000f */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK0FB0 */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_FRONT_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_FUNC_REF */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_BACK_ENABLE */
++	xf_emit(ctx, 1, 0x10);		/* 7f/ff VIEW_VOLUME_CLIP_CTRL */
++	xf_emit(ctx, 0x10, 0);		/* ffffffff DEPTH_RANGE_NEAR */
++	xf_emit(ctx, 0x10, 0x3f800000);	/* ffffffff DEPTH_RANGE_FAR */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 0);		/* 00000007 MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_BACK_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_FUNC_REF */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_MASK */
++	xf_emit(ctx, 3, 0);		/* 00000007 STENCIL_BACK_OP_FAIL, ZFAIL, ZPASS */
++	xf_emit(ctx, 2, 0);		/* ffffffff DEPTH_BOUNDS */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_BOUNDS_EN */
++	xf_emit(ctx, 1, 0);		/* 00000007 DEPTH_TEST_FUNC */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 000000ff CLEAR_STENCIL */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_FRONT_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_FUNC_REF */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_MASK */
++	xf_emit(ctx, 3, 0);		/* 00000007 STENCIL_FRONT_OP_FAIL, ZFAIL, ZPASS */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_BACK_ENABLE */
++	xf_emit(ctx, 1, 0x10);		/* 7f/ff VIEW_VOLUME_CLIP_CTRL */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 0x3f);		/* 0000003f UNK1590 */
++	xf_emit(ctx, 1, 0);		/* 00000007 MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 2, 0);		/* ffff0ff3, ffff */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK0FB0 */
++	xf_emit(ctx, 1, 0);		/* 0001ffff GP_BUILTIN_RESULT_EN */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK15B4 */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffffffff CLEAR_DEPTH */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK19CC */
+ 	if (dev_priv->chipset >= 0xa0) {
+ 		xf_emit(ctx, 2, 0);
+ 		xf_emit(ctx, 1, 0x1001);
+ 		xf_emit(ctx, 0xb, 0);
+ 	} else {
+-		xf_emit(ctx, 0xc, 0);
++		xf_emit(ctx, 1, 0);	/* 00000007 */
++		xf_emit(ctx, 1, 0);	/* 00000001 tesla UNK1534 */
++		xf_emit(ctx, 1, 0);	/* 00000007 MULTISAMPLE_SAMPLES_LOG2 */
++		xf_emit(ctx, 8, 0);	/* 00000001 BLEND_ENABLE */
++		xf_emit(ctx, 1, 0);	/* ffff0ff3 */
+ 	}
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0xf);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 4, 0);
+-	else
+-		xf_emit(ctx, 6, 0);
+-	xf_emit(ctx, 3, 1);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, magic2);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x0fac6881);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		xf_emit(ctx, 1, 0);
+-		xf_emit(ctx, 0x18, 1);
+-		xf_emit(ctx, 8, 2);
+-		xf_emit(ctx, 8, 1);
+-		xf_emit(ctx, 8, 2);
+-		xf_emit(ctx, 8, 1);
+-		xf_emit(ctx, 3, 0);
+-		xf_emit(ctx, 1, 1);
+-		xf_emit(ctx, 5, 0);
+-		xf_emit(ctx, 1, 1);
+-		xf_emit(ctx, 0x16, 0);
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 7, 0);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 1, 0xf);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 7, 0);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f */
++	xf_emit(ctx, 1, 0);		/* 00000001 LOGIC_OP_ENABLE */
++	if (dev_priv->chipset != 0x50) {
++		xf_emit(ctx, 1, 0);	/* 0000000f LOGIC_OP */
++		xf_emit(ctx, 1, 0);	/* 000000ff */
++	}
++	xf_emit(ctx, 1, 0);		/* 00000007 OPERATION */
++	xf_emit(ctx, 1, 0);		/* ff/3ff */
++	xf_emit(ctx, 1, 0);		/* 00000003 UNK0F90 */
++	xf_emit(ctx, 2, 1);		/* 00000007 BLEND_EQUATION_RGB, ALPHA */
++	xf_emit(ctx, 1, 1);		/* 00000001 UNK133C */
++	xf_emit(ctx, 1, 2);		/* 0000001f BLEND_FUNC_SRC_RGB */
++	xf_emit(ctx, 1, 1);		/* 0000001f BLEND_FUNC_DST_RGB */
++	xf_emit(ctx, 1, 2);		/* 0000001f BLEND_FUNC_SRC_ALPHA */
++	xf_emit(ctx, 1, 1);		/* 0000001f BLEND_FUNC_DST_ALPHA */
++	xf_emit(ctx, 1, 0);		/* 00000001 */
++	xf_emit(ctx, 1, magic2);	/* 001fffff tesla UNK0F78 */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
++	xf_emit(ctx, 1, 0x0fac6881);	/* 0fffffff RT_CONTROL */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 1, 0);	/* 00000001 tesla UNK12E4 */
++		xf_emit(ctx, 8, 1);	/* 00000007 IBLEND_EQUATION_RGB */
++		xf_emit(ctx, 8, 1);	/* 00000007 IBLEND_EQUATION_ALPHA */
++		xf_emit(ctx, 8, 1);	/* 00000001 IBLEND_UNK00 */
++		xf_emit(ctx, 8, 2);	/* 0000001f IBLEND_FUNC_SRC_RGB */
++		xf_emit(ctx, 8, 1);	/* 0000001f IBLEND_FUNC_DST_RGB */
++		xf_emit(ctx, 8, 2);	/* 0000001f IBLEND_FUNC_SRC_ALPHA */
++		xf_emit(ctx, 8, 1);	/* 0000001f IBLEND_FUNC_DST_ALPHA */
++		xf_emit(ctx, 1, 0);	/* 00000001 tesla UNK1140 */
++		xf_emit(ctx, 2, 0);	/* 00000001 */
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++		xf_emit(ctx, 1, 0);	/* 0000000f */
++		xf_emit(ctx, 1, 0);	/* 00000003 */
++		xf_emit(ctx, 1, 0);	/* ffffffff */
++		xf_emit(ctx, 2, 0);	/* 00000001 */
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++		xf_emit(ctx, 1, 0);	/* 00000001 */
++		xf_emit(ctx, 1, 0);	/* 000003ff */
++	} else if (dev_priv->chipset >= 0xa0) {
++		xf_emit(ctx, 2, 0);	/* 00000001 */
++		xf_emit(ctx, 1, 0);	/* 00000007 */
++		xf_emit(ctx, 1, 0);	/* 00000003 */
++		xf_emit(ctx, 1, 0);	/* ffffffff */
++		xf_emit(ctx, 2, 0);	/* 00000001 */
+ 	} else {
+-		if (dev_priv->chipset >= 0xa0)
+-			xf_emit(ctx, 0x1b, 0);
+-		else
+-			xf_emit(ctx, 0x15, 0);
++		xf_emit(ctx, 1, 0);	/* 00000007 MULTISAMPLE_SAMPLES_LOG2 */
++		xf_emit(ctx, 1, 0);	/* 00000003 tesla UNK1430 */
++		xf_emit(ctx, 1, 0);	/* ffffffff tesla UNK1A3C */
+ 	}
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 2, 1);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 2, 1);
++	xf_emit(ctx, 4, 0);		/* ffffffff CLEAR_COLOR */
++	xf_emit(ctx, 4, 0);		/* ffffffff BLEND_COLOR A R G B */
++	xf_emit(ctx, 1, 0);		/* 00000fff eng2d UNK2B0 */
+ 	if (dev_priv->chipset >= 0xa0)
+-		xf_emit(ctx, 4, 0);
+-	else
+-		xf_emit(ctx, 3, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		xf_emit(ctx, 0x10, 1);
+-		xf_emit(ctx, 8, 2);
+-		xf_emit(ctx, 0x10, 1);
+-		xf_emit(ctx, 8, 2);
+-		xf_emit(ctx, 8, 1);
+-		xf_emit(ctx, 3, 0);
++		xf_emit(ctx, 2, 0);	/* 00000001 */
++	xf_emit(ctx, 1, 0);		/* 000003ff */
++	xf_emit(ctx, 8, 0);		/* 00000001 BLEND_ENABLE */
++	xf_emit(ctx, 1, 1);		/* 00000001 UNK133C */
++	xf_emit(ctx, 1, 2);		/* 0000001f BLEND_FUNC_SRC_RGB */
++	xf_emit(ctx, 1, 1);		/* 0000001f BLEND_FUNC_DST_RGB */
++	xf_emit(ctx, 1, 1);		/* 00000007 BLEND_EQUATION_RGB */
++	xf_emit(ctx, 1, 2);		/* 0000001f BLEND_FUNC_SRC_ALPHA */
++	xf_emit(ctx, 1, 1);		/* 0000001f BLEND_FUNC_DST_ALPHA */
++	xf_emit(ctx, 1, 1);		/* 00000007 BLEND_EQUATION_ALPHA */
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK19C0 */
++	xf_emit(ctx, 1, 0);		/* 00000001 LOGIC_OP_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 0000000f LOGIC_OP */
++	if (dev_priv->chipset >= 0xa0)
++		xf_emit(ctx, 1, 0);	/* 00000001 UNK12E4? NVA3+ only? */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 8, 1);	/* 00000001 IBLEND_UNK00 */
++		xf_emit(ctx, 8, 1);	/* 00000007 IBLEND_EQUATION_RGB */
++		xf_emit(ctx, 8, 2);	/* 0000001f IBLEND_FUNC_SRC_RGB */
++		xf_emit(ctx, 8, 1);	/* 0000001f IBLEND_FUNC_DST_RGB */
++		xf_emit(ctx, 8, 1);	/* 00000007 IBLEND_EQUATION_ALPHA */
++		xf_emit(ctx, 8, 2);	/* 0000001f IBLEND_FUNC_SRC_ALPHA */
++		xf_emit(ctx, 8, 1);	/* 0000001f IBLEND_FUNC_DST_ALPHA */
++		xf_emit(ctx, 1, 0);	/* 00000001 tesla UNK15C4 */
++		xf_emit(ctx, 1, 0);	/* 00000001 */
++		xf_emit(ctx, 1, 0);	/* 00000001 tesla UNK1140 */
+ 	}
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 0x5b, 0);
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f DST_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 DST_LINEAR */
++	xf_emit(ctx, 1, 0);		/* 00000007 PATTERN_COLOR_FORMAT */
++	xf_emit(ctx, 2, 0);		/* ffffffff PATTERN_MONO_COLOR */
++	xf_emit(ctx, 1, 0);		/* 00000001 PATTERN_MONO_FORMAT */
++	xf_emit(ctx, 2, 0);		/* ffffffff PATTERN_MONO_BITMAP */
++	xf_emit(ctx, 1, 0);		/* 00000003 PATTERN_SELECT */
++	xf_emit(ctx, 1, 0);		/* 000000ff ROP */
++	xf_emit(ctx, 1, 0);		/* ffffffff BETA1 */
++	xf_emit(ctx, 1, 0);		/* ffffffff BETA4 */
++	xf_emit(ctx, 1, 0);		/* 00000007 OPERATION */
++	xf_emit(ctx, 0x50, 0);		/* 10x ffffff, ffffff, ffffff, ffffff, 3 PATTERN */
+ }
+ 
+ static void
+-nv50_graph_construct_xfer_tp_x1(struct nouveau_grctx *ctx)
++nv50_graph_construct_xfer_unk84xx(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+ 	int magic3;
+-	if (dev_priv->chipset == 0x50)
++	switch (dev_priv->chipset) {
++	case 0x50:
+ 		magic3 = 0x1000;
+-	else if (dev_priv->chipset == 0x86 || dev_priv->chipset == 0x98 || dev_priv->chipset >= 0xa8)
++		break;
++	case 0x86:
++	case 0x98:
++	case 0xa8:
++	case 0xaa:
++	case 0xac:
++	case 0xaf:
+ 		magic3 = 0x1e00;
+-	else
++		break;
++	default:
+ 		magic3 = 0;
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 4);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 0x24, 0);
++	}
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 7f/ff[NVA0+] VP_REG_ALLOC_RESULT */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 0);		/* 111/113[NVA0+] */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 0x1f, 0);	/* ffffffff */
+ 	else if (dev_priv->chipset >= 0xa0)
+-		xf_emit(ctx, 0x14, 0);
++		xf_emit(ctx, 0x0f, 0);	/* ffffffff */
+ 	else
+-		xf_emit(ctx, 0x15, 0);
+-	xf_emit(ctx, 2, 4);
++		xf_emit(ctx, 0x10, 0);	/* fffffff VP_RESULT_MAP_1 up */
++	xf_emit(ctx, 2, 0);		/* f/1f[NVA3], fffffff/ffffffff[NVA0+] */
++	xf_emit(ctx, 1, 4);		/* 7f/ff VP_REG_ALLOC_RESULT */
++	xf_emit(ctx, 1, 4);		/* 7f/ff VP_RESULT_MAP_SIZE */
+ 	if (dev_priv->chipset >= 0xa0)
+-		xf_emit(ctx, 1, 0x03020100);
++		xf_emit(ctx, 1, 0x03020100);	/* ffffffff */
+ 	else
+-		xf_emit(ctx, 1, 0x00608080);
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0x80);
++		xf_emit(ctx, 1, 0x00608080);	/* fffffff VP_RESULT_MAP_0 */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 2, 0);		/* 111/113, 7f/ff */
++	xf_emit(ctx, 1, 4);		/* 7f/ff VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_REG_ALLOC_RESULT */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0x80);		/* 0000ffff GP_VERTEX_OUTPUT_COUNT */
+ 	if (magic3)
+-		xf_emit(ctx, 1, magic3);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 0x24, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 0x80);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 0x03020100);
+-	xf_emit(ctx, 1, 3);
++		xf_emit(ctx, 1, magic3);	/* 00007fff tesla UNK141C */
++	xf_emit(ctx, 1, 4);		/* 7f/ff VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 0);		/* 111/113 */
++	xf_emit(ctx, 0x1f, 0);		/* ffffffff GP_RESULT_MAP_1 up */
++	xf_emit(ctx, 1, 0);		/* 0000001f */
++	xf_emit(ctx, 1, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_REG_ALLOC_RESULT */
++	xf_emit(ctx, 1, 0x80);		/* 0000ffff GP_VERTEX_OUTPUT_COUNT */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0x03020100);	/* ffffffff GP_RESULT_MAP_0 */
++	xf_emit(ctx, 1, 3);		/* 00000003 GP_OUTPUT_PRIMITIVE_TYPE */
+ 	if (magic3)
+-		xf_emit(ctx, 1, magic3);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 3);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 4);
++		xf_emit(ctx, 1, magic3);	/* 7fff tesla UNK141C */
++	xf_emit(ctx, 1, 4);		/* 7f/ff VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);		/* 00000001 PROVOKING_VERTEX_LAST */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 0);		/* 111/113 */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 3);		/* 00000003 GP_OUTPUT_PRIMITIVE_TYPE */
++	xf_emit(ctx, 1, 0);		/* 00000001 PROVOKING_VERTEX_LAST */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 0);		/* 00000003 tesla UNK13A0 */
++	xf_emit(ctx, 1, 4);		/* 7f/ff VP_REG_ALLOC_RESULT */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 0);		/* 111/113 */
+ 	if (dev_priv->chipset == 0x94 || dev_priv->chipset == 0x96)
+-		xf_emit(ctx, 0x1024, 0);
++		xf_emit(ctx, 0x1020, 0);	/* 4 x (0x400 x 0xffffffff, ff, 0, 0, 0, 4 x ffffffff) */
+ 	else if (dev_priv->chipset < 0xa0)
+-		xf_emit(ctx, 0xa24, 0);
+-	else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa)
+-		xf_emit(ctx, 0x214, 0);
++		xf_emit(ctx, 0xa20, 0);	/* 4 x (0x280 x 0xffffffff, ff, 0, 0, 0, 4 x ffffffff) */
++	else if (!IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 0x210, 0);	/* ffffffff */
+ 	else
+-		xf_emit(ctx, 0x414, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 3);
+-	xf_emit(ctx, 2, 0);
++		xf_emit(ctx, 0x410, 0);	/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 3);		/* 00000003 GP_OUTPUT_PRIMITIVE_TYPE */
++	xf_emit(ctx, 1, 0);		/* 00000001 PROVOKING_VERTEX_LAST */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
+ }
+ 
+ static void
+-nv50_graph_construct_xfer_tp_x2(struct nouveau_grctx *ctx)
++nv50_graph_construct_xfer_tprop(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+ 	int magic1, magic2;
+ 	if (dev_priv->chipset == 0x50) {
+ 		magic1 = 0x3ff;
+ 		magic2 = 0x00003e60;
+-	} else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa) {
++	} else if (!IS_NVA3F(dev_priv->chipset)) {
+ 		magic1 = 0x7ff;
+ 		magic2 = 0x001ffe67;
+ 	} else {
+ 		magic1 = 0x7ff;
+ 		magic2 = 0x00087e67;
+ 	}
+-	xf_emit(ctx, 3, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 0xc, 0);
+-	xf_emit(ctx, 1, 0xf);
+-	xf_emit(ctx, 0xb, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 4, 0xffff);
+-	xf_emit(ctx, 8, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 5, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 2, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		xf_emit(ctx, 1, 3);
+-		xf_emit(ctx, 1, 0);
+-	} else if (dev_priv->chipset >= 0xa0)
+-		xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 0xa, 0);
+-	xf_emit(ctx, 2, 1);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 2, 1);
+-	xf_emit(ctx, 1, 2);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		xf_emit(ctx, 1, 0);
+-		xf_emit(ctx, 0x18, 1);
+-		xf_emit(ctx, 8, 2);
+-		xf_emit(ctx, 8, 1);
+-		xf_emit(ctx, 8, 2);
+-		xf_emit(ctx, 8, 1);
+-		xf_emit(ctx, 1, 0);
++	xf_emit(ctx, 1, 0);		/* 00000007 ALPHA_TEST_FUNC */
++	xf_emit(ctx, 1, 0);		/* ffffffff ALPHA_TEST_REF */
++	xf_emit(ctx, 1, 0);		/* 00000001 ALPHA_TEST_ENABLE */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 1);	/* 0000000f UNK16A0 */
++	xf_emit(ctx, 1, 0);		/* 7/f MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_MASK */
++	xf_emit(ctx, 3, 0);		/* 00000007 STENCIL_BACK_OP_FAIL, ZFAIL, ZPASS */
++	xf_emit(ctx, 4, 0);		/* ffffffff BLEND_COLOR */
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK19C0 */
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK0FDC */
++	xf_emit(ctx, 1, 0xf);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 7, 0);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 LOGIC_OP_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ff[NV50]/3ff[NV84+] */
++	xf_emit(ctx, 1, 4);		/* 00000007 FP_CONTROL */
++	xf_emit(ctx, 4, 0xffff);	/* 0000ffff MSAA_MASK */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_MASK */
++	xf_emit(ctx, 3, 0);		/* 00000007 STENCIL_FRONT_OP_FAIL, ZFAIL, ZPASS */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_BACK_ENABLE */
++	xf_emit(ctx, 2, 0);		/* 00007fff WINDOW_OFFSET_XY */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK19CC */
++	xf_emit(ctx, 1, 0);		/* 7 */
++	xf_emit(ctx, 1, 0);		/* 00000001 SAMPLECNT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffffffff COLOR_KEY */
++	xf_emit(ctx, 1, 0);		/* 00000001 COLOR_KEY_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000007 COLOR_KEY_FORMAT */
++	xf_emit(ctx, 2, 0);		/* ffffffff SIFC_BITMAP_COLOR */
++	xf_emit(ctx, 1, 1);		/* 00000001 SIFC_BITMAP_WRITE_BIT0_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000007 ALPHA_TEST_FUNC */
++	xf_emit(ctx, 1, 0);		/* 00000001 ALPHA_TEST_ENABLE */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 1, 3);	/* 00000003 tesla UNK16B4 */
++		xf_emit(ctx, 1, 0);	/* 00000003 */
++		xf_emit(ctx, 1, 0);	/* 00000003 tesla UNK1298 */
++	} else if (dev_priv->chipset >= 0xa0) {
++		xf_emit(ctx, 1, 1);	/* 00000001 tesla UNK16B4 */
++		xf_emit(ctx, 1, 0);	/* 00000003 */
++	} else {
++		xf_emit(ctx, 1, 0);	/* 00000003 MULTISAMPLE_CTRL */
+ 	}
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0x0fac6881);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 3, 0xcf);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 0xa, 0);
+-	xf_emit(ctx, 2, 1);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 2, 1);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 8, 1);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0x0fac6881);
+-	xf_emit(ctx, 1, 0xf);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, magic2);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 2, 1);
+-	else
+-		xf_emit(ctx, 1, 1);
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 8, 0);		/* 00000001 BLEND_ENABLE */
++	xf_emit(ctx, 1, 1);		/* 0000001f BLEND_FUNC_DST_ALPHA */
++	xf_emit(ctx, 1, 1);		/* 00000007 BLEND_EQUATION_ALPHA */
++	xf_emit(ctx, 1, 2);		/* 0000001f BLEND_FUNC_SRC_ALPHA */
++	xf_emit(ctx, 1, 1);		/* 0000001f BLEND_FUNC_DST_RGB */
++	xf_emit(ctx, 1, 1);		/* 00000007 BLEND_EQUATION_RGB */
++	xf_emit(ctx, 1, 2);		/* 0000001f BLEND_FUNC_SRC_RGB */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 1, 0);	/* 00000001 UNK12E4 */
++		xf_emit(ctx, 8, 1);	/* 00000007 IBLEND_EQUATION_RGB */
++		xf_emit(ctx, 8, 1);	/* 00000007 IBLEND_EQUATION_ALPHA */
++		xf_emit(ctx, 8, 1);	/* 00000001 IBLEND_UNK00 */
++		xf_emit(ctx, 8, 2);	/* 0000001f IBLEND_SRC_RGB */
++		xf_emit(ctx, 8, 1);	/* 0000001f IBLEND_DST_RGB */
++		xf_emit(ctx, 8, 2);	/* 0000001f IBLEND_SRC_ALPHA */
++		xf_emit(ctx, 8, 1);	/* 0000001f IBLEND_DST_ALPHA */
++		xf_emit(ctx, 1, 0);	/* 00000001 UNK1140 */
++	}
++	xf_emit(ctx, 1, 1);		/* 00000001 UNK133C */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 7, 0);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 1, 0x0fac6881);	/* 0fffffff RT_CONTROL */
++	xf_emit(ctx, 1, 0);		/* 00000001 LOGIC_OP_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ff/3ff */
++	xf_emit(ctx, 1, 4);		/* 00000007 FP_CONTROL */
++	xf_emit(ctx, 1, 0);		/* 00000003 UNK0F90 */
++	xf_emit(ctx, 1, 0);		/* 00000001 FRAMEBUFFER_SRGB */
++	xf_emit(ctx, 1, 0);		/* 7 */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f DST_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 DST_LINEAR */
++	xf_emit(ctx, 1, 0);		/* 00000007 OPERATION */
++	xf_emit(ctx, 1, 0xcf);		/* 000000ff SIFC_FORMAT */
++	xf_emit(ctx, 1, 0xcf);		/* 000000ff DRAW_COLOR_FORMAT */
++	xf_emit(ctx, 1, 0xcf);		/* 000000ff SRC_FORMAT */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
++	xf_emit(ctx, 1, 0);		/* 7/f[NVA3] MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 8, 0);		/* 00000001 BLEND_ENABLE */
++	xf_emit(ctx, 1, 1);		/* 0000001f BLEND_FUNC_DST_ALPHA */
++	xf_emit(ctx, 1, 1);		/* 00000007 BLEND_EQUATION_ALPHA */
++	xf_emit(ctx, 1, 2);		/* 0000001f BLEND_FUNC_SRC_ALPHA */
++	xf_emit(ctx, 1, 1);		/* 0000001f BLEND_FUNC_DST_RGB */
++	xf_emit(ctx, 1, 1);		/* 00000007 BLEND_EQUATION_RGB */
++	xf_emit(ctx, 1, 2);		/* 0000001f BLEND_FUNC_SRC_RGB */
++	xf_emit(ctx, 1, 1);		/* 00000001 UNK133C */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 8, 1);		/* 00000001 UNK19E0 */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 7, 0);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 1, 0x0fac6881);	/* 0fffffff RT_CONTROL */
++	xf_emit(ctx, 1, 0xf);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 7, 0);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 1, magic2);	/* 001fffff tesla UNK0F78 */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_BOUNDS_EN */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f DST_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 DST_LINEAR */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
+ 	if(dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 1, 0);
++		xf_emit(ctx, 1, 0);	/* ff */
+ 	else
+-		xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 5, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0x0fac6881);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, magic1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 2, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 0x28, 0);
+-	xf_emit(ctx, 8, 8);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0x0fac6881);
+-	xf_emit(ctx, 8, 0x400);
+-	xf_emit(ctx, 8, 0x300);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0xf);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0x20);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 1, 0x100);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0x40);
+-	xf_emit(ctx, 1, 0x100);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 3);
+-	xf_emit(ctx, 4, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, magic2);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 1, 0x0fac6881);
+-	xf_emit(ctx, 9, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x400);
+-	xf_emit(ctx, 1, 0x300);
+-	xf_emit(ctx, 1, 0x1001);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 4, 0);
+-	else
+-		xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0x0fac6881);
+-	xf_emit(ctx, 1, 0xf);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		xf_emit(ctx, 0x15, 0);
+-		xf_emit(ctx, 1, 1);
+-		xf_emit(ctx, 3, 0);
+-	} else
+-		xf_emit(ctx, 0x17, 0);
++		xf_emit(ctx, 3, 0);	/* 1, 7, 3ff */
++	xf_emit(ctx, 1, 4);		/* 00000007 FP_CONTROL */
++	xf_emit(ctx, 1, 0);		/* 00000003 UNK0F90 */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000007 */
++	xf_emit(ctx, 1, 0);		/* 00000001 SAMPLECNT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
++	xf_emit(ctx, 1, 0);		/* 7/f MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 7, 0);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 1, 0x0fac6881);	/* 0fffffff RT_CONTROL */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_BOUNDS_EN */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f DST_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 DST_LINEAR */
++	xf_emit(ctx, 1, 0);		/* 000fffff BLIT_DU_DX_FRACT */
++	xf_emit(ctx, 1, 1);		/* 0001ffff BLIT_DU_DX_INT */
++	xf_emit(ctx, 1, 0);		/* 000fffff BLIT_DV_DY_FRACT */
++	xf_emit(ctx, 1, 1);		/* 0001ffff BLIT_DV_DY_INT */
++	xf_emit(ctx, 1, 0);		/* ff/3ff */
++	xf_emit(ctx, 1, magic1);	/* 3ff/7ff tesla UNK0D68 */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK15B4 */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000007 */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++	xf_emit(ctx, 8, 0);		/* 0000ffff DMA_COLOR */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_GLOBAL */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_LOCAL */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_STACK */
++	xf_emit(ctx, 1, 0);		/* ff/3ff */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_DST */
++	xf_emit(ctx, 1, 0);		/* 7 */
++	xf_emit(ctx, 1, 0);		/* 7/f MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 8, 0);		/* 000000ff RT_ADDRESS_HIGH */
++	xf_emit(ctx, 8, 0);		/* ffffffff RT_LAYER_STRIDE */
++	xf_emit(ctx, 8, 0);		/* ffffffff RT_ADDRESS_LOW */
++	xf_emit(ctx, 8, 8);		/* 0000007f RT_TILE_MODE */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 7, 0);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 1, 0x0fac6881);	/* 0fffffff RT_CONTROL */
++	xf_emit(ctx, 8, 0x400);		/* 0fffffff RT_HORIZ */
++	xf_emit(ctx, 8, 0x300);		/* 0000ffff RT_VERT */
++	xf_emit(ctx, 1, 1);		/* 00001fff RT_ARRAY_MODE */
++	xf_emit(ctx, 1, 0xf);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 7, 0);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 1, 0x20);		/* 00000fff DST_TILE_MODE */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f DST_FORMAT */
++	xf_emit(ctx, 1, 0x100);		/* 0001ffff DST_HEIGHT */
++	xf_emit(ctx, 1, 0);		/* 000007ff DST_LAYER */
++	xf_emit(ctx, 1, 1);		/* 00000001 DST_LINEAR */
++	xf_emit(ctx, 1, 0);		/* ffffffff DST_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0);		/* 000000ff DST_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0x40);		/* 0007ffff DST_PITCH */
++	xf_emit(ctx, 1, 0x100);		/* 0001ffff DST_WIDTH */
++	xf_emit(ctx, 1, 0);		/* 0000ffff */
++	xf_emit(ctx, 1, 3);		/* 00000003 tesla UNK15AC */
++	xf_emit(ctx, 1, 0);		/* ff/3ff */
++	xf_emit(ctx, 1, 0);		/* 0001ffff GP_BUILTIN_RESULT_EN */
++	xf_emit(ctx, 1, 0);		/* 00000003 UNK0F90 */
++	xf_emit(ctx, 1, 0);		/* 00000007 */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++	xf_emit(ctx, 1, magic2);	/* 001fffff tesla UNK0F78 */
++	xf_emit(ctx, 1, 0);		/* 7/f MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 1, 2);		/* 00000003 tesla UNK143C */
++	xf_emit(ctx, 1, 0x0fac6881);	/* 0fffffff RT_CONTROL */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_ZETA */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_BOUNDS_EN */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	xf_emit(ctx, 2, 0);		/* ffff, ff/3ff */
++	xf_emit(ctx, 1, 0);		/* 0001ffff GP_BUILTIN_RESULT_EN */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_MASK */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK15B4 */
++	xf_emit(ctx, 1, 0);		/* 00000007 */
++	xf_emit(ctx, 1, 0);		/* ffffffff ZETA_LAYER_STRIDE */
++	xf_emit(ctx, 1, 0);		/* 000000ff ZETA_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff ZETA_ADDRESS_LOW */
++	xf_emit(ctx, 1, 4);		/* 00000007 ZETA_TILE_MODE */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	xf_emit(ctx, 1, 0x400);		/* 0fffffff ZETA_HORIZ */
++	xf_emit(ctx, 1, 0x300);		/* 0000ffff ZETA_VERT */
++	xf_emit(ctx, 1, 0x1001);	/* 00001fff ZETA_ARRAY_MODE */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
++	xf_emit(ctx, 1, 0);		/* 7/f MULTISAMPLE_SAMPLES_LOG2 */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 0);	/* 00000001 */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 7, 0);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 1, 0x0fac6881);	/* 0fffffff RT_CONTROL */
++	xf_emit(ctx, 1, 0xf);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 7, 0);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 1, 0);		/* ff/3ff */
++	xf_emit(ctx, 8, 0);		/* 00000001 BLEND_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000003 UNK0F90 */
++	xf_emit(ctx, 1, 0);		/* 00000001 FRAMEBUFFER_SRGB */
++	xf_emit(ctx, 1, 0);		/* 7 */
++	xf_emit(ctx, 1, 0);		/* 00000001 LOGIC_OP_ENABLE */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 1, 0);	/* 00000001 UNK1140 */
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++	}
++	xf_emit(ctx, 1, 0);		/* 7/f MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK1534 */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
+ 	if (dev_priv->chipset >= 0xa0)
+-		xf_emit(ctx, 1, 0x0fac6881);
+-	xf_emit(ctx, 1, magic2);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 2, 1);
+-	xf_emit(ctx, 3, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 2, 1);
+-	else
+-		xf_emit(ctx, 1, 1);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 2, 0);
+-	else if (dev_priv->chipset != 0x50)
+-		xf_emit(ctx, 1, 0);
++		xf_emit(ctx, 1, 0x0fac6881);	/* fffffff */
++	xf_emit(ctx, 1, magic2);	/* 001fffff tesla UNK0F78 */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_BOUNDS_EN */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f DST_FORMAT */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK0FB0 */
++	xf_emit(ctx, 1, 0);		/* ff/3ff */
++	xf_emit(ctx, 1, 4);		/* 00000007 FP_CONTROL */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK15B4 */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK19CC */
++	xf_emit(ctx, 1, 0);		/* 00000007 */
++	xf_emit(ctx, 1, 0);		/* 00000001 SAMPLECNT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++		xf_emit(ctx, 1, 0);	/* 0000000f tesla UNK15C8 */
++	}
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
++	if (dev_priv->chipset >= 0xa0) {
++		xf_emit(ctx, 3, 0);		/* 7/f, 1, ffff0ff3 */
++		xf_emit(ctx, 1, 0xfac6881);	/* fffffff */
++		xf_emit(ctx, 4, 0);		/* 1, 1, 1, 3ff */
++		xf_emit(ctx, 1, 4);		/* 7 */
++		xf_emit(ctx, 1, 0);		/* 1 */
++		xf_emit(ctx, 2, 1);		/* 1 */
++		xf_emit(ctx, 2, 0);		/* 7, f */
++		xf_emit(ctx, 1, 1);		/* 1 */
++		xf_emit(ctx, 1, 0);		/* 7/f */
++		if (IS_NVA3F(dev_priv->chipset))
++			xf_emit(ctx, 0x9, 0);	/* 1 */
++		else
++			xf_emit(ctx, 0x8, 0);	/* 1 */
++		xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++		xf_emit(ctx, 8, 1);		/* 1 */
++		xf_emit(ctx, 1, 0x11);		/* 7f */
++		xf_emit(ctx, 7, 0);		/* 7f */
++		xf_emit(ctx, 1, 0xfac6881);	/* fffffff */
++		xf_emit(ctx, 1, 0xf);		/* f */
++		xf_emit(ctx, 7, 0);		/* f */
++		xf_emit(ctx, 1, 0x11);		/* 7f */
++		xf_emit(ctx, 1, 1);		/* 1 */
++		xf_emit(ctx, 5, 0);		/* 1, 7, 3ff, 3, 7 */
++		if (IS_NVA3F(dev_priv->chipset)) {
++			xf_emit(ctx, 1, 0);	/* 00000001 UNK1140 */
++			xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++		}
++	}
+ }
+ 
+ static void
+-nv50_graph_construct_xfer_tp_x3(struct nouveau_grctx *ctx)
++nv50_graph_construct_xfer_tex(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
++	xf_emit(ctx, 2, 0);		/* 1 LINKED_TSC. yes, 2. */
++	if (dev_priv->chipset != 0x50)
++		xf_emit(ctx, 1, 0);	/* 3 */
++	xf_emit(ctx, 1, 1);		/* 1ffff BLIT_DU_DX_INT */
++	xf_emit(ctx, 1, 0);		/* fffff BLIT_DU_DX_FRACT */
++	xf_emit(ctx, 1, 1);		/* 1ffff BLIT_DV_DY_INT */
++	xf_emit(ctx, 1, 0);		/* fffff BLIT_DV_DY_FRACT */
+ 	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 2, 0);
++		xf_emit(ctx, 1, 0);	/* 3 BLIT_CONTROL */
+ 	else
+-		xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0x2a712488);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x4085c000);
+-	xf_emit(ctx, 1, 0x40);
+-	xf_emit(ctx, 1, 0x100);
+-	xf_emit(ctx, 1, 0x10100);
+-	xf_emit(ctx, 1, 0x02800000);
++		xf_emit(ctx, 2, 0);	/* 3ff, 1 */
++	xf_emit(ctx, 1, 0x2a712488);	/* ffffffff SRC_TIC_0 */
++	xf_emit(ctx, 1, 0);		/* ffffffff SRC_TIC_1 */
++	xf_emit(ctx, 1, 0x4085c000);	/* ffffffff SRC_TIC_2 */
++	xf_emit(ctx, 1, 0x40);		/* ffffffff SRC_TIC_3 */
++	xf_emit(ctx, 1, 0x100);		/* ffffffff SRC_TIC_4 */
++	xf_emit(ctx, 1, 0x10100);	/* ffffffff SRC_TIC_5 */
++	xf_emit(ctx, 1, 0x02800000);	/* ffffffff SRC_TIC_6 */
++	xf_emit(ctx, 1, 0);		/* ffffffff SRC_TIC_7 */
++	if (dev_priv->chipset == 0x50) {
++		xf_emit(ctx, 1, 0);	/* 00000001 turing UNK358 */
++		xf_emit(ctx, 1, 0);	/* ffffffff tesla UNK1A34? */
++		xf_emit(ctx, 1, 0);	/* 00000003 turing UNK37C tesla UNK1690 */
++		xf_emit(ctx, 1, 0);	/* 00000003 BLIT_CONTROL */
++		xf_emit(ctx, 1, 0);	/* 00000001 turing UNK32C tesla UNK0F94 */
++	} else if (!IS_NVAAF(dev_priv->chipset)) {
++		xf_emit(ctx, 1, 0);	/* ffffffff tesla UNK1A34? */
++		xf_emit(ctx, 1, 0);	/* 00000003 */
++		xf_emit(ctx, 1, 0);	/* 000003ff */
++		xf_emit(ctx, 1, 0);	/* 00000003 */
++		xf_emit(ctx, 1, 0);	/* 000003ff */
++		xf_emit(ctx, 1, 0);	/* 00000003 tesla UNK1664 / turing UNK03E8 */
++		xf_emit(ctx, 1, 0);	/* 00000003 */
++		xf_emit(ctx, 1, 0);	/* 000003ff */
++	} else {
++		xf_emit(ctx, 0x6, 0);
++	}
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A34 */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_TEXTURE */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_SRC */
+ }
+ 
+ static void
+-nv50_graph_construct_xfer_tp_x4(struct nouveau_grctx *ctx)
++nv50_graph_construct_xfer_unk8cxx(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	xf_emit(ctx, 2, 0x04e3bfdf);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x00ffff00);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 2, 1);
+-	else
+-		xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0x00ffff00);
+-	xf_emit(ctx, 8, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x30201000);
+-	xf_emit(ctx, 1, 0x70605040);
+-	xf_emit(ctx, 1, 0xb8a89888);
+-	xf_emit(ctx, 1, 0xf8e8d8c8);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x1a);
+-}
+-
+-static void
+-nv50_graph_construct_xfer_tp_x5(struct nouveau_grctx *ctx)
+-{
+-	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0xfac6881);
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 2, 1);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 1);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 0xb, 0);
+-	else
+-		xf_emit(ctx, 0xa, 0);
+-	xf_emit(ctx, 8, 1);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0xfac6881);
+-	xf_emit(ctx, 1, 0xf);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 1, 1);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		xf_emit(ctx, 6, 0);
+-		xf_emit(ctx, 1, 1);
+-		xf_emit(ctx, 6, 0);
+-	} else {
+-		xf_emit(ctx, 0xb, 0);
+-	}
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK1534 */
++	xf_emit(ctx, 1, 0);		/* 7/f MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 2, 0);		/* 7, ffff0ff3 */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE */
++	xf_emit(ctx, 1, 0x04e3bfdf);	/* ffffffff UNK0D64 */
++	xf_emit(ctx, 1, 0x04e3bfdf);	/* ffffffff UNK0DF4 */
++	xf_emit(ctx, 1, 1);		/* 00000001 UNK15B4 */
++	xf_emit(ctx, 1, 0);		/* 00000001 LINE_STIPPLE_ENABLE */
++	xf_emit(ctx, 1, 0x00ffff00);	/* 00ffffff LINE_STIPPLE_PATTERN */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK0F98 */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++	xf_emit(ctx, 1, 0);		/* 00000003 tesla UNK1668 */
++	xf_emit(ctx, 1, 0);		/* 00000001 LINE_STIPPLE_ENABLE */
++	xf_emit(ctx, 1, 0x00ffff00);	/* 00ffffff LINE_STIPPLE_PATTERN */
++	xf_emit(ctx, 1, 0);		/* 00000001 POLYGON_SMOOTH_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK1534 */
++	xf_emit(ctx, 1, 0);		/* 7/f MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1658 */
++	xf_emit(ctx, 1, 0);		/* 00000001 LINE_SMOOTH_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE */
++	xf_emit(ctx, 1, 1);		/* 00000001 UNK15B4 */
++	xf_emit(ctx, 1, 0);		/* 00000001 POINT_SPRITE_ENABLE */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK165C */
++	xf_emit(ctx, 1, 0x30201000);	/* ffffffff tesla UNK1670 */
++	xf_emit(ctx, 1, 0x70605040);	/* ffffffff tesla UNK1670 */
++	xf_emit(ctx, 1, 0xb8a89888);	/* ffffffff tesla UNK1670 */
++	xf_emit(ctx, 1, 0xf8e8d8c8);	/* ffffffff tesla UNK1670 */
++	xf_emit(ctx, 1, 0);		/* 00000001 VERTEX_TWO_SIDE_ENABLE */
++	xf_emit(ctx, 1, 0x1a);		/* 0000001f POLYGON_MODE */
+ }
+ 
+ static void
+@@ -2193,108 +3102,136 @@ nv50_graph_construct_xfer_tp(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+ 	if (dev_priv->chipset < 0xa0) {
+-		nv50_graph_construct_xfer_tp_x1(ctx);
+-		nv50_graph_construct_xfer_tp_x2(ctx);
+-		nv50_graph_construct_xfer_tp_x3(ctx);
+-		if (dev_priv->chipset == 0x50)
+-			xf_emit(ctx, 0xf, 0);
+-		else
+-			xf_emit(ctx, 0x12, 0);
+-		nv50_graph_construct_xfer_tp_x4(ctx);
++		nv50_graph_construct_xfer_unk84xx(ctx);
++		nv50_graph_construct_xfer_tprop(ctx);
++		nv50_graph_construct_xfer_tex(ctx);
++		nv50_graph_construct_xfer_unk8cxx(ctx);
+ 	} else {
+-		nv50_graph_construct_xfer_tp_x3(ctx);
+-		if (dev_priv->chipset < 0xaa)
+-			xf_emit(ctx, 0xc, 0);
+-		else
+-			xf_emit(ctx, 0xa, 0);
+-		nv50_graph_construct_xfer_tp_x2(ctx);
+-		nv50_graph_construct_xfer_tp_x5(ctx);
+-		nv50_graph_construct_xfer_tp_x4(ctx);
+-		nv50_graph_construct_xfer_tp_x1(ctx);
++		nv50_graph_construct_xfer_tex(ctx);
++		nv50_graph_construct_xfer_tprop(ctx);
++		nv50_graph_construct_xfer_unk8cxx(ctx);
++		nv50_graph_construct_xfer_unk84xx(ctx);
+ 	}
+ }
+ 
+ static void
+-nv50_graph_construct_xfer_tp2(struct nouveau_grctx *ctx)
++nv50_graph_construct_xfer_mpc(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	int i, mpcnt;
+-	if (dev_priv->chipset == 0x98 || dev_priv->chipset == 0xaa)
+-		mpcnt = 1;
+-	else if (dev_priv->chipset < 0xa0 || dev_priv->chipset >= 0xa8)
+-		mpcnt = 2;
+-	else
+-		mpcnt = 3;
++	int i, mpcnt = 2;
++	switch (dev_priv->chipset) {
++		case 0x98:
++		case 0xaa:
++			mpcnt = 1;
++			break;
++		case 0x50:
++		case 0x84:
++		case 0x86:
++		case 0x92:
++		case 0x94:
++		case 0x96:
++		case 0xa8:
++		case 0xac:
++			mpcnt = 2;
++			break;
++		case 0xa0:
++		case 0xa3:
++		case 0xa5:
++		case 0xaf:
++			mpcnt = 3;
++			break;
++	}
+ 	for (i = 0; i < mpcnt; i++) {
+-		xf_emit(ctx, 1, 0);
+-		xf_emit(ctx, 1, 0x80);
+-		xf_emit(ctx, 1, 0x80007004);
+-		xf_emit(ctx, 1, 0x04000400);
++		xf_emit(ctx, 1, 0);		/* ff */
++		xf_emit(ctx, 1, 0x80);		/* ffffffff tesla UNK1404 */
++		xf_emit(ctx, 1, 0x80007004);	/* ffffffff tesla UNK12B0 */
++		xf_emit(ctx, 1, 0x04000400);	/* ffffffff */
+ 		if (dev_priv->chipset >= 0xa0)
+-			xf_emit(ctx, 1, 0xc0);
+-		xf_emit(ctx, 1, 0x1000);
+-		xf_emit(ctx, 2, 0);
+-		if (dev_priv->chipset == 0x86 || dev_priv->chipset == 0x98 || dev_priv->chipset >= 0xa8) {
+-			xf_emit(ctx, 1, 0xe00);
+-			xf_emit(ctx, 1, 0x1e00);
++			xf_emit(ctx, 1, 0xc0);	/* 00007fff tesla UNK152C */
++		xf_emit(ctx, 1, 0x1000);	/* 0000ffff tesla UNK0D60 */
++		xf_emit(ctx, 1, 0);		/* ff/3ff */
++		xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++		if (dev_priv->chipset == 0x86 || dev_priv->chipset == 0x98 || dev_priv->chipset == 0xa8 || IS_NVAAF(dev_priv->chipset)) {
++			xf_emit(ctx, 1, 0xe00);		/* 7fff */
++			xf_emit(ctx, 1, 0x1e00);	/* 7fff */
+ 		}
+-		xf_emit(ctx, 1, 1);
+-		xf_emit(ctx, 2, 0);
++		xf_emit(ctx, 1, 1);		/* 000000ff VP_REG_ALLOC_TEMP */
++		xf_emit(ctx, 1, 0);		/* 00000001 LINKED_TSC */
++		xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
+ 		if (dev_priv->chipset == 0x50)
+-			xf_emit(ctx, 2, 0x1000);
+-		xf_emit(ctx, 1, 1);
+-		xf_emit(ctx, 1, 0);
+-		xf_emit(ctx, 1, 4);
+-		xf_emit(ctx, 1, 2);
+-		if (dev_priv->chipset >= 0xaa)
+-			xf_emit(ctx, 0xb, 0);
++			xf_emit(ctx, 2, 0x1000);	/* 7fff tesla UNK141C */
++		xf_emit(ctx, 1, 1);		/* 000000ff GP_REG_ALLOC_TEMP */
++		xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++		xf_emit(ctx, 1, 4);		/* 000000ff FP_REG_ALLOC_TEMP */
++		xf_emit(ctx, 1, 2);		/* 00000003 REG_MODE */
++		if (IS_NVAAF(dev_priv->chipset))
++			xf_emit(ctx, 0xb, 0);	/* RO */
+ 		else if (dev_priv->chipset >= 0xa0)
+-			xf_emit(ctx, 0xc, 0);
++			xf_emit(ctx, 0xc, 0);	/* RO */
+ 		else
+-			xf_emit(ctx, 0xa, 0);
++			xf_emit(ctx, 0xa, 0);	/* RO */
+ 	}
+-	xf_emit(ctx, 1, 0x08100c12);
+-	xf_emit(ctx, 1, 0);
++	xf_emit(ctx, 1, 0x08100c12);		/* 1fffffff FP_INTERPOLANT_CTRL */
++	xf_emit(ctx, 1, 0);			/* ff/3ff */
+ 	if (dev_priv->chipset >= 0xa0) {
+-		xf_emit(ctx, 1, 0x1fe21);
++		xf_emit(ctx, 1, 0x1fe21);	/* 0003ffff tesla UNK0FAC */
+ 	}
+-	xf_emit(ctx, 5, 0);
+-	xf_emit(ctx, 4, 0xffff);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 2, 0x10001);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x1fe21);
+-	xf_emit(ctx, 1, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 0x08100c12);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 8, 0);
+-	xf_emit(ctx, 1, 0xfac6881);
+-	xf_emit(ctx, 1, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 1, 3);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 9, 0);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 2, 1);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 3, 1);
+-	xf_emit(ctx, 1, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		xf_emit(ctx, 8, 2);
+-		xf_emit(ctx, 0x10, 1);
+-		xf_emit(ctx, 8, 2);
+-		xf_emit(ctx, 0x18, 1);
+-		xf_emit(ctx, 3, 0);
++	xf_emit(ctx, 3, 0);			/* 7fff, 0, 0 */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 1, 0);			/* 7/f MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 4, 0xffff);		/* 0000ffff MSAA_MASK */
++	xf_emit(ctx, 1, 1);			/* 00000001 LANES32 */
++	xf_emit(ctx, 1, 0x10001);		/* 00ffffff BLOCK_ALLOC */
++	xf_emit(ctx, 1, 0x10001);		/* ffffffff BLOCKDIM_XY */
++	xf_emit(ctx, 1, 1);			/* 0000ffff BLOCKDIM_Z */
++	xf_emit(ctx, 1, 0);			/* ffffffff SHARED_SIZE */
++	xf_emit(ctx, 1, 0x1fe21);		/* 1ffff/3ffff[NVA0+] tesla UNk0FAC */
++	xf_emit(ctx, 1, 0);			/* ffffffff tesla UNK1A34 */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 1);		/* 0000001f tesla UNK169C */
++	xf_emit(ctx, 1, 0);			/* ff/3ff */
++	xf_emit(ctx, 1, 0);			/* 1 LINKED_TSC */
++	xf_emit(ctx, 1, 0);			/* ff FP_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);			/* ffffffff FP_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0x08100c12);		/* 1fffffff FP_INTERPOLANT_CTRL */
++	xf_emit(ctx, 1, 4);			/* 00000007 FP_CONTROL */
++	xf_emit(ctx, 1, 0);			/* 000000ff FRAG_COLOR_CLAMP_EN */
++	xf_emit(ctx, 1, 2);			/* 00000003 REG_MODE */
++	xf_emit(ctx, 1, 0x11);			/* 0000007f RT_FORMAT */
++	xf_emit(ctx, 7, 0);			/* 0000007f RT_FORMAT */
++	xf_emit(ctx, 1, 0);			/* 00000007 */
++	xf_emit(ctx, 1, 0xfac6881);		/* 0fffffff RT_CONTROL */
++	xf_emit(ctx, 1, 0);			/* 00000003 MULTISAMPLE_CTRL */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 3);		/* 00000003 tesla UNK16B4 */
++	xf_emit(ctx, 1, 0);			/* 00000001 ALPHA_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);			/* 00000007 ALPHA_TEST_FUNC */
++	xf_emit(ctx, 1, 0);			/* 00000001 FRAMEBUFFER_SRGB */
++	xf_emit(ctx, 1, 4);			/* ffffffff tesla UNK1400 */
++	xf_emit(ctx, 8, 0);			/* 00000001 BLEND_ENABLE */
++	xf_emit(ctx, 1, 0);			/* 00000001 LOGIC_OP_ENABLE */
++	xf_emit(ctx, 1, 2);			/* 0000001f BLEND_FUNC_SRC_RGB */
++	xf_emit(ctx, 1, 1);			/* 0000001f BLEND_FUNC_DST_RGB */
++	xf_emit(ctx, 1, 1);			/* 00000007 BLEND_EQUATION_RGB */
++	xf_emit(ctx, 1, 2);			/* 0000001f BLEND_FUNC_SRC_ALPHA */
++	xf_emit(ctx, 1, 1);			/* 0000001f BLEND_FUNC_DST_ALPHA */
++	xf_emit(ctx, 1, 1);			/* 00000007 BLEND_EQUATION_ALPHA */
++	xf_emit(ctx, 1, 1);			/* 00000001 UNK133C */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 1, 0);		/* 00000001 UNK12E4 */
++		xf_emit(ctx, 8, 2);		/* 0000001f IBLEND_FUNC_SRC_RGB */
++		xf_emit(ctx, 8, 1);		/* 0000001f IBLEND_FUNC_DST_RGB */
++		xf_emit(ctx, 8, 1);		/* 00000007 IBLEND_EQUATION_RGB */
++		xf_emit(ctx, 8, 2);		/* 0000001f IBLEND_FUNC_SRC_ALPHA */
++		xf_emit(ctx, 8, 1);		/* 0000001f IBLEND_FUNC_DST_ALPHA */
++		xf_emit(ctx, 8, 1);		/* 00000007 IBLEND_EQUATION_ALPHA */
++		xf_emit(ctx, 8, 1);		/* 00000001 IBLEND_UNK00 */
++		xf_emit(ctx, 1, 0);		/* 00000003 tesla UNK1928 */
++		xf_emit(ctx, 1, 0);		/* 00000001 UNK1140 */
+ 	}
+-	xf_emit(ctx, 1, 4);
++	xf_emit(ctx, 1, 0);			/* 00000003 tesla UNK0F90 */
++	xf_emit(ctx, 1, 4);			/* 000000ff FP_RESULT_COUNT */
++	/* XXX: demagic this part some day */
+ 	if (dev_priv->chipset == 0x50)
+ 		xf_emit(ctx, 0x3a0, 0);
+ 	else if (dev_priv->chipset < 0x94)
+@@ -2303,9 +3240,9 @@ nv50_graph_construct_xfer_tp2(struct nouveau_grctx *ctx)
+ 		xf_emit(ctx, 0x39f, 0);
+ 	else
+ 		xf_emit(ctx, 0x3a3, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
++	xf_emit(ctx, 1, 0x11);			/* 3f/7f DST_FORMAT */
++	xf_emit(ctx, 1, 0);			/* 7 OPERATION */
++	xf_emit(ctx, 1, 1);			/* 1 DST_LINEAR */
+ 	xf_emit(ctx, 0x2d, 0);
+ }
+ 
+@@ -2323,52 +3260,56 @@ nv50_graph_construct_xfer2(struct nouveau_grctx *ctx)
+ 	if (dev_priv->chipset < 0xa0) {
+ 		for (i = 0; i < 8; i++) {
+ 			ctx->ctxvals_pos = offset + i;
++			/* that little bugger belongs to csched. No idea
++			 * what it's doing here. */
+ 			if (i == 0)
+-				xf_emit(ctx, 1, 0x08100c12);
++				xf_emit(ctx, 1, 0x08100c12); /* FP_INTERPOLANT_CTRL */
+ 			if (units & (1 << i))
+-				nv50_graph_construct_xfer_tp2(ctx);
++				nv50_graph_construct_xfer_mpc(ctx);
+ 			if ((ctx->ctxvals_pos-offset)/8 > size)
+ 				size = (ctx->ctxvals_pos-offset)/8;
+ 		}
+ 	} else {
+ 		/* Strand 0: TPs 0, 1 */
+ 		ctx->ctxvals_pos = offset;
+-		xf_emit(ctx, 1, 0x08100c12);
++		/* that little bugger belongs to csched. No idea
++		 * what it's doing here. */
++		xf_emit(ctx, 1, 0x08100c12); /* FP_INTERPOLANT_CTRL */
+ 		if (units & (1 << 0))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if (units & (1 << 1))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+-		/* Strand 0: TPs 2, 3 */
++		/* Strand 1: TPs 2, 3 */
+ 		ctx->ctxvals_pos = offset + 1;
+ 		if (units & (1 << 2))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if (units & (1 << 3))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+-		/* Strand 0: TPs 4, 5, 6 */
++		/* Strand 2: TPs 4, 5, 6 */
+ 		ctx->ctxvals_pos = offset + 2;
+ 		if (units & (1 << 4))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if (units & (1 << 5))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if (units & (1 << 6))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+-		/* Strand 0: TPs 7, 8, 9 */
++		/* Strand 3: TPs 7, 8, 9 */
+ 		ctx->ctxvals_pos = offset + 3;
+ 		if (units & (1 << 7))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if (units & (1 << 8))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if (units & (1 << 9))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 	}
 diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c
-index 5f21df3..1fd5207 100644
+index 5f21df3..092057b 100644
 --- a/drivers/gpu/drm/nouveau/nv50_instmem.c
 +++ b/drivers/gpu/drm/nouveau/nv50_instmem.c
 @@ -35,8 +35,6 @@ struct nv50_instmem_priv {
@@ -9522,7 +15849,15 @@ index 5f21df3..1fd5207 100644
  };
  
  #define NV50_INSTMEM_PAGE_SHIFT 12
-@@ -147,7 +145,7 @@ nv50_instmem_init(struct drm_device *dev)
+@@ -141,13 +139,15 @@ nv50_instmem_init(struct drm_device *dev)
+ 	chan->file_priv = (struct drm_file *)-2;
+ 	dev_priv->fifos[0] = dev_priv->fifos[127] = chan;
+ 
++	INIT_LIST_HEAD(&chan->ramht_refs);
++
+ 	/* Channel's PRAMIN object + heap */
+ 	ret = nouveau_gpuobj_new_fake(dev, 0, c_offset, c_size, 0,
+ 							NULL, &chan->ramin);
  	if (ret)
  		return ret;
  
@@ -9531,7 +15866,7 @@ index 5f21df3..1fd5207 100644
  		return -ENOMEM;
  
  	/* RAMFC + zero channel's PRAMIN up to start of VM pagedir */
-@@ -262,23 +260,18 @@ nv50_instmem_init(struct drm_device *dev)
+@@ -262,30 +262,25 @@ nv50_instmem_init(struct drm_device *dev)
  
  	/* Assume that praying isn't enough, check that we can re-read the
  	 * entire fake channel back from the PRAMIN BAR */
@@ -9556,7 +15891,15 @@ index 5f21df3..1fd5207 100644
  		NV_ERROR(dev, "Failed to init RAMIN heap\n");
  	}
  
-@@ -321,7 +314,7 @@ nv50_instmem_takedown(struct drm_device *dev)
+ 	/*XXX: incorrect, but needed to make hash func "work" */
+ 	dev_priv->ramht_offset = 0x10000;
+ 	dev_priv->ramht_bits   = 9;
+-	dev_priv->ramht_size   = (1 << dev_priv->ramht_bits);
++	dev_priv->ramht_size   = (1 << dev_priv->ramht_bits) * 8;
+ 	return 0;
+ }
+ 
+@@ -321,7 +316,7 @@ nv50_instmem_takedown(struct drm_device *dev)
  		nouveau_gpuobj_del(dev, &chan->vm_pd);
  		nouveau_gpuobj_ref_del(dev, &chan->ramfc);
  		nouveau_gpuobj_ref_del(dev, &chan->ramin);
@@ -9565,7 +15908,7 @@ index 5f21df3..1fd5207 100644
  
  		dev_priv->fifos[0] = dev_priv->fifos[127] = NULL;
  		kfree(chan);
-@@ -436,14 +429,14 @@ nv50_instmem_bind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
+@@ -436,14 +431,14 @@ nv50_instmem_bind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
  	if (!gpuobj->im_backing || !gpuobj->im_pramin || gpuobj->im_bound)
  		return -EINVAL;
  
@@ -9582,15 +15925,18 @@ index 5f21df3..1fd5207 100644
  		 gpuobj->im_pramin->start, pte, pte_end);
  	NV_DEBUG(dev, "first vram page: 0x%08x\n", gpuobj->im_backing_start);
  
-@@ -453,27 +446,15 @@ nv50_instmem_bind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
+@@ -453,27 +448,16 @@ nv50_instmem_bind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
  		vram |= 0x30;
  	}
  
 -	dev_priv->engine.instmem.prepare_access(dev, true);
  	while (pte < pte_end) {
- 		nv_wo32(dev, pramin_pt, pte++, lower_32_bits(vram));
- 		nv_wo32(dev, pramin_pt, pte++, upper_32_bits(vram));
+-		nv_wo32(dev, pramin_pt, pte++, lower_32_bits(vram));
+-		nv_wo32(dev, pramin_pt, pte++, upper_32_bits(vram));
++		nv_wo32(pramin_pt, (pte * 4) + 0, lower_32_bits(vram));
++		nv_wo32(pramin_pt, (pte * 4) + 4, upper_32_bits(vram));
  		vram += NV50_INSTMEM_PAGE_SIZE;
++		pte += 2;
  	}
 -	dev_priv->engine.instmem.finish_access(dev);
 -
@@ -9613,14 +15959,17 @@ index 5f21df3..1fd5207 100644
  
  	gpuobj->im_bound = 1;
  	return 0;
-@@ -492,36 +473,36 @@ nv50_instmem_unbind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
+@@ -492,36 +476,37 @@ nv50_instmem_unbind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
  	pte     = (gpuobj->im_pramin->start >> 12) << 1;
  	pte_end = ((gpuobj->im_pramin->size >> 12) << 1) + pte;
  
 -	dev_priv->engine.instmem.prepare_access(dev, true);
  	while (pte < pte_end) {
- 		nv_wo32(dev, priv->pramin_pt->gpuobj, pte++, 0x00000000);
- 		nv_wo32(dev, priv->pramin_pt->gpuobj, pte++, 0x00000000);
+-		nv_wo32(dev, priv->pramin_pt->gpuobj, pte++, 0x00000000);
+-		nv_wo32(dev, priv->pramin_pt->gpuobj, pte++, 0x00000000);
++		nv_wo32(priv->pramin_pt->gpuobj, (pte * 4) + 0, 0x00000000);
++		nv_wo32(priv->pramin_pt->gpuobj, (pte * 4) + 4, 0x00000000);
++		pte += 2;
  	}
 -	dev_priv->engine.instmem.finish_access(dev);
 +	dev_priv->engine.instmem.flush(dev);
@@ -10096,10 +16445,10 @@ index 0000000..edf2b21
 +}
 diff --git a/drivers/gpu/drm/nouveau/nvc0_instmem.c b/drivers/gpu/drm/nouveau/nvc0_instmem.c
 new file mode 100644
-index 0000000..b66c913
+index 0000000..9238c73
 --- /dev/null
 +++ b/drivers/gpu/drm/nouveau/nvc0_instmem.c
-@@ -0,0 +1,231 @@
+@@ -0,0 +1,234 @@
 +/*
 + * Copyright 2010 Red Hat Inc.
 + *
@@ -10244,14 +16593,16 @@ index 0000000..b66c913
 +nvc0_instmem_suspend(struct drm_device *dev)
 +{
 +	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	u32 *buf;
 +	int i;
 +
 +	dev_priv->susres.ramin_copy = vmalloc(65536);
 +	if (!dev_priv->susres.ramin_copy)
 +		return -ENOMEM;
++	buf = dev_priv->susres.ramin_copy;
 +
-+	for (i = 0x700000; i < 0x710000; i += 4)
-+		dev_priv->susres.ramin_copy[i/4] = nv_rd32(dev, i);
++	for (i = 0; i < 65536; i += 4)
++		buf[i/4] = nv_rd32(dev, NV04_PRAMIN + i);
 +	return 0;
 +}
 +
@@ -10259,14 +16610,15 @@ index 0000000..b66c913
 +nvc0_instmem_resume(struct drm_device *dev)
 +{
 +	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	u32 *buf = dev_priv->susres.ramin_copy;
 +	u64 chan;
 +	int i;
 +
 +	chan = dev_priv->vram_size - dev_priv->ramin_rsvd_vram;
 +	nv_wr32(dev, 0x001700, chan >> 16);
 +
-+	for (i = 0x700000; i < 0x710000; i += 4)
-+		nv_wr32(dev, i, dev_priv->susres.ramin_copy[i/4]);
++	for (i = 0; i < 65536; i += 4)
++		nv_wr32(dev, NV04_PRAMIN + i, buf[i/4]);
 +	vfree(dev_priv->susres.ramin_copy);
 +	dev_priv->susres.ramin_copy = NULL;
 +
@@ -10323,7 +16675,7 @@ index 0000000..b66c913
 +	/*XXX: incorrect, but needed to make hash func "work" */
 +	dev_priv->ramht_offset = 0x10000;
 +	dev_priv->ramht_bits   = 9;
-+	dev_priv->ramht_size   = (1 << dev_priv->ramht_bits);
++	dev_priv->ramht_size   = (1 << dev_priv->ramht_bits) * 8;
 +	return 0;
 +}
 +
@@ -10365,5 +16717,5 @@ index 5998c35..ad64673 100644
  #	define NV_PCRTC_INTR_0_VBLANK				(1 << 0)
  #define NV_PCRTC_INTR_EN_0				0x00600140
 -- 
-1.7.2
+1.7.2.2
 
diff --git a/kernel.spec b/kernel.spec
index 0b9539b..5685f60 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -48,7 +48,7 @@ Summary: The Linux kernel
 # reset this by hand to 1 (or to 0 and then use rpmdev-bumpspec).
 # scripts/rebase.sh should be made to do that for you, actually.
 #
-%global baserelease 18
+%global baserelease 19
 %global fedora_build %{baserelease}
 
 # base_sublevel is the kernel version we're starting with and patching
@@ -1900,6 +1900,9 @@ fi
 # and build.
 
 %changelog
+* Mon Sep 04 2010 Ben Skeggs <bskeggs at redhat.com> 2.6.35.4-19
+- nouveau: misc fixes from upstream + NVAF support
+
 * Fri Sep 03 2010 Kyle McMartin <kyle at redhat.com>
 - Restore appleir driver that got lost in the 2.6.35 rebase.
 


More information about the scm-commits mailing list