[kernel/f14/master] nouveau: more updates

Wed Sep 8 06:24:57 UTC 2010

commit 215a71b03df53886372ecce4bb2012335845dbfc
Author: Ben Skeggs <bskeggs at redhat.com>
Date:   Wed Sep 8 16:23:30 2010 +1000

    nouveau: more updates

 drm-nouveau-updates.patch | 4581 ++++++++++++++++++++++++++++++++++++++-------
 kernel.spec               |    5 +-
 2 files changed, 3925 insertions(+), 661 deletions(-)
---

diff --git a/drm-nouveau-updates.patch b/drm-nouveau-updates.patch
index 82ded47..9749a29 100644
--- a/drm-nouveau-updates.patch
+++ b/drm-nouveau-updates.patch
@@ -1,7 +1,7 @@
-From c156fa3c71b6581b34526a9b2b649c3f4d57dd3e Mon Sep 17 00:00:00 2001
+From 71c6844b5918cd5b1f8b61735e52be12fb5f80e5 Mon Sep 17 00:00:00 2001
 From: Ben Skeggs <bskeggs at redhat.com>
 Date: Tue, 1 Jun 2010 15:32:24 +1000
-Subject: [PATCH 1/2] drm-nouveau-updates
+Subject: [PATCH] drm-nouveau-updates
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 8bit
@@ -843,6 +843,120 @@ Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
 drm/nv50: report BAR access faults
 
 Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: rebase per-channel pramin heap offsets to 0
+
+Reviewed-by: Francisco Jerez <currojerez at riseup.net>
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: remove nouveau_gpuobj_ref completely, replace with sanity
+
+Reviewed-by: Francisco Jerez <currojerez at riseup.net>
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: simplify fake gpu objects
+
+Reviewed-by: Francisco Jerez <currojerez at riseup.net>
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nv50: allow gpuobjs that aren't mapped into aperture
+
+Reviewed-by: Francisco Jerez <currojerez at riseup.net>
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: rework init ordering so nv50_instmem.c can be less bad
+
+Reviewed-by: Francisco Jerez <currojerez at riseup.net>
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: tidy ram{ht,fc,ro} a bit
+
+Reviewed-by: Francisco Jerez <currojerez at riseup.net>
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: add spinlock around ramht modifications
+
+Reviewed-by: Francisco Jerez <currojerez at riseup.net>
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: fix gpuobj refcount to use atomics
+
+Reviewed-by: Francisco Jerez <currojerez at riseup.net>
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: protect gpuobj list + global instmem heap with spinlock
+
+Reviewed-by: Francisco Jerez <currojerez at riseup.net>
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: remove nouveau_gpuobj_late_takedown
+
+Reviewed-by: Francisco Jerez <currojerez at riseup.net>
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: protect ramht_find() from oopsing if on channel without ramht
+
+This doesn't actually happen now, but there's a test case for an earlier
+kernel where a GPU error is signalled on one of nv50's fake channels, and
+the ramht lookup by the IRQ handler triggered an oops.
+
+This adds a check for RAMHT's existance on a channel before looking up
+an object handle.
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nv50: fix SOR count for early chipsets
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+
+drm/nouveau: Break some long lines in the TV-out code.
+
+Signed-off-by: Francisco Jerez <currojerez at riseup.net>
+
+drm/nouveau: Don't remove ramht entries from the neighboring channels.
+
+Signed-off-by: Francisco Jerez <currojerez at riseup.net>
+
+drm/nouveau: Don't enable AGP FW on nv18.
+
+FW seems to be broken on nv18, it causes random lockups and breaks
+suspend/resume even with the blob.
+
+Signed-off-by: Francisco Jerez <currojerez at riseup.net>
+
+drm/nouveau: Add module parameter to override the default AGP rate.
+
+Signed-off-by: Francisco Jerez <currojerez at riseup.net>
+
+drm/nouveau: PRAMIN is available from the start on pre-nv50.
+
+This makes sure that RAMHT is cleared correctly on start up.
+
+Signed-off-by: Francisco Jerez <currojerez at riseup.net>
+
+drm/nouveau: Remove implicit argument from nv_wait().
+
+Signed-off-by: Francisco Jerez <currojerez at riseup.net>
+
+drm/nouveau: Simplify tile region handling.
+
+Instead of emptying the caches to avoid a race with the PFIFO puller,
+go straight ahead and try to recover from it when it happens. Also,
+kill pfifo->cache_flush and tile->lock, we don't need them anymore.
+
+Signed-off-by: Francisco Jerez <currojerez at riseup.net>
+
+drm/nouveau: handle fifo pusher errors better
+
+The most important part of this change is that we now instruct PFIFO to
+drop all pending fetches, rather than attempting to skip a single dword
+and hope that things would magically sort themselves out - they usually
+don't, and we end up with PFIFO being completely hung.
+
+This commit also adds somewhat more useful logging when these exceptions
+occur.
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
 ---
  drivers/gpu/drm/drm_crtc_helper.c           |   22 +-
  drivers/gpu/drm/i2c/ch7006_drv.c            |   22 +-
@@ -851,15 +965,15 @@ Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
  drivers/gpu/drm/nouveau/nouveau_acpi.c      |   38 +-
  drivers/gpu/drm/nouveau/nouveau_bios.c      |  910 ++++++--
  drivers/gpu/drm/nouveau/nouveau_bios.h      |    6 +-
- drivers/gpu/drm/nouveau/nouveau_bo.c        |  223 ++-
+ drivers/gpu/drm/nouveau/nouveau_bo.c        |  228 ++-
  drivers/gpu/drm/nouveau/nouveau_calc.c      |   10 +-
- drivers/gpu/drm/nouveau/nouveau_channel.c   |    6 +-
+ drivers/gpu/drm/nouveau/nouveau_channel.c   |   18 +-
  drivers/gpu/drm/nouveau/nouveau_connector.c |  417 ++--
  drivers/gpu/drm/nouveau/nouveau_connector.h |    7 +-
- drivers/gpu/drm/nouveau/nouveau_dma.c       |    7 -
- drivers/gpu/drm/nouveau/nouveau_dp.c        |  128 +-
- drivers/gpu/drm/nouveau/nouveau_drv.c       |   39 +-
- drivers/gpu/drm/nouveau/nouveau_drv.h       |  203 +-
+ drivers/gpu/drm/nouveau/nouveau_dma.c       |   21 +-
+ drivers/gpu/drm/nouveau/nouveau_dp.c        |  131 +-
+ drivers/gpu/drm/nouveau/nouveau_drv.c       |   45 +-
+ drivers/gpu/drm/nouveau/nouveau_drv.h       |  309 ++--
  drivers/gpu/drm/nouveau/nouveau_encoder.h   |   16 +-
  drivers/gpu/drm/nouveau/nouveau_fbcon.c     |    4 +-
  drivers/gpu/drm/nouveau/nouveau_fence.c     |   35 +-
@@ -869,52 +983,57 @@ Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
  drivers/gpu/drm/nouveau/nouveau_hw.c        |   13 +-
  drivers/gpu/drm/nouveau/nouveau_i2c.c       |   83 +-
  drivers/gpu/drm/nouveau/nouveau_i2c.h       |   11 +-
- drivers/gpu/drm/nouveau/nouveau_irq.c       |   70 +-
- drivers/gpu/drm/nouveau/nouveau_mem.c       |  404 ++---
- drivers/gpu/drm/nouveau/nouveau_notifier.c  |   30 +-
- drivers/gpu/drm/nouveau/nouveau_object.c    |  325 +--
- drivers/gpu/drm/nouveau/nouveau_ramht.c     |  160 ++
- drivers/gpu/drm/nouveau/nouveau_ramht.h     |   31 +
- drivers/gpu/drm/nouveau/nouveau_reg.h       |  109 +-
- drivers/gpu/drm/nouveau/nouveau_sgdma.c     |  108 +-
- drivers/gpu/drm/nouveau/nouveau_state.c     |  340 ++-
+ drivers/gpu/drm/nouveau/nouveau_irq.c       |  129 +-
+ drivers/gpu/drm/nouveau/nouveau_mem.c       |  529 ++---
+ drivers/gpu/drm/nouveau/nouveau_notifier.c  |   37 +-
+ drivers/gpu/drm/nouveau/nouveau_object.c    |  853 +++-----
+ drivers/gpu/drm/nouveau/nouveau_ramht.c     |  289 +++
+ drivers/gpu/drm/nouveau/nouveau_ramht.h     |   55 +
+ drivers/gpu/drm/nouveau/nouveau_reg.h       |  118 +-
+ drivers/gpu/drm/nouveau/nouveau_sgdma.c     |  117 +-
+ drivers/gpu/drm/nouveau/nouveau_state.c     |  398 ++--
  drivers/gpu/drm/nouveau/nv04_crtc.c         |   11 +-
  drivers/gpu/drm/nouveau/nv04_dac.c          |   60 +-
  drivers/gpu/drm/nouveau/nv04_dfp.c          |  145 +-
  drivers/gpu/drm/nouveau/nv04_display.c      |   90 +-
- drivers/gpu/drm/nouveau/nv04_fifo.c         |   28 +-
+ drivers/gpu/drm/nouveau/nv04_fbcon.c        |    9 +-
+ drivers/gpu/drm/nouveau/nv04_fifo.c         |   88 +-
  drivers/gpu/drm/nouveau/nv04_graph.c        |    5 +-
- drivers/gpu/drm/nouveau/nv04_instmem.c      |   21 +-
+ drivers/gpu/drm/nouveau/nv04_instmem.c      |  167 +-
  drivers/gpu/drm/nouveau/nv04_mc.c           |    4 +
  drivers/gpu/drm/nouveau/nv04_tv.c           |  133 +-
- drivers/gpu/drm/nouveau/nv10_fifo.c         |   10 -
+ drivers/gpu/drm/nouveau/nv10_fifo.c         |   29 +-
  drivers/gpu/drm/nouveau/nv10_gpio.c         |   92 +
  drivers/gpu/drm/nouveau/nv10_graph.c        |  175 +-
  drivers/gpu/drm/nouveau/nv17_gpio.c         |   92 -
- drivers/gpu/drm/nouveau/nv17_tv.c           |   82 +-
- drivers/gpu/drm/nouveau/nv20_graph.c        |  564 +++---
+ drivers/gpu/drm/nouveau/nv17_tv.c           |  181 +-
+ drivers/gpu/drm/nouveau/nv17_tv.h           |   15 +-
+ drivers/gpu/drm/nouveau/nv17_tv_modes.c     |   48 +-
+ drivers/gpu/drm/nouveau/nv20_graph.c        |  576 +++---
  drivers/gpu/drm/nouveau/nv30_fb.c           |   95 +
- drivers/gpu/drm/nouveau/nv40_fifo.c         |    8 -
- drivers/gpu/drm/nouveau/nv40_graph.c        |   62 +-
+ drivers/gpu/drm/nouveau/nv40_fifo.c         |   28 +-
+ drivers/gpu/drm/nouveau/nv40_graph.c        |   72 +-
  drivers/gpu/drm/nouveau/nv40_grctx.c        |    6 +-
  drivers/gpu/drm/nouveau/nv40_mc.c           |    2 +-
  drivers/gpu/drm/nouveau/nv50_crtc.c         |   67 +-
- drivers/gpu/drm/nouveau/nv50_dac.c          |   43 +-
- drivers/gpu/drm/nouveau/nv50_display.c      |  435 +++--
+ drivers/gpu/drm/nouveau/nv50_cursor.c       |    2 +-
+ drivers/gpu/drm/nouveau/nv50_dac.c          |   47 +-
+ drivers/gpu/drm/nouveau/nv50_display.c      |  496 +++--
  drivers/gpu/drm/nouveau/nv50_display.h      |    6 +-
  drivers/gpu/drm/nouveau/nv50_fb.c           |   39 +
- drivers/gpu/drm/nouveau/nv50_fifo.c         |  335 ++--
+ drivers/gpu/drm/nouveau/nv50_fbcon.c        |    4 +-
+ drivers/gpu/drm/nouveau/nv50_fifo.c         |  396 ++--
  drivers/gpu/drm/nouveau/nv50_gpio.c         |   35 +
- drivers/gpu/drm/nouveau/nv50_graph.c        |  104 +-
+ drivers/gpu/drm/nouveau/nv50_graph.c        |  131 +-
  drivers/gpu/drm/nouveau/nv50_grctx.c        | 3305 +++++++++++++++++----------
- drivers/gpu/drm/nouveau/nv50_instmem.c      |   81 +-
- drivers/gpu/drm/nouveau/nv50_sor.c          |  105 +-
+ drivers/gpu/drm/nouveau/nv50_instmem.c      |  473 ++---
+ drivers/gpu/drm/nouveau/nv50_sor.c          |  109 +-
  drivers/gpu/drm/nouveau/nvc0_fb.c           |   38 +
- drivers/gpu/drm/nouveau/nvc0_fifo.c         |   95 +
+ drivers/gpu/drm/nouveau/nvc0_fifo.c         |   89 +
  drivers/gpu/drm/nouveau/nvc0_graph.c        |   74 +
- drivers/gpu/drm/nouveau/nvc0_instmem.c      |  234 ++
+ drivers/gpu/drm/nouveau/nvc0_instmem.c      |  229 ++
  drivers/gpu/drm/nouveau/nvreg.h             |   22 -
- 70 files changed, 6456 insertions(+), 4215 deletions(-)
+ 75 files changed, 7469 insertions(+), 5278 deletions(-)
  delete mode 100644 drivers/gpu/drm/nouveau/nouveau_grctx.c
  create mode 100644 drivers/gpu/drm/nouveau/nouveau_ramht.c
  create mode 100644 drivers/gpu/drm/nouveau/nouveau_ramht.h
@@ -2713,7 +2832,7 @@ index adf4ec2..c1de2f3 100644
  
  	struct {
 diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
-index 6f3c195..553a01d 100644
+index 6f3c195..22a2038 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
 @@ -43,17 +43,12 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
@@ -2754,7 +2873,19 @@ index 6f3c195..553a01d 100644
  }
  
  u16
-@@ -461,18 +454,20 @@ nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
+@@ -395,7 +388,10 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
+ 		man->available_caching = TTM_PL_FLAG_UNCACHED |
+ 					 TTM_PL_FLAG_WC;
+ 		man->default_caching = TTM_PL_FLAG_WC;
+-		man->gpu_offset = dev_priv->vm_vram_base;
++		if (dev_priv->card_type == NV_50)
++			man->gpu_offset = 0x40000000;
++		else
++			man->gpu_offset = 0;
+ 		break;
+ 	case TTM_PL_TT:
+ 		switch (dev_priv->gart_info.type) {
+@@ -461,18 +457,20 @@ nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
  		return ret;
  
  	ret = ttm_bo_move_accel_cleanup(&nvbo->bo, fence, NULL,
@@ -2781,7 +2912,7 @@ index 6f3c195..553a01d 100644
  		if (mem->mem_type == TTM_PL_TT)
  			return NvDmaGART;
  		return NvDmaVRAM;
-@@ -484,86 +479,181 @@ nouveau_bo_mem_ctxdma(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
+@@ -484,86 +482,181 @@ nouveau_bo_mem_ctxdma(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
  }
  
  static int
@@ -3008,7 +3139,7 @@ index 6f3c195..553a01d 100644
  	return nouveau_bo_move_accel_cleanup(chan, nvbo, evict, no_wait_reserve, no_wait_gpu, new_mem);
  }
  
-@@ -710,13 +800,6 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
+@@ -710,13 +803,6 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
  	if (ret)
  		return ret;
  
@@ -3022,7 +3153,7 @@ index 6f3c195..553a01d 100644
  	/* Fake bo copy. */
  	if (old_mem->mem_type == TTM_PL_SYSTEM && !bo->ttm) {
  		BUG_ON(bo->mem.mm_node != NULL);
-@@ -725,6 +808,12 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
+@@ -725,6 +811,12 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
  		goto out;
  	}
  
@@ -3084,10 +3215,27 @@ index 88f9bc0..23d9896 100644
  
  static int
 diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
-index 1fc57ef..9a31023 100644
+index 1fc57ef..53c2a6f 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_channel.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
-@@ -257,9 +257,7 @@ nouveau_channel_free(struct nouveau_channel *chan)
+@@ -69,14 +69,8 @@ nouveau_channel_pushbuf_ctxdma_init(struct nouveau_channel *chan)
+ 		chan->pushbuf_base = pb->bo.mem.mm_node->start << PAGE_SHIFT;
+ 	}
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, chan, 0, pushbuf, &chan->pushbuf);
+-	if (ret) {
+-		NV_ERROR(dev, "Error referencing pushbuf ctxdma: %d\n", ret);
+-		if (pushbuf != dev_priv->gart_info.sg_ctxdma)
+-			nouveau_gpuobj_del(dev, &pushbuf);
+-		return ret;
+-	}
+-
++	nouveau_gpuobj_ref(pushbuf, &chan->pushbuf);
++	nouveau_gpuobj_ref(NULL, &pushbuf);
+ 	return 0;
+ }
+ 
+@@ -257,9 +251,7 @@ nouveau_channel_free(struct nouveau_channel *chan)
  	nouveau_debugfs_channel_fini(chan);
  
  	/* Give outstanding push buffers a chance to complete */
@@ -3097,15 +3245,18 @@ index 1fc57ef..9a31023 100644
  	if (chan->fence.sequence != chan->fence.sequence_ack) {
  		struct nouveau_fence *fence = NULL;
  
-@@ -311,6 +309,7 @@ nouveau_channel_free(struct nouveau_channel *chan)
+@@ -309,8 +301,9 @@ nouveau_channel_free(struct nouveau_channel *chan)
+ 	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+ 
  	/* Release the channel's resources */
- 	nouveau_gpuobj_ref_del(dev, &chan->pushbuf);
+-	nouveau_gpuobj_ref_del(dev, &chan->pushbuf);
++	nouveau_gpuobj_ref(NULL, &chan->pushbuf);
  	if (chan->pushbuf_bo) {
 +		nouveau_bo_unmap(chan->pushbuf_bo);
  		nouveau_bo_unpin(chan->pushbuf_bo);
  		nouveau_bo_ref(NULL, &chan->pushbuf_bo);
  	}
-@@ -368,8 +367,6 @@ nouveau_ioctl_fifo_alloc(struct drm_device *dev, void *data,
+@@ -368,8 +361,6 @@ nouveau_ioctl_fifo_alloc(struct drm_device *dev, void *data,
  	struct nouveau_channel *chan;
  	int ret;
  
@@ -3114,7 +3265,7 @@ index 1fc57ef..9a31023 100644
  	if (dev_priv->engine.graph.accel_blocked)
  		return -ENODEV;
  
-@@ -418,7 +415,6 @@ nouveau_ioctl_fifo_free(struct drm_device *dev, void *data,
+@@ -418,7 +409,6 @@ nouveau_ioctl_fifo_free(struct drm_device *dev, void *data,
  	struct drm_nouveau_channel_free *cfree = data;
  	struct nouveau_channel *chan;
  
@@ -3762,10 +3913,52 @@ index 4ef38ab..0d2e668 100644
  
  #endif /* __NOUVEAU_CONNECTOR_H__ */
 diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
-index 65c441a..2d00699 100644
+index 65c441a..9d27acd 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_dma.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
-@@ -91,13 +91,6 @@ nouveau_dma_init(struct nouveau_channel *chan)
+@@ -28,6 +28,7 @@
+ #include "drm.h"
+ #include "nouveau_drv.h"
+ #include "nouveau_dma.h"
++#include "nouveau_ramht.h"
+ 
+ void
+ nouveau_dma_pre_init(struct nouveau_channel *chan)
+@@ -58,26 +59,27 @@ nouveau_dma_init(struct nouveau_channel *chan)
+ {
+ 	struct drm_device *dev = chan->dev;
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_gpuobj *m2mf = NULL;
+-	struct nouveau_gpuobj *nvsw = NULL;
++	struct nouveau_gpuobj *obj = NULL;
+ 	int ret, i;
+ 
+ 	/* Create NV_MEMORY_TO_MEMORY_FORMAT for buffer moves */
+ 	ret = nouveau_gpuobj_gr_new(chan, dev_priv->card_type < NV_50 ?
+-				    0x0039 : 0x5039, &m2mf);
++				    0x0039 : 0x5039, &obj);
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, chan, NvM2MF, m2mf, NULL);
++	ret = nouveau_ramht_insert(chan, NvM2MF, obj);
++	nouveau_gpuobj_ref(NULL, &obj);
+ 	if (ret)
+ 		return ret;
+ 
+ 	/* Create an NV_SW object for various sync purposes */
+-	ret = nouveau_gpuobj_sw_new(chan, NV_SW, &nvsw);
++	ret = nouveau_gpuobj_sw_new(chan, NV_SW, &obj);
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, chan, NvSw, nvsw, NULL);
++	ret = nouveau_ramht_insert(chan, NvSw, obj);
++	nouveau_gpuobj_ref(NULL, &obj);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -91,13 +93,6 @@ nouveau_dma_init(struct nouveau_channel *chan)
  	if (ret)
  		return ret;
  
@@ -3780,7 +3973,7 @@ index 65c441a..2d00699 100644
  	ret = RING_SPACE(chan, NOUVEAU_DMA_SKIPS);
  	if (ret)
 diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
-index deeb21c..8a1b188 100644
+index deeb21c..89ca1f6 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_dp.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_dp.c
 @@ -23,8 +23,10 @@
@@ -3852,7 +4045,17 @@ index deeb21c..8a1b188 100644
  	return eq_done;
  }
  
-@@ -535,47 +572,64 @@ out:
+@@ -487,7 +524,8 @@ nouveau_dp_auxch(struct nouveau_i2c_chan *auxch, int cmd, int addr,
+ 		nv_wr32(dev, NV50_AUXCH_CTRL(index), ctrl | 0x80000000);
+ 		nv_wr32(dev, NV50_AUXCH_CTRL(index), ctrl);
+ 		nv_wr32(dev, NV50_AUXCH_CTRL(index), ctrl | 0x00010000);
+-		if (!nv_wait(NV50_AUXCH_CTRL(index), 0x00010000, 0x00000000)) {
++		if (!nv_wait(dev, NV50_AUXCH_CTRL(index),
++			     0x00010000, 0x00000000)) {
+ 			NV_ERROR(dev, "expected bit 16 == 0, got 0x%08x\n",
+ 				 nv_rd32(dev, NV50_AUXCH_CTRL(index)));
+ 			ret = -EBUSY;
+@@ -535,47 +573,64 @@ out:
  	return ret ? ret : (stat & NV50_AUXCH_STAT_REPLY);
  }
  
@@ -3953,10 +4156,10 @@ index deeb21c..8a1b188 100644
 +	.functionality = nouveau_dp_i2c_func
 +};
 diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
-index 2737704..a8d3d17 100644
+index 2737704..946748a 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_drv.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
-@@ -35,10 +35,6 @@
+@@ -35,13 +35,9 @@
  
  #include "drm_pciids.h"
  
@@ -3964,9 +4167,15 @@ index 2737704..a8d3d17 100644
 -int nouveau_ctxfw = 0;
 -module_param_named(ctxfw, nouveau_ctxfw, int, 0400);
 -
- MODULE_PARM_DESC(noagp, "Disable AGP");
- int nouveau_noagp;
- module_param_named(noagp, nouveau_noagp, int, 0400);
+-MODULE_PARM_DESC(noagp, "Disable AGP");
+-int nouveau_noagp;
+-module_param_named(noagp, nouveau_noagp, int, 0400);
++MODULE_PARM_DESC(agpmode, "AGP mode (0 to disable AGP)");
++int nouveau_agpmode = -1;
++module_param_named(agpmode, nouveau_agpmode, int, 0400);
+ 
+ MODULE_PARM_DESC(modeset, "Enable kernel modesetting");
+ static int nouveau_modeset = -1; /* kms */
 @@ -56,7 +52,7 @@ int nouveau_vram_pushbuf;
  module_param_named(vram_pushbuf, nouveau_vram_pushbuf, int, 0400);
  
@@ -4065,7 +4274,7 @@ index 2737704..a8d3d17 100644
  	nouveau_unregister_dsm_handler();
  }
 diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
-index c697191..2eb622b 100644
+index c697191..228c8cd 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h
 +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
 @@ -123,14 +123,6 @@ nvbo_kmap_obj_iovirtual(struct nouveau_bo *nvbo)
@@ -4083,20 +4292,55 @@ index c697191..2eb622b 100644
  enum nouveau_flags {
  	NV_NFORCE   = 0x10000000,
  	NV_NFORCE2  = 0x20000000
-@@ -146,10 +138,11 @@ enum nouveau_flags {
+@@ -141,22 +133,24 @@ enum nouveau_flags {
+ #define NVOBJ_ENGINE_DISPLAY	2
+ #define NVOBJ_ENGINE_INT	0xdeadbeef
+ 
+-#define NVOBJ_FLAG_ALLOW_NO_REFS	(1 << 0)
+ #define NVOBJ_FLAG_ZERO_ALLOC		(1 << 1)
  #define NVOBJ_FLAG_ZERO_FREE		(1 << 2)
- #define NVOBJ_FLAG_FAKE			(1 << 3)
+-#define NVOBJ_FLAG_FAKE			(1 << 3)
  struct nouveau_gpuobj {
 +	struct drm_device *dev;
++	struct kref refcount;
  	struct list_head list;
  
- 	struct nouveau_channel *im_channel;
+-	struct nouveau_channel *im_channel;
 -	struct mem_block *im_pramin;
 +	struct drm_mm_node *im_pramin;
  	struct nouveau_bo *im_backing;
- 	uint32_t im_backing_start;
+-	uint32_t im_backing_start;
  	uint32_t *im_backing_suspend;
-@@ -196,7 +189,7 @@ struct nouveau_channel {
+ 	int im_bound;
+ 
+ 	uint32_t flags;
+-	int refcount;
++
++	u32 size;
++	u32 pinst;
++	u32 cinst;
++	u64 vinst;
+ 
+ 	uint32_t engine;
+ 	uint32_t class;
+@@ -165,16 +159,6 @@ struct nouveau_gpuobj {
+ 	void *priv;
+ };
+ 
+-struct nouveau_gpuobj_ref {
+-	struct list_head list;
+-
+-	struct nouveau_gpuobj *gpuobj;
+-	uint32_t instance;
+-
+-	struct nouveau_channel *channel;
+-	int handle;
+-};
+-
+ struct nouveau_channel {
+ 	struct drm_device *dev;
+ 	int id;
+@@ -196,37 +180,36 @@ struct nouveau_channel {
  		struct list_head pending;
  		uint32_t sequence;
  		uint32_t sequence_ack;
@@ -4105,7 +4349,12 @@ index c697191..2eb622b 100644
  	} fence;
  
  	/* DMA push buffer */
-@@ -206,7 +199,7 @@ struct nouveau_channel {
+-	struct nouveau_gpuobj_ref *pushbuf;
+-	struct nouveau_bo         *pushbuf_bo;
+-	uint32_t                   pushbuf_base;
++	struct nouveau_gpuobj *pushbuf;
++	struct nouveau_bo     *pushbuf_bo;
++	uint32_t               pushbuf_base;
  
  	/* Notifier memory */
  	struct nouveau_bo *notifier_bo;
@@ -4113,17 +4362,37 @@ index c697191..2eb622b 100644
 +	struct drm_mm notifier_heap;
  
  	/* PFIFO context */
- 	struct nouveau_gpuobj_ref *ramfc;
-@@ -224,7 +217,7 @@ struct nouveau_channel {
+-	struct nouveau_gpuobj_ref *ramfc;
+-	struct nouveau_gpuobj_ref *cache;
++	struct nouveau_gpuobj *ramfc;
++	struct nouveau_gpuobj *cache;
+ 
+ 	/* PGRAPH context */
+ 	/* XXX may be merge 2 pointers as private data ??? */
+-	struct nouveau_gpuobj_ref *ramin_grctx;
++	struct nouveau_gpuobj *ramin_grctx;
+ 	void *pgraph_ctx;
+ 
+ 	/* NV50 VM */
+-	struct nouveau_gpuobj     *vm_pd;
+-	struct nouveau_gpuobj_ref *vm_gart_pt;
+-	struct nouveau_gpuobj_ref *vm_vram_pt[NV50_VM_VRAM_NR];
++	struct nouveau_gpuobj *vm_pd;
++	struct nouveau_gpuobj *vm_gart_pt;
++	struct nouveau_gpuobj *vm_vram_pt[NV50_VM_VRAM_NR];
  
  	/* Objects */
- 	struct nouveau_gpuobj_ref *ramin; /* Private instmem */
+-	struct nouveau_gpuobj_ref *ramin; /* Private instmem */
 -	struct mem_block          *ramin_heap; /* Private PRAMIN heap */
-+	struct drm_mm              ramin_heap; /* Private PRAMIN heap */
- 	struct nouveau_gpuobj_ref *ramht; /* Hash table */
- 	struct list_head           ramht_refs; /* Objects referenced by RAMHT */
- 
-@@ -277,8 +270,7 @@ struct nouveau_instmem_engine {
+-	struct nouveau_gpuobj_ref *ramht; /* Hash table */
+-	struct list_head           ramht_refs; /* Objects referenced by RAMHT */
++	struct nouveau_gpuobj *ramin; /* Private instmem */
++	struct drm_mm          ramin_heap; /* Private PRAMIN heap */
++	struct nouveau_ramht  *ramht; /* Hash table */
+ 
+ 	/* GPU object info for stuff used in-kernel (mm_enabled) */
+ 	uint32_t m2mf_ntfy;
+@@ -277,8 +260,7 @@ struct nouveau_instmem_engine {
  	void	(*clear)(struct drm_device *, struct nouveau_gpuobj *);
  	int	(*bind)(struct drm_device *, struct nouveau_gpuobj *);
  	int	(*unbind)(struct drm_device *, struct nouveau_gpuobj *);
@@ -4133,7 +4402,7 @@ index c697191..2eb622b 100644
  };
  
  struct nouveau_mc_engine {
-@@ -303,10 +295,11 @@ struct nouveau_fb_engine {
+@@ -303,17 +285,17 @@ struct nouveau_fb_engine {
  };
  
  struct nouveau_fifo_engine {
@@ -4141,13 +4410,20 @@ index c697191..2eb622b 100644
 -
  	int  channels;
  
-+	struct nouveau_gpuobj_ref *playlist[2];
++	struct nouveau_gpuobj *playlist[2];
 +	int cur_playlist;
 +
  	int  (*init)(struct drm_device *);
  	void (*takedown)(struct drm_device *);
  
-@@ -339,10 +332,11 @@ struct nouveau_pgraph_object_class {
+ 	void (*disable)(struct drm_device *);
+ 	void (*enable)(struct drm_device *);
+ 	bool (*reassign)(struct drm_device *, bool enable);
+-	bool (*cache_flush)(struct drm_device *dev);
+ 	bool (*cache_pull)(struct drm_device *dev, bool enable);
+ 
+ 	int  (*channel_id)(struct drm_device *);
+@@ -339,10 +321,11 @@ struct nouveau_pgraph_object_class {
  struct nouveau_pgraph_engine {
  	struct nouveau_pgraph_object_class *grclass;
  	bool accel_blocked;
@@ -4156,12 +4432,12 @@ index c697191..2eb622b 100644
  	int grctx_size;
  
 +	/* NV2x/NV3x context table (0x400780) */
-+	struct nouveau_gpuobj_ref *ctx_table;
++	struct nouveau_gpuobj *ctx_table;
 +
  	int  (*init)(struct drm_device *);
  	void (*takedown)(struct drm_device *);
  
-@@ -358,6 +352,24 @@ struct nouveau_pgraph_engine {
+@@ -358,6 +341,24 @@ struct nouveau_pgraph_engine {
  				  uint32_t size, uint32_t pitch);
  };
  
@@ -4186,7 +4462,7 @@ index c697191..2eb622b 100644
  struct nouveau_engine {
  	struct nouveau_instmem_engine instmem;
  	struct nouveau_mc_engine      mc;
-@@ -365,6 +377,8 @@ struct nouveau_engine {
+@@ -365,6 +366,8 @@ struct nouveau_engine {
  	struct nouveau_fb_engine      fb;
  	struct nouveau_pgraph_engine  graph;
  	struct nouveau_fifo_engine    fifo;
@@ -4195,7 +4471,7 @@ index c697191..2eb622b 100644
  };
  
  struct nouveau_pll_vals {
-@@ -397,7 +411,7 @@ enum nv04_fp_display_regs {
+@@ -397,7 +400,7 @@ enum nv04_fp_display_regs {
  
  struct nv04_crtc_reg {
  	unsigned char MiscOutReg;     /* */
@@ -4204,7 +4480,7 @@ index c697191..2eb622b 100644
  	uint8_t CR58[0x10];
  	uint8_t Sequencer[5];
  	uint8_t Graphics[9];
-@@ -496,15 +510,11 @@ enum nouveau_card_type {
+@@ -496,15 +499,11 @@ enum nouveau_card_type {
  	NV_30      = 0x30,
  	NV_40      = 0x40,
  	NV_50      = 0x50,
@@ -4221,7 +4497,23 @@ index c697191..2eb622b 100644
  
  	/* the card type, takes NV_* as values */
  	enum nouveau_card_type card_type;
-@@ -528,13 +538,9 @@ struct drm_nouveau_private {
+@@ -513,8 +512,14 @@ struct drm_nouveau_private {
+ 	int flags;
+ 
+ 	void __iomem *mmio;
++
++	spinlock_t ramin_lock;
+ 	void __iomem *ramin;
+-	uint32_t ramin_size;
++	u32 ramin_size;
++	u32 ramin_base;
++	bool ramin_available;
++	struct drm_mm ramin_heap;
++	struct list_head gpuobj_list;
+ 
+ 	struct nouveau_bo *vga_ram;
+ 
+@@ -528,13 +533,9 @@ struct drm_nouveau_private {
  		struct ttm_global_reference mem_global_ref;
  		struct ttm_bo_global_ref bo_global_ref;
  		struct ttm_bo_device bdev;
@@ -4235,7 +4527,36 @@ index c697191..2eb622b 100644
  	int fifo_alloc_count;
  	struct nouveau_channel *fifos[NOUVEAU_MAX_CHANNEL_NR];
  
-@@ -579,6 +585,7 @@ struct drm_nouveau_private {
+@@ -545,15 +546,11 @@ struct drm_nouveau_private {
+ 	spinlock_t context_switch_lock;
+ 
+ 	/* RAMIN configuration, RAMFC, RAMHT and RAMRO offsets */
+-	struct nouveau_gpuobj *ramht;
++	struct nouveau_ramht  *ramht;
++	struct nouveau_gpuobj *ramfc;
++	struct nouveau_gpuobj *ramro;
++
+ 	uint32_t ramin_rsvd_vram;
+-	uint32_t ramht_offset;
+-	uint32_t ramht_size;
+-	uint32_t ramht_bits;
+-	uint32_t ramfc_offset;
+-	uint32_t ramfc_size;
+-	uint32_t ramro_offset;
+-	uint32_t ramro_size;
+ 
+ 	struct {
+ 		enum {
+@@ -571,14 +568,12 @@ struct drm_nouveau_private {
+ 	} gart_info;
+ 
+ 	/* nv10-nv40 tiling regions */
+-	struct {
+-		struct nouveau_tile_reg reg[NOUVEAU_MAX_TILE_NR];
+-		spinlock_t lock;
+-	} tile;
++	struct nouveau_tile_reg tile[NOUVEAU_MAX_TILE_NR];
+ 
  	/* VRAM/fb configuration */
  	uint64_t vram_size;
  	uint64_t vram_sys_base;
@@ -4243,7 +4564,7 @@ index c697191..2eb622b 100644
  
  	uint64_t fb_phys;
  	uint64_t fb_available_size;
-@@ -595,11 +602,7 @@ struct drm_nouveau_private {
+@@ -595,14 +590,6 @@ struct drm_nouveau_private {
  	struct nouveau_gpuobj *vm_vram_pt[NV50_VM_VRAM_NR];
  	int vm_vram_pt_nr;
  
@@ -4252,11 +4573,13 @@ index c697191..2eb622b 100644
 -	/* context table pointed to be NV_PGRAPH_CHANNEL_CTX_TABLE (0x400780) */
 -	uint32_t ctx_table_size;
 -	struct nouveau_gpuobj_ref *ctx_table;
-+	struct drm_mm ramin_heap;
- 
- 	struct list_head gpuobj_list;
+-
+-	struct list_head gpuobj_list;
+-
+ 	struct nvbios vbios;
  
-@@ -618,6 +621,11 @@ struct drm_nouveau_private {
+ 	struct nv04_mode_state mode_reg;
+@@ -618,6 +605,11 @@ struct drm_nouveau_private {
  	struct backlight_device *backlight;
  
  	struct nouveau_channel *evo;
@@ -4268,7 +4591,7 @@ index c697191..2eb622b 100644
  
  	struct {
  		struct dentry *channel_root;
-@@ -652,14 +660,6 @@ nouveau_bo_ref(struct nouveau_bo *ref, struct nouveau_bo **pnvbo)
+@@ -652,14 +644,6 @@ nouveau_bo_ref(struct nouveau_bo *ref, struct nouveau_bo **pnvbo)
  	return 0;
  }
  
@@ -4283,7 +4606,16 @@ index c697191..2eb622b 100644
  #define NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(id, cl, ch) do {    \
  	struct drm_nouveau_private *nv = dev->dev_private;       \
  	if (!nouveau_channel_owner(dev, (cl), (id))) {           \
-@@ -682,7 +682,6 @@ extern int nouveau_tv_disable;
+@@ -671,7 +655,7 @@ nouveau_bo_ref(struct nouveau_bo *ref, struct nouveau_bo **pnvbo)
+ } while (0)
+ 
+ /* nouveau_drv.c */
+-extern int nouveau_noagp;
++extern int nouveau_agpmode;
+ extern int nouveau_duallink;
+ extern int nouveau_uscript_lvds;
+ extern int nouveau_uscript_tmds;
+@@ -682,7 +666,6 @@ extern int nouveau_tv_disable;
  extern char *nouveau_tv_norm;
  extern int nouveau_reg_debug;
  extern char *nouveau_vbios;
@@ -4291,7 +4623,7 @@ index c697191..2eb622b 100644
  extern int nouveau_ignorelid;
  extern int nouveau_nofbaccel;
  extern int nouveau_noaccel;
-@@ -707,17 +706,10 @@ extern bool nouveau_wait_for_idle(struct drm_device *);
+@@ -707,17 +690,12 @@ extern bool nouveau_wait_for_idle(struct drm_device *);
  extern int  nouveau_card_init(struct drm_device *);
  
  /* nouveau_mem.c */
@@ -4302,15 +4634,57 @@ index c697191..2eb622b 100644
 -						 struct drm_file *, int tail);
 -extern void nouveau_mem_takedown(struct mem_block **heap);
 -extern void nouveau_mem_free_block(struct mem_block *);
- extern int  nouveau_mem_detect(struct drm_device *dev);
+-extern int  nouveau_mem_detect(struct drm_device *dev);
 -extern void nouveau_mem_release(struct drm_file *, struct mem_block *heap);
- extern int  nouveau_mem_init(struct drm_device *);
+-extern int  nouveau_mem_init(struct drm_device *);
++extern int  nouveau_mem_vram_init(struct drm_device *);
++extern void nouveau_mem_vram_fini(struct drm_device *);
++extern int  nouveau_mem_gart_init(struct drm_device *);
++extern void nouveau_mem_gart_fini(struct drm_device *);
  extern int  nouveau_mem_init_agp(struct drm_device *);
 +extern int  nouveau_mem_reset_agp(struct drm_device *);
  extern void nouveau_mem_close(struct drm_device *);
  extern struct nouveau_tile_reg *nv10_mem_set_tiling(struct drm_device *dev,
  						    uint32_t addr,
-@@ -857,11 +849,13 @@ void nouveau_register_dsm_handler(void);
+@@ -759,7 +737,6 @@ extern void nouveau_channel_free(struct nouveau_channel *);
+ extern int  nouveau_gpuobj_early_init(struct drm_device *);
+ extern int  nouveau_gpuobj_init(struct drm_device *);
+ extern void nouveau_gpuobj_takedown(struct drm_device *);
+-extern void nouveau_gpuobj_late_takedown(struct drm_device *);
+ extern int  nouveau_gpuobj_suspend(struct drm_device *dev);
+ extern void nouveau_gpuobj_suspend_cleanup(struct drm_device *dev);
+ extern void nouveau_gpuobj_resume(struct drm_device *dev);
+@@ -769,24 +746,11 @@ extern void nouveau_gpuobj_channel_takedown(struct nouveau_channel *);
+ extern int nouveau_gpuobj_new(struct drm_device *, struct nouveau_channel *,
+ 			      uint32_t size, int align, uint32_t flags,
+ 			      struct nouveau_gpuobj **);
+-extern int nouveau_gpuobj_del(struct drm_device *, struct nouveau_gpuobj **);
+-extern int nouveau_gpuobj_ref_add(struct drm_device *, struct nouveau_channel *,
+-				  uint32_t handle, struct nouveau_gpuobj *,
+-				  struct nouveau_gpuobj_ref **);
+-extern int nouveau_gpuobj_ref_del(struct drm_device *,
+-				  struct nouveau_gpuobj_ref **);
+-extern int nouveau_gpuobj_ref_find(struct nouveau_channel *, uint32_t handle,
+-				   struct nouveau_gpuobj_ref **ref_ret);
+-extern int nouveau_gpuobj_new_ref(struct drm_device *,
+-				  struct nouveau_channel *alloc_chan,
+-				  struct nouveau_channel *ref_chan,
+-				  uint32_t handle, uint32_t size, int align,
+-				  uint32_t flags, struct nouveau_gpuobj_ref **);
+-extern int nouveau_gpuobj_new_fake(struct drm_device *,
+-				   uint32_t p_offset, uint32_t b_offset,
+-				   uint32_t size, uint32_t flags,
+-				   struct nouveau_gpuobj **,
+-				   struct nouveau_gpuobj_ref**);
++extern void nouveau_gpuobj_ref(struct nouveau_gpuobj *,
++			       struct nouveau_gpuobj **);
++extern int nouveau_gpuobj_new_fake(struct drm_device *, u32 pinst, u64 vinst,
++				   u32 size, u32 flags,
++				   struct nouveau_gpuobj **);
+ extern int nouveau_gpuobj_dma_new(struct nouveau_channel *, int class,
+ 				  uint64_t offset, uint64_t size, int access,
+ 				  int target, struct nouveau_gpuobj **);
+@@ -857,11 +821,13 @@ void nouveau_register_dsm_handler(void);
  void nouveau_unregister_dsm_handler(void);
  int nouveau_acpi_get_bios_chunk(uint8_t *bios, int offset, int len);
  bool nouveau_acpi_rom_supported(struct pci_dev *pdev);
@@ -4324,7 +4698,7 @@ index c697191..2eb622b 100644
  #endif
  
  /* nouveau_backlight.c */
-@@ -924,15 +918,23 @@ extern void nv10_fb_takedown(struct drm_device *);
+@@ -924,22 +890,29 @@ extern void nv10_fb_takedown(struct drm_device *);
  extern void nv10_fb_set_region_tiling(struct drm_device *, int, uint32_t,
  				      uint32_t, uint32_t);
  
@@ -4349,7 +4723,14 @@ index c697191..2eb622b 100644
  
  /* nv04_fifo.c */
  extern int  nv04_fifo_init(struct drm_device *);
-@@ -971,6 +973,20 @@ extern void nv50_fifo_destroy_context(struct nouveau_channel *);
+ extern void nv04_fifo_disable(struct drm_device *);
+ extern void nv04_fifo_enable(struct drm_device *);
+ extern bool nv04_fifo_reassign(struct drm_device *, bool);
+-extern bool nv04_fifo_cache_flush(struct drm_device *);
+ extern bool nv04_fifo_cache_pull(struct drm_device *, bool);
+ extern int  nv04_fifo_channel_id(struct drm_device *);
+ extern int  nv04_fifo_create_context(struct nouveau_channel *);
+@@ -971,6 +944,19 @@ extern void nv50_fifo_destroy_context(struct nouveau_channel *);
  extern int  nv50_fifo_load_context(struct nouveau_channel *);
  extern int  nv50_fifo_unload_context(struct drm_device *);
  
@@ -4359,7 +4740,6 @@ index c697191..2eb622b 100644
 +extern void nvc0_fifo_disable(struct drm_device *);
 +extern void nvc0_fifo_enable(struct drm_device *);
 +extern bool nvc0_fifo_reassign(struct drm_device *, bool);
-+extern bool nvc0_fifo_cache_flush(struct drm_device *);
 +extern bool nvc0_fifo_cache_pull(struct drm_device *, bool);
 +extern int  nvc0_fifo_channel_id(struct drm_device *);
 +extern int  nvc0_fifo_create_context(struct nouveau_channel *);
@@ -4370,7 +4750,7 @@ index c697191..2eb622b 100644
  /* nv04_graph.c */
  extern struct nouveau_pgraph_object_class nv04_graph_grclass[];
  extern int  nv04_graph_init(struct drm_device *);
-@@ -1035,11 +1051,15 @@ extern int  nv50_graph_unload_context(struct drm_device *);
+@@ -1035,11 +1021,15 @@ extern int  nv50_graph_unload_context(struct drm_device *);
  extern void nv50_graph_context_switch(struct drm_device *);
  extern int  nv50_grctx_init(struct nouveau_grctx *);
  
@@ -4391,7 +4771,7 @@ index c697191..2eb622b 100644
  
  /* nv04_instmem.c */
  extern int  nv04_instmem_init(struct drm_device *);
-@@ -1051,8 +1071,7 @@ extern int  nv04_instmem_populate(struct drm_device *, struct nouveau_gpuobj *,
+@@ -1051,8 +1041,7 @@ extern int  nv04_instmem_populate(struct drm_device *, struct nouveau_gpuobj *,
  extern void nv04_instmem_clear(struct drm_device *, struct nouveau_gpuobj *);
  extern int  nv04_instmem_bind(struct drm_device *, struct nouveau_gpuobj *);
  extern int  nv04_instmem_unbind(struct drm_device *, struct nouveau_gpuobj *);
@@ -4401,7 +4781,7 @@ index c697191..2eb622b 100644
  
  /* nv50_instmem.c */
  extern int  nv50_instmem_init(struct drm_device *);
-@@ -1064,8 +1083,21 @@ extern int  nv50_instmem_populate(struct drm_device *, struct nouveau_gpuobj *,
+@@ -1064,8 +1053,21 @@ extern int  nv50_instmem_populate(struct drm_device *, struct nouveau_gpuobj *,
  extern void nv50_instmem_clear(struct drm_device *, struct nouveau_gpuobj *);
  extern int  nv50_instmem_bind(struct drm_device *, struct nouveau_gpuobj *);
  extern int  nv50_instmem_unbind(struct drm_device *, struct nouveau_gpuobj *);
@@ -4425,7 +4805,7 @@ index c697191..2eb622b 100644
  
  /* nv04_mc.c */
  extern int  nv04_mc_init(struct drm_device *);
-@@ -1088,13 +1120,14 @@ extern long nouveau_compat_ioctl(struct file *file, unsigned int cmd,
+@@ -1088,13 +1090,14 @@ extern long nouveau_compat_ioctl(struct file *file, unsigned int cmd,
  				 unsigned long arg);
  
  /* nv04_dac.c */
@@ -4442,7 +4822,7 @@ index c697191..2eb622b 100644
  extern int nv04_dfp_get_bound_head(struct drm_device *dev, struct dcb_entry *dcbent);
  extern void nv04_dfp_bind_head(struct drm_device *dev, struct dcb_entry *dcbent,
  			       int head, bool dl);
-@@ -1103,15 +1136,17 @@ extern void nv04_dfp_update_fp_control(struct drm_encoder *encoder, int mode);
+@@ -1103,15 +1106,17 @@ extern void nv04_dfp_update_fp_control(struct drm_encoder *encoder, int mode);
  
  /* nv04_tv.c */
  extern int nv04_tv_identify(struct drm_device *dev, int i2c_index);
@@ -4463,7 +4843,7 @@ index c697191..2eb622b 100644
  
  /* nv04_crtc.c */
  extern int nv04_crtc_create(struct drm_device *, int index);
-@@ -1147,7 +1182,6 @@ extern int nouveau_fence_wait(void *obj, void *arg, bool lazy, bool intr);
+@@ -1147,7 +1152,6 @@ extern int nouveau_fence_wait(void *obj, void *arg, bool lazy, bool intr);
  extern int nouveau_fence_flush(void *obj, void *arg);
  extern void nouveau_fence_unref(void **obj);
  extern void *nouveau_fence_ref(void *obj);
@@ -4471,7 +4851,7 @@ index c697191..2eb622b 100644
  
  /* nouveau_gem.c */
  extern int nouveau_gem_new(struct drm_device *, struct nouveau_channel *,
-@@ -1167,13 +1201,15 @@ extern int nouveau_gem_ioctl_cpu_fini(struct drm_device *, void *,
+@@ -1167,13 +1171,15 @@ extern int nouveau_gem_ioctl_cpu_fini(struct drm_device *, void *,
  extern int nouveau_gem_ioctl_info(struct drm_device *, void *,
  				  struct drm_file *);
  
@@ -4490,7 +4870,7 @@ index c697191..2eb622b 100644
  
  /* nv50_calc. */
  int nv50_calc_pll(struct drm_device *, struct pll_lims *, int clk,
-@@ -1220,6 +1256,13 @@ static inline void nv_wr32(struct drm_device *dev, unsigned reg, u32 val)
+@@ -1220,6 +1226,13 @@ static inline void nv_wr32(struct drm_device *dev, unsigned reg, u32 val)
  	iowrite32_native(val, dev_priv->mmio + reg);
  }
  
@@ -4504,7 +4884,16 @@ index c697191..2eb622b 100644
  static inline u8 nv_rd08(struct drm_device *dev, unsigned reg)
  {
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
-@@ -1249,17 +1292,8 @@ static inline void nv_wi32(struct drm_device *dev, unsigned offset, u32 val)
+@@ -1232,7 +1245,7 @@ static inline void nv_wr08(struct drm_device *dev, unsigned reg, u8 val)
+ 	iowrite8(val, dev_priv->mmio + reg);
+ }
+ 
+-#define nv_wait(reg, mask, val) \
++#define nv_wait(dev, reg, mask, val) \
+ 	nouveau_wait_until(dev, 2000000000ULL, (reg), (mask), (val))
+ 
+ /* PRAMIN access */
+@@ -1249,17 +1262,8 @@ static inline void nv_wi32(struct drm_device *dev, unsigned offset, u32 val)
  }
  
  /* object access */
@@ -4524,7 +4913,7 @@ index c697191..2eb622b 100644
  
  /*
   * Logging
-@@ -1346,6 +1380,15 @@ nv_two_reg_pll(struct drm_device *dev)
+@@ -1346,6 +1350,15 @@ nv_two_reg_pll(struct drm_device *dev)
  	return false;
  }
  
@@ -5157,10 +5546,18 @@ index c8eaf7a..f71cb32 100644
  
  #endif /* __NOUVEAU_I2C_H__ */
 diff --git a/drivers/gpu/drm/nouveau/nouveau_irq.c b/drivers/gpu/drm/nouveau/nouveau_irq.c
-index 53360f1..b8658a0 100644
+index 53360f1..6fd51a5 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_irq.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_irq.c
-@@ -49,7 +49,7 @@ nouveau_irq_preinstall(struct drm_device *dev)
+@@ -35,6 +35,7 @@
+ #include "nouveau_drm.h"
+ #include "nouveau_drv.h"
+ #include "nouveau_reg.h"
++#include "nouveau_ramht.h"
+ #include <linux/ratelimit.h>
+ 
+ /* needed for hotplug irq */
+@@ -49,7 +50,7 @@ nouveau_irq_preinstall(struct drm_device *dev)
  	/* Master disable */
  	nv_wr32(dev, NV03_PMC_INTR_EN_0, 0);
  
@@ -5169,7 +5566,82 @@ index 53360f1..b8658a0 100644
  		INIT_WORK(&dev_priv->irq_work, nv50_display_irq_handler_bh);
  		INIT_WORK(&dev_priv->hpd_work, nv50_display_irq_hotplug_bh);
  		INIT_LIST_HEAD(&dev_priv->vbl_waiting);
-@@ -226,6 +226,14 @@ nouveau_fifo_irq_handler(struct drm_device *dev)
+@@ -106,15 +107,16 @@ nouveau_fifo_swmthd(struct nouveau_channel *chan, uint32_t addr, uint32_t data)
+ 	const int mthd = addr & 0x1ffc;
+ 
+ 	if (mthd == 0x0000) {
+-		struct nouveau_gpuobj_ref *ref = NULL;
++		struct nouveau_gpuobj *gpuobj;
+ 
+-		if (nouveau_gpuobj_ref_find(chan, data, &ref))
++		gpuobj = nouveau_ramht_find(chan, data);
++		if (!gpuobj)
+ 			return false;
+ 
+-		if (ref->gpuobj->engine != NVOBJ_ENGINE_SW)
++		if (gpuobj->engine != NVOBJ_ENGINE_SW)
+ 			return false;
+ 
+-		chan->sw_subchannel[subc] = ref->gpuobj->class;
++		chan->sw_subchannel[subc] = gpuobj->class;
+ 		nv_wr32(dev, NV04_PFIFO_CACHE1_ENGINE, nv_rd32(dev,
+ 			NV04_PFIFO_CACHE1_ENGINE) & ~(0xf << subc * 4));
+ 		return true;
+@@ -200,16 +202,45 @@ nouveau_fifo_irq_handler(struct drm_device *dev)
+ 		}
+ 
+ 		if (status & NV_PFIFO_INTR_DMA_PUSHER) {
+-			NV_INFO(dev, "PFIFO_DMA_PUSHER - Ch %d\n", chid);
++			u32 get = nv_rd32(dev, 0x003244);
++			u32 put = nv_rd32(dev, 0x003240);
++			u32 push = nv_rd32(dev, 0x003220);
++			u32 state = nv_rd32(dev, 0x003228);
++
++			if (dev_priv->card_type == NV_50) {
++				u32 ho_get = nv_rd32(dev, 0x003328);
++				u32 ho_put = nv_rd32(dev, 0x003320);
++				u32 ib_get = nv_rd32(dev, 0x003334);
++				u32 ib_put = nv_rd32(dev, 0x003330);
++
++				NV_INFO(dev, "PFIFO_DMA_PUSHER - Ch %d Get 0x%02x%08x "
++					     "Put 0x%02x%08x IbGet 0x%08x IbPut 0x%08x "
++					     "State 0x%08x Push 0x%08x\n",
++					chid, ho_get, get, ho_put, put, ib_get, ib_put,
++					state, push);
++
++				/* METHOD_COUNT, in DMA_STATE on earlier chipsets */
++				nv_wr32(dev, 0x003364, 0x00000000);
++				if (get != put || ho_get != ho_put) {
++					nv_wr32(dev, 0x003244, put);
++					nv_wr32(dev, 0x003328, ho_put);
++				} else
++				if (ib_get != ib_put) {
++					nv_wr32(dev, 0x003334, ib_put);
++				}
++			} else {
++				NV_INFO(dev, "PFIFO_DMA_PUSHER - Ch %d Get 0x%08x "
++					     "Put 0x%08x State 0x%08x Push 0x%08x\n",
++					chid, get, put, state, push);
+ 
+-			status &= ~NV_PFIFO_INTR_DMA_PUSHER;
+-			nv_wr32(dev, NV03_PFIFO_INTR_0,
+-						NV_PFIFO_INTR_DMA_PUSHER);
++				if (get != put)
++					nv_wr32(dev, 0x003244, put);
++			}
+ 
+-			nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_STATE, 0x00000000);
+-			if (nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_PUT) != get)
+-				nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_GET,
+-								get + 4);
++			nv_wr32(dev, 0x003228, 0x00000000);
++			nv_wr32(dev, 0x003220, 0x00000001);
++			nv_wr32(dev, 0x002100, NV_PFIFO_INTR_DMA_PUSHER);
++			status &= ~NV_PFIFO_INTR_DMA_PUSHER;
+ 		}
+ 
+ 		if (status & NV_PFIFO_INTR_SEMAPHORE) {
+@@ -226,6 +257,14 @@ nouveau_fifo_irq_handler(struct drm_device *dev)
  			nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 1);
  		}
  
@@ -5184,7 +5656,25 @@ index 53360f1..b8658a0 100644
  		if (status) {
  			NV_INFO(dev, "PFIFO_INTR 0x%08x - Ch %d\n",
  				status, chid);
-@@ -586,11 +594,11 @@ nouveau_pgraph_irq_handler(struct drm_device *dev)
+@@ -357,7 +396,7 @@ nouveau_graph_chid_from_grctx(struct drm_device *dev)
+ 			if (!chan || !chan->ramin_grctx)
+ 				continue;
+ 
+-			if (inst == chan->ramin_grctx->instance)
++			if (inst == chan->ramin_grctx->pinst)
+ 				break;
+ 		}
+ 	} else {
+@@ -369,7 +408,7 @@ nouveau_graph_chid_from_grctx(struct drm_device *dev)
+ 			if (!chan || !chan->ramin)
+ 				continue;
+ 
+-			if (inst == chan->ramin->instance)
++			if (inst == chan->ramin->vinst)
+ 				break;
+ 		}
+ 	}
+@@ -586,11 +625,11 @@ nouveau_pgraph_irq_handler(struct drm_device *dev)
  		}
  
  		if (status & NV_PGRAPH_INTR_CONTEXT_SWITCH) {
@@ -5198,7 +5688,7 @@ index 53360f1..b8658a0 100644
  		}
  
  		if (status) {
-@@ -605,40 +613,6 @@ nouveau_pgraph_irq_handler(struct drm_device *dev)
+@@ -605,40 +644,6 @@ nouveau_pgraph_irq_handler(struct drm_device *dev)
  	nv_wr32(dev, NV03_PMC_INTR_0, NV_PMC_INTR_0_PGRAPH_PENDING);
  }
  
@@ -5239,7 +5729,7 @@ index 53360f1..b8658a0 100644
  static struct nouveau_enum_names nv50_mp_exec_error_names[] =
  {
  	{ 3, "STACK_UNDERFLOW" },
-@@ -711,7 +685,7 @@ nv50_pgraph_tp_trap(struct drm_device *dev, int type, uint32_t ustatus_old,
+@@ -711,7 +716,7 @@ nv50_pgraph_tp_trap(struct drm_device *dev, int type, uint32_t ustatus_old,
  		tps++;
  		switch (type) {
  		case 6: /* texture error... unknown for now */
@@ -5248,7 +5738,7 @@ index 53360f1..b8658a0 100644
  			if (display) {
  				NV_ERROR(dev, "magic set %d:\n", i);
  				for (r = ustatus_addr + 4; r <= ustatus_addr + 0x10; r += 4)
-@@ -734,7 +708,7 @@ nv50_pgraph_tp_trap(struct drm_device *dev, int type, uint32_t ustatus_old,
+@@ -734,7 +739,7 @@ nv50_pgraph_tp_trap(struct drm_device *dev, int type, uint32_t ustatus_old,
  			uint32_t e1c = nv_rd32(dev, ustatus_addr + 0x14);
  			uint32_t e20 = nv_rd32(dev, ustatus_addr + 0x18);
  			uint32_t e24 = nv_rd32(dev, ustatus_addr + 0x1c);
@@ -5257,7 +5747,7 @@ index 53360f1..b8658a0 100644
  			/* 2d engine destination */
  			if (ustatus & 0x00000010) {
  				if (display) {
-@@ -817,7 +791,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
+@@ -817,7 +822,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
  
  		/* Known to be triggered by screwed up NOTIFY and COND... */
  		if (ustatus & 0x00000001) {
@@ -5266,7 +5756,7 @@ index 53360f1..b8658a0 100644
  			nv_wr32(dev, 0x400500, 0);
  			if (nv_rd32(dev, 0x400808) & 0x80000000) {
  				if (display) {
-@@ -842,7 +816,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
+@@ -842,7 +847,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
  			ustatus &= ~0x00000001;
  		}
  		if (ustatus & 0x00000002) {
@@ -5275,7 +5765,7 @@ index 53360f1..b8658a0 100644
  			nv_wr32(dev, 0x400500, 0);
  			if (nv_rd32(dev, 0x40084c) & 0x80000000) {
  				if (display) {
-@@ -884,15 +858,15 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
+@@ -884,15 +889,15 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
  			NV_INFO(dev, "PGRAPH_TRAP_M2MF - no ustatus?\n");
  		}
  		if (ustatus & 0x00000001) {
@@ -5294,7 +5784,7 @@ index 53360f1..b8658a0 100644
  			ustatus &= ~0x00000004;
  		}
  		NV_INFO (dev, "PGRAPH_TRAP_M2MF - %08x %08x %08x %08x\n",
-@@ -917,7 +891,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
+@@ -917,7 +922,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
  			NV_INFO(dev, "PGRAPH_TRAP_VFETCH - no ustatus?\n");
  		}
  		if (ustatus & 0x00000001) {
@@ -5303,7 +5793,7 @@ index 53360f1..b8658a0 100644
  			NV_INFO (dev, "PGRAPH_TRAP_VFETCH_FAULT - %08x %08x %08x %08x\n",
  					nv_rd32(dev, 0x400c00),
  					nv_rd32(dev, 0x400c08),
-@@ -939,7 +913,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
+@@ -939,7 +944,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
  			NV_INFO(dev, "PGRAPH_TRAP_STRMOUT - no ustatus?\n");
  		}
  		if (ustatus & 0x00000001) {
@@ -5312,7 +5802,7 @@ index 53360f1..b8658a0 100644
  			NV_INFO (dev, "PGRAPH_TRAP_STRMOUT_FAULT - %08x %08x %08x %08x\n",
  					nv_rd32(dev, 0x401804),
  					nv_rd32(dev, 0x401808),
-@@ -964,7 +938,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
+@@ -964,7 +969,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
  			NV_INFO(dev, "PGRAPH_TRAP_CCACHE - no ustatus?\n");
  		}
  		if (ustatus & 0x00000001) {
@@ -5321,7 +5811,7 @@ index 53360f1..b8658a0 100644
  			NV_INFO (dev, "PGRAPH_TRAP_CCACHE_FAULT - %08x %08x %08x %08x %08x %08x %08x\n",
  					nv_rd32(dev, 0x405800),
  					nv_rd32(dev, 0x405804),
-@@ -986,7 +960,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
+@@ -986,7 +991,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev)
  	 * remaining, so try to handle it anyway. Perhaps related to that
  	 * unknown DMA slot on tesla? */
  	if (status & 0x20) {
@@ -5331,7 +5821,7 @@ index 53360f1..b8658a0 100644
  		if (display)
  			NV_INFO(dev, "PGRAPH_TRAP_UNKC04 - Unhandled ustatus 0x%08x\n", ustatus);
 diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
-index c1fd42b..ee799c2 100644
+index c1fd42b..4f0ae39 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
 @@ -35,162 +35,6 @@
@@ -5497,7 +5987,74 @@ index c1fd42b..ee799c2 100644
  /*
   * NV10-NV40 tiling helpers
   */
-@@ -299,7 +143,6 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size,
+@@ -203,18 +47,14 @@ nv10_mem_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ 	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+ 	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+ 	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+-	struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i];
++	struct nouveau_tile_reg *tile = &dev_priv->tile[i];
+ 
+ 	tile->addr = addr;
+ 	tile->size = size;
+ 	tile->used = !!pitch;
+ 	nouveau_fence_unref((void **)&tile->fence);
+ 
+-	if (!pfifo->cache_flush(dev))
+-		return;
+-
+ 	pfifo->reassign(dev, false);
+-	pfifo->cache_flush(dev);
+ 	pfifo->cache_pull(dev, false);
+ 
+ 	nouveau_wait_for_idle(dev);
+@@ -232,34 +72,36 @@ nv10_mem_set_tiling(struct drm_device *dev, uint32_t addr, uint32_t size,
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+-	struct nouveau_tile_reg *tile = dev_priv->tile.reg, *found = NULL;
+-	int i;
++	struct nouveau_tile_reg *found = NULL;
++	unsigned long i, flags;
+ 
+-	spin_lock(&dev_priv->tile.lock);
++	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+ 
+ 	for (i = 0; i < pfb->num_tiles; i++) {
+-		if (tile[i].used)
++		struct nouveau_tile_reg *tile = &dev_priv->tile[i];
++
++		if (tile->used)
+ 			/* Tile region in use. */
+ 			continue;
+ 
+-		if (tile[i].fence &&
+-		    !nouveau_fence_signalled(tile[i].fence, NULL))
++		if (tile->fence &&
++		    !nouveau_fence_signalled(tile->fence, NULL))
+ 			/* Pending tile region. */
+ 			continue;
+ 
+-		if (max(tile[i].addr, addr) <
+-		    min(tile[i].addr + tile[i].size, addr + size))
++		if (max(tile->addr, addr) <
++		    min(tile->addr + tile->size, addr + size))
+ 			/* Kill an intersecting tile region. */
+ 			nv10_mem_set_region_tiling(dev, i, 0, 0, 0);
+ 
+ 		if (pitch && !found) {
+ 			/* Free tile region. */
+ 			nv10_mem_set_region_tiling(dev, i, addr, size, pitch);
+-			found = &tile[i];
++			found = tile;
+ 		}
+ 	}
+ 
+-	spin_unlock(&dev_priv->tile.lock);
++	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+ 
+ 	return found;
+ }
+@@ -299,7 +141,6 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size,
  		phys |= 0x30;
  	}
  
@@ -5505,7 +6062,7 @@ index c1fd42b..ee799c2 100644
  	while (size) {
  		unsigned offset_h = upper_32_bits(phys);
  		unsigned offset_l = lower_32_bits(phys);
-@@ -326,41 +169,18 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size,
+@@ -326,41 +167,18 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size,
  			virt  += (end - pte);
  
  			while (pte < end) {
@@ -5555,7 +6112,7 @@ index c1fd42b..ee799c2 100644
  	return 0;
  }
  
-@@ -374,7 +194,6 @@ nv50_mem_vm_unbind(struct drm_device *dev, uint64_t virt, uint32_t size)
+@@ -374,7 +192,6 @@ nv50_mem_vm_unbind(struct drm_device *dev, uint64_t virt, uint32_t size)
  	virt -= dev_priv->vm_vram_base;
  	pages = (size >> 16) << 1;
  
@@ -5563,7 +6120,7 @@ index c1fd42b..ee799c2 100644
  	while (pages) {
  		pgt = dev_priv->vm_vram_pt[virt >> 29];
  		pte = (virt & 0x1ffe0000ULL) >> 15;
-@@ -385,60 +204,24 @@ nv50_mem_vm_unbind(struct drm_device *dev, uint64_t virt, uint32_t size)
+@@ -385,60 +202,24 @@ nv50_mem_vm_unbind(struct drm_device *dev, uint64_t virt, uint32_t size)
  		pages -= (end - pte);
  		virt  += (end - pte) << 15;
  
@@ -5631,31 +6188,43 @@ index c1fd42b..ee799c2 100644
 -
 -void nouveau_mem_close(struct drm_device *dev)
 +void
-+nouveau_mem_close(struct drm_device *dev)
++nouveau_mem_vram_fini(struct drm_device *dev)
  {
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  
-@@ -449,8 +232,7 @@ void nouveau_mem_close(struct drm_device *dev)
+@@ -449,8 +230,20 @@ void nouveau_mem_close(struct drm_device *dev)
  
  	nouveau_ttm_global_release(dev_priv);
  
 -	if (drm_core_has_AGP(dev) && dev->agp &&
 -	    drm_core_check_feature(dev, DRIVER_MODESET)) {
++	if (dev_priv->fb_mtrr >= 0) {
++		drm_mtrr_del(dev_priv->fb_mtrr,
++			     pci_resource_start(dev->pdev, 1),
++			     pci_resource_len(dev->pdev, 1), DRM_MTRR_WC);
++		dev_priv->fb_mtrr = -1;
++	}
++}
++
++void
++nouveau_mem_gart_fini(struct drm_device *dev)
++{
++	nouveau_sgdma_takedown(dev);
++
 +	if (drm_core_has_AGP(dev) && dev->agp) {
  		struct drm_agp_mem *entry, *tempe;
  
  		/* Remove AGP resources, but leave dev->agp
-@@ -470,29 +252,29 @@ void nouveau_mem_close(struct drm_device *dev)
+@@ -469,30 +262,24 @@ void nouveau_mem_close(struct drm_device *dev)
+ 		dev->agp->acquired = 0;
  		dev->agp->enabled = 0;
  	}
- 
+-
 -	if (dev_priv->fb_mtrr) {
-+	if (dev_priv->fb_mtrr >= 0) {
- 		drm_mtrr_del(dev_priv->fb_mtrr, drm_get_resource_start(dev, 1),
- 			     drm_get_resource_len(dev, 1), DRM_MTRR_WC);
+-		drm_mtrr_del(dev_priv->fb_mtrr, drm_get_resource_start(dev, 1),
+-			     drm_get_resource_len(dev, 1), DRM_MTRR_WC);
 -		dev_priv->fb_mtrr = 0;
-+		dev_priv->fb_mtrr = -1;
- 	}
+-	}
  }
  
  static uint32_t
@@ -5683,11 +6252,12 @@ index c1fd42b..ee799c2 100644
  		return 4 * 1024 * 1024;
  	}
  
-@@ -525,7 +307,61 @@ nouveau_mem_detect_nforce(struct drm_device *dev)
+@@ -525,8 +312,62 @@ nouveau_mem_detect_nforce(struct drm_device *dev)
  	return 0;
  }
  
 -/* returns the amount of FB ram in bytes */
+-int
 +static void
 +nv50_vram_preinit(struct drm_device *dev)
 +{
@@ -5743,10 +6313,11 @@ index c1fd42b..ee799c2 100644
 +	dev_priv->vram_rblock_size = 1;
 +}
 +
- int
++static int
  nouveau_mem_detect(struct drm_device *dev)
  {
-@@ -536,12 +372,31 @@ nouveau_mem_detect(struct drm_device *dev)
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+@@ -536,12 +377,31 @@ nouveau_mem_detect(struct drm_device *dev)
  	} else
  	if (dev_priv->flags & (NV_NFORCE | NV_NFORCE2)) {
  		dev_priv->vram_size = nouveau_mem_detect_nforce(dev);
@@ -5782,16 +6353,41 @@ index c1fd42b..ee799c2 100644
  	}
  
  	NV_INFO(dev, "Detected %dMiB VRAM\n", (int)(dev_priv->vram_size >> 20));
-@@ -555,18 +410,37 @@ nouveau_mem_detect(struct drm_device *dev)
- 	return -ENOMEM;
+@@ -556,17 +416,63 @@ nouveau_mem_detect(struct drm_device *dev)
  }
  
--#if __OS_HAS_AGP
+ #if __OS_HAS_AGP
 -static void nouveau_mem_reset_agp(struct drm_device *dev)
-+int
-+nouveau_mem_reset_agp(struct drm_device *dev)
++static unsigned long
++get_agp_mode(struct drm_device *dev, unsigned long mode)
  {
 -	uint32_t saved_pci_nv_1, saved_pci_nv_19, pmc_enable;
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++
++	/*
++	 * FW seems to be broken on nv18, it makes the card lock up
++	 * randomly.
++	 */
++	if (dev_priv->chipset == 0x18)
++		mode &= ~PCI_AGP_COMMAND_FW;
++
++	/*
++	 * AGP mode set in the command line.
++	 */
++	if (nouveau_agpmode > 0) {
++		bool agpv3 = mode & 0x8;
++		int rate = agpv3 ? nouveau_agpmode / 4 : nouveau_agpmode;
++
++		mode = (mode & ~0x7) | (rate & 0x7);
++	}
++
++	return mode;
++}
++#endif
++
++int
++nouveau_mem_reset_agp(struct drm_device *dev)
++{
 +#if __OS_HAS_AGP
 +	uint32_t saved_pci_nv_1, pmc_enable;
 +	int ret;
@@ -5808,7 +6404,7 @@ index c1fd42b..ee799c2 100644
 +		if (ret)
 +			return ret;
 +
-+		mode.mode = info.mode & ~PCI_AGP_COMMAND_FW;
++		mode.mode = get_agp_mode(dev, info.mode) & ~PCI_AGP_COMMAND_FW;
 +		ret = drm_agp_enable(dev, mode);
 +		if (ret)
 +			return ret;
@@ -5826,7 +6422,7 @@ index c1fd42b..ee799c2 100644
  
  	/* power cycle pgraph, if enabled */
  	pmc_enable = nv_rd32(dev, NV03_PMC_ENABLE);
-@@ -578,11 +452,12 @@ static void nouveau_mem_reset_agp(struct drm_device *dev)
+@@ -578,11 +484,12 @@ static void nouveau_mem_reset_agp(struct drm_device *dev)
  	}
  
  	/* and restore (gives effect of resetting AGP) */
@@ -5841,7 +6437,7 @@ index c1fd42b..ee799c2 100644
  int
  nouveau_mem_init_agp(struct drm_device *dev)
  {
-@@ -592,11 +467,6 @@ nouveau_mem_init_agp(struct drm_device *dev)
+@@ -592,11 +499,6 @@ nouveau_mem_init_agp(struct drm_device *dev)
  	struct drm_agp_mode mode;
  	int ret;
  
@@ -5853,7 +6449,7 @@ index c1fd42b..ee799c2 100644
  	if (!dev->agp->acquired) {
  		ret = drm_agp_acquire(dev);
  		if (ret) {
-@@ -605,6 +475,8 @@ nouveau_mem_init_agp(struct drm_device *dev)
+@@ -605,6 +507,8 @@ nouveau_mem_init_agp(struct drm_device *dev)
  		}
  	}
  
@@ -5862,38 +6458,139 @@ index c1fd42b..ee799c2 100644
  	ret = drm_agp_info(dev, &info);
  	if (ret) {
  		NV_ERROR(dev, "Unable to get AGP info: %d\n", ret);
-@@ -659,8 +531,6 @@ nouveau_mem_init(struct drm_device *dev)
- 		return ret;
+@@ -612,7 +516,7 @@ nouveau_mem_init_agp(struct drm_device *dev)
  	}
  
--	INIT_LIST_HEAD(&dev_priv->ttm.bo_list);
--	spin_lock_init(&dev_priv->ttm.bo_list_lock);
- 	spin_lock_init(&dev_priv->tile.lock);
+ 	/* see agp.h for the AGPSTAT_* modes available */
+-	mode.mode = info.mode;
++	mode.mode = get_agp_mode(dev, info.mode);
+ 	ret = drm_agp_enable(dev, mode);
+ 	if (ret) {
+ 		NV_ERROR(dev, "Unable to enable AGP: %d\n", ret);
+@@ -627,24 +531,27 @@ nouveau_mem_init_agp(struct drm_device *dev)
+ }
  
- 	dev_priv->fb_available_size = dev_priv->vram_size;
-@@ -692,7 +562,7 @@ nouveau_mem_init(struct drm_device *dev)
+ int
+-nouveau_mem_init(struct drm_device *dev)
++nouveau_mem_vram_init(struct drm_device *dev)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct ttm_bo_device *bdev = &dev_priv->ttm.bdev;
+-	int ret, dma_bits = 32;
+-
+-	dev_priv->fb_phys = drm_get_resource_start(dev, 1);
+-	dev_priv->gart_info.type = NOUVEAU_GART_NONE;
++	int ret, dma_bits;
  
- 	/* GART */
- #if !defined(__powerpc__) && !defined(__ia64__)
--	if (drm_device_is_agp(dev) && dev->agp) {
-+	if (drm_device_is_agp(dev) && dev->agp && !nouveau_noagp) {
- 		ret = nouveau_mem_init_agp(dev);
- 		if (ret)
- 			NV_ERROR(dev, "Error initialising AGP: %d\n", ret);
-diff --git a/drivers/gpu/drm/nouveau/nouveau_notifier.c b/drivers/gpu/drm/nouveau/nouveau_notifier.c
-index 9537f3e..3ec181f 100644
---- a/drivers/gpu/drm/nouveau/nouveau_notifier.c
-+++ b/drivers/gpu/drm/nouveau/nouveau_notifier.c
-@@ -55,7 +55,7 @@ nouveau_notifier_init_channel(struct nouveau_channel *chan)
- 	if (ret)
- 		goto out_err;
+ 	if (dev_priv->card_type >= NV_50 &&
+ 	    pci_dma_supported(dev->pdev, DMA_BIT_MASK(40)))
+ 		dma_bits = 40;
++	else
++		dma_bits = 32;
  
--	ret = nouveau_mem_init_heap(&chan->notifier_heap, 0, ntfy->bo.mem.size);
-+	ret = drm_mm_init(&chan->notifier_heap, 0, ntfy->bo.mem.size);
+ 	ret = pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(dma_bits));
+-	if (ret) {
+-		NV_ERROR(dev, "Error setting DMA mask: %d\n", ret);
++	if (ret)
+ 		return ret;
+-	}
++
++	ret = nouveau_mem_detect(dev);
++	if (ret)
++		return ret;
++
++	dev_priv->fb_phys = pci_resource_start(dev->pdev, 1);
+ 
+ 	ret = nouveau_ttm_global_init(dev_priv);
+ 	if (ret)
+@@ -659,17 +566,22 @@ nouveau_mem_init(struct drm_device *dev)
+ 		return ret;
+ 	}
+ 
+-	INIT_LIST_HEAD(&dev_priv->ttm.bo_list);
+-	spin_lock_init(&dev_priv->ttm.bo_list_lock);
+-	spin_lock_init(&dev_priv->tile.lock);
+-
+ 	dev_priv->fb_available_size = dev_priv->vram_size;
+ 	dev_priv->fb_mappable_pages = dev_priv->fb_available_size;
+ 	if (dev_priv->fb_mappable_pages > drm_get_resource_len(dev, 1))
+ 		dev_priv->fb_mappable_pages = drm_get_resource_len(dev, 1);
+ 	dev_priv->fb_mappable_pages >>= PAGE_SHIFT;
+ 
+-	/* remove reserved space at end of vram from available amount */
++	/* reserve space at end of VRAM for PRAMIN */
++	if (dev_priv->chipset == 0x40 || dev_priv->chipset == 0x47 ||
++	    dev_priv->chipset == 0x49 || dev_priv->chipset == 0x4b)
++		dev_priv->ramin_rsvd_vram = (2 * 1024 * 1024);
++	else
++	if (dev_priv->card_type >= NV_40)
++		dev_priv->ramin_rsvd_vram = (1 * 1024 * 1024);
++	else
++		dev_priv->ramin_rsvd_vram = (512 * 1024);
++
+ 	dev_priv->fb_available_size -= dev_priv->ramin_rsvd_vram;
+ 	dev_priv->fb_aper_free = dev_priv->fb_available_size;
+ 
+@@ -690,9 +602,23 @@ nouveau_mem_init(struct drm_device *dev)
+ 		nouveau_bo_ref(NULL, &dev_priv->vga_ram);
+ 	}
+ 
+-	/* GART */
++	dev_priv->fb_mtrr = drm_mtrr_add(pci_resource_start(dev->pdev, 1),
++					 pci_resource_len(dev->pdev, 1),
++					 DRM_MTRR_WC);
++	return 0;
++}
++
++int
++nouveau_mem_gart_init(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct ttm_bo_device *bdev = &dev_priv->ttm.bdev;
++	int ret;
++
++	dev_priv->gart_info.type = NOUVEAU_GART_NONE;
++
+ #if !defined(__powerpc__) && !defined(__ia64__)
+-	if (drm_device_is_agp(dev) && dev->agp) {
++	if (drm_device_is_agp(dev) && dev->agp && nouveau_agpmode) {
+ 		ret = nouveau_mem_init_agp(dev);
+ 		if (ret)
+ 			NV_ERROR(dev, "Error initialising AGP: %d\n", ret);
+@@ -718,11 +644,6 @@ nouveau_mem_init(struct drm_device *dev)
+ 		return ret;
+ 	}
+ 
+-	dev_priv->fb_mtrr = drm_mtrr_add(drm_get_resource_start(dev, 1),
+-					 drm_get_resource_len(dev, 1),
+-					 DRM_MTRR_WC);
+-
+ 	return 0;
+ }
+ 
+-
+diff --git a/drivers/gpu/drm/nouveau/nouveau_notifier.c b/drivers/gpu/drm/nouveau/nouveau_notifier.c
+index 9537f3e..22b8618 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_notifier.c
++++ b/drivers/gpu/drm/nouveau/nouveau_notifier.c
+@@ -28,6 +28,7 @@
+ #include "drmP.h"
+ #include "drm.h"
+ #include "nouveau_drv.h"
++#include "nouveau_ramht.h"
+ 
+ int
+ nouveau_notifier_init_channel(struct nouveau_channel *chan)
+@@ -55,7 +56,7 @@ nouveau_notifier_init_channel(struct nouveau_channel *chan)
  	if (ret)
  		goto out_err;
  
-@@ -80,7 +80,7 @@ nouveau_notifier_takedown_channel(struct nouveau_channel *chan)
+-	ret = nouveau_mem_init_heap(&chan->notifier_heap, 0, ntfy->bo.mem.size);
++	ret = drm_mm_init(&chan->notifier_heap, 0, ntfy->bo.mem.size);
+ 	if (ret)
+ 		goto out_err;
+ 
+@@ -80,7 +81,7 @@ nouveau_notifier_takedown_channel(struct nouveau_channel *chan)
  	nouveau_bo_unpin(chan->notifier_bo);
  	mutex_unlock(&dev->struct_mutex);
  	drm_gem_object_unreference_unlocked(chan->notifier_bo->gem);
@@ -5902,7 +6599,7 @@ index 9537f3e..3ec181f 100644
  }
  
  static void
-@@ -90,7 +90,7 @@ nouveau_notifier_gpuobj_dtor(struct drm_device *dev,
+@@ -90,7 +91,7 @@ nouveau_notifier_gpuobj_dtor(struct drm_device *dev,
  	NV_DEBUG(dev, "\n");
  
  	if (gpuobj->priv)
@@ -5911,7 +6608,7 @@ index 9537f3e..3ec181f 100644
  }
  
  int
-@@ -100,18 +100,13 @@ nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle,
+@@ -100,18 +101,13 @@ nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle,
  	struct drm_device *dev = chan->dev;
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  	struct nouveau_gpuobj *nobj = NULL;
@@ -5934,7 +6631,7 @@ index 9537f3e..3ec181f 100644
  	if (!mem) {
  		NV_ERROR(dev, "Channel %d notifier block full\n", chan->id);
  		return -ENOMEM;
-@@ -144,17 +139,17 @@ nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle,
+@@ -144,18 +140,18 @@ nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle,
  				     mem->size, NV_DMA_ACCESS_RW, target,
  				     &nobj);
  	if (ret) {
@@ -5948,15 +6645,19 @@ index 9537f3e..3ec181f 100644
 +	nobj->dtor = nouveau_notifier_gpuobj_dtor;
 +	nobj->priv = mem;
  
- 	ret = nouveau_gpuobj_ref_add(dev, chan, handle, nobj, NULL);
+-	ret = nouveau_gpuobj_ref_add(dev, chan, handle, nobj, NULL);
++	ret = nouveau_ramht_insert(chan, handle, nobj);
++	nouveau_gpuobj_ref(NULL, &nobj);
  	if (ret) {
- 		nouveau_gpuobj_del(dev, &nobj);
+-		nouveau_gpuobj_del(dev, &nobj);
 -		nouveau_mem_free_block(mem);
+-		NV_ERROR(dev, "Error referencing notifier ctxdma: %d\n", ret);
 +		drm_mm_put_block(mem);
- 		NV_ERROR(dev, "Error referencing notifier ctxdma: %d\n", ret);
++		NV_ERROR(dev, "Error adding notifier to ramht: %d\n", ret);
  		return ret;
  	}
-@@ -170,7 +165,7 @@ nouveau_notifier_offset(struct nouveau_gpuobj *nobj, uint32_t *poffset)
+ 
+@@ -170,7 +166,7 @@ nouveau_notifier_offset(struct nouveau_gpuobj *nobj, uint32_t *poffset)
  		return -EINVAL;
  
  	if (poffset) {
@@ -5965,7 +6666,7 @@ index 9537f3e..3ec181f 100644
  
  		if (*poffset >= mem->size)
  			return false;
-@@ -189,7 +184,6 @@ nouveau_ioctl_notifier_alloc(struct drm_device *dev, void *data,
+@@ -189,7 +185,6 @@ nouveau_ioctl_notifier_alloc(struct drm_device *dev, void *data,
  	struct nouveau_channel *chan;
  	int ret;
  
@@ -5974,7 +6675,7 @@ index 9537f3e..3ec181f 100644
  
  	ret = nouveau_notifier_alloc(chan, na->handle, na->size, &na->offset);
 diff --git a/drivers/gpu/drm/nouveau/nouveau_object.c b/drivers/gpu/drm/nouveau/nouveau_object.c
-index e7c100b..6aedc3b 100644
+index e7c100b..115904d 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_object.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_object.c
 @@ -34,6 +34,7 @@
@@ -6132,21 +6833,29 @@ index e7c100b..6aedc3b 100644
  	struct nouveau_engine *engine = &dev_priv->engine;
  	struct nouveau_gpuobj *gpuobj;
 -	struct mem_block *pramin = NULL;
-+	struct drm_mm *pramin = NULL;
++	struct drm_mm_node *ramin = NULL;
  	int ret;
  
  	NV_DEBUG(dev, "ch%d size=%u align=%d flags=0x%08x\n",
-@@ -222,6 +88,7 @@ nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
+@@ -222,82 +88,102 @@ nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
  	if (!gpuobj)
  		return -ENOMEM;
  	NV_DEBUG(dev, "gpuobj %p\n", gpuobj);
 +	gpuobj->dev = dev;
  	gpuobj->flags = flags;
- 	gpuobj->im_channel = chan;
+-	gpuobj->im_channel = chan;
++	kref_init(&gpuobj->refcount);
++	gpuobj->size = size;
  
-@@ -233,25 +100,12 @@ nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
- 	 * available.
- 	 */
++	spin_lock(&dev_priv->ramin_lock);
+ 	list_add_tail(&gpuobj->list, &dev_priv->gpuobj_list);
++	spin_unlock(&dev_priv->ramin_lock);
+ 
+-	/* Choose between global instmem heap, and per-channel private
+-	 * instmem heap.  On <NV50 allow requests for private instmem
+-	 * to be satisfied from global heap if no per-channel area
+-	 * available.
+-	 */
  	if (chan) {
 -		if (chan->ramin_heap) {
 -			NV_DEBUG(dev, "private heap\n");
@@ -6155,14 +6864,20 @@ index e7c100b..6aedc3b 100644
 -		if (dev_priv->card_type < NV_50) {
 -			NV_DEBUG(dev, "global heap fallback\n");
 -			pramin = dev_priv->ramin_heap;
--		}
 +		NV_DEBUG(dev, "channel heap\n");
-+		pramin = &chan->ramin_heap;
++
++		ramin = drm_mm_search_free(&chan->ramin_heap, size, align, 0);
++		if (ramin)
++			ramin = drm_mm_get_block(ramin, size, align);
++
++		if (!ramin) {
++			nouveau_gpuobj_ref(NULL, &gpuobj);
++			return -ENOMEM;
+ 		}
  	} else {
  		NV_DEBUG(dev, "global heap\n");
 -		pramin = dev_priv->ramin_heap;
 -	}
-+		pramin = &dev_priv->ramin_heap;
  
 -	if (!pramin) {
 -		NV_ERROR(dev, "No PRAMIN heap!\n");
@@ -6170,88 +6885,472 @@ index e7c100b..6aedc3b 100644
 -	}
 -
 -	if (!chan) {
++		/* allocate backing pages, sets vinst */
  		ret = engine->instmem.populate(dev, gpuobj, &size);
  		if (ret) {
- 			nouveau_gpuobj_del(dev, &gpuobj);
-@@ -260,9 +114,10 @@ nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
- 	}
+-			nouveau_gpuobj_del(dev, &gpuobj);
++			nouveau_gpuobj_ref(NULL, &gpuobj);
+ 			return ret;
+ 		}
+-	}
  
- 	/* Allocate a chunk of the PRAMIN aperture */
+-	/* Allocate a chunk of the PRAMIN aperture */
 -	gpuobj->im_pramin = nouveau_mem_alloc_block(pramin, size,
 -						    drm_order(align),
 -						    (struct drm_file *)-2, 0);
-+	gpuobj->im_pramin = drm_mm_search_free(pramin, size, align, 0);
-+	if (gpuobj->im_pramin)
-+		gpuobj->im_pramin = drm_mm_get_block(gpuobj->im_pramin, size, align);
+-	if (!gpuobj->im_pramin) {
+-		nouveau_gpuobj_del(dev, &gpuobj);
+-		return -ENOMEM;
++		/* try and get aperture space */
++		do {
++			if (drm_mm_pre_get(&dev_priv->ramin_heap))
++				return -ENOMEM;
++
++			spin_lock(&dev_priv->ramin_lock);
++			ramin = drm_mm_search_free(&dev_priv->ramin_heap, size,
++						   align, 0);
++			if (ramin == NULL) {
++				spin_unlock(&dev_priv->ramin_lock);
++				nouveau_gpuobj_ref(NULL, &gpuobj);
++				return ret;
++			}
 +
- 	if (!gpuobj->im_pramin) {
- 		nouveau_gpuobj_del(dev, &gpuobj);
- 		return -ENOMEM;
-@@ -279,10 +134,9 @@ nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
- 	if (gpuobj->flags & NVOBJ_FLAG_ZERO_ALLOC) {
- 		int i;
++			ramin = drm_mm_get_block_atomic(ramin, size, align);
++			spin_unlock(&dev_priv->ramin_lock);
++		} while (ramin == NULL);
++
++		/* on nv50 it's ok to fail, we have a fallback path */
++		if (!ramin && dev_priv->card_type < NV_50) {
++			nouveau_gpuobj_ref(NULL, &gpuobj);
++			return -ENOMEM;
++		}
+ 	}
+ 
+-	if (!chan) {
++	/* if we got a chunk of the aperture, map pages into it */
++	gpuobj->im_pramin = ramin;
++	if (!chan && gpuobj->im_pramin && dev_priv->ramin_available) {
+ 		ret = engine->instmem.bind(dev, gpuobj);
+ 		if (ret) {
+-			nouveau_gpuobj_del(dev, &gpuobj);
++			nouveau_gpuobj_ref(NULL, &gpuobj);
+ 			return ret;
+ 		}
+ 	}
+ 
+-	if (gpuobj->flags & NVOBJ_FLAG_ZERO_ALLOC) {
+-		int i;
++	/* calculate the various different addresses for the object */
++	if (chan) {
++		gpuobj->pinst = chan->ramin->pinst;
++		if (gpuobj->pinst != ~0)
++			gpuobj->pinst += gpuobj->im_pramin->start;
  
 -		engine->instmem.prepare_access(dev, true);
- 		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
+-		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
 -			nv_wo32(dev, gpuobj, i/4, 0);
 -		engine->instmem.finish_access(dev);
++		if (dev_priv->card_type < NV_50) {
++			gpuobj->cinst = gpuobj->pinst;
++		} else {
++			gpuobj->cinst = gpuobj->im_pramin->start;
++			gpuobj->vinst = gpuobj->im_pramin->start +
++					chan->ramin->vinst;
++		}
++	} else {
++		if (gpuobj->im_pramin)
++			gpuobj->pinst = gpuobj->im_pramin->start;
++		else
++			gpuobj->pinst = ~0;
++		gpuobj->cinst = 0xdeadbeef;
+ 	}
+ 
+-	*gpuobj_ret = gpuobj;
+-	return 0;
+-}
+-
+-int
+-nouveau_gpuobj_early_init(struct drm_device *dev)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	if (gpuobj->flags & NVOBJ_FLAG_ZERO_ALLOC) {
++		int i;
+ 
+-	NV_DEBUG(dev, "\n");
++		for (i = 0; i < gpuobj->size; i += 4)
 +			nv_wo32(gpuobj, i, 0);
 +		engine->instmem.flush(dev);
- 	}
++	}
  
- 	*gpuobj_ret = gpuobj;
-@@ -370,10 +224,9 @@ nouveau_gpuobj_del(struct drm_device *dev, struct nouveau_gpuobj **pgpuobj)
- 	}
+-	INIT_LIST_HEAD(&dev_priv->gpuobj_list);
+ 
++	*gpuobj_ret = gpuobj;
+ 	return 0;
+ }
+ 
+@@ -305,18 +191,12 @@ int
+ nouveau_gpuobj_init(struct drm_device *dev)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	int ret;
+ 
+ 	NV_DEBUG(dev, "\n");
+ 
+-	if (dev_priv->card_type < NV_50) {
+-		ret = nouveau_gpuobj_new_fake(dev,
+-			dev_priv->ramht_offset, ~0, dev_priv->ramht_size,
+-			NVOBJ_FLAG_ZERO_ALLOC | NVOBJ_FLAG_ALLOW_NO_REFS,
+-						&dev_priv->ramht, NULL);
+-		if (ret)
+-			return ret;
+-	}
++	INIT_LIST_HEAD(&dev_priv->gpuobj_list);
++	spin_lock_init(&dev_priv->ramin_lock);
++	dev_priv->ramin_base = ~0;
+ 
+ 	return 0;
+ }
+@@ -328,299 +208,89 @@ nouveau_gpuobj_takedown(struct drm_device *dev)
+ 
+ 	NV_DEBUG(dev, "\n");
+ 
+-	nouveau_gpuobj_del(dev, &dev_priv->ramht);
++	BUG_ON(!list_empty(&dev_priv->gpuobj_list));
+ }
+ 
+-void
+-nouveau_gpuobj_late_takedown(struct drm_device *dev)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_gpuobj *gpuobj = NULL;
+-	struct list_head *entry, *tmp;
+-
+-	NV_DEBUG(dev, "\n");
+-
+-	list_for_each_safe(entry, tmp, &dev_priv->gpuobj_list) {
+-		gpuobj = list_entry(entry, struct nouveau_gpuobj, list);
+ 
+-		NV_ERROR(dev, "gpuobj %p still exists at takedown, refs=%d\n",
+-			 gpuobj, gpuobj->refcount);
+-		gpuobj->refcount = 0;
+-		nouveau_gpuobj_del(dev, &gpuobj);
+-	}
+-}
+-
+-int
+-nouveau_gpuobj_del(struct drm_device *dev, struct nouveau_gpuobj **pgpuobj)
++static void
++nouveau_gpuobj_del(struct kref *ref)
+ {
++	struct nouveau_gpuobj *gpuobj =
++		container_of(ref, struct nouveau_gpuobj, refcount);
++	struct drm_device *dev = gpuobj->dev;
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_engine *engine = &dev_priv->engine;
+-	struct nouveau_gpuobj *gpuobj;
+ 	int i;
+ 
+-	NV_DEBUG(dev, "gpuobj %p\n", pgpuobj ? *pgpuobj : NULL);
+-
+-	if (!dev_priv || !pgpuobj || !(*pgpuobj))
+-		return -EINVAL;
+-	gpuobj = *pgpuobj;
+-
+-	if (gpuobj->refcount != 0) {
+-		NV_ERROR(dev, "gpuobj refcount is %d\n", gpuobj->refcount);
+-		return -EINVAL;
+-	}
++	NV_DEBUG(dev, "gpuobj %p\n", gpuobj);
  
  	if (gpuobj->im_pramin && (gpuobj->flags & NVOBJ_FLAG_ZERO_FREE)) {
 -		engine->instmem.prepare_access(dev, true);
- 		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
+-		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
 -			nv_wo32(dev, gpuobj, i/4, 0);
 -		engine->instmem.finish_access(dev);
++		for (i = 0; i < gpuobj->size; i += 4)
 +			nv_wo32(gpuobj, i, 0);
 +		engine->instmem.flush(dev);
  	}
  
  	if (gpuobj->dtor)
-@@ -386,7 +239,7 @@ nouveau_gpuobj_del(struct drm_device *dev, struct nouveau_gpuobj **pgpuobj)
- 		if (gpuobj->flags & NVOBJ_FLAG_FAKE)
- 			kfree(gpuobj->im_pramin);
- 		else
--			nouveau_mem_free_block(gpuobj->im_pramin);
-+			drm_mm_put_block(gpuobj->im_pramin);
- 	}
+ 		gpuobj->dtor(dev, gpuobj);
  
+-	if (gpuobj->im_backing && !(gpuobj->flags & NVOBJ_FLAG_FAKE))
++	if (gpuobj->im_backing)
+ 		engine->instmem.clear(dev, gpuobj);
+ 
+-	if (gpuobj->im_pramin) {
+-		if (gpuobj->flags & NVOBJ_FLAG_FAKE)
+-			kfree(gpuobj->im_pramin);
+-		else
+-			nouveau_mem_free_block(gpuobj->im_pramin);
+-	}
+-
++	spin_lock(&dev_priv->ramin_lock);
++	if (gpuobj->im_pramin)
++		drm_mm_put_block(gpuobj->im_pramin);
  	list_del(&gpuobj->list);
-@@ -583,13 +436,14 @@ nouveau_gpuobj_new_fake(struct drm_device *dev, uint32_t p_offset,
++	spin_unlock(&dev_priv->ramin_lock);
+ 
+-	*pgpuobj = NULL;
+ 	kfree(gpuobj);
+-	return 0;
+-}
+-
+-static int
+-nouveau_gpuobj_instance_get(struct drm_device *dev,
+-			    struct nouveau_channel *chan,
+-			    struct nouveau_gpuobj *gpuobj, uint32_t *inst)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_gpuobj *cpramin;
+-
+-	/* <NV50 use PRAMIN address everywhere */
+-	if (dev_priv->card_type < NV_50) {
+-		*inst = gpuobj->im_pramin->start;
+-		return 0;
+-	}
+-
+-	if (chan && gpuobj->im_channel != chan) {
+-		NV_ERROR(dev, "Channel mismatch: obj %d, ref %d\n",
+-			 gpuobj->im_channel->id, chan->id);
+-		return -EINVAL;
+-	}
+-
+-	/* NV50 channel-local instance */
+-	if (chan) {
+-		cpramin = chan->ramin->gpuobj;
+-		*inst = gpuobj->im_pramin->start - cpramin->im_pramin->start;
+-		return 0;
+-	}
+-
+-	/* NV50 global (VRAM) instance */
+-	if (!gpuobj->im_channel) {
+-		/* ...from global heap */
+-		if (!gpuobj->im_backing) {
+-			NV_ERROR(dev, "AII, no VRAM backing gpuobj\n");
+-			return -EINVAL;
+-		}
+-		*inst = gpuobj->im_backing_start;
+-		return 0;
+-	} else {
+-		/* ...from local heap */
+-		cpramin = gpuobj->im_channel->ramin->gpuobj;
+-		*inst = cpramin->im_backing_start +
+-			(gpuobj->im_pramin->start - cpramin->im_pramin->start);
+-		return 0;
+-	}
+-
+-	return -EINVAL;
+-}
+-
+-int
+-nouveau_gpuobj_ref_add(struct drm_device *dev, struct nouveau_channel *chan,
+-		       uint32_t handle, struct nouveau_gpuobj *gpuobj,
+-		       struct nouveau_gpuobj_ref **ref_ret)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_gpuobj_ref *ref;
+-	uint32_t instance;
+-	int ret;
+-
+-	NV_DEBUG(dev, "ch%d h=0x%08x gpuobj=%p\n",
+-		 chan ? chan->id : -1, handle, gpuobj);
+-
+-	if (!dev_priv || !gpuobj || (ref_ret && *ref_ret != NULL))
+-		return -EINVAL;
+-
+-	if (!chan && !ref_ret)
+-		return -EINVAL;
+-
+-	if (gpuobj->engine == NVOBJ_ENGINE_SW && !gpuobj->im_pramin) {
+-		/* sw object */
+-		instance = 0x40;
+-	} else {
+-		ret = nouveau_gpuobj_instance_get(dev, chan, gpuobj, &instance);
+-		if (ret)
+-			return ret;
+-	}
+-
+-	ref = kzalloc(sizeof(*ref), GFP_KERNEL);
+-	if (!ref)
+-		return -ENOMEM;
+-	INIT_LIST_HEAD(&ref->list);
+-	ref->gpuobj   = gpuobj;
+-	ref->channel  = chan;
+-	ref->instance = instance;
+-
+-	if (!ref_ret) {
+-		ref->handle = handle;
+-
+-		ret = nouveau_ramht_insert(dev, ref);
+-		if (ret) {
+-			kfree(ref);
+-			return ret;
+-		}
+-	} else {
+-		ref->handle = ~0;
+-		*ref_ret = ref;
+-	}
+-
+-	ref->gpuobj->refcount++;
+-	return 0;
+ }
+ 
+-int nouveau_gpuobj_ref_del(struct drm_device *dev, struct nouveau_gpuobj_ref **pref)
+-{
+-	struct nouveau_gpuobj_ref *ref;
+-
+-	NV_DEBUG(dev, "ref %p\n", pref ? *pref : NULL);
+-
+-	if (!dev || !pref || *pref == NULL)
+-		return -EINVAL;
+-	ref = *pref;
+-
+-	if (ref->handle != ~0)
+-		nouveau_ramht_remove(dev, ref);
+-
+-	if (ref->gpuobj) {
+-		ref->gpuobj->refcount--;
+-
+-		if (ref->gpuobj->refcount == 0) {
+-			if (!(ref->gpuobj->flags & NVOBJ_FLAG_ALLOW_NO_REFS))
+-				nouveau_gpuobj_del(dev, &ref->gpuobj);
+-		}
+-	}
+-
+-	*pref = NULL;
+-	kfree(ref);
+-	return 0;
+-}
+-
+-int
+-nouveau_gpuobj_new_ref(struct drm_device *dev,
+-		       struct nouveau_channel *oc, struct nouveau_channel *rc,
+-		       uint32_t handle, uint32_t size, int align,
+-		       uint32_t flags, struct nouveau_gpuobj_ref **ref)
+-{
+-	struct nouveau_gpuobj *gpuobj = NULL;
+-	int ret;
+-
+-	ret = nouveau_gpuobj_new(dev, oc, size, align, flags, &gpuobj);
+-	if (ret)
+-		return ret;
+-
+-	ret = nouveau_gpuobj_ref_add(dev, rc, handle, gpuobj, ref);
+-	if (ret) {
+-		nouveau_gpuobj_del(dev, &gpuobj);
+-		return ret;
+-	}
+-
+-	return 0;
+-}
+-
+-int
+-nouveau_gpuobj_ref_find(struct nouveau_channel *chan, uint32_t handle,
+-			struct nouveau_gpuobj_ref **ref_ret)
++void
++nouveau_gpuobj_ref(struct nouveau_gpuobj *ref, struct nouveau_gpuobj **ptr)
+ {
+-	struct nouveau_gpuobj_ref *ref;
+-	struct list_head *entry, *tmp;
++	if (ref)
++		kref_get(&ref->refcount);
+ 
+-	list_for_each_safe(entry, tmp, &chan->ramht_refs) {
+-		ref = list_entry(entry, struct nouveau_gpuobj_ref, list);
++	if (*ptr)
++		kref_put(&(*ptr)->refcount, nouveau_gpuobj_del);
+ 
+-		if (ref->handle == handle) {
+-			if (ref_ret)
+-				*ref_ret = ref;
+-			return 0;
+-		}
+-	}
+-
+-	return -EINVAL;
++	*ptr = ref;
+ }
+ 
+ int
+-nouveau_gpuobj_new_fake(struct drm_device *dev, uint32_t p_offset,
+-			uint32_t b_offset, uint32_t size,
+-			uint32_t flags, struct nouveau_gpuobj **pgpuobj,
+-			struct nouveau_gpuobj_ref **pref)
++nouveau_gpuobj_new_fake(struct drm_device *dev, u32 pinst, u64 vinst,
++			u32 size, u32 flags, struct nouveau_gpuobj **pgpuobj)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_gpuobj *gpuobj = NULL;
+ 	int i;
+ 
+ 	NV_DEBUG(dev,
+-		 "p_offset=0x%08x b_offset=0x%08x size=0x%08x flags=0x%08x\n",
+-		 p_offset, b_offset, size, flags);
++		 "pinst=0x%08x vinst=0x%010llx size=0x%08x flags=0x%08x\n",
++		 pinst, vinst, size, flags);
+ 
+ 	gpuobj = kzalloc(sizeof(*gpuobj), GFP_KERNEL);
  	if (!gpuobj)
  		return -ENOMEM;
  	NV_DEBUG(dev, "gpuobj %p\n", gpuobj);
-+	gpuobj->dev = dev;
- 	gpuobj->im_channel = NULL;
- 	gpuobj->flags      = flags | NVOBJ_FLAG_FAKE;
- 
- 	list_add_tail(&gpuobj->list, &dev_priv->gpuobj_list);
- 
- 	if (p_offset != ~0) {
+-	gpuobj->im_channel = NULL;
+-	gpuobj->flags      = flags | NVOBJ_FLAG_FAKE;
+-
+-	list_add_tail(&gpuobj->list, &dev_priv->gpuobj_list);
+-
+-	if (p_offset != ~0) {
 -		gpuobj->im_pramin = kzalloc(sizeof(struct mem_block),
-+		gpuobj->im_pramin = kzalloc(sizeof(struct drm_mm_node),
- 					    GFP_KERNEL);
- 		if (!gpuobj->im_pramin) {
- 			nouveau_gpuobj_del(dev, &gpuobj);
-@@ -605,10 +459,9 @@ nouveau_gpuobj_new_fake(struct drm_device *dev, uint32_t p_offset,
- 	}
+-					    GFP_KERNEL);
+-		if (!gpuobj->im_pramin) {
+-			nouveau_gpuobj_del(dev, &gpuobj);
+-			return -ENOMEM;
+-		}
+-		gpuobj->im_pramin->start = p_offset;
+-		gpuobj->im_pramin->size  = size;
+-	}
+-
+-	if (b_offset != ~0) {
+-		gpuobj->im_backing = (struct nouveau_bo *)-1;
+-		gpuobj->im_backing_start = b_offset;
+-	}
++	gpuobj->dev = dev;
++	gpuobj->flags = flags;
++	kref_init(&gpuobj->refcount);
++	gpuobj->size  = size;
++	gpuobj->pinst = pinst;
++	gpuobj->cinst = 0xdeadbeef;
++	gpuobj->vinst = vinst;
  
  	if (gpuobj->flags & NVOBJ_FLAG_ZERO_ALLOC) {
 -		dev_priv->engine.instmem.prepare_access(dev, true);
- 		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
+-		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
 -			nv_wo32(dev, gpuobj, i/4, 0);
 -		dev_priv->engine.instmem.finish_access(dev);
++		for (i = 0; i < gpuobj->size; i += 4)
 +			nv_wo32(gpuobj, i, 0);
 +		dev_priv->engine.instmem.flush(dev);
  	}
  
- 	if (pref) {
-@@ -696,8 +549,6 @@ nouveau_gpuobj_dma_new(struct nouveau_channel *chan, int class,
+-	if (pref) {
+-		i = nouveau_gpuobj_ref_add(dev, NULL, 0, gpuobj, pref);
+-		if (i) {
+-			nouveau_gpuobj_del(dev, &gpuobj);
+-			return i;
+-		}
+-	}
+-
+-	if (pgpuobj)
+-		*pgpuobj = gpuobj;
++	spin_lock(&dev_priv->ramin_lock);
++	list_add_tail(&gpuobj->list, &dev_priv->gpuobj_list);
++	spin_unlock(&dev_priv->ramin_lock);
++	*pgpuobj = gpuobj;
+ 	return 0;
+ }
+ 
+@@ -696,8 +366,6 @@ nouveau_gpuobj_dma_new(struct nouveau_channel *chan, int class,
  		return ret;
  	}
  
@@ -6260,7 +7359,7 @@ index e7c100b..6aedc3b 100644
  	if (dev_priv->card_type < NV_50) {
  		uint32_t frame, adjust, pte_flags = 0;
  
-@@ -706,14 +557,12 @@ nouveau_gpuobj_dma_new(struct nouveau_channel *chan, int class,
+@@ -706,14 +374,12 @@ nouveau_gpuobj_dma_new(struct nouveau_channel *chan, int class,
  		adjust = offset &  0x00000fff;
  		frame  = offset & ~0x00000fff;
  
@@ -6281,7 +7380,7 @@ index e7c100b..6aedc3b 100644
  	} else {
  		uint64_t limit = offset + size - 1;
  		uint32_t flags0, flags5;
-@@ -726,15 +575,15 @@ nouveau_gpuobj_dma_new(struct nouveau_channel *chan, int class,
+@@ -726,15 +392,15 @@ nouveau_gpuobj_dma_new(struct nouveau_channel *chan, int class,
  			flags5 = 0x00080000;
  		}
  
@@ -6304,7 +7403,16 @@ index e7c100b..6aedc3b 100644
  
  	(*gpuobj)->engine = NVOBJ_ENGINE_SW;
  	(*gpuobj)->class  = class;
-@@ -849,32 +698,31 @@ nouveau_gpuobj_gr_new(struct nouveau_channel *chan, int class,
+@@ -762,7 +428,7 @@ nouveau_gpuobj_gart_dma_new(struct nouveau_channel *chan,
+ 			*o_ret = 0;
+ 	} else
+ 	if (dev_priv->gart_info.type == NOUVEAU_GART_SGDMA) {
+-		*gpuobj = dev_priv->gart_info.sg_ctxdma;
++		nouveau_gpuobj_ref(dev_priv->gart_info.sg_ctxdma, gpuobj);
+ 		if (offset & ~0xffffffffULL) {
+ 			NV_ERROR(dev, "obj offset exceeds 32-bits\n");
+ 			return -EINVAL;
+@@ -849,32 +515,31 @@ nouveau_gpuobj_gr_new(struct nouveau_channel *chan, int class,
  		return ret;
  	}
  
@@ -6346,7 +7454,31 @@ index e7c100b..6aedc3b 100644
  
  	(*gpuobj)->engine = NVOBJ_ENGINE_GR;
  	(*gpuobj)->class  = class;
-@@ -920,6 +768,7 @@ nouveau_gpuobj_channel_init_pramin(struct nouveau_channel *chan)
+@@ -895,10 +560,15 @@ nouveau_gpuobj_sw_new(struct nouveau_channel *chan, int class,
+ 	gpuobj = kzalloc(sizeof(*gpuobj), GFP_KERNEL);
+ 	if (!gpuobj)
+ 		return -ENOMEM;
++	gpuobj->dev = chan->dev;
+ 	gpuobj->engine = NVOBJ_ENGINE_SW;
+ 	gpuobj->class = class;
++	kref_init(&gpuobj->refcount);
++	gpuobj->cinst = 0x40;
+ 
++	spin_lock(&dev_priv->ramin_lock);
+ 	list_add_tail(&gpuobj->list, &dev_priv->gpuobj_list);
++	spin_unlock(&dev_priv->ramin_lock);
+ 	*gpuobj_ret = gpuobj;
+ 	return 0;
+ }
+@@ -908,7 +578,6 @@ nouveau_gpuobj_channel_init_pramin(struct nouveau_channel *chan)
+ {
+ 	struct drm_device *dev = chan->dev;
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_gpuobj *pramin = NULL;
+ 	uint32_t size;
+ 	uint32_t base;
+ 	int ret;
+@@ -920,6 +589,7 @@ nouveau_gpuobj_channel_init_pramin(struct nouveau_channel *chan)
  	base = 0;
  
  	/* PGRAPH context */
@@ -6354,7 +7486,7 @@ index e7c100b..6aedc3b 100644
  
  	if (dev_priv->card_type == NV_50) {
  		/* Various fixed table thingos */
-@@ -930,12 +779,8 @@ nouveau_gpuobj_channel_init_pramin(struct nouveau_channel *chan)
+@@ -930,25 +600,18 @@ nouveau_gpuobj_channel_init_pramin(struct nouveau_channel *chan)
  		size += 0x8000;
  		/* RAMFC */
  		size += 0x1000;
@@ -6364,21 +7496,31 @@ index e7c100b..6aedc3b 100644
  
 -	NV_DEBUG(dev, "ch%d PRAMIN size: 0x%08x bytes, base alloc=0x%08x\n",
 -		 chan->id, size, base);
- 	ret = nouveau_gpuobj_new_ref(dev, NULL, NULL, 0, size, 0x1000, 0,
- 				     &chan->ramin);
+-	ret = nouveau_gpuobj_new_ref(dev, NULL, NULL, 0, size, 0x1000, 0,
+-				     &chan->ramin);
++	ret = nouveau_gpuobj_new(dev, NULL, size, 0x1000, 0, &chan->ramin);
  	if (ret) {
-@@ -944,8 +789,7 @@ nouveau_gpuobj_channel_init_pramin(struct nouveau_channel *chan)
+ 		NV_ERROR(dev, "Error allocating channel PRAMIN: %d\n", ret);
+ 		return ret;
  	}
- 	pramin = chan->ramin->gpuobj;
+-	pramin = chan->ramin->gpuobj;
  
 -	ret = nouveau_mem_init_heap(&chan->ramin_heap,
 -				    pramin->im_pramin->start + base, size);
-+	ret = drm_mm_init(&chan->ramin_heap, pramin->im_pramin->start + base, size);
++	ret = drm_mm_init(&chan->ramin_heap, base, size);
  	if (ret) {
  		NV_ERROR(dev, "Error creating PRAMIN heap: %d\n", ret);
- 		nouveau_gpuobj_ref_del(dev, &chan->ramin);
-@@ -969,15 +813,11 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
+-		nouveau_gpuobj_ref_del(dev, &chan->ramin);
++		nouveau_gpuobj_ref(NULL, &chan->ramin);
+ 		return ret;
+ 	}
  
+@@ -965,19 +628,13 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
+ 	struct nouveau_gpuobj *vram = NULL, *tt = NULL;
+ 	int ret, i;
+ 
+-	INIT_LIST_HEAD(&chan->ramht_refs);
+-
  	NV_DEBUG(dev, "ch%d vram=0x%08x tt=0x%08x\n", chan->id, vram_h, tt_h);
  
 -	/* Reserve a block of PRAMIN for the channel
@@ -6398,19 +7540,29 @@ index e7c100b..6aedc3b 100644
  	}
  
  	/* NV50 VM
-@@ -988,50 +828,42 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
+@@ -986,65 +643,56 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
+ 	 *    locations determined during init.
+ 	 */
  	if (dev_priv->card_type >= NV_50) {
- 		uint32_t vm_offset, pde;
- 
--		instmem->prepare_access(dev, true);
+-		uint32_t vm_offset, pde;
 -
- 		vm_offset = (dev_priv->chipset & 0xf0) == 0x50 ? 0x1400 : 0x200;
- 		vm_offset += chan->ramin->gpuobj->im_pramin->start;
- 
- 		ret = nouveau_gpuobj_new_fake(dev, vm_offset, ~0, 0x4000,
- 							0, &chan->vm_pd, NULL);
+-		instmem->prepare_access(dev, true);
++		u32 pgd_offs = (dev_priv->chipset == 0x50) ? 0x1400 : 0x0200;
++		u64 vm_vinst = chan->ramin->vinst + pgd_offs;
++		u32 vm_pinst = chan->ramin->pinst;
++		u32 pde;
+ 
+-		vm_offset = (dev_priv->chipset & 0xf0) == 0x50 ? 0x1400 : 0x200;
+-		vm_offset += chan->ramin->gpuobj->im_pramin->start;
++		if (vm_pinst != ~0)
++			vm_pinst += pgd_offs;
+ 
+-		ret = nouveau_gpuobj_new_fake(dev, vm_offset, ~0, 0x4000,
+-							0, &chan->vm_pd, NULL);
 -		if (ret) {
 -			instmem->finish_access(dev);
++		ret = nouveau_gpuobj_new_fake(dev, vm_pinst, vm_vinst, 0x4000,
++					      0, &chan->vm_pd);
 +		if (ret)
  			return ret;
 -		}
@@ -6422,38 +7574,40 @@ index e7c100b..6aedc3b 100644
  		}
  
 -		pde = (dev_priv->vm_gart_base / (512*1024*1024)) * 2;
-+		pde = (dev_priv->vm_gart_base / (512*1024*1024)) * 8;
- 		ret = nouveau_gpuobj_ref_add(dev, NULL, 0,
- 					     dev_priv->gart_info.sg_ctxdma,
- 					     &chan->vm_gart_pt);
+-		ret = nouveau_gpuobj_ref_add(dev, NULL, 0,
+-					     dev_priv->gart_info.sg_ctxdma,
+-					     &chan->vm_gart_pt);
 -		if (ret) {
 -			instmem->finish_access(dev);
-+		if (ret)
- 			return ret;
+-			return ret;
 -		}
 -		nv_wo32(dev, chan->vm_pd, pde++,
 -			    chan->vm_gart_pt->instance | 0x03);
 -		nv_wo32(dev, chan->vm_pd, pde++, 0x00000000);
-+		nv_wo32(chan->vm_pd, pde + 0, chan->vm_gart_pt->instance | 3);
++		nouveau_gpuobj_ref(dev_priv->gart_info.sg_ctxdma,
++				   &chan->vm_gart_pt);
++		pde = (dev_priv->vm_gart_base / (512*1024*1024)) * 8;
++		nv_wo32(chan->vm_pd, pde + 0, chan->vm_gart_pt->vinst | 3);
 +		nv_wo32(chan->vm_pd, pde + 4, 0x00000000);
  
 -		pde = (dev_priv->vm_vram_base / (512*1024*1024)) * 2;
 +		pde = (dev_priv->vm_vram_base / (512*1024*1024)) * 8;
  		for (i = 0; i < dev_priv->vm_vram_pt_nr; i++) {
- 			ret = nouveau_gpuobj_ref_add(dev, NULL, 0,
- 						     dev_priv->vm_vram_pt[i],
- 						     &chan->vm_vram_pt[i]);
+-			ret = nouveau_gpuobj_ref_add(dev, NULL, 0,
+-						     dev_priv->vm_vram_pt[i],
+-						     &chan->vm_vram_pt[i]);
 -			if (ret) {
 -				instmem->finish_access(dev);
-+			if (ret)
- 				return ret;
+-				return ret;
 -			}
++			nouveau_gpuobj_ref(dev_priv->vm_vram_pt[i],
++					   &chan->vm_vram_pt[i]);
  
 -			nv_wo32(dev, chan->vm_pd, pde++,
 -				    chan->vm_vram_pt[i]->instance | 0x61);
 -			nv_wo32(dev, chan->vm_pd, pde++, 0x00000000);
 +			nv_wo32(chan->vm_pd, pde + 0,
-+				chan->vm_vram_pt[i]->instance | 0x61);
++				chan->vm_vram_pt[i]->vinst | 0x61);
 +			nv_wo32(chan->vm_pd, pde + 4, 0x00000000);
 +			pde += 8;
  		}
@@ -6463,18 +7617,134 @@ index e7c100b..6aedc3b 100644
  	}
  
  	/* RAMHT */
-@@ -1130,8 +962,8 @@ nouveau_gpuobj_channel_takedown(struct nouveau_channel *chan)
- 	for (i = 0; i < dev_priv->vm_vram_pt_nr; i++)
- 		nouveau_gpuobj_ref_del(dev, &chan->vm_vram_pt[i]);
+ 	if (dev_priv->card_type < NV_50) {
+-		ret = nouveau_gpuobj_ref_add(dev, NULL, 0, dev_priv->ramht,
+-					     &chan->ramht);
++		nouveau_ramht_ref(dev_priv->ramht, &chan->ramht, NULL);
++	} else {
++		struct nouveau_gpuobj *ramht = NULL;
++
++		ret = nouveau_gpuobj_new(dev, chan, 0x8000, 16,
++					 NVOBJ_FLAG_ZERO_ALLOC, &ramht);
+ 		if (ret)
+ 			return ret;
+-	} else {
+-		ret = nouveau_gpuobj_new_ref(dev, chan, chan, 0,
+-					     0x8000, 16,
+-					     NVOBJ_FLAG_ZERO_ALLOC,
+-					     &chan->ramht);
++
++		ret = nouveau_ramht_new(dev, ramht, &chan->ramht);
++		nouveau_gpuobj_ref(NULL, &ramht);
+ 		if (ret)
+ 			return ret;
+ 	}
+@@ -1061,24 +709,32 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
+ 		}
+ 	} else {
+ 		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
+-						0, dev_priv->fb_available_size,
+-						NV_DMA_ACCESS_RW,
+-						NV_DMA_TARGET_VIDMEM, &vram);
++					     0, dev_priv->fb_available_size,
++					     NV_DMA_ACCESS_RW,
++					     NV_DMA_TARGET_VIDMEM, &vram);
+ 		if (ret) {
+ 			NV_ERROR(dev, "Error creating VRAM ctxdma: %d\n", ret);
+ 			return ret;
+ 		}
+ 	}
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, chan, vram_h, vram, NULL);
++	ret = nouveau_ramht_insert(chan, vram_h, vram);
++	nouveau_gpuobj_ref(NULL, &vram);
+ 	if (ret) {
+-		NV_ERROR(dev, "Error referencing VRAM ctxdma: %d\n", ret);
++		NV_ERROR(dev, "Error adding VRAM ctxdma to RAMHT: %d\n", ret);
+ 		return ret;
+ 	}
+ 
+ 	/* TT memory ctxdma */
+ 	if (dev_priv->card_type >= NV_50) {
+-		tt = vram;
++		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
++					     0, dev_priv->vm_end,
++					     NV_DMA_ACCESS_RW,
++					     NV_DMA_TARGET_AGP, &tt);
++		if (ret) {
++			NV_ERROR(dev, "Error creating VRAM ctxdma: %d\n", ret);
++			return ret;
++		}
+ 	} else
+ 	if (dev_priv->gart_info.type != NOUVEAU_GART_NONE) {
+ 		ret = nouveau_gpuobj_gart_dma_new(chan, 0,
+@@ -1094,9 +750,10 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
+ 		return ret;
+ 	}
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, chan, tt_h, tt, NULL);
++	ret = nouveau_ramht_insert(chan, tt_h, tt);
++	nouveau_gpuobj_ref(NULL, &tt);
+ 	if (ret) {
+-		NV_ERROR(dev, "Error referencing TT ctxdma: %d\n", ret);
++		NV_ERROR(dev, "Error adding TT ctxdma to RAMHT: %d\n", ret);
+ 		return ret;
+ 	}
+ 
+@@ -1108,33 +765,23 @@ nouveau_gpuobj_channel_takedown(struct nouveau_channel *chan)
+ {
+ 	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+ 	struct drm_device *dev = chan->dev;
+-	struct list_head *entry, *tmp;
+-	struct nouveau_gpuobj_ref *ref;
+ 	int i;
  
+ 	NV_DEBUG(dev, "ch%d\n", chan->id);
+ 
+-	if (!chan->ramht_refs.next)
++	if (!chan->ramht)
+ 		return;
+ 
+-	list_for_each_safe(entry, tmp, &chan->ramht_refs) {
+-		ref = list_entry(entry, struct nouveau_gpuobj_ref, list);
++	nouveau_ramht_ref(NULL, &chan->ramht, chan);
+ 
+-		nouveau_gpuobj_ref_del(dev, &ref);
+-	}
+-
+-	nouveau_gpuobj_ref_del(dev, &chan->ramht);
+-
+-	nouveau_gpuobj_del(dev, &chan->vm_pd);
+-	nouveau_gpuobj_ref_del(dev, &chan->vm_gart_pt);
++	nouveau_gpuobj_ref(NULL, &chan->vm_pd);
++	nouveau_gpuobj_ref(NULL, &chan->vm_gart_pt);
+ 	for (i = 0; i < dev_priv->vm_vram_pt_nr; i++)
+-		nouveau_gpuobj_ref_del(dev, &chan->vm_vram_pt[i]);
+-
 -	if (chan->ramin_heap)
 -		nouveau_mem_takedown(&chan->ramin_heap);
+-	if (chan->ramin)
+-		nouveau_gpuobj_ref_del(dev, &chan->ramin);
++		nouveau_gpuobj_ref(NULL, &chan->vm_vram_pt[i]);
+ 
 +	if (chan->ramin_heap.fl_entry.next)
 +		drm_mm_takedown(&chan->ramin_heap);
- 	if (chan->ramin)
- 		nouveau_gpuobj_ref_del(dev, &chan->ramin);
++	nouveau_gpuobj_ref(NULL, &chan->ramin);
+ }
+ 
+ int
+@@ -1155,19 +802,17 @@ nouveau_gpuobj_suspend(struct drm_device *dev)
+ 	}
+ 
+ 	list_for_each_entry(gpuobj, &dev_priv->gpuobj_list, list) {
+-		if (!gpuobj->im_backing || (gpuobj->flags & NVOBJ_FLAG_FAKE))
++		if (!gpuobj->im_backing)
+ 			continue;
  
-@@ -1164,10 +996,8 @@ nouveau_gpuobj_suspend(struct drm_device *dev)
+-		gpuobj->im_backing_suspend = vmalloc(gpuobj->im_pramin->size);
++		gpuobj->im_backing_suspend = vmalloc(gpuobj->size);
+ 		if (!gpuobj->im_backing_suspend) {
+ 			nouveau_gpuobj_resume(dev);
  			return -ENOMEM;
  		}
  
@@ -6482,12 +7752,12 @@ index e7c100b..6aedc3b 100644
 -		for (i = 0; i < gpuobj->im_pramin->size / 4; i++)
 -			gpuobj->im_backing_suspend[i] = nv_ro32(dev, gpuobj, i);
 -		dev_priv->engine.instmem.finish_access(dev);
-+		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
++		for (i = 0; i < gpuobj->size; i += 4)
 +			gpuobj->im_backing_suspend[i/4] = nv_ro32(gpuobj, i);
  	}
  
  	return 0;
-@@ -1212,10 +1042,9 @@ nouveau_gpuobj_resume(struct drm_device *dev)
+@@ -1212,10 +857,9 @@ nouveau_gpuobj_resume(struct drm_device *dev)
  		if (!gpuobj->im_backing_suspend)
  			continue;
  
@@ -6495,13 +7765,13 @@ index e7c100b..6aedc3b 100644
 -		for (i = 0; i < gpuobj->im_pramin->size / 4; i++)
 -			nv_wo32(dev, gpuobj, i, gpuobj->im_backing_suspend[i]);
 -		dev_priv->engine.instmem.finish_access(dev);
-+		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
++		for (i = 0; i < gpuobj->size; i += 4)
 +			nv_wo32(gpuobj, i, gpuobj->im_backing_suspend[i/4]);
 +		dev_priv->engine.instmem.flush(dev);
  	}
  
  	nouveau_gpuobj_suspend_cleanup(dev);
-@@ -1232,7 +1061,6 @@ int nouveau_ioctl_grobj_alloc(struct drm_device *dev, void *data,
+@@ -1232,7 +876,6 @@ int nouveau_ioctl_grobj_alloc(struct drm_device *dev, void *data,
  	struct nouveau_channel *chan;
  	int ret;
  
@@ -6509,38 +7779,111 @@ index e7c100b..6aedc3b 100644
  	NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(init->channel, file_priv, chan);
  
  	if (init->handle == ~0)
-@@ -1283,7 +1111,6 @@ int nouveau_ioctl_gpuobj_free(struct drm_device *dev, void *data,
+@@ -1250,25 +893,24 @@ int nouveau_ioctl_grobj_alloc(struct drm_device *dev, void *data,
+ 		return -EPERM;
+ 	}
+ 
+-	if (nouveau_gpuobj_ref_find(chan, init->handle, NULL) == 0)
++	if (nouveau_ramht_find(chan, init->handle))
+ 		return -EEXIST;
+ 
+ 	if (!grc->software)
+ 		ret = nouveau_gpuobj_gr_new(chan, grc->id, &gr);
+ 	else
+ 		ret = nouveau_gpuobj_sw_new(chan, grc->id, &gr);
+-
+ 	if (ret) {
+ 		NV_ERROR(dev, "Error creating object: %d (%d/0x%08x)\n",
+ 			 ret, init->channel, init->handle);
+ 		return ret;
+ 	}
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, chan, init->handle, gr, NULL);
++	ret = nouveau_ramht_insert(chan, init->handle, gr);
++	nouveau_gpuobj_ref(NULL, &gr);
+ 	if (ret) {
+ 		NV_ERROR(dev, "Error referencing object: %d (%d/0x%08x)\n",
+ 			 ret, init->channel, init->handle);
+-		nouveau_gpuobj_del(dev, &gr);
+ 		return ret;
+ 	}
+ 
+@@ -1279,17 +921,62 @@ int nouveau_ioctl_gpuobj_free(struct drm_device *dev, void *data,
+ 			      struct drm_file *file_priv)
+ {
+ 	struct drm_nouveau_gpuobj_free *objfree = data;
+-	struct nouveau_gpuobj_ref *ref;
++	struct nouveau_gpuobj *gpuobj;
  	struct nouveau_channel *chan;
- 	int ret;
+-	int ret;
  
 -	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
  	NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(objfree->channel, file_priv, chan);
  
- 	ret = nouveau_gpuobj_ref_find(chan, objfree->handle, &ref);
-@@ -1293,3 +1120,17 @@ int nouveau_ioctl_gpuobj_free(struct drm_device *dev, void *data,
+-	ret = nouveau_gpuobj_ref_find(chan, objfree->handle, &ref);
+-	if (ret)
+-		return ret;
+-	nouveau_gpuobj_ref_del(dev, &ref);
++	gpuobj = nouveau_ramht_find(chan, objfree->handle);
++	if (!gpuobj)
++		return -ENOENT;
  
++	nouveau_ramht_remove(chan, objfree->handle);
  	return 0;
  }
 +
 +u32
 +nv_ro32(struct nouveau_gpuobj *gpuobj, u32 offset)
 +{
++	struct drm_nouveau_private *dev_priv = gpuobj->dev->dev_private;
 +	struct drm_device *dev = gpuobj->dev;
-+	return nv_ri32(dev, gpuobj->im_pramin->start + offset);
++
++	if (gpuobj->pinst == ~0 || !dev_priv->ramin_available) {
++		u64  ptr = gpuobj->vinst + offset;
++		u32 base = ptr >> 16;
++		u32  val;
++
++		spin_lock(&dev_priv->ramin_lock);
++		if (dev_priv->ramin_base != base) {
++			dev_priv->ramin_base = base;
++			nv_wr32(dev, 0x001700, dev_priv->ramin_base);
++		}
++		val = nv_rd32(dev, 0x700000 + (ptr & 0xffff));
++		spin_unlock(&dev_priv->ramin_lock);
++		return val;
++	}
++
++	return nv_ri32(dev, gpuobj->pinst + offset);
 +}
 +
 +void
 +nv_wo32(struct nouveau_gpuobj *gpuobj, u32 offset, u32 val)
 +{
++	struct drm_nouveau_private *dev_priv = gpuobj->dev->dev_private;
 +	struct drm_device *dev = gpuobj->dev;
-+	nv_wi32(dev, gpuobj->im_pramin->start + offset, val);
++
++	if (gpuobj->pinst == ~0 || !dev_priv->ramin_available) {
++		u64  ptr = gpuobj->vinst + offset;
++		u32 base = ptr >> 16;
++
++		spin_lock(&dev_priv->ramin_lock);
++		if (dev_priv->ramin_base != base) {
++			dev_priv->ramin_base = base;
++			nv_wr32(dev, 0x001700, dev_priv->ramin_base);
++		}
++		nv_wr32(dev, 0x700000 + (ptr & 0xffff), val);
++		spin_unlock(&dev_priv->ramin_lock);
++		return;
++	}
++
++	nv_wi32(dev, gpuobj->pinst + offset, val);
 +}
 diff --git a/drivers/gpu/drm/nouveau/nouveau_ramht.c b/drivers/gpu/drm/nouveau/nouveau_ramht.c
 new file mode 100644
-index 0000000..e5cc93c
+index 0000000..7f16697
 --- /dev/null
 +++ b/drivers/gpu/drm/nouveau/nouveau_ramht.c
-@@ -0,0 +1,160 @@
+@@ -0,0 +1,289 @@
 +/*
 + * Copyright 2010 Red Hat Inc.
 + *
@@ -6570,22 +7913,24 @@ index 0000000..e5cc93c
 +#include "nouveau_drv.h"
 +#include "nouveau_ramht.h"
 +
-+static uint32_t
-+nouveau_ramht_hash_handle(struct drm_device *dev, int channel, uint32_t handle)
++static u32
++nouveau_ramht_hash_handle(struct nouveau_channel *chan, u32 handle)
 +{
++	struct drm_device *dev = chan->dev;
 +	struct drm_nouveau_private *dev_priv = dev->dev_private;
-+	uint32_t hash = 0;
++	struct nouveau_ramht *ramht = chan->ramht;
++	u32 hash = 0;
 +	int i;
 +
-+	NV_DEBUG(dev, "ch%d handle=0x%08x\n", channel, handle);
++	NV_DEBUG(dev, "ch%d handle=0x%08x\n", chan->id, handle);
 +
-+	for (i = 32; i > 0; i -= dev_priv->ramht_bits) {
-+		hash ^= (handle & ((1 << dev_priv->ramht_bits) - 1));
-+		handle >>= dev_priv->ramht_bits;
++	for (i = 32; i > 0; i -= ramht->bits) {
++		hash ^= (handle & ((1 << ramht->bits) - 1));
++		handle >>= ramht->bits;
 +	}
 +
 +	if (dev_priv->card_type < NV_50)
-+		hash ^= channel << (dev_priv->ramht_bits - 4);
++		hash ^= chan->id << (ramht->bits - 4);
 +	hash <<= 3;
 +
 +	NV_DEBUG(dev, "hash=0x%08x\n", hash);
@@ -6594,59 +7939,88 @@ index 0000000..e5cc93c
 +
 +static int
 +nouveau_ramht_entry_valid(struct drm_device *dev, struct nouveau_gpuobj *ramht,
-+			  uint32_t offset)
++			  u32 offset)
 +{
 +	struct drm_nouveau_private *dev_priv = dev->dev_private;
-+	uint32_t ctx = nv_ro32(ramht, offset + 4);
++	u32 ctx = nv_ro32(ramht, offset + 4);
 +
 +	if (dev_priv->card_type < NV_40)
 +		return ((ctx & NV_RAMHT_CONTEXT_VALID) != 0);
 +	return (ctx != 0);
 +}
 +
++static int
++nouveau_ramht_entry_same_channel(struct nouveau_channel *chan,
++				 struct nouveau_gpuobj *ramht, u32 offset)
++{
++	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
++	u32 ctx = nv_ro32(ramht, offset + 4);
++
++	if (dev_priv->card_type >= NV_50)
++		return true;
++	else if (dev_priv->card_type >= NV_40)
++		return chan->id ==
++			((ctx >> NV40_RAMHT_CONTEXT_CHANNEL_SHIFT) & 0x1f);
++	else
++		return chan->id ==
++			((ctx >> NV_RAMHT_CONTEXT_CHANNEL_SHIFT) & 0x1f);
++}
++
 +int
-+nouveau_ramht_insert(struct drm_device *dev, struct nouveau_gpuobj_ref *ref)
++nouveau_ramht_insert(struct nouveau_channel *chan, u32 handle,
++		     struct nouveau_gpuobj *gpuobj)
 +{
++	struct drm_device *dev = chan->dev;
 +	struct drm_nouveau_private *dev_priv = dev->dev_private;
 +	struct nouveau_instmem_engine *instmem = &dev_priv->engine.instmem;
-+	struct nouveau_channel *chan = ref->channel;
-+	struct nouveau_gpuobj *ramht = chan->ramht ? chan->ramht->gpuobj : NULL;
-+	uint32_t ctx, co, ho;
++	struct nouveau_ramht_entry *entry;
++	struct nouveau_gpuobj *ramht = chan->ramht->gpuobj;
++	unsigned long flags;
++	u32 ctx, co, ho;
 +
-+	if (!ramht) {
-+		NV_ERROR(dev, "No hash table!\n");
-+		return -EINVAL;
-+	}
++	if (nouveau_ramht_find(chan, handle))
++		return -EEXIST;
++
++	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
++	if (!entry)
++		return -ENOMEM;
++	entry->channel = chan;
++	entry->gpuobj = NULL;
++	entry->handle = handle;
++	nouveau_gpuobj_ref(gpuobj, &entry->gpuobj);
 +
 +	if (dev_priv->card_type < NV_40) {
-+		ctx = NV_RAMHT_CONTEXT_VALID | (ref->instance >> 4) |
++		ctx = NV_RAMHT_CONTEXT_VALID | (gpuobj->cinst >> 4) |
 +		      (chan->id << NV_RAMHT_CONTEXT_CHANNEL_SHIFT) |
-+		      (ref->gpuobj->engine << NV_RAMHT_CONTEXT_ENGINE_SHIFT);
++		      (gpuobj->engine << NV_RAMHT_CONTEXT_ENGINE_SHIFT);
 +	} else
 +	if (dev_priv->card_type < NV_50) {
-+		ctx = (ref->instance >> 4) |
++		ctx = (gpuobj->cinst >> 4) |
 +		      (chan->id << NV40_RAMHT_CONTEXT_CHANNEL_SHIFT) |
-+		      (ref->gpuobj->engine << NV40_RAMHT_CONTEXT_ENGINE_SHIFT);
++		      (gpuobj->engine << NV40_RAMHT_CONTEXT_ENGINE_SHIFT);
 +	} else {
-+		if (ref->gpuobj->engine == NVOBJ_ENGINE_DISPLAY) {
-+			ctx = (ref->instance << 10) | 2;
++		if (gpuobj->engine == NVOBJ_ENGINE_DISPLAY) {
++			ctx = (gpuobj->cinst << 10) | 2;
 +		} else {
-+			ctx = (ref->instance >> 4) |
-+			      ((ref->gpuobj->engine <<
++			ctx = (gpuobj->cinst >> 4) |
++			      ((gpuobj->engine <<
 +				NV40_RAMHT_CONTEXT_ENGINE_SHIFT));
 +		}
 +	}
 +
-+	co = ho = nouveau_ramht_hash_handle(dev, chan->id, ref->handle);
++	spin_lock_irqsave(&chan->ramht->lock, flags);
++	list_add(&entry->head, &chan->ramht->entries);
++
++	co = ho = nouveau_ramht_hash_handle(chan, handle);
 +	do {
 +		if (!nouveau_ramht_entry_valid(dev, ramht, co)) {
 +			NV_DEBUG(dev,
 +				 "insert ch%d 0x%08x: h=0x%08x, c=0x%08x\n",
-+				 chan->id, co, ref->handle, ctx);
-+			nv_wo32(ramht, co + 0, ref->handle);
++				 chan->id, co, handle, ctx);
++			nv_wo32(ramht, co + 0, handle);
 +			nv_wo32(ramht, co + 4, ctx);
 +
-+			list_add_tail(&ref->list, &chan->ramht_refs);
++			spin_unlock_irqrestore(&chan->ramht->lock, flags);
 +			instmem->flush(dev);
 +			return 0;
 +		}
@@ -6654,59 +8028,157 @@ index 0000000..e5cc93c
 +			 chan->id, co, nv_ro32(ramht, co));
 +
 +		co += 8;
-+		if (co >= dev_priv->ramht_size)
++		if (co >= ramht->size)
 +			co = 0;
 +	} while (co != ho);
 +
 +	NV_ERROR(dev, "RAMHT space exhausted. ch=%d\n", chan->id);
++	list_del(&entry->head);
++	spin_unlock_irqrestore(&chan->ramht->lock, flags);
++	kfree(entry);
 +	return -ENOMEM;
 +}
 +
-+void
-+nouveau_ramht_remove(struct drm_device *dev, struct nouveau_gpuobj_ref *ref)
++static void
++nouveau_ramht_remove_locked(struct nouveau_channel *chan, u32 handle)
 +{
++	struct drm_device *dev = chan->dev;
 +	struct drm_nouveau_private *dev_priv = dev->dev_private;
 +	struct nouveau_instmem_engine *instmem = &dev_priv->engine.instmem;
-+	struct nouveau_channel *chan = ref->channel;
-+	struct nouveau_gpuobj *ramht = chan->ramht ? chan->ramht->gpuobj : NULL;
-+	uint32_t co, ho;
++	struct nouveau_gpuobj *ramht = chan->ramht->gpuobj;
++	struct nouveau_ramht_entry *entry, *tmp;
++	u32 co, ho;
 +
-+	if (!ramht) {
-+		NV_ERROR(dev, "No hash table!\n");
-+		return;
++	list_for_each_entry_safe(entry, tmp, &chan->ramht->entries, head) {
++		if (entry->channel != chan || entry->handle != handle)
++			continue;
++
++		nouveau_gpuobj_ref(NULL, &entry->gpuobj);
++		list_del(&entry->head);
++		kfree(entry);
++		break;
 +	}
 +
-+	co = ho = nouveau_ramht_hash_handle(dev, chan->id, ref->handle);
++	co = ho = nouveau_ramht_hash_handle(chan, handle);
 +	do {
 +		if (nouveau_ramht_entry_valid(dev, ramht, co) &&
-+		    (ref->handle == nv_ro32(ramht, co))) {
++		    nouveau_ramht_entry_same_channel(chan, ramht, co) &&
++		    (handle == nv_ro32(ramht, co))) {
 +			NV_DEBUG(dev,
 +				 "remove ch%d 0x%08x: h=0x%08x, c=0x%08x\n",
-+				 chan->id, co, ref->handle,
-+				 nv_ro32(ramht, co + 4));
++				 chan->id, co, handle, nv_ro32(ramht, co + 4));
 +			nv_wo32(ramht, co + 0, 0x00000000);
 +			nv_wo32(ramht, co + 4, 0x00000000);
-+
-+			list_del(&ref->list);
 +			instmem->flush(dev);
 +			return;
 +		}
 +
 +		co += 8;
-+		if (co >= dev_priv->ramht_size)
++		if (co >= ramht->size)
 +			co = 0;
 +	} while (co != ho);
-+	list_del(&ref->list);
 +
 +	NV_ERROR(dev, "RAMHT entry not found. ch=%d, handle=0x%08x\n",
-+		 chan->id, ref->handle);
++		 chan->id, handle);
++}
++
++void
++nouveau_ramht_remove(struct nouveau_channel *chan, u32 handle)
++{
++	struct nouveau_ramht *ramht = chan->ramht;
++	unsigned long flags;
++
++	spin_lock_irqsave(&ramht->lock, flags);
++	nouveau_ramht_remove_locked(chan, handle);
++	spin_unlock_irqrestore(&ramht->lock, flags);
++}
++
++struct nouveau_gpuobj *
++nouveau_ramht_find(struct nouveau_channel *chan, u32 handle)
++{
++	struct nouveau_ramht *ramht = chan->ramht;
++	struct nouveau_ramht_entry *entry;
++	struct nouveau_gpuobj *gpuobj = NULL;
++	unsigned long flags;
++
++	if (unlikely(!chan->ramht))
++		return NULL;
++
++	spin_lock_irqsave(&ramht->lock, flags);
++	list_for_each_entry(entry, &chan->ramht->entries, head) {
++		if (entry->channel == chan && entry->handle == handle) {
++			gpuobj = entry->gpuobj;
++			break;
++		}
++	}
++	spin_unlock_irqrestore(&ramht->lock, flags);
++
++	return gpuobj;
++}
++
++int
++nouveau_ramht_new(struct drm_device *dev, struct nouveau_gpuobj *gpuobj,
++		  struct nouveau_ramht **pramht)
++{
++	struct nouveau_ramht *ramht;
++
++	ramht = kzalloc(sizeof(*ramht), GFP_KERNEL);
++	if (!ramht)
++		return -ENOMEM;
++
++	ramht->dev = dev;
++	kref_init(&ramht->refcount);
++	ramht->bits = drm_order(gpuobj->size / 8);
++	INIT_LIST_HEAD(&ramht->entries);
++	spin_lock_init(&ramht->lock);
++	nouveau_gpuobj_ref(gpuobj, &ramht->gpuobj);
++
++	*pramht = ramht;
++	return 0;
++}
++
++static void
++nouveau_ramht_del(struct kref *ref)
++{
++	struct nouveau_ramht *ramht =
++		container_of(ref, struct nouveau_ramht, refcount);
++
++	nouveau_gpuobj_ref(NULL, &ramht->gpuobj);
++	kfree(ramht);
++}
++
++void
++nouveau_ramht_ref(struct nouveau_ramht *ref, struct nouveau_ramht **ptr,
++		  struct nouveau_channel *chan)
++{
++	struct nouveau_ramht_entry *entry, *tmp;
++	struct nouveau_ramht *ramht;
++	unsigned long flags;
++
++	if (ref)
++		kref_get(&ref->refcount);
++
++	ramht = *ptr;
++	if (ramht) {
++		spin_lock_irqsave(&ramht->lock, flags);
++		list_for_each_entry_safe(entry, tmp, &ramht->entries, head) {
++			if (entry->channel != chan)
++				continue;
++
++			nouveau_ramht_remove_locked(chan, entry->handle);
++		}
++		spin_unlock_irqrestore(&ramht->lock, flags);
++
++		kref_put(&ramht->refcount, nouveau_ramht_del);
++	}
++	*ptr = ref;
 +}
 diff --git a/drivers/gpu/drm/nouveau/nouveau_ramht.h b/drivers/gpu/drm/nouveau/nouveau_ramht.h
 new file mode 100644
-index 0000000..e10455c
+index 0000000..b79cb5e
 --- /dev/null
 +++ b/drivers/gpu/drm/nouveau/nouveau_ramht.h
-@@ -0,0 +1,31 @@
+@@ -0,0 +1,55 @@
 +/*
 + * Copyright 2010 Red Hat Inc.
 + *
@@ -6734,12 +8206,36 @@ index 0000000..e10455c
 +#ifndef __NOUVEAU_RAMHT_H__
 +#define __NOUVEAU_RAMHT_H__
 +
-+extern int nouveau_ramht_insert(struct drm_device *, struct nouveau_gpuobj_ref *);
-+extern void nouveau_ramht_remove(struct drm_device *, struct nouveau_gpuobj_ref *);
++struct nouveau_ramht_entry {
++	struct list_head head;
++	struct nouveau_channel *channel;
++	struct nouveau_gpuobj *gpuobj;
++	u32 handle;
++};
++
++struct nouveau_ramht {
++	struct drm_device *dev;
++	struct kref refcount;
++	spinlock_t lock;
++	struct nouveau_gpuobj *gpuobj;
++	struct list_head entries;
++	int bits;
++};
++
++extern int  nouveau_ramht_new(struct drm_device *, struct nouveau_gpuobj *,
++			      struct nouveau_ramht **);
++extern void nouveau_ramht_ref(struct nouveau_ramht *, struct nouveau_ramht **,
++			      struct nouveau_channel *unref_channel);
++
++extern int  nouveau_ramht_insert(struct nouveau_channel *, u32 handle,
++				 struct nouveau_gpuobj *);
++extern void nouveau_ramht_remove(struct nouveau_channel *, u32 handle);
++extern struct nouveau_gpuobj *
++nouveau_ramht_find(struct nouveau_channel *chan, u32 handle);
 +
 +#endif
 diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h
-index 6ca80a3..21a6e45 100644
+index 6ca80a3..1b42541 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_reg.h
 +++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
 @@ -1,19 +1,64 @@
@@ -6891,7 +8387,34 @@ index 6ca80a3..21a6e45 100644
  #define NV10_PGRAPH_DMA_PITCH                              0x00400770
  #define NV10_PGRAPH_DVD_COLORFMT                           0x00400774
  #define NV10_PGRAPH_SCALED_FORMAT                          0x00400778
-@@ -814,6 +838,7 @@
+@@ -527,6 +551,8 @@
+ #define NV10_PFIFO_CACHE1_DMA_SUBROUTINE                   0x0000324C
+ #define NV03_PFIFO_CACHE1_PULL0                            0x00003240
+ #define NV04_PFIFO_CACHE1_PULL0                            0x00003250
++#    define NV04_PFIFO_CACHE1_PULL0_HASH_FAILED            0x00000010
++#    define NV04_PFIFO_CACHE1_PULL0_HASH_BUSY              0x00001000
+ #define NV03_PFIFO_CACHE1_PULL1                            0x00003250
+ #define NV04_PFIFO_CACHE1_PULL1                            0x00003254
+ #define NV04_PFIFO_CACHE1_HASH                             0x00003258
+@@ -761,15 +787,12 @@
+ #define NV50_PDISPLAY_DAC_MODE_CTRL_C(i)                (0x00610b5c + (i) * 0x8)
+ #define NV50_PDISPLAY_SOR_MODE_CTRL_P(i)                (0x00610b70 + (i) * 0x8)
+ #define NV50_PDISPLAY_SOR_MODE_CTRL_C(i)                (0x00610b74 + (i) * 0x8)
++#define NV50_PDISPLAY_EXT_MODE_CTRL_P(i)                (0x00610b80 + (i) * 0x8)
++#define NV50_PDISPLAY_EXT_MODE_CTRL_C(i)                (0x00610b84 + (i) * 0x8)
+ #define NV50_PDISPLAY_DAC_MODE_CTRL2_P(i)               (0x00610bdc + (i) * 0x8)
+ #define NV50_PDISPLAY_DAC_MODE_CTRL2_C(i)               (0x00610be0 + (i) * 0x8)
+-
+ #define NV90_PDISPLAY_SOR_MODE_CTRL_P(i)                (0x00610794 + (i) * 0x8)
+ #define NV90_PDISPLAY_SOR_MODE_CTRL_C(i)                (0x00610798 + (i) * 0x8)
+-#define NV90_PDISPLAY_DAC_MODE_CTRL_P(i)                (0x00610b58 + (i) * 0x8)
+-#define NV90_PDISPLAY_DAC_MODE_CTRL_C(i)                (0x00610b5c + (i) * 0x8)
+-#define NV90_PDISPLAY_DAC_MODE_CTRL2_P(i)               (0x00610b80 + (i) * 0x8)
+-#define NV90_PDISPLAY_DAC_MODE_CTRL2_C(i)               (0x00610b84 + (i) * 0x8)
+ 
+ #define NV50_PDISPLAY_CRTC_CLK                                       0x00614000
+ #define NV50_PDISPLAY_CRTC_CLK_CTRL1(i)                 ((i) * 0x800 + 0x614100)
+@@ -814,6 +837,7 @@
  #define NV50_PDISPLAY_SOR_BACKLIGHT_ENABLE                           0x80000000
  #define NV50_PDISPLAY_SOR_BACKLIGHT_LEVEL                            0x00000fff
  #define NV50_SOR_DP_CTRL(i,l)            (0x0061c10c + (i) * 0x800 + (l) * 0x80)
@@ -6900,7 +8423,7 @@ index 6ca80a3..21a6e45 100644
  #define NV50_SOR_DP_CTRL_LANE_MASK                                   0x001f0000
  #define NV50_SOR_DP_CTRL_LANE_0_ENABLED                              0x00010000
 diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
-index 1d6ee8b..630988a 100644
+index 1d6ee8b..5a66a7a 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
 @@ -97,7 +97,6 @@ nouveau_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
@@ -7015,12 +8538,20 @@ index 1d6ee8b..630988a 100644
  	struct nouveau_gpuobj *gpuobj = NULL;
  	uint32_t aper_size, obj_size;
  	int i, ret;
-@@ -267,34 +244,42 @@ nouveau_sgdma_init(struct drm_device *dev)
+@@ -257,7 +234,6 @@ nouveau_sgdma_init(struct drm_device *dev)
+ 	}
+ 
+ 	ret = nouveau_gpuobj_new(dev, NULL, obj_size, 16,
+-				      NVOBJ_FLAG_ALLOW_NO_REFS |
+ 				      NVOBJ_FLAG_ZERO_ALLOC |
+ 				      NVOBJ_FLAG_ZERO_FREE, &gpuobj);
+ 	if (ret) {
+@@ -267,34 +243,48 @@ nouveau_sgdma_init(struct drm_device *dev)
  
  	dev_priv->gart_info.sg_dummy_page =
  		alloc_page(GFP_KERNEL|__GFP_DMA32);
 +	if (!dev_priv->gart_info.sg_dummy_page) {
-+		nouveau_gpuobj_del(dev, &gpuobj);
++		nouveau_gpuobj_ref(NULL, &gpuobj);
 +		return -ENOMEM;
 +	}
 +
@@ -7030,12 +8561,18 @@ index 1d6ee8b..630988a 100644
 +		pci_map_page(pdev, dev_priv->gart_info.sg_dummy_page, 0,
  			     PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 +	if (pci_dma_mapping_error(pdev, dev_priv->gart_info.sg_dummy_bus)) {
-+		nouveau_gpuobj_del(dev, &gpuobj);
++		nouveau_gpuobj_ref(NULL, &gpuobj);
 +		return -EFAULT;
 +	}
  
 -	dev_priv->engine.instmem.prepare_access(dev, true);
  	if (dev_priv->card_type < NV_50) {
++		/* special case, allocated from global instmem heap so
++		 * cinst is invalid, we use it on all channels though so
++		 * cinst needs to be valid, set it the same as pinst
++		 */
++		gpuobj->cinst = gpuobj->pinst;
++
  		/* Maybe use NV_DMA_TARGET_AGP for PCIE? NVIDIA do this, and
  		 * confirmed to work on c51.  Perhaps means NV_DMA_TARGET_PCIE
  		 * on those cards? */
@@ -7072,7 +8609,16 @@ index 1d6ee8b..630988a 100644
  
  	dev_priv->gart_info.type      = NOUVEAU_GART_SGDMA;
  	dev_priv->gart_info.aper_base = 0;
-@@ -325,14 +310,11 @@ nouveau_sgdma_get_page(struct drm_device *dev, uint32_t offset, uint32_t *page)
+@@ -317,7 +307,7 @@ nouveau_sgdma_takedown(struct drm_device *dev)
+ 		dev_priv->gart_info.sg_dummy_bus = 0;
+ 	}
+ 
+-	nouveau_gpuobj_del(dev, &dev_priv->gart_info.sg_ctxdma);
++	nouveau_gpuobj_ref(NULL, &dev_priv->gart_info.sg_ctxdma);
+ }
+ 
+ int
+@@ -325,14 +315,11 @@ nouveau_sgdma_get_page(struct drm_device *dev, uint32_t offset, uint32_t *page)
  {
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  	struct nouveau_gpuobj *gpuobj = dev_priv->gart_info.sg_ctxdma;
@@ -7090,10 +8636,14 @@ index 1d6ee8b..630988a 100644
  	}
  
 diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
-index b02a231..989322b 100644
+index b02a231..be85960 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_state.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_state.c
-@@ -38,6 +38,7 @@
+@@ -35,9 +35,11 @@
+ #include "nouveau_drv.h"
+ #include "nouveau_drm.h"
+ #include "nouveau_fbcon.h"
++#include "nouveau_ramht.h"
  #include "nv50_display.h"
  
  static void nouveau_stub_takedown(struct drm_device *dev) {}
@@ -7101,7 +8651,7 @@ index b02a231..989322b 100644
  
  static int nouveau_init_engine_ptrs(struct drm_device *dev)
  {
-@@ -54,8 +55,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
+@@ -54,8 +56,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
  		engine->instmem.clear		= nv04_instmem_clear;
  		engine->instmem.bind		= nv04_instmem_bind;
  		engine->instmem.unbind		= nv04_instmem_unbind;
@@ -7111,7 +8661,14 @@ index b02a231..989322b 100644
  		engine->mc.init			= nv04_mc_init;
  		engine->mc.takedown		= nv04_mc_takedown;
  		engine->timer.init		= nv04_timer_init;
-@@ -85,6 +85,16 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
+@@ -78,13 +79,22 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
+ 		engine->fifo.disable		= nv04_fifo_disable;
+ 		engine->fifo.enable		= nv04_fifo_enable;
+ 		engine->fifo.reassign		= nv04_fifo_reassign;
+-		engine->fifo.cache_flush	= nv04_fifo_cache_flush;
+ 		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
+ 		engine->fifo.channel_id		= nv04_fifo_channel_id;
+ 		engine->fifo.create_context	= nv04_fifo_create_context;
  		engine->fifo.destroy_context	= nv04_fifo_destroy_context;
  		engine->fifo.load_context	= nv04_fifo_load_context;
  		engine->fifo.unload_context	= nv04_fifo_unload_context;
@@ -7138,7 +8695,14 @@ index b02a231..989322b 100644
  		engine->mc.init			= nv04_mc_init;
  		engine->mc.takedown		= nv04_mc_takedown;
  		engine->timer.init		= nv04_timer_init;
-@@ -128,6 +137,16 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
+@@ -121,13 +130,22 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
+ 		engine->fifo.disable		= nv04_fifo_disable;
+ 		engine->fifo.enable		= nv04_fifo_enable;
+ 		engine->fifo.reassign		= nv04_fifo_reassign;
+-		engine->fifo.cache_flush	= nv04_fifo_cache_flush;
+ 		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
+ 		engine->fifo.channel_id		= nv10_fifo_channel_id;
+ 		engine->fifo.create_context	= nv10_fifo_create_context;
  		engine->fifo.destroy_context	= nv10_fifo_destroy_context;
  		engine->fifo.load_context	= nv10_fifo_load_context;
  		engine->fifo.unload_context	= nv10_fifo_unload_context;
@@ -7155,7 +8719,7 @@ index b02a231..989322b 100644
  		break;
  	case 0x20:
  		engine->instmem.init		= nv04_instmem_init;
-@@ -138,8 +157,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
+@@ -138,8 +156,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
  		engine->instmem.clear		= nv04_instmem_clear;
  		engine->instmem.bind		= nv04_instmem_bind;
  		engine->instmem.unbind		= nv04_instmem_unbind;
@@ -7165,7 +8729,14 @@ index b02a231..989322b 100644
  		engine->mc.init			= nv04_mc_init;
  		engine->mc.takedown		= nv04_mc_takedown;
  		engine->timer.init		= nv04_timer_init;
-@@ -171,6 +189,16 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
+@@ -164,13 +181,22 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
+ 		engine->fifo.disable		= nv04_fifo_disable;
+ 		engine->fifo.enable		= nv04_fifo_enable;
+ 		engine->fifo.reassign		= nv04_fifo_reassign;
+-		engine->fifo.cache_flush	= nv04_fifo_cache_flush;
+ 		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
+ 		engine->fifo.channel_id		= nv10_fifo_channel_id;
+ 		engine->fifo.create_context	= nv10_fifo_create_context;
  		engine->fifo.destroy_context	= nv10_fifo_destroy_context;
  		engine->fifo.load_context	= nv10_fifo_load_context;
  		engine->fifo.unload_context	= nv10_fifo_unload_context;
@@ -7182,7 +8753,7 @@ index b02a231..989322b 100644
  		break;
  	case 0x30:
  		engine->instmem.init		= nv04_instmem_init;
-@@ -181,15 +209,14 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
+@@ -181,15 +207,14 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
  		engine->instmem.clear		= nv04_instmem_clear;
  		engine->instmem.bind		= nv04_instmem_bind;
  		engine->instmem.unbind		= nv04_instmem_unbind;
@@ -7201,7 +8772,14 @@ index b02a231..989322b 100644
  		engine->fb.set_region_tiling	= nv10_fb_set_region_tiling;
  		engine->graph.grclass		= nv30_graph_grclass;
  		engine->graph.init		= nv30_graph_init;
-@@ -214,6 +241,16 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
+@@ -207,13 +232,22 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
+ 		engine->fifo.disable		= nv04_fifo_disable;
+ 		engine->fifo.enable		= nv04_fifo_enable;
+ 		engine->fifo.reassign		= nv04_fifo_reassign;
+-		engine->fifo.cache_flush	= nv04_fifo_cache_flush;
+ 		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
+ 		engine->fifo.channel_id		= nv10_fifo_channel_id;
+ 		engine->fifo.create_context	= nv10_fifo_create_context;
  		engine->fifo.destroy_context	= nv10_fifo_destroy_context;
  		engine->fifo.load_context	= nv10_fifo_load_context;
  		engine->fifo.unload_context	= nv10_fifo_unload_context;
@@ -7218,7 +8796,7 @@ index b02a231..989322b 100644
  		break;
  	case 0x40:
  	case 0x60:
-@@ -225,8 +262,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
+@@ -225,8 +259,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
  		engine->instmem.clear		= nv04_instmem_clear;
  		engine->instmem.bind		= nv04_instmem_bind;
  		engine->instmem.unbind		= nv04_instmem_unbind;
@@ -7228,7 +8806,14 @@ index b02a231..989322b 100644
  		engine->mc.init			= nv40_mc_init;
  		engine->mc.takedown		= nv40_mc_takedown;
  		engine->timer.init		= nv04_timer_init;
-@@ -258,6 +294,16 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
+@@ -251,13 +284,22 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
+ 		engine->fifo.disable		= nv04_fifo_disable;
+ 		engine->fifo.enable		= nv04_fifo_enable;
+ 		engine->fifo.reassign		= nv04_fifo_reassign;
+-		engine->fifo.cache_flush	= nv04_fifo_cache_flush;
+ 		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
+ 		engine->fifo.channel_id		= nv10_fifo_channel_id;
+ 		engine->fifo.create_context	= nv40_fifo_create_context;
  		engine->fifo.destroy_context	= nv40_fifo_destroy_context;
  		engine->fifo.load_context	= nv40_fifo_load_context;
  		engine->fifo.unload_context	= nv40_fifo_unload_context;
@@ -7245,7 +8830,7 @@ index b02a231..989322b 100644
  		break;
  	case 0x50:
  	case 0x80: /* gotta love NVIDIA's consistency.. */
-@@ -271,8 +317,10 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
+@@ -271,8 +313,10 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
  		engine->instmem.clear		= nv50_instmem_clear;
  		engine->instmem.bind		= nv50_instmem_bind;
  		engine->instmem.unbind		= nv50_instmem_unbind;
@@ -7258,7 +8843,7 @@ index b02a231..989322b 100644
  		engine->mc.init			= nv50_mc_init;
  		engine->mc.takedown		= nv50_mc_takedown;
  		engine->timer.init		= nv04_timer_init;
-@@ -300,6 +348,64 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
+@@ -300,6 +344,64 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
  		engine->fifo.destroy_context	= nv50_fifo_destroy_context;
  		engine->fifo.load_context	= nv50_fifo_load_context;
  		engine->fifo.unload_context	= nv50_fifo_unload_context;
@@ -7323,7 +8908,58 @@ index b02a231..989322b 100644
  		break;
  	default:
  		NV_ERROR(dev, "NV%02x unsupported\n", dev_priv->chipset);
-@@ -407,11 +513,6 @@ nouveau_card_init(struct drm_device *dev)
+@@ -331,16 +433,14 @@ static int
+ nouveau_card_init_channel(struct drm_device *dev)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_gpuobj *gpuobj;
++	struct nouveau_gpuobj *gpuobj = NULL;
+ 	int ret;
+ 
+ 	ret = nouveau_channel_alloc(dev, &dev_priv->channel,
+-				    (struct drm_file *)-2,
+-				    NvDmaFB, NvDmaTT);
++				    (struct drm_file *)-2, NvDmaFB, NvDmaTT);
+ 	if (ret)
+ 		return ret;
+ 
+-	gpuobj = NULL;
+ 	ret = nouveau_gpuobj_dma_new(dev_priv->channel, NV_CLASS_DMA_IN_MEMORY,
+ 				     0, dev_priv->vram_size,
+ 				     NV_DMA_ACCESS_RW, NV_DMA_TARGET_VIDMEM,
+@@ -348,26 +448,25 @@ nouveau_card_init_channel(struct drm_device *dev)
+ 	if (ret)
+ 		goto out_err;
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, dev_priv->channel, NvDmaVRAM,
+-				     gpuobj, NULL);
++	ret = nouveau_ramht_insert(dev_priv->channel, NvDmaVRAM, gpuobj);
++	nouveau_gpuobj_ref(NULL, &gpuobj);
+ 	if (ret)
+ 		goto out_err;
+ 
+-	gpuobj = NULL;
+ 	ret = nouveau_gpuobj_gart_dma_new(dev_priv->channel, 0,
+ 					  dev_priv->gart_info.aper_size,
+ 					  NV_DMA_ACCESS_RW, &gpuobj, NULL);
+ 	if (ret)
+ 		goto out_err;
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, dev_priv->channel, NvDmaGART,
+-				     gpuobj, NULL);
++	ret = nouveau_ramht_insert(dev_priv->channel, NvDmaGART, gpuobj);
++	nouveau_gpuobj_ref(NULL, &gpuobj);
+ 	if (ret)
+ 		goto out_err;
+ 
+ 	return 0;
++
+ out_err:
+-	nouveau_gpuobj_del(dev, &gpuobj);
+ 	nouveau_channel_free(dev_priv->channel);
+ 	dev_priv->channel = NULL;
+ 	return ret;
+@@ -407,11 +506,6 @@ nouveau_card_init(struct drm_device *dev)
  	struct nouveau_engine *engine;
  	int ret;
  
@@ -7335,7 +8971,7 @@ index b02a231..989322b 100644
  	vga_client_register(dev->pdev, dev, NULL, nouveau_vga_set_decode);
  	vga_switcheroo_register_client(dev->pdev, nouveau_switcheroo_set_state,
  				       nouveau_switcheroo_can_switch);
-@@ -421,15 +522,17 @@ nouveau_card_init(struct drm_device *dev)
+@@ -421,50 +515,48 @@ nouveau_card_init(struct drm_device *dev)
  	if (ret)
  		goto out;
  	engine = &dev_priv->engine;
@@ -7357,17 +8993,47 @@ index b02a231..989322b 100644
 +	if (ret)
 +		goto out_display_early;
  
- 	ret = nouveau_mem_detect(dev);
+-	ret = nouveau_mem_detect(dev);
++	ret = nouveau_mem_vram_init(dev);
+ 	if (ret)
+ 		goto out_bios;
+ 
+-	ret = nouveau_gpuobj_early_init(dev);
++	ret = nouveau_gpuobj_init(dev);
  	if (ret)
-@@ -461,10 +564,15 @@ nouveau_card_init(struct drm_device *dev)
+-		goto out_bios;
++		goto out_vram;
+ 
+-	/* Initialise instance memory, must happen before mem_init so we
+-	 * know exactly how much VRAM we're able to use for "normal"
+-	 * purposes.
+-	 */
+ 	ret = engine->instmem.init(dev);
+ 	if (ret)
+-		goto out_gpuobj_early;
++		goto out_gpuobj;
+ 
+-	/* Setup the memory manager */
+-	ret = nouveau_mem_init(dev);
++	ret = nouveau_mem_gart_init(dev);
  	if (ret)
- 		goto out_gpuobj;
+ 		goto out_instmem;
  
+-	ret = nouveau_gpuobj_init(dev);
+-	if (ret)
+-		goto out_mem;
+-
+ 	/* PMC */
+ 	ret = engine->mc.init(dev);
+ 	if (ret)
+-		goto out_gpuobj;
++		goto out_gart;
++
 +	/* PGPIO */
 +	ret = engine->gpio.init(dev);
 +	if (ret)
 +		goto out_mc;
-+
+ 
  	/* PTIMER */
  	ret = engine->timer.init(dev);
  	if (ret)
@@ -7376,7 +9042,7 @@ index b02a231..989322b 100644
  
  	/* PFB */
  	ret = engine->fb.init(dev);
-@@ -485,12 +593,16 @@ nouveau_card_init(struct drm_device *dev)
+@@ -485,12 +577,16 @@ nouveau_card_init(struct drm_device *dev)
  			goto out_graph;
  	}
  
@@ -7394,7 +9060,7 @@ index b02a231..989322b 100644
  
  	ret = drm_vblank_init(dev, 0);
  	if (ret)
-@@ -504,35 +616,18 @@ nouveau_card_init(struct drm_device *dev)
+@@ -504,35 +600,18 @@ nouveau_card_init(struct drm_device *dev)
  			goto out_irq;
  	}
  
@@ -7434,7 +9100,7 @@ index b02a231..989322b 100644
  out_fifo:
  	if (!nouveau_noaccel)
  		engine->fifo.takedown(dev);
-@@ -543,6 +638,8 @@ out_fb:
+@@ -543,19 +622,22 @@ out_fb:
  	engine->fb.takedown(dev);
  out_timer:
  	engine->timer.takedown(dev);
@@ -7442,9 +9108,21 @@ index b02a231..989322b 100644
 +	engine->gpio.takedown(dev);
  out_mc:
  	engine->mc.takedown(dev);
- out_gpuobj:
-@@ -556,6 +653,8 @@ out_gpuobj_early:
- 	nouveau_gpuobj_late_takedown(dev);
+-out_gpuobj:
+-	nouveau_gpuobj_takedown(dev);
+-out_mem:
+-	nouveau_sgdma_takedown(dev);
+-	nouveau_mem_close(dev);
++out_gart:
++	nouveau_mem_gart_fini(dev);
+ out_instmem:
+ 	engine->instmem.takedown(dev);
+-out_gpuobj_early:
+-	nouveau_gpuobj_late_takedown(dev);
++out_gpuobj:
++	nouveau_gpuobj_takedown(dev);
++out_vram:
++	nouveau_mem_vram_fini(dev);
  out_bios:
  	nouveau_bios_takedown(dev);
 +out_display_early:
@@ -7452,22 +9130,22 @@ index b02a231..989322b 100644
  out:
  	vga_client_register(dev->pdev, NULL, NULL, NULL);
  	return ret;
-@@ -566,45 +665,39 @@ static void nouveau_card_takedown(struct drm_device *dev)
+@@ -566,45 +648,38 @@ static void nouveau_card_takedown(struct drm_device *dev)
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  	struct nouveau_engine *engine = &dev_priv->engine;
  
 -	NV_DEBUG(dev, "prev state = %d\n", dev_priv->init_state);
 -
 -	if (dev_priv->init_state != NOUVEAU_CARD_INIT_DOWN) {
--
++	nouveau_backlight_exit(dev);
+ 
 -		nouveau_backlight_exit(dev);
 -
 -		if (dev_priv->channel) {
 -			nouveau_channel_free(dev_priv->channel);
 -			dev_priv->channel = NULL;
 -		}
-+	nouveau_backlight_exit(dev);
- 
+-
 -		if (!nouveau_noaccel) {
 -			engine->fifo.takedown(dev);
 -			engine->graph.takedown(dev);
@@ -7502,20 +9180,19 @@ index b02a231..989322b 100644
 +	ttm_bo_clean_mm(&dev_priv->ttm.bdev, TTM_PL_VRAM);
 +	ttm_bo_clean_mm(&dev_priv->ttm.bdev, TTM_PL_TT);
 +	mutex_unlock(&dev->struct_mutex);
-+	nouveau_sgdma_takedown(dev);
++	nouveau_mem_gart_fini(dev);
  
 -		if (drm_core_check_feature(dev, DRIVER_MODESET))
 -			drm_irq_uninstall(dev);
-+	nouveau_gpuobj_takedown(dev);
-+	nouveau_mem_close(dev);
 +	engine->instmem.takedown(dev);
++	nouveau_gpuobj_takedown(dev);
++	nouveau_mem_vram_fini(dev);
  
 -		nouveau_gpuobj_late_takedown(dev);
 -		nouveau_bios_takedown(dev);
 +	drm_irq_uninstall(dev);
  
 -		vga_client_register(dev->pdev, NULL, NULL, NULL);
-+	nouveau_gpuobj_late_takedown(dev);
 +	nouveau_bios_takedown(dev);
  
 -		dev_priv->init_state = NOUVEAU_CARD_INIT_DOWN;
@@ -7524,7 +9201,7 @@ index b02a231..989322b 100644
  }
  
  /* here a client dies, release the stuff that was allocated for its
-@@ -691,22 +784,26 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
+@@ -691,22 +766,26 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
  	struct drm_nouveau_private *dev_priv;
  	uint32_t reg0;
  	resource_size_t mmio_start_offs;
@@ -7556,7 +9233,7 @@ index b02a231..989322b 100644
  
  	/* resource 0 is mmio regs */
  	/* resource 1 is linear FB */
-@@ -719,7 +816,8 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
+@@ -719,7 +798,8 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
  	if (!dev_priv->mmio) {
  		NV_ERROR(dev, "Unable to initialize the mmio mapping. "
  			 "Please report your setup to " DRIVER_EMAIL "\n");
@@ -7566,7 +9243,7 @@ index b02a231..989322b 100644
  	}
  	NV_DEBUG(dev, "regs mapped ok at 0x%llx\n",
  					(unsigned long long)mmio_start_offs);
-@@ -765,19 +863,21 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
+@@ -765,19 +845,21 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
  	case 0xa0:
  		dev_priv->card_type = NV_50;
  		break;
@@ -7594,7 +9271,7 @@ index b02a231..989322b 100644
  
  	/* Map PRAMIN BAR, or on older cards, the aperture withing BAR0 */
  	if (dev_priv->card_type >= NV_40) {
-@@ -791,7 +891,8 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
+@@ -791,7 +873,8 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
  				dev_priv->ramin_size);
  		if (!dev_priv->ramin) {
  			NV_ERROR(dev, "Failed to PRAMIN BAR");
@@ -7604,7 +9281,7 @@ index b02a231..989322b 100644
  		}
  	} else {
  		dev_priv->ramin_size = 1 * 1024 * 1024;
-@@ -799,7 +900,8 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
+@@ -799,7 +882,8 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
  					  dev_priv->ramin_size);
  		if (!dev_priv->ramin) {
  			NV_ERROR(dev, "Failed to map BAR0 PRAMIN.\n");
@@ -7614,7 +9291,7 @@ index b02a231..989322b 100644
  		}
  	}
  
-@@ -812,46 +914,38 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
+@@ -812,46 +896,38 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
  		dev_priv->flags |= NV_NFORCE2;
  
  	/* For kernel modesetting, init card now and bring up fbcon */
@@ -7680,7 +9357,7 @@ index b02a231..989322b 100644
  
  	iounmap(dev_priv->mmio);
  	iounmap(dev_priv->ramin);
-@@ -867,8 +961,6 @@ int nouveau_ioctl_getparam(struct drm_device *dev, void *data,
+@@ -867,8 +943,6 @@ int nouveau_ioctl_getparam(struct drm_device *dev, void *data,
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  	struct drm_nouveau_getparam *getparam = data;
  
@@ -7689,7 +9366,7 @@ index b02a231..989322b 100644
  	switch (getparam->param) {
  	case NOUVEAU_GETPARAM_CHIPSET_ID:
  		getparam->value = dev_priv->chipset;
-@@ -937,8 +1029,6 @@ nouveau_ioctl_setparam(struct drm_device *dev, void *data,
+@@ -937,8 +1011,6 @@ nouveau_ioctl_setparam(struct drm_device *dev, void *data,
  {
  	struct drm_nouveau_setparam *setparam = data;
  
@@ -7698,6 +9375,15 @@ index b02a231..989322b 100644
  	switch (setparam->param) {
  	default:
  		NV_ERROR(dev, "unknown parameter %lld\n", setparam->param);
+@@ -967,7 +1039,7 @@ bool nouveau_wait_until(struct drm_device *dev, uint64_t timeout,
+ /* Waits for PGRAPH to go completely idle */
+ bool nouveau_wait_for_idle(struct drm_device *dev)
+ {
+-	if (!nv_wait(NV04_PGRAPH_STATUS, 0xffffffff, 0x00000000)) {
++	if (!nv_wait(dev, NV04_PGRAPH_STATUS, 0xffffffff, 0x00000000)) {
+ 		NV_ERROR(dev, "PGRAPH idle timed out with status 0x%08x\n",
+ 			 nv_rd32(dev, NV04_PGRAPH_STATUS));
+ 		return false;
 diff --git a/drivers/gpu/drm/nouveau/nv04_crtc.c b/drivers/gpu/drm/nouveau/nv04_crtc.c
 index eba687f..291a4cb 100644
 --- a/drivers/gpu/drm/nouveau/nv04_crtc.c
@@ -8324,11 +10010,49 @@ index c7898b4..9e28cf7 100644
 +	return 0;
  }
  
+diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c
+index 1eeac4f..33e4c93 100644
+--- a/drivers/gpu/drm/nouveau/nv04_fbcon.c
++++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c
+@@ -25,6 +25,7 @@
+ #include "drmP.h"
+ #include "nouveau_drv.h"
+ #include "nouveau_dma.h"
++#include "nouveau_ramht.h"
+ #include "nouveau_fbcon.h"
+ 
+ void
+@@ -169,11 +170,9 @@ nv04_fbcon_grobj_new(struct drm_device *dev, int class, uint32_t handle)
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, dev_priv->channel, handle, obj, NULL);
+-	if (ret)
+-		return ret;
+-
+-	return 0;
++	ret = nouveau_ramht_insert(dev_priv->channel, handle, obj);
++	nouveau_gpuobj_ref(NULL, &obj);
++	return ret;
+ }
+ 
+ int
 diff --git a/drivers/gpu/drm/nouveau/nv04_fifo.c b/drivers/gpu/drm/nouveau/nv04_fifo.c
-index 66fe559..bbb87ef 100644
+index 66fe559..708293b 100644
 --- a/drivers/gpu/drm/nouveau/nv04_fifo.c
 +++ b/drivers/gpu/drm/nouveau/nv04_fifo.c
-@@ -38,10 +38,10 @@
+@@ -27,8 +27,9 @@
+ #include "drmP.h"
+ #include "drm.h"
+ #include "nouveau_drv.h"
++#include "nouveau_ramht.h"
+ 
+-#define NV04_RAMFC(c) (dev_priv->ramfc_offset + ((c) * NV04_RAMFC__SIZE))
++#define NV04_RAMFC(c) (dev_priv->ramfc->pinst + ((c) * NV04_RAMFC__SIZE))
+ #define NV04_RAMFC__SIZE 32
+ #define NV04_RAMFC_DMA_PUT                                       0x00
+ #define NV04_RAMFC_DMA_GET                                       0x04
+@@ -38,10 +39,8 @@
  #define NV04_RAMFC_ENGINE                                        0x14
  #define NV04_RAMFC_PULL1_ENGINE                                  0x18
  
@@ -8336,14 +10060,70 @@ index 66fe559..bbb87ef 100644
 -					 NV04_RAMFC_##offset/4, (val))
 -#define RAMFC_RD(offset)      nv_ro32(dev, chan->ramfc->gpuobj, \
 -					 NV04_RAMFC_##offset/4)
-+#define RAMFC_WR(offset, val) nv_wo32(chan->ramfc->gpuobj, \
-+				      NV04_RAMFC_##offset, (val))
-+#define RAMFC_RD(offset)      nv_ro32(chan->ramfc->gpuobj, \
-+				      NV04_RAMFC_##offset)
++#define RAMFC_WR(offset, val) nv_wo32(chan->ramfc, NV04_RAMFC_##offset, (val))
++#define RAMFC_RD(offset)      nv_ro32(chan->ramfc, NV04_RAMFC_##offset)
  
  void
  nv04_fifo_disable(struct drm_device *dev)
-@@ -112,6 +112,12 @@ nv04_fifo_channel_id(struct drm_device *dev)
+@@ -72,37 +71,32 @@ nv04_fifo_reassign(struct drm_device *dev, bool enable)
+ }
+ 
+ bool
+-nv04_fifo_cache_flush(struct drm_device *dev)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_timer_engine *ptimer = &dev_priv->engine.timer;
+-	uint64_t start = ptimer->read(dev);
+-
+-	do {
+-		if (nv_rd32(dev, NV03_PFIFO_CACHE1_GET) ==
+-		    nv_rd32(dev, NV03_PFIFO_CACHE1_PUT))
+-			return true;
+-
+-	} while (ptimer->read(dev) - start < 100000000);
+-
+-	NV_ERROR(dev, "Timeout flushing the PFIFO cache.\n");
+-
+-	return false;
+-}
+-
+-bool
+ nv04_fifo_cache_pull(struct drm_device *dev, bool enable)
+ {
+-	uint32_t pull = nv_rd32(dev, NV04_PFIFO_CACHE1_PULL0);
++	int pull = nv_mask(dev, NV04_PFIFO_CACHE1_PULL0, 1, enable);
++
++	if (!enable) {
++		/* In some cases the PFIFO puller may be left in an
++		 * inconsistent state if you try to stop it when it's
++		 * busy translating handles. Sometimes you get a
++		 * PFIFO_CACHE_ERROR, sometimes it just fails silently
++		 * sending incorrect instance offsets to PGRAPH after
++		 * it's started up again. To avoid the latter we
++		 * invalidate the most recently calculated instance.
++		 */
++		if (!nv_wait(dev, NV04_PFIFO_CACHE1_PULL0,
++			     NV04_PFIFO_CACHE1_PULL0_HASH_BUSY, 0))
++			NV_ERROR(dev, "Timeout idling the PFIFO puller.\n");
++
++		if (nv_rd32(dev, NV04_PFIFO_CACHE1_PULL0) &
++		    NV04_PFIFO_CACHE1_PULL0_HASH_FAILED)
++			nv_wr32(dev, NV03_PFIFO_INTR_0,
++				NV_PFIFO_INTR_CACHE_ERROR);
+ 
+-	if (enable) {
+-		nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, pull | 1);
+-	} else {
+-		nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, pull & ~1);
+ 		nv_wr32(dev, NV04_PFIFO_CACHE1_HASH, 0);
+ 	}
+ 
+-	return !!(pull & 1);
++	return pull & 1;
+ }
+ 
+ int
+@@ -112,6 +106,12 @@ nv04_fifo_channel_id(struct drm_device *dev)
  			NV03_PFIFO_CACHE1_PUSH1_CHID_MASK;
  }
  
@@ -8356,14 +10136,23 @@ index 66fe559..bbb87ef 100644
  int
  nv04_fifo_create_context(struct nouveau_channel *chan)
  {
-@@ -131,18 +137,13 @@ nv04_fifo_create_context(struct nouveau_channel *chan)
+@@ -124,25 +124,20 @@ nv04_fifo_create_context(struct nouveau_channel *chan)
+ 						NV04_RAMFC__SIZE,
+ 						NVOBJ_FLAG_ZERO_ALLOC |
+ 						NVOBJ_FLAG_ZERO_FREE,
+-						NULL, &chan->ramfc);
++						&chan->ramfc);
+ 	if (ret)
+ 		return ret;
+ 
  	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
  
  	/* Setup initial state */
 -	dev_priv->engine.instmem.prepare_access(dev, true);
  	RAMFC_WR(DMA_PUT, chan->pushbuf_base);
  	RAMFC_WR(DMA_GET, chan->pushbuf_base);
- 	RAMFC_WR(DMA_INSTANCE, chan->pushbuf->instance >> 4);
+-	RAMFC_WR(DMA_INSTANCE, chan->pushbuf->instance >> 4);
++	RAMFC_WR(DMA_INSTANCE, chan->pushbuf->pinst >> 4);
  	RAMFC_WR(DMA_FETCH, (NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
  			     NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
  			     NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8 |
@@ -8376,7 +10165,16 @@ index 66fe559..bbb87ef 100644
  
  	/* enable the fifo dma operation */
  	nv_wr32(dev, NV04_PFIFO_MODE,
-@@ -169,8 +170,6 @@ nv04_fifo_do_load_context(struct drm_device *dev, int chid)
+@@ -160,7 +155,7 @@ nv04_fifo_destroy_context(struct nouveau_channel *chan)
+ 	nv_wr32(dev, NV04_PFIFO_MODE,
+ 		nv_rd32(dev, NV04_PFIFO_MODE) & ~(1 << chan->id));
+ 
+-	nouveau_gpuobj_ref_del(dev, &chan->ramfc);
++	nouveau_gpuobj_ref(NULL, &chan->ramfc);
+ }
+ 
+ static void
+@@ -169,8 +164,6 @@ nv04_fifo_do_load_context(struct drm_device *dev, int chid)
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  	uint32_t fc = NV04_RAMFC(chid), tmp;
  
@@ -8385,7 +10183,7 @@ index 66fe559..bbb87ef 100644
  	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_PUT, nv_ri32(dev, fc + 0));
  	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_GET, nv_ri32(dev, fc + 4));
  	tmp = nv_ri32(dev, fc + 8);
-@@ -181,8 +180,6 @@ nv04_fifo_do_load_context(struct drm_device *dev, int chid)
+@@ -181,8 +174,6 @@ nv04_fifo_do_load_context(struct drm_device *dev, int chid)
  	nv_wr32(dev, NV04_PFIFO_CACHE1_ENGINE, nv_ri32(dev, fc + 20));
  	nv_wr32(dev, NV04_PFIFO_CACHE1_PULL1, nv_ri32(dev, fc + 24));
  
@@ -8394,7 +10192,7 @@ index 66fe559..bbb87ef 100644
  	nv_wr32(dev, NV03_PFIFO_CACHE1_GET, 0);
  	nv_wr32(dev, NV03_PFIFO_CACHE1_PUT, 0);
  }
-@@ -223,7 +220,6 @@ nv04_fifo_unload_context(struct drm_device *dev)
+@@ -223,7 +214,6 @@ nv04_fifo_unload_context(struct drm_device *dev)
  		return -EINVAL;
  	}
  
@@ -8402,7 +10200,7 @@ index 66fe559..bbb87ef 100644
  	RAMFC_WR(DMA_PUT, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_PUT));
  	RAMFC_WR(DMA_GET, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_GET));
  	tmp  = nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_DCOUNT) << 16;
-@@ -233,7 +229,6 @@ nv04_fifo_unload_context(struct drm_device *dev)
+@@ -233,7 +223,6 @@ nv04_fifo_unload_context(struct drm_device *dev)
  	RAMFC_WR(DMA_FETCH, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_FETCH));
  	RAMFC_WR(ENGINE, nv_rd32(dev, NV04_PFIFO_CACHE1_ENGINE));
  	RAMFC_WR(PULL1_ENGINE, nv_rd32(dev, NV04_PFIFO_CACHE1_PULL1));
@@ -8410,7 +10208,22 @@ index 66fe559..bbb87ef 100644
  
  	nv04_fifo_do_load_context(dev, pfifo->channels - 1);
  	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, pfifo->channels - 1);
-@@ -297,6 +292,7 @@ nv04_fifo_init(struct drm_device *dev)
+@@ -269,10 +258,10 @@ nv04_fifo_init_ramxx(struct drm_device *dev)
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 
+ 	nv_wr32(dev, NV03_PFIFO_RAMHT, (0x03 << 24) /* search 128 */ |
+-				       ((dev_priv->ramht_bits - 9) << 16) |
+-				       (dev_priv->ramht_offset >> 8));
+-	nv_wr32(dev, NV03_PFIFO_RAMRO, dev_priv->ramro_offset>>8);
+-	nv_wr32(dev, NV03_PFIFO_RAMFC, dev_priv->ramfc_offset >> 8);
++				       ((dev_priv->ramht->bits - 9) << 16) |
++				       (dev_priv->ramht->gpuobj->pinst >> 8));
++	nv_wr32(dev, NV03_PFIFO_RAMRO, dev_priv->ramro->pinst >> 8);
++	nv_wr32(dev, NV03_PFIFO_RAMFC, dev_priv->ramfc->pinst >> 8);
+ }
+ 
+ static void
+@@ -297,6 +286,7 @@ nv04_fifo_init(struct drm_device *dev)
  
  	nv04_fifo_init_intr(dev);
  	pfifo->enable(dev);
@@ -8442,30 +10255,158 @@ index 618355e..c897342 100644
  }
  
 diff --git a/drivers/gpu/drm/nouveau/nv04_instmem.c b/drivers/gpu/drm/nouveau/nv04_instmem.c
-index a3b9563..4408232 100644
+index a3b9563..0b5ae29 100644
 --- a/drivers/gpu/drm/nouveau/nv04_instmem.c
 +++ b/drivers/gpu/drm/nouveau/nv04_instmem.c
-@@ -49,10 +49,8 @@ nv04_instmem_determine_amount(struct drm_device *dev)
- 	NV_DEBUG(dev, "RAMIN size: %dKiB\n", dev_priv->ramin_rsvd_vram >> 10);
+@@ -1,6 +1,7 @@
+ #include "drmP.h"
+ #include "drm.h"
+ #include "nouveau_drv.h"
++#include "nouveau_ramht.h"
+ 
+ /* returns the size of fifo context */
+ static int
+@@ -17,104 +18,51 @@ nouveau_fifo_ctx_size(struct drm_device *dev)
+ 	return 32;
+ }
  
- 	/* Clear all of it, except the BIOS image that's in the first 64KiB */
+-static void
+-nv04_instmem_determine_amount(struct drm_device *dev)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	int i;
+-
+-	/* Figure out how much instance memory we need */
+-	if (dev_priv->card_type >= NV_40) {
+-		/* We'll want more instance memory than this on some NV4x cards.
+-		 * There's a 16MB aperture to play with that maps onto the end
+-		 * of vram.  For now, only reserve a small piece until we know
+-		 * more about what each chipset requires.
+-		 */
+-		switch (dev_priv->chipset) {
+-		case 0x40:
+-		case 0x47:
+-		case 0x49:
+-		case 0x4b:
+-			dev_priv->ramin_rsvd_vram = (2 * 1024 * 1024);
+-			break;
+-		default:
+-			dev_priv->ramin_rsvd_vram = (1 * 1024 * 1024);
+-			break;
+-		}
+-	} else {
+-		/*XXX: what *are* the limits on <NV40 cards?
+-		 */
+-		dev_priv->ramin_rsvd_vram = (512 * 1024);
+-	}
+-	NV_DEBUG(dev, "RAMIN size: %dKiB\n", dev_priv->ramin_rsvd_vram >> 10);
+-
+-	/* Clear all of it, except the BIOS image that's in the first 64KiB */
 -	dev_priv->engine.instmem.prepare_access(dev, true);
- 	for (i = 64 * 1024; i < dev_priv->ramin_rsvd_vram; i += 4)
- 		nv_wi32(dev, i, 0x00000000);
+-	for (i = 64 * 1024; i < dev_priv->ramin_rsvd_vram; i += 4)
+-		nv_wi32(dev, i, 0x00000000);
 -	dev_priv->engine.instmem.finish_access(dev);
- }
+-}
+-
+-static void
+-nv04_instmem_configure_fixed_tables(struct drm_device *dev)
++int nv04_instmem_init(struct drm_device *dev)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_engine *engine = &dev_priv->engine;
+-
+-	/* FIFO hash table (RAMHT)
+-	 *   use 4k hash table at RAMIN+0x10000
+-	 *   TODO: extend the hash table
+-	 */
+-	dev_priv->ramht_offset = 0x10000;
+-	dev_priv->ramht_bits   = 9;
+-	dev_priv->ramht_size   = (1 << dev_priv->ramht_bits); /* nr entries */
+-	dev_priv->ramht_size  *= 8; /* 2 32-bit values per entry in RAMHT */
+-	NV_DEBUG(dev, "RAMHT offset=0x%x, size=%d\n", dev_priv->ramht_offset,
+-						      dev_priv->ramht_size);
+-
+-	/* FIFO runout table (RAMRO) - 512k at 0x11200 */
+-	dev_priv->ramro_offset = 0x11200;
+-	dev_priv->ramro_size   = 512;
+-	NV_DEBUG(dev, "RAMRO offset=0x%x, size=%d\n", dev_priv->ramro_offset,
+-						      dev_priv->ramro_size);
+-
+-	/* FIFO context table (RAMFC)
+-	 *   NV40  : Not sure exactly how to position RAMFC on some cards,
+-	 *           0x30002 seems to position it at RAMIN+0x20000 on these
+-	 *           cards.  RAMFC is 4kb (32 fifos, 128byte entries).
+-	 *   Others: Position RAMFC at RAMIN+0x11400
+-	 */
+-	dev_priv->ramfc_size = engine->fifo.channels *
+-						nouveau_fifo_ctx_size(dev);
++	struct nouveau_gpuobj *ramht = NULL;
++	u32 offset, length;
++	int ret;
++
++	/* RAMIN always available */
++	dev_priv->ramin_available = true;
++
++	/* Setup shared RAMHT */
++	ret = nouveau_gpuobj_new_fake(dev, 0x10000, ~0, 4096,
++				      NVOBJ_FLAG_ZERO_ALLOC, &ramht);
++	if (ret)
++		return ret;
++
++	ret = nouveau_ramht_new(dev, ramht, &dev_priv->ramht);
++	nouveau_gpuobj_ref(NULL, &ramht);
++	if (ret)
++		return ret;
++
++	/* And RAMRO */
++	ret = nouveau_gpuobj_new_fake(dev, 0x11200, ~0, 512,
++				      NVOBJ_FLAG_ZERO_ALLOC, &dev_priv->ramro);
++	if (ret)
++		return ret;
++
++	/* And RAMFC */
++	length = dev_priv->engine.fifo.channels * nouveau_fifo_ctx_size(dev);
+ 	switch (dev_priv->card_type) {
+ 	case NV_40:
+-		dev_priv->ramfc_offset = 0x20000;
++		offset = 0x20000;
+ 		break;
+-	case NV_30:
+-	case NV_20:
+-	case NV_10:
+-	case NV_04:
+ 	default:
+-		dev_priv->ramfc_offset = 0x11400;
++		offset = 0x11400;
+ 		break;
+ 	}
+-	NV_DEBUG(dev, "RAMFC offset=0x%x, size=%d\n", dev_priv->ramfc_offset,
+-						      dev_priv->ramfc_size);
+-}
  
- static void
-@@ -106,7 +104,7 @@ int nv04_instmem_init(struct drm_device *dev)
- {
- 	struct drm_nouveau_private *dev_priv = dev->dev_private;
- 	uint32_t offset;
+-int nv04_instmem_init(struct drm_device *dev)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	uint32_t offset;
 -	int ret = 0;
-+	int ret;
++	ret = nouveau_gpuobj_new_fake(dev, offset, ~0, length,
++				      NVOBJ_FLAG_ZERO_ALLOC, &dev_priv->ramfc);
++	if (ret)
++		return ret;
+ 
+-	nv04_instmem_determine_amount(dev);
+-	nv04_instmem_configure_fixed_tables(dev);
+-
+-	/* Create a heap to manage RAMIN allocations, we don't allocate
+-	 * the space that was reserved for RAMHT/FC/RO.
+-	 */
+-	offset = dev_priv->ramfc_offset + dev_priv->ramfc_size;
++	/* Only allow space after RAMFC to be used for object allocation */
++	offset += length;
  
- 	nv04_instmem_determine_amount(dev);
- 	nv04_instmem_configure_fixed_tables(dev);
-@@ -129,14 +127,14 @@ int nv04_instmem_init(struct drm_device *dev)
+ 	/* It appears RAMRO (or something?) is controlled by 0x2220/0x2230
+ 	 * on certain NV4x chipsets as well as RAMFC.  When 0x2230 == 0
+@@ -129,69 +77,52 @@ int nv04_instmem_init(struct drm_device *dev)
  			offset = 0x40000;
  	}
  
@@ -8485,7 +10426,56 @@ index a3b9563..4408232 100644
  }
  
  void
-@@ -186,12 +184,7 @@ nv04_instmem_unbind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
+ nv04_instmem_takedown(struct drm_device *dev)
+ {
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++
++	nouveau_ramht_ref(NULL, &dev_priv->ramht, NULL);
++	nouveau_gpuobj_ref(NULL, &dev_priv->ramro);
++	nouveau_gpuobj_ref(NULL, &dev_priv->ramfc);
+ }
+ 
+ int
+-nv04_instmem_populate(struct drm_device *dev, struct nouveau_gpuobj *gpuobj, uint32_t *sz)
++nv04_instmem_populate(struct drm_device *dev, struct nouveau_gpuobj *gpuobj,
++		      uint32_t *sz)
+ {
+-	if (gpuobj->im_backing)
+-		return -EINVAL;
+-
+ 	return 0;
+ }
+ 
+ void
+ nv04_instmem_clear(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
+ {
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-
+-	if (gpuobj && gpuobj->im_backing) {
+-		if (gpuobj->im_bound)
+-			dev_priv->engine.instmem.unbind(dev, gpuobj);
+-		gpuobj->im_backing = NULL;
+-	}
+ }
+ 
+ int
+ nv04_instmem_bind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
+ {
+-	if (!gpuobj->im_pramin || gpuobj->im_bound)
+-		return -EINVAL;
+-
+-	gpuobj->im_bound = 1;
+ 	return 0;
+ }
+ 
+ int
+ nv04_instmem_unbind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
+ {
+-	if (gpuobj->im_bound == 0)
+-		return -EINVAL;
+-
+-	gpuobj->im_bound = 0;
+ 	return 0;
  }
  
  void
@@ -8734,18 +10724,41 @@ index c4e3404..0b5d012 100644
  	return ret;
  }
 diff --git a/drivers/gpu/drm/nouveau/nv10_fifo.c b/drivers/gpu/drm/nouveau/nv10_fifo.c
-index 7aeabf2..7a4069c 100644
+index 7aeabf2..f1b03ad 100644
 --- a/drivers/gpu/drm/nouveau/nv10_fifo.c
 +++ b/drivers/gpu/drm/nouveau/nv10_fifo.c
-@@ -55,7 +55,6 @@ nv10_fifo_create_context(struct nouveau_channel *chan)
+@@ -27,8 +27,9 @@
+ #include "drmP.h"
+ #include "drm.h"
+ #include "nouveau_drv.h"
++#include "nouveau_ramht.h"
+ 
+-#define NV10_RAMFC(c) (dev_priv->ramfc_offset + ((c) * NV10_RAMFC__SIZE))
++#define NV10_RAMFC(c) (dev_priv->ramfc->pinst + ((c) * NV10_RAMFC__SIZE))
+ #define NV10_RAMFC__SIZE ((dev_priv->chipset) >= 0x17 ? 64 : 32)
+ 
+ int
+@@ -48,17 +49,16 @@ nv10_fifo_create_context(struct nouveau_channel *chan)
+ 
+ 	ret = nouveau_gpuobj_new_fake(dev, NV10_RAMFC(chan->id), ~0,
+ 				      NV10_RAMFC__SIZE, NVOBJ_FLAG_ZERO_ALLOC |
+-				      NVOBJ_FLAG_ZERO_FREE, NULL, &chan->ramfc);
++				      NVOBJ_FLAG_ZERO_FREE, &chan->ramfc);
+ 	if (ret)
+ 		return ret;
+ 
  	/* Fill entries that are seen filled in dumps of nvidia driver just
  	 * after channel's is put into DMA mode
  	 */
 -	dev_priv->engine.instmem.prepare_access(dev, true);
  	nv_wi32(dev, fc +  0, chan->pushbuf_base);
  	nv_wi32(dev, fc +  4, chan->pushbuf_base);
- 	nv_wi32(dev, fc + 12, chan->pushbuf->instance >> 4);
-@@ -66,7 +65,6 @@ nv10_fifo_create_context(struct nouveau_channel *chan)
+-	nv_wi32(dev, fc + 12, chan->pushbuf->instance >> 4);
++	nv_wi32(dev, fc + 12, chan->pushbuf->pinst >> 4);
+ 	nv_wi32(dev, fc + 20, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+ 			      NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
+ 			      NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8 |
+@@ -66,7 +66,6 @@ nv10_fifo_create_context(struct nouveau_channel *chan)
  			      NV_PFIFO_CACHE1_BIG_ENDIAN |
  #endif
  			      0);
@@ -8753,7 +10766,16 @@ index 7aeabf2..7a4069c 100644
  
  	/* enable the fifo dma operation */
  	nv_wr32(dev, NV04_PFIFO_MODE,
-@@ -91,8 +89,6 @@ nv10_fifo_do_load_context(struct drm_device *dev, int chid)
+@@ -82,7 +81,7 @@ nv10_fifo_destroy_context(struct nouveau_channel *chan)
+ 	nv_wr32(dev, NV04_PFIFO_MODE,
+ 			nv_rd32(dev, NV04_PFIFO_MODE) & ~(1 << chan->id));
+ 
+-	nouveau_gpuobj_ref_del(dev, &chan->ramfc);
++	nouveau_gpuobj_ref(NULL, &chan->ramfc);
+ }
+ 
+ static void
+@@ -91,8 +90,6 @@ nv10_fifo_do_load_context(struct drm_device *dev, int chid)
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  	uint32_t fc = NV10_RAMFC(chid), tmp;
  
@@ -8762,7 +10784,7 @@ index 7aeabf2..7a4069c 100644
  	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_PUT, nv_ri32(dev, fc + 0));
  	nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_GET, nv_ri32(dev, fc + 4));
  	nv_wr32(dev, NV10_PFIFO_CACHE1_REF_CNT, nv_ri32(dev, fc + 8));
-@@ -117,8 +113,6 @@ nv10_fifo_do_load_context(struct drm_device *dev, int chid)
+@@ -117,8 +114,6 @@ nv10_fifo_do_load_context(struct drm_device *dev, int chid)
  	nv_wr32(dev, NV10_PFIFO_CACHE1_DMA_SUBROUTINE, nv_ri32(dev, fc + 48));
  
  out:
@@ -8771,7 +10793,7 @@ index 7aeabf2..7a4069c 100644
  	nv_wr32(dev, NV03_PFIFO_CACHE1_GET, 0);
  	nv_wr32(dev, NV03_PFIFO_CACHE1_PUT, 0);
  }
-@@ -155,8 +149,6 @@ nv10_fifo_unload_context(struct drm_device *dev)
+@@ -155,8 +150,6 @@ nv10_fifo_unload_context(struct drm_device *dev)
  		return 0;
  	fc = NV10_RAMFC(chid);
  
@@ -8780,7 +10802,7 @@ index 7aeabf2..7a4069c 100644
  	nv_wi32(dev, fc +  0, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_PUT));
  	nv_wi32(dev, fc +  4, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_GET));
  	nv_wi32(dev, fc +  8, nv_rd32(dev, NV10_PFIFO_CACHE1_REF_CNT));
-@@ -179,8 +171,6 @@ nv10_fifo_unload_context(struct drm_device *dev)
+@@ -179,8 +172,6 @@ nv10_fifo_unload_context(struct drm_device *dev)
  	nv_wi32(dev, fc + 48, nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_GET));
  
  out:
@@ -8789,6 +10811,26 @@ index 7aeabf2..7a4069c 100644
  	nv10_fifo_do_load_context(dev, pfifo->channels - 1);
  	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, pfifo->channels - 1);
  	return 0;
+@@ -212,14 +203,14 @@ nv10_fifo_init_ramxx(struct drm_device *dev)
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 
+ 	nv_wr32(dev, NV03_PFIFO_RAMHT, (0x03 << 24) /* search 128 */ |
+-				       ((dev_priv->ramht_bits - 9) << 16) |
+-				       (dev_priv->ramht_offset >> 8));
+-	nv_wr32(dev, NV03_PFIFO_RAMRO, dev_priv->ramro_offset>>8);
++				       ((dev_priv->ramht->bits - 9) << 16) |
++				       (dev_priv->ramht->gpuobj->pinst >> 8));
++	nv_wr32(dev, NV03_PFIFO_RAMRO, dev_priv->ramro->pinst >> 8);
+ 
+ 	if (dev_priv->chipset < 0x17) {
+-		nv_wr32(dev, NV03_PFIFO_RAMFC, dev_priv->ramfc_offset >> 8);
++		nv_wr32(dev, NV03_PFIFO_RAMFC, dev_priv->ramfc->pinst >> 8);
+ 	} else {
+-		nv_wr32(dev, NV03_PFIFO_RAMFC, (dev_priv->ramfc_offset >> 8) |
++		nv_wr32(dev, NV03_PFIFO_RAMFC, (dev_priv->ramfc->pinst >> 8) |
+ 					       (1 << 16) /* 64 Bytes entry*/);
+ 		/* XXX nvidia blob set bit 18, 21,23 for nv20 & nv30 */
+ 	}
 diff --git a/drivers/gpu/drm/nouveau/nv10_gpio.c b/drivers/gpu/drm/nouveau/nv10_gpio.c
 new file mode 100644
 index 0000000..007fc29
@@ -9193,7 +11235,7 @@ index 2e58c33..0000000
 -	return 0;
 -}
 diff --git a/drivers/gpu/drm/nouveau/nv17_tv.c b/drivers/gpu/drm/nouveau/nv17_tv.c
-index 74c8803..703c188 100644
+index 74c8803..a3b8861 100644
 --- a/drivers/gpu/drm/nouveau/nv17_tv.c
 +++ b/drivers/gpu/drm/nouveau/nv17_tv.c
 @@ -37,6 +37,7 @@ static uint32_t nv42_tv_sample_load(struct drm_encoder *encoder)
@@ -9297,7 +11339,149 @@ index 74c8803..703c188 100644
  		NV_INFO(dev, "Load detected on output %c\n",
  			'@' + ffs(dcb->or));
  		return connector_status_connected;
-@@ -296,6 +326,9 @@ static bool nv17_tv_mode_fixup(struct drm_encoder *encoder,
+@@ -163,55 +193,56 @@ nv17_tv_detect(struct drm_encoder *encoder, struct drm_connector *connector)
+ 	}
+ }
+ 
+-static const struct {
+-	int hdisplay;
+-	int vdisplay;
+-} modes[] = {
+-	{ 640, 400 },
+-	{ 640, 480 },
+-	{ 720, 480 },
+-	{ 720, 576 },
+-	{ 800, 600 },
+-	{ 1024, 768 },
+-	{ 1280, 720 },
+-	{ 1280, 1024 },
+-	{ 1920, 1080 }
+-};
+-
+-static int nv17_tv_get_modes(struct drm_encoder *encoder,
+-			     struct drm_connector *connector)
++static int nv17_tv_get_ld_modes(struct drm_encoder *encoder,
++				struct drm_connector *connector)
+ {
+ 	struct nv17_tv_norm_params *tv_norm = get_tv_norm(encoder);
+-	struct drm_display_mode *mode;
+-	struct drm_display_mode *output_mode;
++	struct drm_display_mode *mode, *tv_mode;
+ 	int n = 0;
+-	int i;
+ 
+-	if (tv_norm->kind != CTV_ENC_MODE) {
+-		struct drm_display_mode *tv_mode;
++	for (tv_mode = nv17_tv_modes; tv_mode->hdisplay; tv_mode++) {
++		mode = drm_mode_duplicate(encoder->dev, tv_mode);
+ 
+-		for (tv_mode = nv17_tv_modes; tv_mode->hdisplay; tv_mode++) {
+-			mode = drm_mode_duplicate(encoder->dev, tv_mode);
++		mode->clock = tv_norm->tv_enc_mode.vrefresh *
++			mode->htotal / 1000 *
++			mode->vtotal / 1000;
+ 
+-			mode->clock = tv_norm->tv_enc_mode.vrefresh *
+-						mode->htotal / 1000 *
+-						mode->vtotal / 1000;
+-
+-			if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+-				mode->clock *= 2;
++		if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
++			mode->clock *= 2;
+ 
+-			if (mode->hdisplay == tv_norm->tv_enc_mode.hdisplay &&
+-			    mode->vdisplay == tv_norm->tv_enc_mode.vdisplay)
+-				mode->type |= DRM_MODE_TYPE_PREFERRED;
++		if (mode->hdisplay == tv_norm->tv_enc_mode.hdisplay &&
++		    mode->vdisplay == tv_norm->tv_enc_mode.vdisplay)
++			mode->type |= DRM_MODE_TYPE_PREFERRED;
+ 
+-			drm_mode_probed_add(connector, mode);
+-			n++;
+-		}
+-		return n;
++		drm_mode_probed_add(connector, mode);
++		n++;
+ 	}
+ 
+-	/* tv_norm->kind == CTV_ENC_MODE */
+-	output_mode = &tv_norm->ctv_enc_mode.mode;
++	return n;
++}
++
++static int nv17_tv_get_hd_modes(struct drm_encoder *encoder,
++				struct drm_connector *connector)
++{
++	struct nv17_tv_norm_params *tv_norm = get_tv_norm(encoder);
++	struct drm_display_mode *output_mode = &tv_norm->ctv_enc_mode.mode;
++	struct drm_display_mode *mode;
++	const struct {
++		int hdisplay;
++		int vdisplay;
++	} modes[] = {
++		{ 640, 400 },
++		{ 640, 480 },
++		{ 720, 480 },
++		{ 720, 576 },
++		{ 800, 600 },
++		{ 1024, 768 },
++		{ 1280, 720 },
++		{ 1280, 1024 },
++		{ 1920, 1080 }
++	};
++	int i, n = 0;
++
+ 	for (i = 0; i < ARRAY_SIZE(modes); i++) {
+ 		if (modes[i].hdisplay > output_mode->hdisplay ||
+ 		    modes[i].vdisplay > output_mode->vdisplay)
+@@ -221,11 +252,12 @@ static int nv17_tv_get_modes(struct drm_encoder *encoder,
+ 		    modes[i].vdisplay == output_mode->vdisplay) {
+ 			mode = drm_mode_duplicate(encoder->dev, output_mode);
+ 			mode->type |= DRM_MODE_TYPE_PREFERRED;
++
+ 		} else {
+ 			mode = drm_cvt_mode(encoder->dev, modes[i].hdisplay,
+-				modes[i].vdisplay, 60, false,
+-				output_mode->flags & DRM_MODE_FLAG_INTERLACE,
+-				false);
++					    modes[i].vdisplay, 60, false,
++					    (output_mode->flags &
++					     DRM_MODE_FLAG_INTERLACE), false);
+ 		}
+ 
+ 		/* CVT modes are sometimes unsuitable... */
+@@ -236,6 +268,7 @@ static int nv17_tv_get_modes(struct drm_encoder *encoder,
+ 					     - mode->hdisplay) * 9 / 10) & ~7;
+ 			mode->hsync_end = mode->hsync_start + 8;
+ 		}
++
+ 		if (output_mode->vdisplay >= 1024) {
+ 			mode->vtotal = output_mode->vtotal;
+ 			mode->vsync_start = output_mode->vsync_start;
+@@ -246,9 +279,21 @@ static int nv17_tv_get_modes(struct drm_encoder *encoder,
+ 		drm_mode_probed_add(connector, mode);
+ 		n++;
+ 	}
++
+ 	return n;
+ }
+ 
++static int nv17_tv_get_modes(struct drm_encoder *encoder,
++			     struct drm_connector *connector)
++{
++	struct nv17_tv_norm_params *tv_norm = get_tv_norm(encoder);
++
++	if (tv_norm->kind == CTV_ENC_MODE)
++		return nv17_tv_get_hd_modes(encoder, connector);
++	else
++		return nv17_tv_get_ld_modes(encoder, connector);
++}
++
+ static int nv17_tv_mode_valid(struct drm_encoder *encoder,
+ 			      struct drm_display_mode *mode)
+ {
+@@ -296,6 +341,9 @@ static bool nv17_tv_mode_fixup(struct drm_encoder *encoder,
  {
  	struct nv17_tv_norm_params *tv_norm = get_tv_norm(encoder);
  
@@ -9307,7 +11491,7 @@ index 74c8803..703c188 100644
  	if (tv_norm->kind == CTV_ENC_MODE)
  		adjusted_mode->clock = tv_norm->ctv_enc_mode.mode.clock;
  	else
-@@ -307,6 +340,8 @@ static bool nv17_tv_mode_fixup(struct drm_encoder *encoder,
+@@ -307,6 +355,8 @@ static bool nv17_tv_mode_fixup(struct drm_encoder *encoder,
  static void  nv17_tv_dpms(struct drm_encoder *encoder, int mode)
  {
  	struct drm_device *dev = encoder->dev;
@@ -9316,7 +11500,7 @@ index 74c8803..703c188 100644
  	struct nv17_tv_state *regs = &to_tv_enc(encoder)->state;
  	struct nv17_tv_norm_params *tv_norm = get_tv_norm(encoder);
  
-@@ -331,8 +366,8 @@ static void  nv17_tv_dpms(struct drm_encoder *encoder, int mode)
+@@ -331,8 +381,8 @@ static void  nv17_tv_dpms(struct drm_encoder *encoder, int mode)
  
  	nv_load_ptv(dev, regs, 200);
  
@@ -9327,7 +11511,7 @@ index 74c8803..703c188 100644
  
  	nv04_dac_update_dacclk(encoder, mode == DRM_MODE_DPMS_ON);
  }
-@@ -373,15 +408,10 @@ static void nv17_tv_prepare(struct drm_encoder *encoder)
+@@ -373,15 +423,10 @@ static void nv17_tv_prepare(struct drm_encoder *encoder)
  
  	}
  
@@ -9347,7 +11531,7 @@ index 74c8803..703c188 100644
  
  	/* Set the DACCLK register */
  	dacclk = (NVReadRAMDAC(dev, 0, dacclk_off) & ~0x30) | 0x1;
-@@ -744,8 +774,10 @@ static struct drm_encoder_funcs nv17_tv_funcs = {
+@@ -744,8 +789,10 @@ static struct drm_encoder_funcs nv17_tv_funcs = {
  	.destroy = nv17_tv_destroy,
  };
  
@@ -9359,7 +11543,7 @@ index 74c8803..703c188 100644
  	struct drm_encoder *encoder;
  	struct nv17_tv_encoder *tv_enc = NULL;
  
-@@ -774,5 +806,7 @@ int nv17_tv_create(struct drm_device *dev, struct dcb_entry *entry)
+@@ -774,5 +821,7 @@ int nv17_tv_create(struct drm_device *dev, struct dcb_entry *entry)
  	encoder->possible_crtcs = entry->heads;
  	encoder->possible_clones = 0;
  
@@ -9367,8 +11551,142 @@ index 74c8803..703c188 100644
 +	drm_mode_connector_attach_encoder(connector, encoder);
  	return 0;
  }
+diff --git a/drivers/gpu/drm/nouveau/nv17_tv.h b/drivers/gpu/drm/nouveau/nv17_tv.h
+index c00977c..6bf0384 100644
+--- a/drivers/gpu/drm/nouveau/nv17_tv.h
++++ b/drivers/gpu/drm/nouveau/nv17_tv.h
+@@ -127,7 +127,8 @@ void nv17_ctv_update_rescaler(struct drm_encoder *encoder);
+ 
+ /* TV hardware access functions */
+ 
+-static inline void nv_write_ptv(struct drm_device *dev, uint32_t reg, uint32_t val)
++static inline void nv_write_ptv(struct drm_device *dev, uint32_t reg,
++				uint32_t val)
+ {
+ 	nv_wr32(dev, reg, val);
+ }
+@@ -137,7 +138,8 @@ static inline uint32_t nv_read_ptv(struct drm_device *dev, uint32_t reg)
+ 	return nv_rd32(dev, reg);
+ }
+ 
+-static inline void nv_write_tv_enc(struct drm_device *dev, uint8_t reg, uint8_t val)
++static inline void nv_write_tv_enc(struct drm_device *dev, uint8_t reg,
++				   uint8_t val)
+ {
+ 	nv_write_ptv(dev, NV_PTV_TV_INDEX, reg);
+ 	nv_write_ptv(dev, NV_PTV_TV_DATA, val);
+@@ -149,8 +151,11 @@ static inline uint8_t nv_read_tv_enc(struct drm_device *dev, uint8_t reg)
+ 	return nv_read_ptv(dev, NV_PTV_TV_DATA);
+ }
+ 
+-#define nv_load_ptv(dev, state, reg) nv_write_ptv(dev, NV_PTV_OFFSET + 0x##reg, state->ptv_##reg)
+-#define nv_save_ptv(dev, state, reg) state->ptv_##reg = nv_read_ptv(dev, NV_PTV_OFFSET + 0x##reg)
+-#define nv_load_tv_enc(dev, state, reg) nv_write_tv_enc(dev, 0x##reg, state->tv_enc[0x##reg])
++#define nv_load_ptv(dev, state, reg) \
++	nv_write_ptv(dev, NV_PTV_OFFSET + 0x##reg, state->ptv_##reg)
++#define nv_save_ptv(dev, state, reg) \
++	state->ptv_##reg = nv_read_ptv(dev, NV_PTV_OFFSET + 0x##reg)
++#define nv_load_tv_enc(dev, state, reg) \
++	nv_write_tv_enc(dev, 0x##reg, state->tv_enc[0x##reg])
+ 
+ #endif
+diff --git a/drivers/gpu/drm/nouveau/nv17_tv_modes.c b/drivers/gpu/drm/nouveau/nv17_tv_modes.c
+index d64683d..9d3893c 100644
+--- a/drivers/gpu/drm/nouveau/nv17_tv_modes.c
++++ b/drivers/gpu/drm/nouveau/nv17_tv_modes.c
+@@ -336,12 +336,17 @@ static void tv_setup_filter(struct drm_encoder *encoder)
+ 			struct filter_params *p = &fparams[k][j];
+ 
+ 			for (i = 0; i < 7; i++) {
+-				int64_t c = (p->k1 + p->ki*i + p->ki2*i*i + p->ki3*i*i*i)
+-					+ (p->kr + p->kir*i + p->ki2r*i*i + p->ki3r*i*i*i)*rs[k]
+-					+ (p->kf + p->kif*i + p->ki2f*i*i + p->ki3f*i*i*i)*flicker
+-					+ (p->krf + p->kirf*i + p->ki2rf*i*i + p->ki3rf*i*i*i)*flicker*rs[k];
+-
+-				(*filters[k])[j][i] = (c + id5/2) >> 39 & (0x1 << 31 | 0x7f << 9);
++				int64_t c = (p->k1 + p->ki*i + p->ki2*i*i +
++					     p->ki3*i*i*i)
++					+ (p->kr + p->kir*i + p->ki2r*i*i +
++					   p->ki3r*i*i*i) * rs[k]
++					+ (p->kf + p->kif*i + p->ki2f*i*i +
++					   p->ki3f*i*i*i) * flicker
++					+ (p->krf + p->kirf*i + p->ki2rf*i*i +
++					   p->ki3rf*i*i*i) * flicker * rs[k];
++
++				(*filters[k])[j][i] = (c + id5/2) >> 39
++					& (0x1 << 31 | 0x7f << 9);
+ 			}
+ 		}
+ 	}
+@@ -349,7 +354,8 @@ static void tv_setup_filter(struct drm_encoder *encoder)
+ 
+ /* Hardware state saving/restoring */
+ 
+-static void tv_save_filter(struct drm_device *dev, uint32_t base, uint32_t regs[4][7])
++static void tv_save_filter(struct drm_device *dev, uint32_t base,
++			   uint32_t regs[4][7])
+ {
+ 	int i, j;
+ 	uint32_t offsets[] = { base, base + 0x1c, base + 0x40, base + 0x5c };
+@@ -360,7 +366,8 @@ static void tv_save_filter(struct drm_device *dev, uint32_t base, uint32_t regs[
+ 	}
+ }
+ 
+-static void tv_load_filter(struct drm_device *dev, uint32_t base, uint32_t regs[4][7])
++static void tv_load_filter(struct drm_device *dev, uint32_t base,
++			   uint32_t regs[4][7])
+ {
+ 	int i, j;
+ 	uint32_t offsets[] = { base, base + 0x1c, base + 0x40, base + 0x5c };
+@@ -504,10 +511,10 @@ void nv17_tv_update_properties(struct drm_encoder *encoder)
+ 		break;
+ 	}
+ 
+-	regs->tv_enc[0x20] = interpolate(0, tv_norm->tv_enc_mode.tv_enc[0x20], 255,
+-					 tv_enc->saturation);
+-	regs->tv_enc[0x22] = interpolate(0, tv_norm->tv_enc_mode.tv_enc[0x22], 255,
+-					 tv_enc->saturation);
++	regs->tv_enc[0x20] = interpolate(0, tv_norm->tv_enc_mode.tv_enc[0x20],
++					 255, tv_enc->saturation);
++	regs->tv_enc[0x22] = interpolate(0, tv_norm->tv_enc_mode.tv_enc[0x22],
++					 255, tv_enc->saturation);
+ 	regs->tv_enc[0x25] = tv_enc->hue * 255 / 100;
+ 
+ 	nv_load_ptv(dev, regs, 204);
+@@ -541,7 +548,8 @@ void nv17_ctv_update_rescaler(struct drm_encoder *encoder)
+ 	int head = nouveau_crtc(encoder->crtc)->index;
+ 	struct nv04_crtc_reg *regs = &dev_priv->mode_reg.crtc_reg[head];
+ 	struct drm_display_mode *crtc_mode = &encoder->crtc->mode;
+-	struct drm_display_mode *output_mode = &get_tv_norm(encoder)->ctv_enc_mode.mode;
++	struct drm_display_mode *output_mode =
++		&get_tv_norm(encoder)->ctv_enc_mode.mode;
+ 	int overscan, hmargin, vmargin, hratio, vratio;
+ 
+ 	/* The rescaler doesn't do the right thing for interlaced modes. */
+@@ -553,13 +561,15 @@ void nv17_ctv_update_rescaler(struct drm_encoder *encoder)
+ 	hmargin = (output_mode->hdisplay - crtc_mode->hdisplay) / 2;
+ 	vmargin = (output_mode->vdisplay - crtc_mode->vdisplay) / 2;
+ 
+-	hmargin = interpolate(0, min(hmargin, output_mode->hdisplay/20), hmargin,
+-			      overscan);
+-	vmargin = interpolate(0, min(vmargin, output_mode->vdisplay/20), vmargin,
+-			      overscan);
++	hmargin = interpolate(0, min(hmargin, output_mode->hdisplay/20),
++			      hmargin, overscan);
++	vmargin = interpolate(0, min(vmargin, output_mode->vdisplay/20),
++			      vmargin, overscan);
+ 
+-	hratio = crtc_mode->hdisplay * 0x800 / (output_mode->hdisplay - 2*hmargin);
+-	vratio = crtc_mode->vdisplay * 0x800 / (output_mode->vdisplay - 2*vmargin) & ~3;
++	hratio = crtc_mode->hdisplay * 0x800 /
++		(output_mode->hdisplay - 2*hmargin);
++	vratio = crtc_mode->vdisplay * 0x800 /
++		(output_mode->vdisplay - 2*vmargin) & ~3;
+ 
+ 	regs->fp_horiz_regs[FP_VALID_START] = hmargin;
+ 	regs->fp_horiz_regs[FP_VALID_END] = output_mode->hdisplay - hmargin - 1;
 diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c b/drivers/gpu/drm/nouveau/nv20_graph.c
-index d6fc0a8..cc876ef 100644
+index d6fc0a8..93f0d8a 100644
 --- a/drivers/gpu/drm/nouveau/nv20_graph.c
 +++ b/drivers/gpu/drm/nouveau/nv20_graph.c
 @@ -37,49 +37,49 @@ nv20_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
@@ -9935,7 +12253,7 @@ index d6fc0a8..cc876ef 100644
  }
  
  int
-@@ -370,68 +370,54 @@ nv20_graph_create_context(struct nouveau_channel *chan)
+@@ -370,68 +370,52 @@ nv20_graph_create_context(struct nouveau_channel *chan)
  {
  	struct drm_device *dev = chan->dev;
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -9989,48 +12307,66 @@ index d6fc0a8..cc876ef 100644
 -	ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, ctx_size, 16,
 -					  NVOBJ_FLAG_ZERO_ALLOC,
 -					  &chan->ramin_grctx);
-+	ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pgraph->grctx_size,
-+				     16, NVOBJ_FLAG_ZERO_ALLOC,
-+				     &chan->ramin_grctx);
++	ret = nouveau_gpuobj_new(dev, chan, pgraph->grctx_size, 16,
++				 NVOBJ_FLAG_ZERO_ALLOC, &chan->ramin_grctx);
  	if (ret)
  		return ret;
  
  	/* Initialise default context values */
 -	dev_priv->engine.instmem.prepare_access(dev, true);
- 	ctx_init(dev, chan->ramin_grctx->gpuobj);
+-	ctx_init(dev, chan->ramin_grctx->gpuobj);
++	ctx_init(dev, chan->ramin_grctx);
  
  	/* nv20: nv_wo32(dev, chan->ramin_grctx->gpuobj, 10, chan->id<<24); */
 -	nv_wo32(dev, chan->ramin_grctx->gpuobj, idoffs,
 -					(chan->id << 24) | 0x1); /* CTX_USER */
-+	nv_wo32(chan->ramin_grctx->gpuobj, idoffs,
++	nv_wo32(chan->ramin_grctx, idoffs,
 +		(chan->id << 24) | 0x1); /* CTX_USER */
  
 -	nv_wo32(dev, dev_priv->ctx_table->gpuobj, chan->id,
 -			chan->ramin_grctx->instance >> 4);
 -
 -	dev_priv->engine.instmem.finish_access(dev);
-+	nv_wo32(pgraph->ctx_table->gpuobj, chan->id * 4,
-+		chan->ramin_grctx->instance >> 4);
++	nv_wo32(pgraph->ctx_table, chan->id * 4, chan->ramin_grctx->pinst >> 4);
  	return 0;
  }
  
-@@ -440,13 +426,12 @@ nv20_graph_destroy_context(struct nouveau_channel *chan)
+@@ -440,13 +424,10 @@ nv20_graph_destroy_context(struct nouveau_channel *chan)
  {
  	struct drm_device *dev = chan->dev;
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
 +	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
  
- 	if (chan->ramin_grctx)
- 		nouveau_gpuobj_ref_del(dev, &chan->ramin_grctx);
- 
+-	if (chan->ramin_grctx)
+-		nouveau_gpuobj_ref_del(dev, &chan->ramin_grctx);
+-
 -	dev_priv->engine.instmem.prepare_access(dev, true);
 -	nv_wo32(dev, dev_priv->ctx_table->gpuobj, chan->id, 0);
 -	dev_priv->engine.instmem.finish_access(dev);
-+	nv_wo32(pgraph->ctx_table->gpuobj, chan->id * 4, 0);
++	nouveau_gpuobj_ref(NULL, &chan->ramin_grctx);
++	nv_wo32(pgraph->ctx_table, chan->id * 4, 0);
  }
  
  int
-@@ -538,29 +523,44 @@ nv20_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+@@ -457,7 +438,7 @@ nv20_graph_load_context(struct nouveau_channel *chan)
+ 
+ 	if (!chan->ramin_grctx)
+ 		return -EINVAL;
+-	inst = chan->ramin_grctx->instance >> 4;
++	inst = chan->ramin_grctx->pinst >> 4;
+ 
+ 	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_POINTER, inst);
+ 	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_XFER,
+@@ -480,7 +461,7 @@ nv20_graph_unload_context(struct drm_device *dev)
+ 	chan = pgraph->channel(dev);
+ 	if (!chan)
+ 		return 0;
+-	inst = chan->ramin_grctx->instance >> 4;
++	inst = chan->ramin_grctx->pinst >> 4;
+ 
+ 	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_POINTER, inst);
+ 	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_XFER,
+@@ -538,29 +519,44 @@ nv20_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
  int
  nv20_graph_init(struct drm_device *dev)
  {
@@ -10069,28 +12405,29 @@ index d6fc0a8..cc876ef 100644
 -		dev_priv->ctx_table_size = 32 * 4;
 -		ret = nouveau_gpuobj_new_ref(dev, NULL, NULL, 0,
 -						  dev_priv->ctx_table_size, 16,
-+		ret = nouveau_gpuobj_new_ref(dev, NULL, NULL, 0, 32 * 4, 16,
- 						  NVOBJ_FLAG_ZERO_ALLOC,
+-						  NVOBJ_FLAG_ZERO_ALLOC,
 -						  &dev_priv->ctx_table);
-+						  &pgraph->ctx_table);
++		ret = nouveau_gpuobj_new(dev, NULL, 32 * 4, 16,
++					 NVOBJ_FLAG_ZERO_ALLOC,
++					 &pgraph->ctx_table);
  		if (ret)
  			return ret;
  	}
  
  	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_TABLE,
 -		 dev_priv->ctx_table->instance >> 4);
-+		     pgraph->ctx_table->instance >> 4);
++		     pgraph->ctx_table->pinst >> 4);
  
  	nv20_graph_rdi(dev);
  
-@@ -644,34 +644,52 @@ void
+@@ -644,34 +640,52 @@ void
  nv20_graph_takedown(struct drm_device *dev)
  {
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
 +	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
  
 -	nouveau_gpuobj_ref_del(dev, &dev_priv->ctx_table);
-+	nouveau_gpuobj_ref_del(dev, &pgraph->ctx_table);
++	nouveau_gpuobj_ref(NULL, &pgraph->ctx_table);
  }
  
  int
@@ -10129,17 +12466,18 @@ index d6fc0a8..cc876ef 100644
 -		dev_priv->ctx_table_size = 32 * 4;
 -		ret = nouveau_gpuobj_new_ref(dev, NULL, NULL, 0,
 -						  dev_priv->ctx_table_size, 16,
-+		ret = nouveau_gpuobj_new_ref(dev, NULL, NULL, 0, 32 * 4, 16,
- 						  NVOBJ_FLAG_ZERO_ALLOC,
+-						  NVOBJ_FLAG_ZERO_ALLOC,
 -						  &dev_priv->ctx_table);
-+						  &pgraph->ctx_table);
++		ret = nouveau_gpuobj_new(dev, NULL, 32 * 4, 16,
++					 NVOBJ_FLAG_ZERO_ALLOC,
++					 &pgraph->ctx_table);
  		if (ret)
  			return ret;
  	}
  
  	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_TABLE,
 -			dev_priv->ctx_table->instance >> 4);
-+		     pgraph->ctx_table->instance >> 4);
++		     pgraph->ctx_table->pinst >> 4);
  
  	nv_wr32(dev, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
  	nv_wr32(dev, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
@@ -10245,25 +12583,60 @@ index 0000000..4a3f2f0
 +{
 +}
 diff --git a/drivers/gpu/drm/nouveau/nv40_fifo.c b/drivers/gpu/drm/nouveau/nv40_fifo.c
-index 500ccfd..2b67f18 100644
+index 500ccfd..d337b8b 100644
 --- a/drivers/gpu/drm/nouveau/nv40_fifo.c
 +++ b/drivers/gpu/drm/nouveau/nv40_fifo.c
-@@ -48,7 +48,6 @@ nv40_fifo_create_context(struct nouveau_channel *chan)
+@@ -27,8 +27,9 @@
+ #include "drmP.h"
+ #include "nouveau_drv.h"
+ #include "nouveau_drm.h"
++#include "nouveau_ramht.h"
+ 
+-#define NV40_RAMFC(c) (dev_priv->ramfc_offset + ((c) * NV40_RAMFC__SIZE))
++#define NV40_RAMFC(c) (dev_priv->ramfc->pinst + ((c) * NV40_RAMFC__SIZE))
+ #define NV40_RAMFC__SIZE 128
+ 
+ int
+@@ -42,16 +43,15 @@ nv40_fifo_create_context(struct nouveau_channel *chan)
+ 
+ 	ret = nouveau_gpuobj_new_fake(dev, NV40_RAMFC(chan->id), ~0,
+ 				      NV40_RAMFC__SIZE, NVOBJ_FLAG_ZERO_ALLOC |
+-				      NVOBJ_FLAG_ZERO_FREE, NULL, &chan->ramfc);
++				      NVOBJ_FLAG_ZERO_FREE, &chan->ramfc);
+ 	if (ret)
+ 		return ret;
  
  	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
  
 -	dev_priv->engine.instmem.prepare_access(dev, true);
  	nv_wi32(dev, fc +  0, chan->pushbuf_base);
  	nv_wi32(dev, fc +  4, chan->pushbuf_base);
- 	nv_wi32(dev, fc + 12, chan->pushbuf->instance >> 4);
-@@ -61,7 +60,6 @@ nv40_fifo_create_context(struct nouveau_channel *chan)
+-	nv_wi32(dev, fc + 12, chan->pushbuf->instance >> 4);
++	nv_wi32(dev, fc + 12, chan->pushbuf->pinst >> 4);
+ 	nv_wi32(dev, fc + 24, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+ 			      NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
+ 			      NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8 |
+@@ -59,9 +59,8 @@ nv40_fifo_create_context(struct nouveau_channel *chan)
+ 			      NV_PFIFO_CACHE1_BIG_ENDIAN |
+ #endif
  			      0x30000000 /* no idea.. */);
- 	nv_wi32(dev, fc + 56, chan->ramin_grctx->instance >> 4);
+-	nv_wi32(dev, fc + 56, chan->ramin_grctx->instance >> 4);
++	nv_wi32(dev, fc + 56, chan->ramin_grctx->pinst >> 4);
  	nv_wi32(dev, fc + 60, 0x0001FFFF);
 -	dev_priv->engine.instmem.finish_access(dev);
  
  	/* enable the fifo dma operation */
  	nv_wr32(dev, NV04_PFIFO_MODE,
+@@ -79,8 +78,7 @@ nv40_fifo_destroy_context(struct nouveau_channel *chan)
+ 	nv_wr32(dev, NV04_PFIFO_MODE,
+ 		nv_rd32(dev, NV04_PFIFO_MODE) & ~(1 << chan->id));
+ 
+-	if (chan->ramfc)
+-		nouveau_gpuobj_ref_del(dev, &chan->ramfc);
++	nouveau_gpuobj_ref(NULL, &chan->ramfc);
+ }
+ 
+ static void
 @@ -89,8 +87,6 @@ nv40_fifo_do_load_context(struct drm_device *dev, int chid)
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  	uint32_t fc = NV40_RAMFC(chid), tmp, tmp2;
@@ -10298,19 +12671,54 @@ index 500ccfd..2b67f18 100644
  
  	nv40_fifo_do_load_context(dev, pfifo->channels - 1);
  	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1,
+@@ -249,9 +241,9 @@ nv40_fifo_init_ramxx(struct drm_device *dev)
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 
+ 	nv_wr32(dev, NV03_PFIFO_RAMHT, (0x03 << 24) /* search 128 */ |
+-				       ((dev_priv->ramht_bits - 9) << 16) |
+-				       (dev_priv->ramht_offset >> 8));
+-	nv_wr32(dev, NV03_PFIFO_RAMRO, dev_priv->ramro_offset>>8);
++				       ((dev_priv->ramht->bits - 9) << 16) |
++				       (dev_priv->ramht->gpuobj->pinst >> 8));
++	nv_wr32(dev, NV03_PFIFO_RAMRO, dev_priv->ramro->pinst >> 8);
+ 
+ 	switch (dev_priv->chipset) {
+ 	case 0x47:
+@@ -279,7 +271,7 @@ nv40_fifo_init_ramxx(struct drm_device *dev)
+ 		nv_wr32(dev, 0x2230, 0);
+ 		nv_wr32(dev, NV40_PFIFO_RAMFC,
+ 			((dev_priv->vram_size - 512 * 1024 +
+-			  dev_priv->ramfc_offset) >> 16) | (3 << 16));
++			  dev_priv->ramfc->pinst) >> 16) | (3 << 16));
+ 		break;
+ 	}
+ }
 diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c
-index 704a25d..f7b59ad 100644
+index 704a25d..2424289 100644
 --- a/drivers/gpu/drm/nouveau/nv40_graph.c
 +++ b/drivers/gpu/drm/nouveau/nv40_graph.c
-@@ -58,6 +58,7 @@ nv40_graph_create_context(struct nouveau_channel *chan)
+@@ -45,7 +45,7 @@ nv40_graph_channel(struct drm_device *dev)
+ 		struct nouveau_channel *chan = dev_priv->fifos[i];
+ 
+ 		if (chan && chan->ramin_grctx &&
+-		    chan->ramin_grctx->instance == inst)
++		    chan->ramin_grctx->pinst == inst)
+ 			return chan;
+ 	}
+ 
+@@ -58,36 +58,28 @@ nv40_graph_create_context(struct nouveau_channel *chan)
  	struct drm_device *dev = chan->dev;
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
 +	struct nouveau_grctx ctx = {};
  	int ret;
  
- 	ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pgraph->grctx_size,
-@@ -67,20 +68,13 @@ nv40_graph_create_context(struct nouveau_channel *chan)
+-	ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pgraph->grctx_size,
+-				     16, NVOBJ_FLAG_ZERO_ALLOC,
+-				     &chan->ramin_grctx);
++	ret = nouveau_gpuobj_new(dev, chan, pgraph->grctx_size, 16,
++				 NVOBJ_FLAG_ZERO_ALLOC, &chan->ramin_grctx);
+ 	if (ret)
  		return ret;
  
  	/* Initialise default context values */
@@ -10330,15 +12738,31 @@ index 704a25d..f7b59ad 100644
 -	dev_priv->engine.instmem.finish_access(dev);
 +	ctx.dev = chan->dev;
 +	ctx.mode = NOUVEAU_GRCTX_VALS;
-+	ctx.data = chan->ramin_grctx->gpuobj;
++	ctx.data = chan->ramin_grctx;
 +	nv40_grctx_init(&ctx);
 +
-+	nv_wo32(chan->ramin_grctx->gpuobj, 0,
-+		chan->ramin_grctx->gpuobj->im_pramin->start);
++	nv_wo32(chan->ramin_grctx, 0, chan->ramin_grctx->pinst);
  	return 0;
  }
  
-@@ -238,7 +232,8 @@ nv40_graph_init(struct drm_device *dev)
+ void
+ nv40_graph_destroy_context(struct nouveau_channel *chan)
+ {
+-	nouveau_gpuobj_ref_del(chan->dev, &chan->ramin_grctx);
++	nouveau_gpuobj_ref(NULL, &chan->ramin_grctx);
+ }
+ 
+ static int
+@@ -141,7 +133,7 @@ nv40_graph_load_context(struct nouveau_channel *chan)
+ 
+ 	if (!chan->ramin_grctx)
+ 		return -EINVAL;
+-	inst = chan->ramin_grctx->instance >> 4;
++	inst = chan->ramin_grctx->pinst >> 4;
+ 
+ 	ret = nv40_graph_transfer_context(dev, inst, 0);
+ 	if (ret)
+@@ -238,7 +230,8 @@ nv40_graph_init(struct drm_device *dev)
  	struct drm_nouveau_private *dev_priv =
  		(struct drm_nouveau_private *)dev->dev_private;
  	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
@@ -10348,7 +12772,7 @@ index 704a25d..f7b59ad 100644
  	int i, j;
  
  	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) &
-@@ -246,32 +241,22 @@ nv40_graph_init(struct drm_device *dev)
+@@ -246,32 +239,22 @@ nv40_graph_init(struct drm_device *dev)
  	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) |
  			 NV_PMC_ENABLE_PGRAPH);
  
@@ -10394,7 +12818,7 @@ index 704a25d..f7b59ad 100644
  
  	/* No context present currently */
  	nv_wr32(dev, NV40_PGRAPH_CTXCTL_CUR, 0x00000000);
-@@ -407,7 +392,6 @@ nv40_graph_init(struct drm_device *dev)
+@@ -407,7 +390,6 @@ nv40_graph_init(struct drm_device *dev)
  
  void nv40_graph_takedown(struct drm_device *dev)
  {
@@ -10568,8 +12992,21 @@ index b4e4a3b..2423c92 100644
  }
  
  static bool
+diff --git a/drivers/gpu/drm/nouveau/nv50_cursor.c b/drivers/gpu/drm/nouveau/nv50_cursor.c
+index 03ad7ab..1b9ce30 100644
+--- a/drivers/gpu/drm/nouveau/nv50_cursor.c
++++ b/drivers/gpu/drm/nouveau/nv50_cursor.c
+@@ -147,7 +147,7 @@ nv50_cursor_fini(struct nouveau_crtc *nv_crtc)
+ 	NV_DEBUG_KMS(dev, "\n");
+ 
+ 	nv_wr32(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(idx), 0);
+-	if (!nv_wait(NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(idx),
++	if (!nv_wait(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(idx),
+ 		     NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_STATUS, 0)) {
+ 		NV_ERROR(dev, "timeout: CURSOR_CTRL2_STATUS == 0\n");
+ 		NV_ERROR(dev, "CURSOR_CTRL2 = 0x%08x\n",
 diff --git a/drivers/gpu/drm/nouveau/nv50_dac.c b/drivers/gpu/drm/nouveau/nv50_dac.c
-index 1fd9537..1bc0859 100644
+index 1fd9537..875414b 100644
 --- a/drivers/gpu/drm/nouveau/nv50_dac.c
 +++ b/drivers/gpu/drm/nouveau/nv50_dac.c
 @@ -37,22 +37,31 @@
@@ -10608,6 +13045,24 @@ index 1fd9537..1bc0859 100644
  }
  
  static enum drm_connector_status
+@@ -70,7 +79,7 @@ nv50_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector)
+ 
+ 	nv_wr32(dev, NV50_PDISPLAY_DAC_DPMS_CTRL(or),
+ 		0x00150000 | NV50_PDISPLAY_DAC_DPMS_CTRL_PENDING);
+-	if (!nv_wait(NV50_PDISPLAY_DAC_DPMS_CTRL(or),
++	if (!nv_wait(dev, NV50_PDISPLAY_DAC_DPMS_CTRL(or),
+ 		     NV50_PDISPLAY_DAC_DPMS_CTRL_PENDING, 0)) {
+ 		NV_ERROR(dev, "timeout: DAC_DPMS_CTRL_PENDING(%d) == 0\n", or);
+ 		NV_ERROR(dev, "DAC_DPMS_CTRL(%d) = 0x%08x\n", or,
+@@ -121,7 +130,7 @@ nv50_dac_dpms(struct drm_encoder *encoder, int mode)
+ 	NV_DEBUG_KMS(dev, "or %d mode %d\n", or, mode);
+ 
+ 	/* wait for it to be done */
+-	if (!nv_wait(NV50_PDISPLAY_DAC_DPMS_CTRL(or),
++	if (!nv_wait(dev, NV50_PDISPLAY_DAC_DPMS_CTRL(or),
+ 		     NV50_PDISPLAY_DAC_DPMS_CTRL_PENDING, 0)) {
+ 		NV_ERROR(dev, "timeout: DAC_DPMS_CTRL_PENDING(%d) == 0\n", or);
+ 		NV_ERROR(dev, "DAC_DPMS_CTRL(%d) = 0x%08x\n", or,
 @@ -213,7 +222,8 @@ nv50_dac_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
  	uint32_t mode_ctl = 0, mode_ctl2 = 0;
  	int ret;
@@ -10679,10 +13134,33 @@ index 1fd9537..1bc0859 100644
  }
  
 diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
-index 580a5d1..435d2b7 100644
+index 580a5d1..11d366a 100644
 --- a/drivers/gpu/drm/nouveau/nv50_display.c
 +++ b/drivers/gpu/drm/nouveau/nv50_display.c
-@@ -42,6 +42,7 @@ nv50_evo_channel_del(struct nouveau_channel **pchan)
+@@ -30,8 +30,22 @@
+ #include "nouveau_connector.h"
+ #include "nouveau_fb.h"
+ #include "nouveau_fbcon.h"
++#include "nouveau_ramht.h"
+ #include "drm_crtc_helper.h"
+ 
++static inline int
++nv50_sor_nr(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++
++	if (dev_priv->chipset  < 0x90 ||
++	    dev_priv->chipset == 0x92 ||
++	    dev_priv->chipset == 0xa0)
++		return 2;
++
++	return 4;
++}
++
+ static void
+ nv50_evo_channel_del(struct nouveau_channel **pchan)
+ {
+@@ -42,6 +56,7 @@ nv50_evo_channel_del(struct nouveau_channel **pchan)
  	*pchan = NULL;
  
  	nouveau_gpuobj_channel_takedown(chan);
@@ -10690,18 +13168,11 @@ index 580a5d1..435d2b7 100644
  	nouveau_bo_ref(NULL, &chan->pushbuf_bo);
  
  	if (chan->user)
-@@ -71,14 +72,16 @@ nv50_evo_dmaobj_new(struct nouveau_channel *evo, uint32_t class, uint32_t name,
+@@ -65,21 +80,23 @@ nv50_evo_dmaobj_new(struct nouveau_channel *evo, uint32_t class, uint32_t name,
  		return ret;
- 	}
+ 	obj->engine = NVOBJ_ENGINE_DISPLAY;
  
--	dev_priv->engine.instmem.prepare_access(dev, true);
--	nv_wo32(dev, obj, 0, (tile_flags << 22) | (magic_flags << 16) | class);
--	nv_wo32(dev, obj, 1, limit);
--	nv_wo32(dev, obj, 2, offset);
--	nv_wo32(dev, obj, 3, 0x00000000);
--	nv_wo32(dev, obj, 4, 0x00000000);
--	nv_wo32(dev, obj, 5, 0x00010000);
--	dev_priv->engine.instmem.finish_access(dev);
+-	ret = nouveau_gpuobj_ref_add(dev, evo, name, obj, NULL);
 +	nv_wo32(obj,  0, (tile_flags << 22) | (magic_flags << 16) | class);
 +	nv_wo32(obj,  4, limit);
 +	nv_wo32(obj,  8, offset);
@@ -10712,21 +13183,79 @@ index 580a5d1..435d2b7 100644
 +	else
 +		nv_wo32(obj, 20, 0x00020000);
 +	dev_priv->engine.instmem.flush(dev);
++
++	ret = nouveau_ramht_insert(evo, name, obj);
++	nouveau_gpuobj_ref(NULL, &obj);
+ 	if (ret) {
+-		nouveau_gpuobj_del(dev, &obj);
+ 		return ret;
+ 	}
  
+-	dev_priv->engine.instmem.prepare_access(dev, true);
+-	nv_wo32(dev, obj, 0, (tile_flags << 22) | (magic_flags << 16) | class);
+-	nv_wo32(dev, obj, 1, limit);
+-	nv_wo32(dev, obj, 2, offset);
+-	nv_wo32(dev, obj, 3, 0x00000000);
+-	nv_wo32(dev, obj, 4, 0x00000000);
+-	nv_wo32(dev, obj, 5, 0x00010000);
+-	dev_priv->engine.instmem.finish_access(dev);
+-
  	return 0;
  }
-@@ -110,8 +113,8 @@ nv50_evo_channel_new(struct drm_device *dev, struct nouveau_channel **pchan)
+ 
+@@ -87,6 +104,7 @@ static int
+ nv50_evo_channel_new(struct drm_device *dev, struct nouveau_channel **pchan)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_gpuobj *ramht = NULL;
+ 	struct nouveau_channel *chan;
+ 	int ret;
+ 
+@@ -100,32 +118,35 @@ nv50_evo_channel_new(struct drm_device *dev, struct nouveau_channel **pchan)
+ 	chan->user_get = 4;
+ 	chan->user_put = 0;
+ 
+-	INIT_LIST_HEAD(&chan->ramht_refs);
+-
+-	ret = nouveau_gpuobj_new_ref(dev, NULL, NULL, 0, 32768, 0x1000,
+-				     NVOBJ_FLAG_ZERO_ALLOC, &chan->ramin);
++	ret = nouveau_gpuobj_new(dev, NULL, 32768, 0x1000,
++				 NVOBJ_FLAG_ZERO_ALLOC, &chan->ramin);
+ 	if (ret) {
+ 		NV_ERROR(dev, "Error allocating EVO channel memory: %d\n", ret);
+ 		nv50_evo_channel_del(pchan);
  		return ret;
  	}
  
 -	ret = nouveau_mem_init_heap(&chan->ramin_heap, chan->ramin->gpuobj->
 -				    im_pramin->start, 32768);
-+	ret = drm_mm_init(&chan->ramin_heap,
-+			  chan->ramin->gpuobj->im_pramin->start, 32768);
++	ret = drm_mm_init(&chan->ramin_heap, 0, 32768);
  	if (ret) {
  		NV_ERROR(dev, "Error initialising EVO PRAMIN heap: %d\n", ret);
  		nv50_evo_channel_del(pchan);
-@@ -179,13 +182,25 @@ nv50_evo_channel_new(struct drm_device *dev, struct nouveau_channel **pchan)
+ 		return ret;
+ 	}
+ 
+-	ret = nouveau_gpuobj_new_ref(dev, chan, chan, 0, 4096, 16,
+-				     0, &chan->ramht);
++	ret = nouveau_gpuobj_new(dev, chan, 4096, 16, 0, &ramht);
+ 	if (ret) {
+ 		NV_ERROR(dev, "Unable to allocate EVO RAMHT: %d\n", ret);
+ 		nv50_evo_channel_del(pchan);
+ 		return ret;
+ 	}
+ 
++	ret = nouveau_ramht_new(dev, ramht, &chan->ramht);
++	nouveau_gpuobj_ref(NULL, &ramht);
++	if (ret) {
++		nv50_evo_channel_del(pchan);
++		return ret;
++	}
++
+ 	if (dev_priv->chipset != 0x50) {
+ 		ret = nv50_evo_dmaobj_new(chan, 0x3d, NvEvoFB16, 0x70, 0x19,
+ 					  0, 0xffffffff);
+@@ -179,13 +200,25 @@ nv50_evo_channel_new(struct drm_device *dev, struct nouveau_channel **pchan)
  }
  
  int
@@ -10753,7 +13282,85 @@ index 580a5d1..435d2b7 100644
  	uint64_t start;
  	int ret, i;
  
-@@ -366,26 +381,13 @@ nv50_display_init(struct drm_device *dev)
+@@ -213,11 +246,11 @@ nv50_display_init(struct drm_device *dev)
+ 		nv_wr32(dev, 0x006101d0 + (i * 0x04), val);
+ 	}
+ 	/* SOR */
+-	for (i = 0; i < 4; i++) {
++	for (i = 0; i < nv50_sor_nr(dev); i++) {
+ 		val = nv_rd32(dev, 0x0061c000 + (i * 0x800));
+ 		nv_wr32(dev, 0x006101e0 + (i * 0x04), val);
+ 	}
+-	/* Something not yet in use, tv-out maybe. */
++	/* EXT */
+ 	for (i = 0; i < 3; i++) {
+ 		val = nv_rd32(dev, 0x0061e000 + (i * 0x800));
+ 		nv_wr32(dev, 0x006101f0 + (i * 0x04), val);
+@@ -246,7 +279,7 @@ nv50_display_init(struct drm_device *dev)
+ 	if (nv_rd32(dev, NV50_PDISPLAY_INTR_1) & 0x100) {
+ 		nv_wr32(dev, NV50_PDISPLAY_INTR_1, 0x100);
+ 		nv_wr32(dev, 0x006194e8, nv_rd32(dev, 0x006194e8) & ~1);
+-		if (!nv_wait(0x006194e8, 2, 0)) {
++		if (!nv_wait(dev, 0x006194e8, 2, 0)) {
+ 			NV_ERROR(dev, "timeout: (0x6194e8 & 2) != 0\n");
+ 			NV_ERROR(dev, "0x6194e8 = 0x%08x\n",
+ 						nv_rd32(dev, 0x6194e8));
+@@ -277,7 +310,8 @@ nv50_display_init(struct drm_device *dev)
+ 
+ 	nv_wr32(dev, NV50_PDISPLAY_CTRL_STATE, NV50_PDISPLAY_CTRL_STATE_ENABLE);
+ 	nv_wr32(dev, NV50_PDISPLAY_CHANNEL_STAT(0), 0x1000b03);
+-	if (!nv_wait(NV50_PDISPLAY_CHANNEL_STAT(0), 0x40000000, 0x40000000)) {
++	if (!nv_wait(dev, NV50_PDISPLAY_CHANNEL_STAT(0),
++		     0x40000000, 0x40000000)) {
+ 		NV_ERROR(dev, "timeout: (0x610200 & 0x40000000) == 0x40000000\n");
+ 		NV_ERROR(dev, "0x610200 = 0x%08x\n",
+ 			  nv_rd32(dev, NV50_PDISPLAY_CHANNEL_STAT(0)));
+@@ -286,7 +320,7 @@ nv50_display_init(struct drm_device *dev)
+ 
+ 	for (i = 0; i < 2; i++) {
+ 		nv_wr32(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i), 0x2000);
+-		if (!nv_wait(NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i),
++		if (!nv_wait(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i),
+ 			     NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_STATUS, 0)) {
+ 			NV_ERROR(dev, "timeout: CURSOR_CTRL2_STATUS == 0\n");
+ 			NV_ERROR(dev, "CURSOR_CTRL2 = 0x%08x\n",
+@@ -296,7 +330,7 @@ nv50_display_init(struct drm_device *dev)
+ 
+ 		nv_wr32(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i),
+ 			NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_ON);
+-		if (!nv_wait(NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i),
++		if (!nv_wait(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i),
+ 			     NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_STATUS,
+ 			     NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_STATUS_ACTIVE)) {
+ 			NV_ERROR(dev, "timeout: "
+@@ -307,7 +341,7 @@ nv50_display_init(struct drm_device *dev)
+ 		}
+ 	}
+ 
+-	nv_wr32(dev, NV50_PDISPLAY_OBJECTS, (evo->ramin->instance >> 8) | 9);
++	nv_wr32(dev, NV50_PDISPLAY_OBJECTS, (evo->ramin->vinst >> 8) | 9);
+ 
+ 	/* initialise fifo */
+ 	nv_wr32(dev, NV50_PDISPLAY_CHANNEL_DMA_CB(0),
+@@ -316,7 +350,7 @@ nv50_display_init(struct drm_device *dev)
+ 		NV50_PDISPLAY_CHANNEL_DMA_CB_VALID);
+ 	nv_wr32(dev, NV50_PDISPLAY_CHANNEL_UNK2(0), 0x00010000);
+ 	nv_wr32(dev, NV50_PDISPLAY_CHANNEL_UNK3(0), 0x00000002);
+-	if (!nv_wait(0x610200, 0x80000000, 0x00000000)) {
++	if (!nv_wait(dev, 0x610200, 0x80000000, 0x00000000)) {
+ 		NV_ERROR(dev, "timeout: (0x610200 & 0x80000000) == 0\n");
+ 		NV_ERROR(dev, "0x610200 = 0x%08x\n", nv_rd32(dev, 0x610200));
+ 		return -EBUSY;
+@@ -356,7 +390,7 @@ nv50_display_init(struct drm_device *dev)
+ 	BEGIN_RING(evo, 0, NV50_EVO_CRTC(0, UNK082C), 1);
+ 	OUT_RING(evo, 0);
+ 	FIRE_RING(evo);
+-	if (!nv_wait(0x640004, 0xffffffff, evo->dma.put << 2))
++	if (!nv_wait(dev, 0x640004, 0xffffffff, evo->dma.put << 2))
+ 		NV_ERROR(dev, "evo pushbuf stalled\n");
+ 
+ 	/* enable clock change interrupts. */
+@@ -366,26 +400,13 @@ nv50_display_init(struct drm_device *dev)
  					     NV50_PDISPLAY_INTR_EN_CLK_UNK40));
  
  	/* enable hotplug interrupts */
@@ -10781,7 +13388,33 @@ index 580a5d1..435d2b7 100644
  	}
  
  	return 0;
-@@ -465,6 +467,7 @@ int nv50_display_create(struct drm_device *dev)
+@@ -423,7 +444,7 @@ static int nv50_display_disable(struct drm_device *dev)
+ 			continue;
+ 
+ 		nv_wr32(dev, NV50_PDISPLAY_INTR_1, mask);
+-		if (!nv_wait(NV50_PDISPLAY_INTR_1, mask, mask)) {
++		if (!nv_wait(dev, NV50_PDISPLAY_INTR_1, mask, mask)) {
+ 			NV_ERROR(dev, "timeout: (0x610024 & 0x%08x) == "
+ 				      "0x%08x\n", mask, mask);
+ 			NV_ERROR(dev, "0x610024 = 0x%08x\n",
+@@ -433,14 +454,14 @@ static int nv50_display_disable(struct drm_device *dev)
+ 
+ 	nv_wr32(dev, NV50_PDISPLAY_CHANNEL_STAT(0), 0);
+ 	nv_wr32(dev, NV50_PDISPLAY_CTRL_STATE, 0);
+-	if (!nv_wait(NV50_PDISPLAY_CHANNEL_STAT(0), 0x1e0000, 0)) {
++	if (!nv_wait(dev, NV50_PDISPLAY_CHANNEL_STAT(0), 0x1e0000, 0)) {
+ 		NV_ERROR(dev, "timeout: (0x610200 & 0x1e0000) == 0\n");
+ 		NV_ERROR(dev, "0x610200 = 0x%08x\n",
+ 			  nv_rd32(dev, NV50_PDISPLAY_CHANNEL_STAT(0)));
+ 	}
+ 
+ 	for (i = 0; i < 3; i++) {
+-		if (!nv_wait(NV50_PDISPLAY_SOR_DPMS_STATE(i),
++		if (!nv_wait(dev, NV50_PDISPLAY_SOR_DPMS_STATE(i),
+ 			     NV50_PDISPLAY_SOR_DPMS_STATE_WAIT, 0)) {
+ 			NV_ERROR(dev, "timeout: SOR_DPMS_STATE_WAIT(%d) == 0\n", i);
+ 			NV_ERROR(dev, "SOR_DPMS_STATE(%d) = 0x%08x\n", i,
+@@ -465,6 +486,7 @@ int nv50_display_create(struct drm_device *dev)
  {
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  	struct dcb_table *dcb = &dev_priv->vbios.dcb;
@@ -10789,7 +13422,7 @@ index 580a5d1..435d2b7 100644
  	int ret, i;
  
  	NV_DEBUG_KMS(dev, "\n");
-@@ -507,14 +510,18 @@ int nv50_display_create(struct drm_device *dev)
+@@ -507,14 +529,18 @@ int nv50_display_create(struct drm_device *dev)
  			continue;
  		}
  
@@ -10810,7 +13443,7 @@ index 580a5d1..435d2b7 100644
  			break;
  		default:
  			NV_WARN(dev, "DCB encoder %d unknown\n", entry->type);
-@@ -522,11 +529,13 @@ int nv50_display_create(struct drm_device *dev)
+@@ -522,11 +548,13 @@ int nv50_display_create(struct drm_device *dev)
  		}
  	}
  
@@ -10829,7 +13462,7 @@ index 580a5d1..435d2b7 100644
  	}
  
  	ret = nv50_display_init(dev);
-@@ -538,7 +547,8 @@ int nv50_display_create(struct drm_device *dev)
+@@ -538,7 +566,8 @@ int nv50_display_create(struct drm_device *dev)
  	return 0;
  }
  
@@ -10839,7 +13472,7 @@ index 580a5d1..435d2b7 100644
  {
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  
-@@ -548,135 +558,30 @@ int nv50_display_destroy(struct drm_device *dev)
+@@ -548,135 +577,30 @@ int nv50_display_destroy(struct drm_device *dev)
  
  	nv50_display_disable(dev);
  	nv50_evo_channel_del(&dev_priv->evo);
@@ -10982,7 +13615,7 @@ index 580a5d1..435d2b7 100644
  	case OUTPUT_LVDS:
  		script = (mc >> 8) & 0xf;
  		if (bios->fp_no_ddc) {
-@@ -767,17 +672,88 @@ nv50_display_vblank_handler(struct drm_device *dev, uint32_t intr)
+@@ -767,17 +691,88 @@ nv50_display_vblank_handler(struct drm_device *dev, uint32_t intr)
  static void
  nv50_display_unk10_handler(struct drm_device *dev)
  {
@@ -11012,7 +13645,7 @@ index 580a5d1..435d2b7 100644
 +	crtc = ffs((unk30 & 0x00000180) >> 7) - 1;
 +	if (crtc < 0)
 +		goto ack;
- 
++
 +	/* Find which encoder was connected to the CRTC */
 +	for (i = 0; type == OUTPUT_ANY && i < 3; i++) {
 +		mc = nv_rd32(dev, NV50_PDISPLAY_DAC_MODE_CTRL_C(i));
@@ -11031,7 +13664,7 @@ index 580a5d1..435d2b7 100644
 +		or = i;
 +	}
 +
-+	for (i = 0; type == OUTPUT_ANY && i < 4; i++) {
++	for (i = 0; type == OUTPUT_ANY && i < nv50_sor_nr(dev); i++) {
 +		if (dev_priv->chipset  < 0x90 ||
 +		    dev_priv->chipset == 0x92 ||
 +		    dev_priv->chipset == 0xa0)
@@ -11072,12 +13705,12 @@ index 580a5d1..435d2b7 100644
 +			goto ack;
 +		}
 +	}
-+
+ 
 +	NV_ERROR(dev, "no dcb for %d %d 0x%08x\n", or, type, mc);
  ack:
  	nv_wr32(dev, NV50_PDISPLAY_INTR_1, NV50_PDISPLAY_INTR_1_CLK_UNK10);
  	nv_wr32(dev, 0x610030, 0x80000000);
-@@ -817,33 +793,103 @@ nv50_display_unk20_dp_hack(struct drm_device *dev, struct dcb_entry *dcb)
+@@ -817,33 +812,103 @@ nv50_display_unk20_dp_hack(struct drm_device *dev, struct dcb_entry *dcb)
  static void
  nv50_display_unk20_handler(struct drm_device *dev)
  {
@@ -11119,17 +13752,14 @@ index 580a5d1..435d2b7 100644
 -	pclk = nv_rd32(dev, NV50_PDISPLAY_CRTC_P(head, CLOCK)) & 0x3fffff;
 -	script = nv50_display_script_select(dev, dcbent, pclk);
 +	pclk  = nv_rd32(dev, NV50_PDISPLAY_CRTC_P(crtc, CLOCK)) & 0x003fffff;
- 
--	NV_DEBUG_KMS(dev, "head %d pxclk: %dKHz\n", head, pclk);
++
 +	/* Find which encoder is connected to the CRTC */
 +	for (i = 0; type == OUTPUT_ANY && i < 3; i++) {
 +		mc = nv_rd32(dev, NV50_PDISPLAY_DAC_MODE_CTRL_P(i));
 +		NV_DEBUG_KMS(dev, "DAC-%d mc: 0x%08x\n", i, mc);
 +		if (!(mc & (1 << crtc)))
 +			continue;
- 
--	if (dcbent->type != OUTPUT_DP)
--		nouveau_bios_run_display_table(dev, dcbent, 0, -2);
++
 +		switch ((mc & 0x00000f00) >> 8) {
 +		case 0: type = OUTPUT_ANALOG; break;
 +		case 1: type = OUTPUT_TV; break;
@@ -11137,21 +13767,18 @@ index 580a5d1..435d2b7 100644
 +			NV_ERROR(dev, "invalid mc, DAC-%d: 0x%08x\n", i, mc);
 +			goto ack;
 +		}
- 
--	nv50_crtc_set_clock(dev, head, pclk);
++
 +		or = i;
 +	}
- 
--	nouveau_bios_run_display_table(dev, dcbent, script, pclk);
-+	for (i = 0; type == OUTPUT_ANY && i < 4; i++) {
++
++	for (i = 0; type == OUTPUT_ANY && i < nv50_sor_nr(dev); i++) {
 +		if (dev_priv->chipset  < 0x90 ||
 +		    dev_priv->chipset == 0x92 ||
 +		    dev_priv->chipset == 0xa0)
 +			mc = nv_rd32(dev, NV50_PDISPLAY_SOR_MODE_CTRL_P(i));
 +		else
-+			mc = nv_rd32(dev, NV90_PDISPLAY_SOR_MODE_CTRL_P(i));
- 
--	nv50_display_unk20_dp_hack(dev, dcbent);
++			mc = nv_rd32(dev, NV90_PDISPLAY_SOR_MODE_CTRL_P(i));
++
 +		NV_DEBUG_KMS(dev, "SOR-%d mc: 0x%08x\n", i, mc);
 +		if (!(mc & (1 << crtc)))
 +			continue;
@@ -11167,39 +13794,45 @@ index 580a5d1..435d2b7 100644
 +			NV_ERROR(dev, "invalid mc, SOR-%d: 0x%08x\n", i, mc);
 +			goto ack;
 +		}
-+
+ 
+-	NV_DEBUG_KMS(dev, "head %d pxclk: %dKHz\n", head, pclk);
 +		or = i;
 +	}
-+
+ 
+-	if (dcbent->type != OUTPUT_DP)
+-		nouveau_bios_run_display_table(dev, dcbent, 0, -2);
 +	if (type == OUTPUT_ANY)
 +		goto ack;
-+
+ 
+-	nv50_crtc_set_clock(dev, head, pclk);
 +	/* Enable the encoder */
 +	for (i = 0; i < dev_priv->vbios.dcb.entries; i++) {
 +		dcb = &dev_priv->vbios.dcb.entry[i];
 +		if (dcb->type == type && (dcb->or & (1 << or)))
 +			break;
 +	}
-+
+ 
+-	nouveau_bios_run_display_table(dev, dcbent, script, pclk);
 +	if (i == dev_priv->vbios.dcb.entries) {
 +		NV_ERROR(dev, "no dcb for %d %d 0x%08x\n", or, type, mc);
 +		goto ack;
 +	}
  
+-	nv50_display_unk20_dp_hack(dev, dcbent);
++	script = nv50_display_script_select(dev, dcb, mc, pclk);
++	nouveau_bios_run_display_table(dev, dcb, script, pclk);
+ 
 -	tmp = nv_rd32(dev, NV50_PDISPLAY_CRTC_CLK_CTRL2(head));
 -	tmp &= ~0x000000f;
 -	nv_wr32(dev, NV50_PDISPLAY_CRTC_CLK_CTRL2(head), tmp);
-+	script = nv50_display_script_select(dev, dcb, mc, pclk);
-+	nouveau_bios_run_display_table(dev, dcb, script, pclk);
++	nv50_display_unk20_dp_hack(dev, dcb);
  
 -	if (dcbent->type != OUTPUT_ANALOG) {
-+	nv50_display_unk20_dp_hack(dev, dcb);
-+
 +	if (dcb->type != OUTPUT_ANALOG) {
  		tmp = nv_rd32(dev, NV50_PDISPLAY_SOR_CLK_CTRL2(or));
  		tmp &= ~0x00000f0f;
  		if (script & 0x0100)
-@@ -853,24 +899,61 @@ nv50_display_unk20_handler(struct drm_device *dev)
+@@ -853,24 +918,61 @@ nv50_display_unk20_handler(struct drm_device *dev)
  		nv_wr32(dev, NV50_PDISPLAY_DAC_CLK_CTRL2(or), 0);
  	}
  
@@ -11287,7 +13920,7 @@ index 581d405..c551f0b 100644
  int nv50_crtc_set_clock(struct drm_device *, int head, int pclk);
  
 diff --git a/drivers/gpu/drm/nouveau/nv50_fb.c b/drivers/gpu/drm/nouveau/nv50_fb.c
-index 32611bd..ad267c5 100644
+index 32611bd..594720b 100644
 --- a/drivers/gpu/drm/nouveau/nv50_fb.c
 +++ b/drivers/gpu/drm/nouveau/nv50_fb.c
 @@ -36,3 +36,42 @@ void
@@ -11323,7 +13956,7 @@ index 32611bd..ad267c5 100644
 +		if (!chan || !chan->ramin)
 +			continue;
 +
-+		if (chinst == chan->ramin->instance >> 12)
++		if (chinst == chan->ramin->vinst >> 12)
 +			break;
 +	}
 +
@@ -11333,29 +13966,54 @@ index 32611bd..ad267c5 100644
 +		trap[5] & 0xff, trap[4] & 0xffff, trap[3] & 0xffff,
 +		trap[0], ch, chinst);
 +}
+diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c
+index 6bf025c..6dcf048 100644
+--- a/drivers/gpu/drm/nouveau/nv50_fbcon.c
++++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c
+@@ -1,6 +1,7 @@
+ #include "drmP.h"
+ #include "nouveau_drv.h"
+ #include "nouveau_dma.h"
++#include "nouveau_ramht.h"
+ #include "nouveau_fbcon.h"
+ 
+ void
+@@ -193,7 +194,8 @@ nv50_fbcon_accel_init(struct fb_info *info)
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, dev_priv->channel, Nv2D, eng2d, NULL);
++	ret = nouveau_ramht_insert(dev_priv->channel, Nv2D, eng2d);
++	nouveau_gpuobj_ref(NULL, &eng2d);
+ 	if (ret)
+ 		return ret;
+ 
 diff --git a/drivers/gpu/drm/nouveau/nv50_fifo.c b/drivers/gpu/drm/nouveau/nv50_fifo.c
-index e20c0e2..38dbcda 100644
+index e20c0e2..a46a961 100644
 --- a/drivers/gpu/drm/nouveau/nv50_fifo.c
 +++ b/drivers/gpu/drm/nouveau/nv50_fifo.c
-@@ -28,41 +28,35 @@
+@@ -27,42 +27,37 @@
+ #include "drmP.h"
  #include "drm.h"
  #include "nouveau_drv.h"
- 
+-
 -struct nv50_fifo_priv {
 -	struct nouveau_gpuobj_ref *thingo[2];
 -	int cur_thingo;
 -};
 -
 -#define IS_G80 ((dev_priv->chipset & 0xf0) == 0x50)
--
++#include "nouveau_ramht.h"
+ 
  static void
 -nv50_fifo_init_thingo(struct drm_device *dev)
 +nv50_fifo_playlist_update(struct drm_device *dev)
  {
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
 -	struct nv50_fifo_priv *priv = dev_priv->engine.fifo.priv;
+-	struct nouveau_gpuobj_ref *cur;
 +	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
- 	struct nouveau_gpuobj_ref *cur;
++	struct nouveau_gpuobj *cur;
  	int i, nr;
  
  	NV_DEBUG(dev, "\n");
@@ -11371,14 +14029,15 @@ index e20c0e2..38dbcda 100644
 -		if (dev_priv->fifos[i] && dev_priv->fifos[i]->ramfc)
 -			nv_wo32(dev, cur->gpuobj, nr++, i);
 +		if (dev_priv->fifos[i] && dev_priv->fifos[i]->ramfc) {
-+			nv_wo32(cur->gpuobj, (nr * 4), i);
++			nv_wo32(cur, (nr * 4), i);
 +			nr++;
 +		}
  	}
 -	dev_priv->engine.instmem.finish_access(dev);
 +	dev_priv->engine.instmem.flush(dev);
  
- 	nv_wr32(dev, 0x32f4, cur->instance >> 12);
+-	nv_wr32(dev, 0x32f4, cur->instance >> 12);
++	nv_wr32(dev, 0x32f4, cur->vinst >> 12);
  	nv_wr32(dev, 0x32ec, nr);
  	nv_wr32(dev, 0x2500, 0x101);
  }
@@ -11390,7 +14049,7 @@ index e20c0e2..38dbcda 100644
  {
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
  	struct nouveau_channel *chan = dev_priv->fifos[channel];
-@@ -70,37 +64,28 @@ nv50_fifo_channel_enable(struct drm_device *dev, int channel, bool nt)
+@@ -70,37 +65,28 @@ nv50_fifo_channel_enable(struct drm_device *dev, int channel, bool nt)
  
  	NV_DEBUG(dev, "ch%d\n", channel);
  
@@ -11398,12 +14057,14 @@ index e20c0e2..38dbcda 100644
 -		return -EINVAL;
 -
 -	if (IS_G80)
+-		inst = chan->ramfc->instance >> 12;
 +	if (dev_priv->chipset == 0x50)
- 		inst = chan->ramfc->instance >> 12;
++		inst = chan->ramfc->vinst >> 12;
  	else
- 		inst = chan->ramfc->instance >> 8;
+-		inst = chan->ramfc->instance >> 8;
 -	nv_wr32(dev, NV50_PFIFO_CTX_TABLE(channel),
 -		 inst | NV50_PFIFO_CTX_TABLE_CHANNEL_ENABLED);
++		inst = chan->ramfc->vinst >> 8;
  
 -	if (!nt)
 -		nv50_fifo_init_thingo(dev);
@@ -11434,7 +14095,7 @@ index e20c0e2..38dbcda 100644
  }
  
  static void
-@@ -133,12 +118,12 @@ nv50_fifo_init_context_table(struct drm_device *dev)
+@@ -133,12 +119,12 @@ nv50_fifo_init_context_table(struct drm_device *dev)
  
  	for (i = 0; i < NV50_PFIFO_CTX_TABLE__SIZE; i++) {
  		if (dev_priv->fifos[i])
@@ -11450,7 +14111,7 @@ index e20c0e2..38dbcda 100644
  }
  
  static void
-@@ -162,41 +147,38 @@ nv50_fifo_init_regs(struct drm_device *dev)
+@@ -162,41 +148,38 @@ nv50_fifo_init_regs(struct drm_device *dev)
  	nv_wr32(dev, 0x3270, 0);
  
  	/* Enable dummy channels setup by nv50_instmem.c */
@@ -11483,28 +14144,30 @@ index e20c0e2..38dbcda 100644
 -		return -ENOMEM;
 -	dev_priv->engine.fifo.priv = priv;
 -
- 	ret = nouveau_gpuobj_new_ref(dev, NULL, NULL, 0, 128*4, 0x1000,
+-	ret = nouveau_gpuobj_new_ref(dev, NULL, NULL, 0, 128*4, 0x1000,
 -				     NVOBJ_FLAG_ZERO_ALLOC, &priv->thingo[0]);
-+				     NVOBJ_FLAG_ZERO_ALLOC,
-+				     &pfifo->playlist[0]);
++	ret = nouveau_gpuobj_new(dev, NULL, 128*4, 0x1000,
++				 NVOBJ_FLAG_ZERO_ALLOC,
++				 &pfifo->playlist[0]);
  	if (ret) {
 -		NV_ERROR(dev, "error creating thingo0: %d\n", ret);
 +		NV_ERROR(dev, "error creating playlist 0: %d\n", ret);
  		return ret;
  	}
  
- 	ret = nouveau_gpuobj_new_ref(dev, NULL, NULL, 0, 128*4, 0x1000,
+-	ret = nouveau_gpuobj_new_ref(dev, NULL, NULL, 0, 128*4, 0x1000,
 -				     NVOBJ_FLAG_ZERO_ALLOC, &priv->thingo[1]);
-+				     NVOBJ_FLAG_ZERO_ALLOC,
-+				     &pfifo->playlist[1]);
++	ret = nouveau_gpuobj_new(dev, NULL, 128*4, 0x1000,
++				 NVOBJ_FLAG_ZERO_ALLOC,
++				 &pfifo->playlist[1]);
  	if (ret) {
 -		NV_ERROR(dev, "error creating thingo1: %d\n", ret);
-+		nouveau_gpuobj_ref_del(dev, &pfifo->playlist[0]);
++		nouveau_gpuobj_ref(NULL, &pfifo->playlist[0]);
 +		NV_ERROR(dev, "error creating playlist 1: %d\n", ret);
  		return ret;
  	}
  
-@@ -216,18 +198,15 @@ void
+@@ -216,18 +199,15 @@ void
  nv50_fifo_takedown(struct drm_device *dev)
  {
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -11522,21 +14185,59 @@ index e20c0e2..38dbcda 100644
 -
 -	dev_priv->engine.fifo.priv = NULL;
 -	kfree(priv);
-+	nouveau_gpuobj_ref_del(dev, &pfifo->playlist[0]);
-+	nouveau_gpuobj_ref_del(dev, &pfifo->playlist[1]);
++	nouveau_gpuobj_ref(NULL, &pfifo->playlist[0]);
++	nouveau_gpuobj_ref(NULL, &pfifo->playlist[1]);
  }
  
  int
-@@ -248,7 +227,7 @@ nv50_fifo_create_context(struct nouveau_channel *chan)
+@@ -248,72 +228,61 @@ nv50_fifo_create_context(struct nouveau_channel *chan)
  
  	NV_DEBUG(dev, "ch%d\n", chan->id);
  
 -	if (IS_G80) {
+-		uint32_t ramin_poffset = chan->ramin->gpuobj->im_pramin->start;
+-		uint32_t ramin_voffset = chan->ramin->gpuobj->im_backing_start;
+-
+-		ret = nouveau_gpuobj_new_fake(dev, ramin_poffset, ramin_voffset,
+-					      0x100, NVOBJ_FLAG_ZERO_ALLOC |
+-					      NVOBJ_FLAG_ZERO_FREE, &ramfc,
 +	if (dev_priv->chipset == 0x50) {
- 		uint32_t ramin_poffset = chan->ramin->gpuobj->im_pramin->start;
- 		uint32_t ramin_voffset = chan->ramin->gpuobj->im_backing_start;
++		ret = nouveau_gpuobj_new_fake(dev, chan->ramin->pinst,
++					      chan->ramin->vinst, 0x100,
++					      NVOBJ_FLAG_ZERO_ALLOC |
++					      NVOBJ_FLAG_ZERO_FREE,
+ 					      &chan->ramfc);
+ 		if (ret)
+ 			return ret;
+ 
+-		ret = nouveau_gpuobj_new_fake(dev, ramin_poffset + 0x0400,
+-					      ramin_voffset + 0x0400, 4096,
+-					      0, NULL, &chan->cache);
++		ret = nouveau_gpuobj_new_fake(dev, chan->ramin->pinst + 0x0400,
++					      chan->ramin->vinst + 0x0400,
++					      4096, 0, &chan->cache);
+ 		if (ret)
+ 			return ret;
+ 	} else {
+-		ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, 0x100, 256,
+-					     NVOBJ_FLAG_ZERO_ALLOC |
+-					     NVOBJ_FLAG_ZERO_FREE,
+-					     &chan->ramfc);
++		ret = nouveau_gpuobj_new(dev, chan, 0x100, 256,
++					 NVOBJ_FLAG_ZERO_ALLOC |
++					 NVOBJ_FLAG_ZERO_FREE, &chan->ramfc);
+ 		if (ret)
+ 			return ret;
+-		ramfc = chan->ramfc->gpuobj;
  
-@@ -281,39 +260,31 @@ nv50_fifo_create_context(struct nouveau_channel *chan)
+-		ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, 4096, 1024,
+-					     0, &chan->cache);
++		ret = nouveau_gpuobj_new(dev, chan, 4096, 1024,
++					 0, &chan->cache);
+ 		if (ret)
+ 			return ret;
+ 	}
++	ramfc = chan->ramfc;
  
  	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
  
@@ -11561,10 +14262,10 @@ index e20c0e2..38dbcda 100644
 -
 -		nv_wo32(dev, ramfc, 0x88/4, chan->cache->instance >> 10);
 -		nv_wo32(dev, ramfc, 0x98/4, chan->ramin->instance >> 12);
-+	nv_wo32(ramfc, 0x48, chan->pushbuf->instance >> 4);
-+	nv_wo32(ramfc, 0x80, (0 << 27) /* 4KiB */ |
++	nv_wo32(ramfc, 0x48, chan->pushbuf->cinst >> 4);
++	nv_wo32(ramfc, 0x80, ((chan->ramht->bits - 9) << 27) |
 +			     (4 << 24) /* SEARCH_FULL */ |
-+			     (chan->ramht->instance >> 4));
++			     (chan->ramht->gpuobj->cinst >> 4));
 +	nv_wo32(ramfc, 0x44, 0x2101ffff);
 +	nv_wo32(ramfc, 0x60, 0x7fffffff);
 +	nv_wo32(ramfc, 0x40, 0x00000000);
@@ -11575,11 +14276,11 @@ index e20c0e2..38dbcda 100644
 +	nv_wo32(ramfc, 0x54, drm_order(chan->dma.ib_max + 1) << 16);
 +
 +	if (dev_priv->chipset != 0x50) {
-+		nv_wo32(chan->ramin->gpuobj, 0, chan->id);
-+		nv_wo32(chan->ramin->gpuobj, 4, chan->ramfc->instance >> 8);
++		nv_wo32(chan->ramin, 0, chan->id);
++		nv_wo32(chan->ramin, 4, chan->ramfc->vinst >> 8);
 +
-+		nv_wo32(ramfc, 0x88, chan->cache->instance >> 10);
-+		nv_wo32(ramfc, 0x98, chan->ramin->instance >> 12);
++		nv_wo32(ramfc, 0x88, chan->cache->vinst >> 10);
++		nv_wo32(ramfc, 0x98, chan->ramin->vinst >> 12);
  	}
  
 -	dev_priv->engine.instmem.finish_access(dev);
@@ -11598,11 +14299,20 @@ index e20c0e2..38dbcda 100644
  	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
  	return 0;
  }
-@@ -328,11 +299,12 @@ nv50_fifo_destroy_context(struct nouveau_channel *chan)
+@@ -322,20 +291,22 @@ void
+ nv50_fifo_destroy_context(struct nouveau_channel *chan)
+ {
+ 	struct drm_device *dev = chan->dev;
+-	struct nouveau_gpuobj_ref *ramfc = chan->ramfc;
++	struct nouveau_gpuobj *ramfc = NULL;
+ 
+ 	NV_DEBUG(dev, "ch%d\n", chan->id);
  
  	/* This will ensure the channel is seen as disabled. */
- 	chan->ramfc = NULL;
+-	chan->ramfc = NULL;
 -	nv50_fifo_channel_disable(dev, chan->id, false);
++	nouveau_gpuobj_ref(chan->ramfc, &ramfc);
++	nouveau_gpuobj_ref(NULL, &chan->ramfc);
 +	nv50_fifo_channel_disable(dev, chan->id);
  
  	/* Dummy channel, also used on ch 127 */
@@ -11611,9 +14321,22 @@ index e20c0e2..38dbcda 100644
 +		nv50_fifo_channel_disable(dev, 127);
 +	nv50_fifo_playlist_update(dev);
  
- 	nouveau_gpuobj_ref_del(dev, &ramfc);
- 	nouveau_gpuobj_ref_del(dev, &chan->cache);
-@@ -349,63 +321,59 @@ nv50_fifo_load_context(struct nouveau_channel *chan)
+-	nouveau_gpuobj_ref_del(dev, &ramfc);
+-	nouveau_gpuobj_ref_del(dev, &chan->cache);
++	nouveau_gpuobj_ref(NULL, &ramfc);
++	nouveau_gpuobj_ref(NULL, &chan->cache);
+ }
+ 
+ int
+@@ -343,69 +314,65 @@ nv50_fifo_load_context(struct nouveau_channel *chan)
+ {
+ 	struct drm_device *dev = chan->dev;
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_gpuobj *ramfc = chan->ramfc->gpuobj;
+-	struct nouveau_gpuobj *cache = chan->cache->gpuobj;
++	struct nouveau_gpuobj *ramfc = chan->ramfc;
++	struct nouveau_gpuobj *cache = chan->cache;
+ 	int ptr, cnt;
  
  	NV_DEBUG(dev, "ch%d\n", chan->id);
  
@@ -11720,10 +14443,13 @@ index e20c0e2..38dbcda 100644
  	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, chan->id | (1<<16));
  	return 0;
  }
-@@ -434,64 +402,63 @@ nv50_fifo_unload_context(struct drm_device *dev)
- 	ramfc = chan->ramfc->gpuobj;
- 	cache = chan->cache->gpuobj;
- 
+@@ -431,67 +398,66 @@ nv50_fifo_unload_context(struct drm_device *dev)
+ 		return -EINVAL;
+ 	}
+ 	NV_DEBUG(dev, "ch%d\n", chan->id);
+-	ramfc = chan->ramfc->gpuobj;
+-	cache = chan->cache->gpuobj;
+-
 -	dev_priv->engine.instmem.prepare_access(dev, true);
 -
 -	nv_wo32(dev, ramfc, 0x00/4, nv_rd32(dev, 0x3330));
@@ -11759,6 +14485,9 @@ index e20c0e2..38dbcda 100644
 -	nv_wo32(dev, ramfc, 0x78/4, nv_rd32(dev, 0x2088));
 -	nv_wo32(dev, ramfc, 0x7c/4, nv_rd32(dev, 0x2058));
 -	nv_wo32(dev, ramfc, 0x80/4, nv_rd32(dev, 0x2210));
++	ramfc = chan->ramfc;
++	cache = chan->cache;
++
 +	nv_wo32(ramfc, 0x00, nv_rd32(dev, 0x3330));
 +	nv_wo32(ramfc, 0x04, nv_rd32(dev, 0x3334));
 +	nv_wo32(ramfc, 0x08, nv_rd32(dev, 0x3240));
@@ -11875,11 +14604,15 @@ index bb47ad7..b2fab2b 100644
 +	return 0;
 +}
 diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c
-index b203d06..17a8d78 100644
+index b203d06..cbf5ae2 100644
 --- a/drivers/gpu/drm/nouveau/nv50_graph.c
 +++ b/drivers/gpu/drm/nouveau/nv50_graph.c
-@@ -30,8 +30,6 @@
- 
+@@ -27,11 +27,9 @@
+ #include "drmP.h"
+ #include "drm.h"
+ #include "nouveau_drv.h"
+-
++#include "nouveau_ramht.h"
  #include "nouveau_grctx.h"
  
 -#define IS_G80 ((dev_priv->chipset & 0xf0) == 0x50)
@@ -11955,19 +14688,43 @@ index b203d06..17a8d78 100644
  }
  
  void
-@@ -212,8 +205,9 @@ nv50_graph_create_context(struct nouveau_channel *chan)
+@@ -188,7 +181,7 @@ nv50_graph_channel(struct drm_device *dev)
+ 	/* Be sure we're not in the middle of a context switch or bad things
+ 	 * will happen, such as unloading the wrong pgraph context.
+ 	 */
+-	if (!nv_wait(0x400300, 0x00000001, 0x00000000))
++	if (!nv_wait(dev, 0x400300, 0x00000001, 0x00000000))
+ 		NV_ERROR(dev, "Ctxprog is still running\n");
+ 
+ 	inst = nv_rd32(dev, NV50_PGRAPH_CTXCTL_CUR);
+@@ -199,7 +192,7 @@ nv50_graph_channel(struct drm_device *dev)
+ 	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+ 		struct nouveau_channel *chan = dev_priv->fifos[i];
+ 
+-		if (chan && chan->ramin && chan->ramin->instance == inst)
++		if (chan && chan->ramin && chan->ramin->vinst == inst)
+ 			return chan;
+ 	}
+ 
+@@ -211,44 +204,36 @@ nv50_graph_create_context(struct nouveau_channel *chan)
+ {
  	struct drm_device *dev = chan->dev;
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
- 	struct nouveau_gpuobj *ramin = chan->ramin->gpuobj;
+-	struct nouveau_gpuobj *ramin = chan->ramin->gpuobj;
 -	struct nouveau_gpuobj *ctx;
-+	struct nouveau_gpuobj *obj;
++	struct nouveau_gpuobj *ramin = chan->ramin;
  	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
 +	struct nouveau_grctx ctx = {};
  	int hdr, ret;
  
  	NV_DEBUG(dev, "ch%d\n", chan->id);
-@@ -223,32 +217,25 @@ nv50_graph_create_context(struct nouveau_channel *chan)
- 				     NVOBJ_FLAG_ZERO_FREE, &chan->ramin_grctx);
+ 
+-	ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pgraph->grctx_size,
+-				     0x1000, NVOBJ_FLAG_ZERO_ALLOC |
+-				     NVOBJ_FLAG_ZERO_FREE, &chan->ramin_grctx);
++	ret = nouveau_gpuobj_new(dev, chan, pgraph->grctx_size, 0x1000,
++				 NVOBJ_FLAG_ZERO_ALLOC |
++				 NVOBJ_FLAG_ZERO_FREE, &chan->ramin_grctx);
  	if (ret)
  		return ret;
 -	ctx = chan->ramin_grctx->gpuobj;
@@ -11995,29 +14752,28 @@ index b203d06..17a8d78 100644
 -	}
 -	nv_wo32(dev, ctx, 0x00000/4, chan->ramin->instance >> 12);
 -	dev_priv->engine.instmem.finish_access(dev);
-+	obj = chan->ramin_grctx->gpuobj;
-+
+ 
 +	hdr = (dev_priv->chipset == 0x50) ? 0x200 : 0x20;
 +	nv_wo32(ramin, hdr + 0x00, 0x00190002);
-+	nv_wo32(ramin, hdr + 0x04, chan->ramin_grctx->instance +
++	nv_wo32(ramin, hdr + 0x04, chan->ramin_grctx->vinst +
 +				   pgraph->grctx_size - 1);
-+	nv_wo32(ramin, hdr + 0x08, chan->ramin_grctx->instance);
++	nv_wo32(ramin, hdr + 0x08, chan->ramin_grctx->vinst);
 +	nv_wo32(ramin, hdr + 0x0c, 0);
 +	nv_wo32(ramin, hdr + 0x10, 0);
 +	nv_wo32(ramin, hdr + 0x14, 0x00010000);
 +
 +	ctx.dev = chan->dev;
 +	ctx.mode = NOUVEAU_GRCTX_VALS;
-+	ctx.data = obj;
++	ctx.data = chan->ramin_grctx;
 +	nv50_grctx_init(&ctx);
 +
-+	nv_wo32(obj, 0x00000, chan->ramin->instance >> 12);
- 
++	nv_wo32(chan->ramin_grctx, 0x00000, chan->ramin->vinst >> 12);
++
 +	dev_priv->engine.instmem.flush(dev);
  	return 0;
  }
  
-@@ -257,17 +244,16 @@ nv50_graph_destroy_context(struct nouveau_channel *chan)
+@@ -257,19 +242,18 @@ nv50_graph_destroy_context(struct nouveau_channel *chan)
  {
  	struct drm_device *dev = chan->dev;
  	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -12026,17 +14782,51 @@ index b203d06..17a8d78 100644
  
  	NV_DEBUG(dev, "ch%d\n", chan->id);
  
- 	if (!chan->ramin || !chan->ramin->gpuobj)
+-	if (!chan->ramin || !chan->ramin->gpuobj)
++	if (!chan->ramin)
  		return;
  
 -	dev_priv->engine.instmem.prepare_access(dev, true);
  	for (i = hdr; i < hdr + 24; i += 4)
 -		nv_wo32(dev, chan->ramin->gpuobj, i/4, 0);
 -	dev_priv->engine.instmem.finish_access(dev);
-+		nv_wo32(chan->ramin->gpuobj, i, 0);
++		nv_wo32(chan->ramin, i, 0);
 +	dev_priv->engine.instmem.flush(dev);
  
- 	nouveau_gpuobj_ref_del(dev, &chan->ramin_grctx);
+-	nouveau_gpuobj_ref_del(dev, &chan->ramin_grctx);
++	nouveau_gpuobj_ref(NULL, &chan->ramin_grctx);
+ }
+ 
+ static int
+@@ -296,7 +280,7 @@ nv50_graph_do_load_context(struct drm_device *dev, uint32_t inst)
+ int
+ nv50_graph_load_context(struct nouveau_channel *chan)
+ {
+-	uint32_t inst = chan->ramin->instance >> 12;
++	uint32_t inst = chan->ramin->vinst >> 12;
+ 
+ 	NV_DEBUG(chan->dev, "ch%d\n", chan->id);
+ 	return nv50_graph_do_load_context(chan->dev, inst);
+@@ -341,15 +325,16 @@ static int
+ nv50_graph_nvsw_dma_vblsem(struct nouveau_channel *chan, int grclass,
+ 			   int mthd, uint32_t data)
+ {
+-	struct nouveau_gpuobj_ref *ref = NULL;
++	struct nouveau_gpuobj *gpuobj;
+ 
+-	if (nouveau_gpuobj_ref_find(chan, data, &ref))
++	gpuobj = nouveau_ramht_find(chan, data);
++	if (!gpuobj)
+ 		return -ENOENT;
+ 
+-	if (nouveau_notifier_offset(ref->gpuobj, NULL))
++	if (nouveau_notifier_offset(gpuobj, NULL))
+ 		return -EINVAL;
+ 
+-	chan->nvsw.vblsem = ref->gpuobj;
++	chan->nvsw.vblsem = gpuobj;
+ 	chan->nvsw.vblsem_offset = ~0;
+ 	return 0;
  }
 diff --git a/drivers/gpu/drm/nouveau/nv50_grctx.c b/drivers/gpu/drm/nouveau/nv50_grctx.c
 index 42a8fb2..336aab2 100644
@@ -15837,78 +18627,531 @@ index 42a8fb2..336aab2 100644
  			size = (ctx->ctxvals_pos-offset)/8;
  	}
 diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c
-index 5f21df3..092057b 100644
+index 5f21df3..c0eef78 100644
 --- a/drivers/gpu/drm/nouveau/nv50_instmem.c
 +++ b/drivers/gpu/drm/nouveau/nv50_instmem.c
-@@ -35,8 +35,6 @@ struct nv50_instmem_priv {
- 	struct nouveau_gpuobj_ref *pramin_pt;
- 	struct nouveau_gpuobj_ref *pramin_bar;
- 	struct nouveau_gpuobj_ref *fb_bar;
+@@ -32,41 +32,87 @@
+ struct nv50_instmem_priv {
+ 	uint32_t save1700[5]; /* 0x1700->0x1710 */
+ 
+-	struct nouveau_gpuobj_ref *pramin_pt;
+-	struct nouveau_gpuobj_ref *pramin_bar;
+-	struct nouveau_gpuobj_ref *fb_bar;
 -
 -	bool last_access_wr;
++	struct nouveau_gpuobj *pramin_pt;
++	struct nouveau_gpuobj *pramin_bar;
++	struct nouveau_gpuobj *fb_bar;
  };
  
- #define NV50_INSTMEM_PAGE_SHIFT 12
-@@ -141,13 +139,15 @@ nv50_instmem_init(struct drm_device *dev)
- 	chan->file_priv = (struct drm_file *)-2;
- 	dev_priv->fifos[0] = dev_priv->fifos[127] = chan;
+-#define NV50_INSTMEM_PAGE_SHIFT 12
+-#define NV50_INSTMEM_PAGE_SIZE  (1 << NV50_INSTMEM_PAGE_SHIFT)
+-#define NV50_INSTMEM_PT_SIZE(a)	(((a) >> 12) << 3)
++static void
++nv50_channel_del(struct nouveau_channel **pchan)
++{
++	struct nouveau_channel *chan;
  
-+	INIT_LIST_HEAD(&chan->ramht_refs);
+-/*NOTE: - Assumes 0x1700 already covers the correct MiB of PRAMIN
+- */
+-#define BAR0_WI32(g, o, v) do {                                   \
+-	uint32_t offset;                                          \
+-	if ((g)->im_backing) {                                    \
+-		offset = (g)->im_backing_start;                   \
+-	} else {                                                  \
+-		offset  = chan->ramin->gpuobj->im_backing_start;  \
+-		offset += (g)->im_pramin->start;                  \
+-	}                                                         \
+-	offset += (o);                                            \
+-	nv_wr32(dev, NV_RAMIN + (offset & 0xfffff), (v));              \
+-} while (0)
++	chan = *pchan;
++	*pchan = NULL;
++	if (!chan)
++		return;
++
++	nouveau_gpuobj_ref(NULL, &chan->ramfc);
++	nouveau_gpuobj_ref(NULL, &chan->vm_pd);
++	if (chan->ramin_heap.free_stack.next)
++		drm_mm_takedown(&chan->ramin_heap);
++	nouveau_gpuobj_ref(NULL, &chan->ramin);
++	kfree(chan);
++}
++
++static int
++nv50_channel_new(struct drm_device *dev, u32 size,
++		 struct nouveau_channel **pchan)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	u32 pgd = (dev_priv->chipset == 0x50) ? 0x1400 : 0x0200;
++	u32  fc = (dev_priv->chipset == 0x50) ? 0x0000 : 0x4200;
++	struct nouveau_channel *chan;
++	int ret;
++
++	chan = kzalloc(sizeof(*chan), GFP_KERNEL);
++	if (!chan)
++		return -ENOMEM;
++	chan->dev = dev;
++
++	ret = nouveau_gpuobj_new(dev, NULL, size, 0x1000, 0, &chan->ramin);
++	if (ret) {
++		nv50_channel_del(&chan);
++		return ret;
++	}
++
++	ret = drm_mm_init(&chan->ramin_heap, 0x6000, chan->ramin->size);
++	if (ret) {
++		nv50_channel_del(&chan);
++		return ret;
++	}
++
++	ret = nouveau_gpuobj_new_fake(dev, chan->ramin->pinst == ~0 ? ~0 :
++				      chan->ramin->pinst + pgd,
++				      chan->ramin->vinst + pgd,
++				      0x4000, NVOBJ_FLAG_ZERO_ALLOC,
++				      &chan->vm_pd);
++	if (ret) {
++		nv50_channel_del(&chan);
++		return ret;
++	}
++
++	ret = nouveau_gpuobj_new_fake(dev, chan->ramin->pinst == ~0 ? ~0 :
++				      chan->ramin->pinst + fc,
++				      chan->ramin->vinst + fc, 0x100,
++				      NVOBJ_FLAG_ZERO_ALLOC, &chan->ramfc);
++	if (ret) {
++		nv50_channel_del(&chan);
++		return ret;
++	}
 +
- 	/* Channel's PRAMIN object + heap */
- 	ret = nouveau_gpuobj_new_fake(dev, 0, c_offset, c_size, 0,
- 							NULL, &chan->ramin);
++	*pchan = chan;
++	return 0;
++}
+ 
+ int
+ nv50_instmem_init(struct drm_device *dev)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_channel *chan;
+-	uint32_t c_offset, c_size, c_ramfc, c_vmpd, c_base, pt_size;
+-	uint32_t save_nv001700;
+-	uint64_t v;
+ 	struct nv50_instmem_priv *priv;
++	struct nouveau_channel *chan;
+ 	int ret, i;
++	u32 tmp;
+ 
+ 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ 	if (!priv)
+@@ -77,215 +123,113 @@ nv50_instmem_init(struct drm_device *dev)
+ 	for (i = 0x1700; i <= 0x1710; i += 4)
+ 		priv->save1700[(i-0x1700)/4] = nv_rd32(dev, i);
+ 
+-	/* Reserve the last MiB of VRAM, we should probably try to avoid
+-	 * setting up the below tables over the top of the VBIOS image at
+-	 * some point.
+-	 */
+-	dev_priv->ramin_rsvd_vram = 1 << 20;
+-	c_offset = dev_priv->vram_size - dev_priv->ramin_rsvd_vram;
+-	c_size   = 128 << 10;
+-	c_vmpd   = ((dev_priv->chipset & 0xf0) == 0x50) ? 0x1400 : 0x200;
+-	c_ramfc  = ((dev_priv->chipset & 0xf0) == 0x50) ? 0x0 : 0x20;
+-	c_base   = c_vmpd + 0x4000;
+-	pt_size  = NV50_INSTMEM_PT_SIZE(dev_priv->ramin_size);
+-
+-	NV_DEBUG(dev, " Rsvd VRAM base: 0x%08x\n", c_offset);
+-	NV_DEBUG(dev, "    VBIOS image: 0x%08x\n",
+-				(nv_rd32(dev, 0x619f04) & ~0xff) << 8);
+-	NV_DEBUG(dev, "  Aperture size: %d MiB\n", dev_priv->ramin_size >> 20);
+-	NV_DEBUG(dev, "        PT size: %d KiB\n", pt_size >> 10);
+-
+-	/* Determine VM layout, we need to do this first to make sure
+-	 * we allocate enough memory for all the page tables.
+-	 */
+-	dev_priv->vm_gart_base = roundup(NV50_VM_BLOCK, NV50_VM_BLOCK);
+-	dev_priv->vm_gart_size = NV50_VM_BLOCK;
+-
+-	dev_priv->vm_vram_base = dev_priv->vm_gart_base + dev_priv->vm_gart_size;
+-	dev_priv->vm_vram_size = dev_priv->vram_size;
+-	if (dev_priv->vm_vram_size > NV50_VM_MAX_VRAM)
+-		dev_priv->vm_vram_size = NV50_VM_MAX_VRAM;
+-	dev_priv->vm_vram_size = roundup(dev_priv->vm_vram_size, NV50_VM_BLOCK);
+-	dev_priv->vm_vram_pt_nr = dev_priv->vm_vram_size / NV50_VM_BLOCK;
+-
+-	dev_priv->vm_end = dev_priv->vm_vram_base + dev_priv->vm_vram_size;
+-
+-	NV_DEBUG(dev, "NV50VM: GART 0x%016llx-0x%016llx\n",
+-		 dev_priv->vm_gart_base,
+-		 dev_priv->vm_gart_base + dev_priv->vm_gart_size - 1);
+-	NV_DEBUG(dev, "NV50VM: VRAM 0x%016llx-0x%016llx\n",
+-		 dev_priv->vm_vram_base,
+-		 dev_priv->vm_vram_base + dev_priv->vm_vram_size - 1);
+-
+-	c_size += dev_priv->vm_vram_pt_nr * (NV50_VM_BLOCK / 65536 * 8);
+-
+-	/* Map BAR0 PRAMIN aperture over the memory we want to use */
+-	save_nv001700 = nv_rd32(dev, NV50_PUNK_BAR0_PRAMIN);
+-	nv_wr32(dev, NV50_PUNK_BAR0_PRAMIN, (c_offset >> 16));
+-
+-	/* Create a fake channel, and use it as our "dummy" channels 0/127.
+-	 * The main reason for creating a channel is so we can use the gpuobj
+-	 * code.  However, it's probably worth noting that NVIDIA also setup
+-	 * their channels 0/127 with the same values they configure here.
+-	 * So, there may be some other reason for doing this.
+-	 *
+-	 * Have to create the entire channel manually, as the real channel
+-	 * creation code assumes we have PRAMIN access, and we don't until
+-	 * we're done here.
+-	 */
+-	chan = kzalloc(sizeof(*chan), GFP_KERNEL);
+-	if (!chan)
++	/* Global PRAMIN heap */
++	ret = drm_mm_init(&dev_priv->ramin_heap, 0, dev_priv->ramin_size);
++	if (ret) {
++		NV_ERROR(dev, "Failed to init RAMIN heap\n");
+ 		return -ENOMEM;
+-	chan->id = 0;
+-	chan->dev = dev;
+-	chan->file_priv = (struct drm_file *)-2;
+-	dev_priv->fifos[0] = dev_priv->fifos[127] = chan;
++	}
+ 
+-	/* Channel's PRAMIN object + heap */
+-	ret = nouveau_gpuobj_new_fake(dev, 0, c_offset, c_size, 0,
+-							NULL, &chan->ramin);
++	/* we need a channel to plug into the hw to control the BARs */
++	ret = nv50_channel_new(dev, 128*1024, &dev_priv->fifos[0]);
  	if (ret)
  		return ret;
++	chan = dev_priv->fifos[127] = dev_priv->fifos[0];
  
 -	if (nouveau_mem_init_heap(&chan->ramin_heap, c_base, c_size - c_base))
-+	if (drm_mm_init(&chan->ramin_heap, c_base, c_size - c_base))
- 		return -ENOMEM;
+-		return -ENOMEM;
+-
+-	/* RAMFC + zero channel's PRAMIN up to start of VM pagedir */
+-	ret = nouveau_gpuobj_new_fake(dev, c_ramfc, c_offset + c_ramfc,
+-						0x4000, 0, NULL, &chan->ramfc);
++	/* allocate page table for PRAMIN BAR */
++	ret = nouveau_gpuobj_new(dev, chan, (dev_priv->ramin_size >> 12) * 8,
++				 0x1000, NVOBJ_FLAG_ZERO_ALLOC,
++				 &priv->pramin_pt);
+ 	if (ret)
+ 		return ret;
  
- 	/* RAMFC + zero channel's PRAMIN up to start of VM pagedir */
-@@ -262,30 +262,25 @@ nv50_instmem_init(struct drm_device *dev)
+-	for (i = 0; i < c_vmpd; i += 4)
+-		BAR0_WI32(chan->ramin->gpuobj, i, 0);
++	nv_wo32(chan->vm_pd, 0x0000, priv->pramin_pt->vinst | 0x63);
++	nv_wo32(chan->vm_pd, 0x0004, 0);
  
- 	/* Assume that praying isn't enough, check that we can re-read the
- 	 * entire fake channel back from the PRAMIN BAR */
--	dev_priv->engine.instmem.prepare_access(dev, false);
- 	for (i = 0; i < c_size; i += 4) {
- 		if (nv_rd32(dev, NV_RAMIN + i) != nv_ri32(dev, i)) {
- 			NV_ERROR(dev, "Error reading back PRAMIN at 0x%08x\n",
- 									i);
--			dev_priv->engine.instmem.finish_access(dev);
- 			return -EINVAL;
- 		}
+-	/* VM page directory */
+-	ret = nouveau_gpuobj_new_fake(dev, c_vmpd, c_offset + c_vmpd,
+-					   0x4000, 0, &chan->vm_pd, NULL);
++	/* DMA object for PRAMIN BAR */
++	ret = nouveau_gpuobj_new(dev, chan, 6*4, 16, 0, &priv->pramin_bar);
+ 	if (ret)
+ 		return ret;
+-	for (i = 0; i < 0x4000; i += 8) {
+-		BAR0_WI32(chan->vm_pd, i + 0x00, 0x00000000);
+-		BAR0_WI32(chan->vm_pd, i + 0x04, 0x00000000);
+-	}
+-
+-	/* PRAMIN page table, cheat and map into VM at 0x0000000000.
+-	 * We map the entire fake channel into the start of the PRAMIN BAR
+-	 */
+-	ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pt_size, 0x1000,
+-				     0, &priv->pramin_pt);
++	nv_wo32(priv->pramin_bar, 0x00, 0x7fc00000);
++	nv_wo32(priv->pramin_bar, 0x04, dev_priv->ramin_size - 1);
++	nv_wo32(priv->pramin_bar, 0x08, 0x00000000);
++	nv_wo32(priv->pramin_bar, 0x0c, 0x00000000);
++	nv_wo32(priv->pramin_bar, 0x10, 0x00000000);
++	nv_wo32(priv->pramin_bar, 0x14, 0x00000000);
++
++	/* map channel into PRAMIN, gpuobj didn't do it for us */
++	ret = nv50_instmem_bind(dev, chan->ramin);
+ 	if (ret)
+ 		return ret;
+ 
+-	v = c_offset | 1;
+-	if (dev_priv->vram_sys_base) {
+-		v += dev_priv->vram_sys_base;
+-		v |= 0x30;
+-	}
++	/* poke regs... */
++	nv_wr32(dev, 0x001704, 0x00000000 | (chan->ramin->vinst >> 12));
++	nv_wr32(dev, 0x001704, 0x40000000 | (chan->ramin->vinst >> 12));
++	nv_wr32(dev, 0x00170c, 0x80000000 | (priv->pramin_bar->cinst >> 4));
+ 
+-	i = 0;
+-	while (v < dev_priv->vram_sys_base + c_offset + c_size) {
+-		BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, lower_32_bits(v));
+-		BAR0_WI32(priv->pramin_pt->gpuobj, i + 4, upper_32_bits(v));
+-		v += 0x1000;
+-		i += 8;
++	tmp = nv_ri32(dev, 0);
++	nv_wi32(dev, 0, ~tmp);
++	if (nv_ri32(dev, 0) != ~tmp) {
++		NV_ERROR(dev, "PRAMIN readback failed\n");
++		return -EIO;
  	}
--	dev_priv->engine.instmem.finish_access(dev);
++	nv_wi32(dev, 0, tmp);
  
- 	nv_wr32(dev, NV50_PUNK_BAR0_PRAMIN, save_nv001700);
+-	while (i < pt_size) {
+-		BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, 0x00000000);
+-		BAR0_WI32(priv->pramin_pt->gpuobj, i + 4, 0x00000000);
+-		i += 8;
+-	}
++	dev_priv->ramin_available = true;
++
++	/* Determine VM layout */
++	dev_priv->vm_gart_base = roundup(NV50_VM_BLOCK, NV50_VM_BLOCK);
++	dev_priv->vm_gart_size = NV50_VM_BLOCK;
++
++	dev_priv->vm_vram_base = dev_priv->vm_gart_base + dev_priv->vm_gart_size;
++	dev_priv->vm_vram_size = dev_priv->vram_size;
++	if (dev_priv->vm_vram_size > NV50_VM_MAX_VRAM)
++		dev_priv->vm_vram_size = NV50_VM_MAX_VRAM;
++	dev_priv->vm_vram_size = roundup(dev_priv->vm_vram_size, NV50_VM_BLOCK);
++	dev_priv->vm_vram_pt_nr = dev_priv->vm_vram_size / NV50_VM_BLOCK;
++
++	dev_priv->vm_end = dev_priv->vm_vram_base + dev_priv->vm_vram_size;
+ 
+-	BAR0_WI32(chan->vm_pd, 0x00, priv->pramin_pt->instance | 0x63);
+-	BAR0_WI32(chan->vm_pd, 0x04, 0x00000000);
++	NV_DEBUG(dev, "NV50VM: GART 0x%016llx-0x%016llx\n",
++		 dev_priv->vm_gart_base,
++		 dev_priv->vm_gart_base + dev_priv->vm_gart_size - 1);
++	NV_DEBUG(dev, "NV50VM: VRAM 0x%016llx-0x%016llx\n",
++		 dev_priv->vm_vram_base,
++		 dev_priv->vm_vram_base + dev_priv->vm_vram_size - 1);
+ 
+ 	/* VRAM page table(s), mapped into VM at +1GiB  */
+ 	for (i = 0; i < dev_priv->vm_vram_pt_nr; i++) {
+-		ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0,
+-					     NV50_VM_BLOCK/65536*8, 0, 0,
+-					     &chan->vm_vram_pt[i]);
++		ret = nouveau_gpuobj_new(dev, NULL, NV50_VM_BLOCK / 0x10000 * 8,
++					 0, NVOBJ_FLAG_ZERO_ALLOC,
++					 &chan->vm_vram_pt[i]);
+ 		if (ret) {
+-			NV_ERROR(dev, "Error creating VRAM page tables: %d\n",
+-									ret);
++			NV_ERROR(dev, "Error creating VRAM PGT: %d\n", ret);
+ 			dev_priv->vm_vram_pt_nr = i;
+ 			return ret;
+ 		}
+-		dev_priv->vm_vram_pt[i] = chan->vm_vram_pt[i]->gpuobj;
++		dev_priv->vm_vram_pt[i] = chan->vm_vram_pt[i];
+ 
+-		for (v = 0; v < dev_priv->vm_vram_pt[i]->im_pramin->size;
+-								v += 4)
+-			BAR0_WI32(dev_priv->vm_vram_pt[i], v, 0);
+-
+-		BAR0_WI32(chan->vm_pd, 0x10 + (i*8),
+-			  chan->vm_vram_pt[i]->instance | 0x61);
+-		BAR0_WI32(chan->vm_pd, 0x14 + (i*8), 0);
++		nv_wo32(chan->vm_pd, 0x10 + (i*8),
++			chan->vm_vram_pt[i]->vinst | 0x61);
++		nv_wo32(chan->vm_pd, 0x14 + (i*8), 0);
+ 	}
  
- 	/* Global PRAMIN heap */
+-	/* DMA object for PRAMIN BAR */
+-	ret = nouveau_gpuobj_new_ref(dev, chan, chan, 0, 6*4, 16, 0,
+-							&priv->pramin_bar);
+-	if (ret)
+-		return ret;
+-	BAR0_WI32(priv->pramin_bar->gpuobj, 0x00, 0x7fc00000);
+-	BAR0_WI32(priv->pramin_bar->gpuobj, 0x04, dev_priv->ramin_size - 1);
+-	BAR0_WI32(priv->pramin_bar->gpuobj, 0x08, 0x00000000);
+-	BAR0_WI32(priv->pramin_bar->gpuobj, 0x0c, 0x00000000);
+-	BAR0_WI32(priv->pramin_bar->gpuobj, 0x10, 0x00000000);
+-	BAR0_WI32(priv->pramin_bar->gpuobj, 0x14, 0x00000000);
+-
+ 	/* DMA object for FB BAR */
+-	ret = nouveau_gpuobj_new_ref(dev, chan, chan, 0, 6*4, 16, 0,
+-							&priv->fb_bar);
++	ret = nouveau_gpuobj_new(dev, chan, 6*4, 16, 0, &priv->fb_bar);
+ 	if (ret)
+ 		return ret;
+-	BAR0_WI32(priv->fb_bar->gpuobj, 0x00, 0x7fc00000);
+-	BAR0_WI32(priv->fb_bar->gpuobj, 0x04, 0x40000000 +
+-					      drm_get_resource_len(dev, 1) - 1);
+-	BAR0_WI32(priv->fb_bar->gpuobj, 0x08, 0x40000000);
+-	BAR0_WI32(priv->fb_bar->gpuobj, 0x0c, 0x00000000);
+-	BAR0_WI32(priv->fb_bar->gpuobj, 0x10, 0x00000000);
+-	BAR0_WI32(priv->fb_bar->gpuobj, 0x14, 0x00000000);
+-
+-	/* Poke the relevant regs, and pray it works :) */
+-	nv_wr32(dev, NV50_PUNK_BAR_CFG_BASE, (chan->ramin->instance >> 12));
+-	nv_wr32(dev, NV50_PUNK_UNK1710, 0);
+-	nv_wr32(dev, NV50_PUNK_BAR_CFG_BASE, (chan->ramin->instance >> 12) |
+-					 NV50_PUNK_BAR_CFG_BASE_VALID);
+-	nv_wr32(dev, NV50_PUNK_BAR1_CTXDMA, (priv->fb_bar->instance >> 4) |
+-					NV50_PUNK_BAR1_CTXDMA_VALID);
+-	nv_wr32(dev, NV50_PUNK_BAR3_CTXDMA, (priv->pramin_bar->instance >> 4) |
+-					NV50_PUNK_BAR3_CTXDMA_VALID);
+-
++	nv_wo32(priv->fb_bar, 0x00, 0x7fc00000);
++	nv_wo32(priv->fb_bar, 0x04, 0x40000000 +
++				    pci_resource_len(dev->pdev, 1) - 1);
++	nv_wo32(priv->fb_bar, 0x08, 0x40000000);
++	nv_wo32(priv->fb_bar, 0x0c, 0x00000000);
++	nv_wo32(priv->fb_bar, 0x10, 0x00000000);
++	nv_wo32(priv->fb_bar, 0x14, 0x00000000);
++
++	nv_wr32(dev, 0x001708, 0x80000000 | (priv->fb_bar->cinst >> 4));
+ 	for (i = 0; i < 8; i++)
+ 		nv_wr32(dev, 0x1900 + (i*4), 0);
+ 
+-	/* Assume that praying isn't enough, check that we can re-read the
+-	 * entire fake channel back from the PRAMIN BAR */
+-	dev_priv->engine.instmem.prepare_access(dev, false);
+-	for (i = 0; i < c_size; i += 4) {
+-		if (nv_rd32(dev, NV_RAMIN + i) != nv_ri32(dev, i)) {
+-			NV_ERROR(dev, "Error reading back PRAMIN at 0x%08x\n",
+-									i);
+-			dev_priv->engine.instmem.finish_access(dev);
+-			return -EINVAL;
+-		}
+-	}
+-	dev_priv->engine.instmem.finish_access(dev);
+-
+-	nv_wr32(dev, NV50_PUNK_BAR0_PRAMIN, save_nv001700);
+-
+-	/* Global PRAMIN heap */
 -	if (nouveau_mem_init_heap(&dev_priv->ramin_heap,
 -				  c_size, dev_priv->ramin_size - c_size)) {
 -		dev_priv->ramin_heap = NULL;
-+	if (drm_mm_init(&dev_priv->ramin_heap, c_size, dev_priv->ramin_size - c_size)) {
- 		NV_ERROR(dev, "Failed to init RAMIN heap\n");
- 	}
- 
- 	/*XXX: incorrect, but needed to make hash func "work" */
- 	dev_priv->ramht_offset = 0x10000;
- 	dev_priv->ramht_bits   = 9;
+-		NV_ERROR(dev, "Failed to init RAMIN heap\n");
+-	}
+-
+-	/*XXX: incorrect, but needed to make hash func "work" */
+-	dev_priv->ramht_offset = 0x10000;
+-	dev_priv->ramht_bits   = 9;
 -	dev_priv->ramht_size   = (1 << dev_priv->ramht_bits);
-+	dev_priv->ramht_size   = (1 << dev_priv->ramht_bits) * 8;
  	return 0;
  }
  
-@@ -321,7 +316,7 @@ nv50_instmem_takedown(struct drm_device *dev)
- 		nouveau_gpuobj_del(dev, &chan->vm_pd);
- 		nouveau_gpuobj_ref_del(dev, &chan->ramfc);
- 		nouveau_gpuobj_ref_del(dev, &chan->ramin);
+@@ -302,29 +246,24 @@ nv50_instmem_takedown(struct drm_device *dev)
+ 	if (!priv)
+ 		return;
+ 
++	dev_priv->ramin_available = false;
++
+ 	/* Restore state from before init */
+ 	for (i = 0x1700; i <= 0x1710; i += 4)
+ 		nv_wr32(dev, i, priv->save1700[(i - 0x1700) / 4]);
+ 
+-	nouveau_gpuobj_ref_del(dev, &priv->fb_bar);
+-	nouveau_gpuobj_ref_del(dev, &priv->pramin_bar);
+-	nouveau_gpuobj_ref_del(dev, &priv->pramin_pt);
++	nouveau_gpuobj_ref(NULL, &priv->fb_bar);
++	nouveau_gpuobj_ref(NULL, &priv->pramin_bar);
++	nouveau_gpuobj_ref(NULL, &priv->pramin_pt);
+ 
+ 	/* Destroy dummy channel */
+ 	if (chan) {
+-		for (i = 0; i < dev_priv->vm_vram_pt_nr; i++) {
+-			nouveau_gpuobj_ref_del(dev, &chan->vm_vram_pt[i]);
+-			dev_priv->vm_vram_pt[i] = NULL;
+-		}
++		for (i = 0; i < dev_priv->vm_vram_pt_nr; i++)
++			nouveau_gpuobj_ref(NULL, &chan->vm_vram_pt[i]);
+ 		dev_priv->vm_vram_pt_nr = 0;
+ 
+-		nouveau_gpuobj_del(dev, &chan->vm_pd);
+-		nouveau_gpuobj_ref_del(dev, &chan->ramfc);
+-		nouveau_gpuobj_ref_del(dev, &chan->ramin);
 -		nouveau_mem_takedown(&chan->ramin_heap);
-+		drm_mm_takedown(&chan->ramin_heap);
+-
+-		dev_priv->fifos[0] = dev_priv->fifos[127] = NULL;
+-		kfree(chan);
++		nv50_channel_del(&dev_priv->fifos[0]);
++		dev_priv->fifos[127] = NULL;
+ 	}
+ 
+ 	dev_priv->engine.instmem.priv = NULL;
+@@ -336,14 +275,14 @@ nv50_instmem_suspend(struct drm_device *dev)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_channel *chan = dev_priv->fifos[0];
+-	struct nouveau_gpuobj *ramin = chan->ramin->gpuobj;
++	struct nouveau_gpuobj *ramin = chan->ramin;
+ 	int i;
+ 
+-	ramin->im_backing_suspend = vmalloc(ramin->im_pramin->size);
++	ramin->im_backing_suspend = vmalloc(ramin->size);
+ 	if (!ramin->im_backing_suspend)
+ 		return -ENOMEM;
+ 
+-	for (i = 0; i < ramin->im_pramin->size; i += 4)
++	for (i = 0; i < ramin->size; i += 4)
+ 		ramin->im_backing_suspend[i/4] = nv_ri32(dev, i);
+ 	return 0;
+ }
+@@ -354,23 +293,25 @@ nv50_instmem_resume(struct drm_device *dev)
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nv50_instmem_priv *priv = dev_priv->engine.instmem.priv;
+ 	struct nouveau_channel *chan = dev_priv->fifos[0];
+-	struct nouveau_gpuobj *ramin = chan->ramin->gpuobj;
++	struct nouveau_gpuobj *ramin = chan->ramin;
+ 	int i;
+ 
+-	nv_wr32(dev, NV50_PUNK_BAR0_PRAMIN, (ramin->im_backing_start >> 16));
+-	for (i = 0; i < ramin->im_pramin->size; i += 4)
+-		BAR0_WI32(ramin, i, ramin->im_backing_suspend[i/4]);
++	dev_priv->ramin_available = false;
++	dev_priv->ramin_base = ~0;
++	for (i = 0; i < ramin->size; i += 4)
++		nv_wo32(ramin, i, ramin->im_backing_suspend[i/4]);
++	dev_priv->ramin_available = true;
+ 	vfree(ramin->im_backing_suspend);
+ 	ramin->im_backing_suspend = NULL;
+ 
+ 	/* Poke the relevant regs, and pray it works :) */
+-	nv_wr32(dev, NV50_PUNK_BAR_CFG_BASE, (chan->ramin->instance >> 12));
++	nv_wr32(dev, NV50_PUNK_BAR_CFG_BASE, (chan->ramin->vinst >> 12));
+ 	nv_wr32(dev, NV50_PUNK_UNK1710, 0);
+-	nv_wr32(dev, NV50_PUNK_BAR_CFG_BASE, (chan->ramin->instance >> 12) |
++	nv_wr32(dev, NV50_PUNK_BAR_CFG_BASE, (chan->ramin->vinst >> 12) |
+ 					 NV50_PUNK_BAR_CFG_BASE_VALID);
+-	nv_wr32(dev, NV50_PUNK_BAR1_CTXDMA, (priv->fb_bar->instance >> 4) |
++	nv_wr32(dev, NV50_PUNK_BAR1_CTXDMA, (priv->fb_bar->cinst >> 4) |
+ 					NV50_PUNK_BAR1_CTXDMA_VALID);
+-	nv_wr32(dev, NV50_PUNK_BAR3_CTXDMA, (priv->pramin_bar->instance >> 4) |
++	nv_wr32(dev, NV50_PUNK_BAR3_CTXDMA, (priv->pramin_bar->cinst >> 4) |
+ 					NV50_PUNK_BAR3_CTXDMA_VALID);
+ 
+ 	for (i = 0; i < 8; i++)
+@@ -386,7 +327,7 @@ nv50_instmem_populate(struct drm_device *dev, struct nouveau_gpuobj *gpuobj,
+ 	if (gpuobj->im_backing)
+ 		return -EINVAL;
+ 
+-	*sz = ALIGN(*sz, NV50_INSTMEM_PAGE_SIZE);
++	*sz = ALIGN(*sz, 4096);
+ 	if (*sz == 0)
+ 		return -EINVAL;
+ 
+@@ -404,9 +345,7 @@ nv50_instmem_populate(struct drm_device *dev, struct nouveau_gpuobj *gpuobj,
+ 		return ret;
+ 	}
+ 
+-	gpuobj->im_backing_start = gpuobj->im_backing->bo.mem.mm_node->start;
+-	gpuobj->im_backing_start <<= PAGE_SHIFT;
+-
++	gpuobj->vinst = gpuobj->im_backing->bo.mem.mm_node->start << PAGE_SHIFT;
+ 	return 0;
+ }
+ 
+@@ -429,23 +368,23 @@ nv50_instmem_bind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nv50_instmem_priv *priv = dev_priv->engine.instmem.priv;
+-	struct nouveau_gpuobj *pramin_pt = priv->pramin_pt->gpuobj;
++	struct nouveau_gpuobj *pramin_pt = priv->pramin_pt;
+ 	uint32_t pte, pte_end;
+ 	uint64_t vram;
  
- 		dev_priv->fifos[0] = dev_priv->fifos[127] = NULL;
- 		kfree(chan);
-@@ -436,14 +431,14 @@ nv50_instmem_bind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
  	if (!gpuobj->im_backing || !gpuobj->im_pramin || gpuobj->im_bound)
  		return -EINVAL;
  
@@ -15918,14 +19161,18 @@ index 5f21df3..092057b 100644
  
  	pte     = (gpuobj->im_pramin->start >> 12) << 1;
  	pte_end = ((gpuobj->im_pramin->size >> 12) << 1) + pte;
- 	vram    = gpuobj->im_backing_start;
+-	vram    = gpuobj->im_backing_start;
++	vram    = gpuobj->vinst;
  
 -	NV_DEBUG(dev, "pramin=0x%llx, pte=%d, pte_end=%d\n",
 +	NV_DEBUG(dev, "pramin=0x%lx, pte=%d, pte_end=%d\n",
  		 gpuobj->im_pramin->start, pte, pte_end);
- 	NV_DEBUG(dev, "first vram page: 0x%08x\n", gpuobj->im_backing_start);
+-	NV_DEBUG(dev, "first vram page: 0x%08x\n", gpuobj->im_backing_start);
++	NV_DEBUG(dev, "first vram page: 0x%010llx\n", gpuobj->vinst);
  
-@@ -453,27 +448,16 @@ nv50_instmem_bind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
+ 	vram |= 1;
+ 	if (dev_priv->vram_sys_base) {
+@@ -453,27 +392,16 @@ nv50_instmem_bind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
  		vram |= 0x30;
  	}
  
@@ -15933,11 +19180,8 @@ index 5f21df3..092057b 100644
  	while (pte < pte_end) {
 -		nv_wo32(dev, pramin_pt, pte++, lower_32_bits(vram));
 -		nv_wo32(dev, pramin_pt, pte++, upper_32_bits(vram));
-+		nv_wo32(pramin_pt, (pte * 4) + 0, lower_32_bits(vram));
-+		nv_wo32(pramin_pt, (pte * 4) + 4, upper_32_bits(vram));
- 		vram += NV50_INSTMEM_PAGE_SIZE;
-+		pte += 2;
- 	}
+-		vram += NV50_INSTMEM_PAGE_SIZE;
+-	}
 -	dev_priv->engine.instmem.finish_access(dev);
 -
 -	nv_wr32(dev, 0x100c80, 0x00040001);
@@ -15945,7 +19189,11 @@ index 5f21df3..092057b 100644
 -		NV_ERROR(dev, "timeout: (0x100c80 & 1) == 0 (1)\n");
 -		NV_ERROR(dev, "0x100c80 = 0x%08x\n", nv_rd32(dev, 0x100c80));
 -		return -EBUSY;
--	}
++		nv_wo32(pramin_pt, (pte * 4) + 0, lower_32_bits(vram));
++		nv_wo32(pramin_pt, (pte * 4) + 4, upper_32_bits(vram));
++		vram += 0x1000;
++		pte += 2;
+ 	}
 +	dev_priv->engine.instmem.flush(dev);
  
 -	nv_wr32(dev, 0x100c80, 0x00060001);
@@ -15959,7 +19207,14 @@ index 5f21df3..092057b 100644
  
  	gpuobj->im_bound = 1;
  	return 0;
-@@ -492,36 +476,37 @@ nv50_instmem_unbind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
+@@ -489,39 +417,44 @@ nv50_instmem_unbind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
+ 	if (gpuobj->im_bound == 0)
+ 		return -EINVAL;
+ 
++	/* can happen during late takedown */
++	if (unlikely(!dev_priv->ramin_available))
++		return 0;
++
  	pte     = (gpuobj->im_pramin->start >> 12) << 1;
  	pte_end = ((gpuobj->im_pramin->size >> 12) << 1) + pte;
  
@@ -15967,8 +19222,8 @@ index 5f21df3..092057b 100644
  	while (pte < pte_end) {
 -		nv_wo32(dev, priv->pramin_pt->gpuobj, pte++, 0x00000000);
 -		nv_wo32(dev, priv->pramin_pt->gpuobj, pte++, 0x00000000);
-+		nv_wo32(priv->pramin_pt->gpuobj, (pte * 4) + 0, 0x00000000);
-+		nv_wo32(priv->pramin_pt->gpuobj, (pte * 4) + 4, 0x00000000);
++		nv_wo32(priv->pramin_pt, (pte * 4) + 0, 0x00000000);
++		nv_wo32(priv->pramin_pt, (pte * 4) + 4, 0x00000000);
 +		pte += 2;
  	}
 -	dev_priv->engine.instmem.finish_access(dev);
@@ -15987,7 +19242,7 @@ index 5f21df3..092057b 100644
 -
 -	priv->last_access_wr = write;
 +	nv_wr32(dev, 0x00330c, 0x00000001);
-+	if (!nv_wait(0x00330c, 0x00000002, 0x00000000))
++	if (!nv_wait(dev, 0x00330c, 0x00000002, 0x00000000))
 +		NV_ERROR(dev, "PRAMIN flush timeout\n");
  }
  
@@ -16004,7 +19259,7 @@ index 5f21df3..092057b 100644
 -			NV_ERROR(dev, "PRAMIN flush timeout\n");
 -	}
 +	nv_wr32(dev, 0x070000, 0x00000001);
-+	if (!nv_wait(0x070000, 0x00000002, 0x00000000))
++	if (!nv_wait(dev, 0x070000, 0x00000002, 0x00000000))
 +		NV_ERROR(dev, "PRAMIN flush timeout\n");
  }
  
@@ -16012,11 +19267,11 @@ index 5f21df3..092057b 100644
 +nv50_vm_flush(struct drm_device *dev, int engine)
 +{
 +	nv_wr32(dev, 0x100c80, (engine << 16) | 1);
-+	if (!nv_wait(0x100c80, 0x00000001, 0x00000000))
++	if (!nv_wait(dev, 0x100c80, 0x00000001, 0x00000000))
 +		NV_ERROR(dev, "vm flush timeout: engine %d\n", engine);
 +}
 diff --git a/drivers/gpu/drm/nouveau/nv50_sor.c b/drivers/gpu/drm/nouveau/nv50_sor.c
-index 812778d..bcd4cf8 100644
+index 812778d..b4a5ecb 100644
 --- a/drivers/gpu/drm/nouveau/nv50_sor.c
 +++ b/drivers/gpu/drm/nouveau/nv50_sor.c
 @@ -37,52 +37,32 @@
@@ -16104,7 +19359,24 @@ index 812778d..bcd4cf8 100644
  		    nvenc->dcb->or != nv_encoder->dcb->or)
  			continue;
  
-@@ -133,8 +115,22 @@ nv50_sor_dpms(struct drm_encoder *encoder, int mode)
+@@ -110,7 +92,7 @@ nv50_sor_dpms(struct drm_encoder *encoder, int mode)
+ 	}
+ 
+ 	/* wait for it to be done */
+-	if (!nv_wait(NV50_PDISPLAY_SOR_DPMS_CTRL(or),
++	if (!nv_wait(dev, NV50_PDISPLAY_SOR_DPMS_CTRL(or),
+ 		     NV50_PDISPLAY_SOR_DPMS_CTRL_PENDING, 0)) {
+ 		NV_ERROR(dev, "timeout: SOR_DPMS_CTRL_PENDING(%d) == 0\n", or);
+ 		NV_ERROR(dev, "SOR_DPMS_CTRL(%d) = 0x%08x\n", or,
+@@ -126,15 +108,29 @@ nv50_sor_dpms(struct drm_encoder *encoder, int mode)
+ 
+ 	nv_wr32(dev, NV50_PDISPLAY_SOR_DPMS_CTRL(or), val |
+ 		NV50_PDISPLAY_SOR_DPMS_CTRL_PENDING);
+-	if (!nv_wait(NV50_PDISPLAY_SOR_DPMS_STATE(or),
++	if (!nv_wait(dev, NV50_PDISPLAY_SOR_DPMS_STATE(or),
+ 		     NV50_PDISPLAY_SOR_DPMS_STATE_WAIT, 0)) {
+ 		NV_ERROR(dev, "timeout: SOR_DPMS_STATE_WAIT(%d) == 0\n", or);
+ 		NV_ERROR(dev, "SOR_DPMS_STATE(%d) = 0x%08x\n", or,
  			 nv_rd32(dev, NV50_PDISPLAY_SOR_DPMS_STATE(or)));
  	}
  
@@ -16264,10 +19536,10 @@ index 0000000..26a9960
 +}
 diff --git a/drivers/gpu/drm/nouveau/nvc0_fifo.c b/drivers/gpu/drm/nouveau/nvc0_fifo.c
 new file mode 100644
-index 0000000..45ca994
+index 0000000..2cdb7c3
 --- /dev/null
 +++ b/drivers/gpu/drm/nouveau/nvc0_fifo.c
-@@ -0,0 +1,95 @@
+@@ -0,0 +1,89 @@
 +/*
 + * Copyright 2010 Red Hat Inc.
 + *
@@ -16313,12 +19585,6 @@ index 0000000..45ca994
 +}
 +
 +bool
-+nvc0_fifo_cache_flush(struct drm_device *dev)
-+{
-+	return true;
-+}
-+
-+bool
 +nvc0_fifo_cache_pull(struct drm_device *dev, bool enable)
 +{
 +	return false;
@@ -16445,10 +19711,10 @@ index 0000000..edf2b21
 +}
 diff --git a/drivers/gpu/drm/nouveau/nvc0_instmem.c b/drivers/gpu/drm/nouveau/nvc0_instmem.c
 new file mode 100644
-index 0000000..9238c73
+index 0000000..152d8e8
 --- /dev/null
 +++ b/drivers/gpu/drm/nouveau/nvc0_instmem.c
-@@ -0,0 +1,234 @@
+@@ -0,0 +1,229 @@
 +/*
 + * Copyright 2010 Red Hat Inc.
 + *
@@ -16501,8 +19767,7 @@ index 0000000..9238c73
 +		return ret;
 +	}
 +
-+	gpuobj->im_backing_start = gpuobj->im_backing->bo.mem.mm_node->start;
-+	gpuobj->im_backing_start <<= PAGE_SHIFT;
++	gpuobj->vinst = gpuobj->im_backing->bo.mem.mm_node->start << PAGE_SHIFT;
 +	return 0;
 +}
 +
@@ -16535,11 +19800,11 @@ index 0000000..9238c73
 +
 +	pte     = gpuobj->im_pramin->start >> 12;
 +	pte_end = (gpuobj->im_pramin->size >> 12) + pte;
-+	vram    = gpuobj->im_backing_start;
++	vram    = gpuobj->vinst;
 +
 +	NV_DEBUG(dev, "pramin=0x%lx, pte=%d, pte_end=%d\n",
 +		 gpuobj->im_pramin->start, pte, pte_end);
-+	NV_DEBUG(dev, "first vram page: 0x%08x\n", gpuobj->im_backing_start);
++	NV_DEBUG(dev, "first vram page: 0x%010llx\n", gpuobj->vinst);
 +
 +	while (pte < pte_end) {
 +		nv_wr32(dev, 0x702000 + (pte * 8), (vram >> 8) | 1);
@@ -16585,7 +19850,7 @@ index 0000000..9238c73
 +nvc0_instmem_flush(struct drm_device *dev)
 +{
 +	nv_wr32(dev, 0x070000, 1);
-+	if (!nv_wait(0x070000, 0x00000002, 0x00000000))
++	if (!nv_wait(dev, 0x070000, 0x00000002, 0x00000000))
 +		NV_ERROR(dev, "PRAMIN flush timeout\n");
 +}
 +
@@ -16672,10 +19937,6 @@ index 0000000..9238c73
 +		return -ENOMEM;
 +	}
 +
-+	/*XXX: incorrect, but needed to make hash func "work" */
-+	dev_priv->ramht_offset = 0x10000;
-+	dev_priv->ramht_bits   = 9;
-+	dev_priv->ramht_size   = (1 << dev_priv->ramht_bits) * 8;
 +	return 0;
 +}
 +
diff --git a/kernel.spec b/kernel.spec
index 4d2cd3e..134f6f3 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -48,7 +48,7 @@ Summary: The Linux kernel
 # reset this by hand to 1 (or to 0 and then use rpmdev-bumpspec).
 # scripts/rebase.sh should be made to do that for you, actually.
 #
-%global baserelease 22
+%global baserelease 23
 %global fedora_build %{baserelease}
 
 # base_sublevel is the kernel version we're starting with and patching
@@ -1921,6 +1921,9 @@ fi
 # and build.
 
 %changelog
+* Wed Sep 08 2010 Ben Skeggs <bskeggs at redhat.com> 2.6.35.4-23
+- nouveau: handle certain GPU errors better, AGP + misc fixes
+
 * Tue Sep 07 2010 Dave Jones <davej at redhat.com> 2.6.35.4-22
 - Disable hung task checker, it only ever causes false positives. (#630777)