[kernel/f13/master] nouveau: handle certain GPU errors better

Ben Skeggs bskeggs at fedoraproject.org
Mon Sep 27 02:42:45 UTC 2010


commit 35a02f9ac3e95445e0efc9958081a9e8b9c2c0d5
Author: Ben Skeggs <bskeggs at redhat.com>
Date:   Mon Sep 27 12:39:17 2010 +1000

    nouveau: handle certain GPU errors better

 drm-nouveau-pusher-intr.patch |   78 +++++++++++++++++++++++++++++++++++++++++
 kernel.spec                   |    7 +++-
 2 files changed, 84 insertions(+), 1 deletions(-)
---
diff --git a/drm-nouveau-pusher-intr.patch b/drm-nouveau-pusher-intr.patch
new file mode 100644
index 0000000..a2eff99
--- /dev/null
+++ b/drm-nouveau-pusher-intr.patch
@@ -0,0 +1,78 @@
+From ef0744eff71f519db7641313d1588289f5cb49d2 Mon Sep 17 00:00:00 2001
+From: Ben Skeggs <bskeggs at redhat.com>
+Date: Wed, 8 Sep 2010 15:40:30 +1000
+Subject: [PATCH] drm/nouveau: handle fifo pusher errors better
+
+The most important part of this change is that we now instruct PFIFO to
+drop all pending fetches, rather than attempting to skip a single dword
+and hope that things would magically sort themselves out - they usually
+don't, and we end up with PFIFO being completely hung.
+
+This commit also adds somewhat more useful logging when these exceptions
+occur.
+
+Signed-off-by: Ben Skeggs <bskeggs at redhat.com>
+---
+ drivers/gpu/drm/nouveau/nouveau_irq.c |   45 +++++++++++++++++++++++++++------
+ 1 files changed, 37 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nouveau_irq.c b/drivers/gpu/drm/nouveau/nouveau_irq.c
+index 53360f1..a0f31e4 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_irq.c
++++ b/drivers/gpu/drm/nouveau/nouveau_irq.c
+@@ -200,16 +200,45 @@ nouveau_fifo_irq_handler(struct drm_device *dev)
+		}
+
+		if (status & NV_PFIFO_INTR_DMA_PUSHER) {
+-			NV_INFO(dev, "PFIFO_DMA_PUSHER - Ch %d\n", chid);
++			u32 get = nv_rd32(dev, 0x003244);
++			u32 put = nv_rd32(dev, 0x003240);
++			u32 push = nv_rd32(dev, 0x003220);
++			u32 state = nv_rd32(dev, 0x003228);
++
++			if (dev_priv->card_type == NV_50) {
++				u32 ho_get = nv_rd32(dev, 0x003328);
++				u32 ho_put = nv_rd32(dev, 0x003320);
++				u32 ib_get = nv_rd32(dev, 0x003334);
++				u32 ib_put = nv_rd32(dev, 0x003330);
++
++				NV_INFO(dev, "PFIFO_DMA_PUSHER - Ch %d Get 0x%02x%08x "
++					     "Put 0x%02x%08x IbGet 0x%08x IbPut 0x%08x "
++					     "State 0x%08x Push 0x%08x\n",
++					chid, ho_get, get, ho_put, put, ib_get, ib_put,
++					state, push);
++
++				/* METHOD_COUNT, in DMA_STATE on earlier chipsets */
++				nv_wr32(dev, 0x003364, 0x00000000);
++				if (get != put || ho_get != ho_put) {
++					nv_wr32(dev, 0x003244, put);
++					nv_wr32(dev, 0x003328, ho_put);
++				} else
++				if (ib_get != ib_put) {
++					nv_wr32(dev, 0x003334, ib_put);
++				}
++			} else {
++				NV_INFO(dev, "PFIFO_DMA_PUSHER - Ch %d Get 0x%08x "
++					     "Put 0x%08x State 0x%08x Push 0x%08x\n",
++					chid, get, put, state, push);
+
+-			status &= ~NV_PFIFO_INTR_DMA_PUSHER;
+-			nv_wr32(dev, NV03_PFIFO_INTR_0,
+-						NV_PFIFO_INTR_DMA_PUSHER);
++				if (get != put)
++					nv_wr32(dev, 0x003244, put);
++			}
+
+-			nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_STATE, 0x00000000);
+-			if (nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_PUT) != get)
+-				nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_GET,
+-								get + 4);
++			nv_wr32(dev, 0x003228, 0x00000000);
++			nv_wr32(dev, 0x003220, 0x00000001);
++			nv_wr32(dev, 0x002100, NV_PFIFO_INTR_DMA_PUSHER);
++			status &= ~NV_PFIFO_INTR_DMA_PUSHER;
+		}
+
+		if (status & NV_PFIFO_INTR_SEMAPHORE) {
+--
+1.7.2.2
diff --git a/kernel.spec b/kernel.spec
index 90d684f..e2e4369 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -48,7 +48,7 @@ Summary: The Linux kernel
 # reset this by hand to 1 (or to 0 and then use rpmdev-bumpspec).
 # scripts/rebase.sh should be made to do that for you, actually.
 #
-%global baserelease 57
+%global baserelease 58
 %global fedora_build %{baserelease}
 
 # base_sublevel is the kernel version we're starting with and patching
@@ -714,6 +714,7 @@ Patch1830: drm-i915-explosion-following-oom-in-do_execbuffer.patch
 Patch1900: linux-2.6-intel-iommu-igfx.patch
 Patch1901: drm-nouveau-acpi-edid-fix.patch
 Patch1902: agp-intel-use-the-correct-mask-to-detect-i830-aperture-size.patch
+Patch1903: drm-nouveau-pusher-intr.patch
 # radeon
 
 # linux1394 git patches
@@ -1406,6 +1407,7 @@ ApplyPatch drm-nouveau-race-fix.patch
 ApplyPatch drm-nouveau-nva3-noaccel.patch
 ApplyPatch drm-nouveau-nv50-crtc-update-delay.patch
 ApplyPatch drm-nouveau-acpi-edid-fix.patch
+ApplyPatch drm-nouveau-pusher-intr.patch
 
 ApplyPatch drm-intel-big-hammer.patch
 ApplyOptionalPatch drm-intel-next.patch
@@ -2162,6 +2164,9 @@ fi
 
 
 %changelog
+* Mon Sep 27 2010 Ben Skeggs <bskeggs at redhat.com> 2.6.34.7-58
+- nouveau: better handling of certain GPU errors
+
 * Fri Sep 24 2010 Chuck Ebbert <cebbert at redhat.com>
 - Fix typo in previous Xen fix that causes boot failure.
 


More information about the scm-commits mailing list