[kernel] Add two patches to fix stalls in khugepaged (rhbz 735946)

Josh Boyer jwboyer at fedoraproject.org
Mon Oct 17 17:34:54 UTC 2011


commit 922059227ce012ccb60adaefee0e4237f46bee46
Author: Josh Boyer <jwboyer at redhat.com>
Date:   Mon Oct 17 13:24:14 2011 -0400

    Add two patches to fix stalls in khugepaged (rhbz 735946)

 ...imit-direct-reclaim-for-higher-order-allo.patch |   54 +++++++++++++
 ...claim-compaction-if-compaction-can-procee.patch |   81 ++++++++++++++++++++
 TODO                                               |    1 +
 kernel.spec                                        |   11 +++
 4 files changed, 147 insertions(+), 0 deletions(-)
---
diff --git a/0001-mm-vmscan-Limit-direct-reclaim-for-higher-order-allo.patch b/0001-mm-vmscan-Limit-direct-reclaim-for-higher-order-allo.patch
new file mode 100644
index 0000000..77777f0
--- /dev/null
+++ b/0001-mm-vmscan-Limit-direct-reclaim-for-higher-order-allo.patch
@@ -0,0 +1,54 @@
+From 6b7025ea927d290a59d2772828435c1893f0267f Mon Sep 17 00:00:00 2001
+From: Rik van Riel <riel at redhat.com>
+Date: Fri, 7 Oct 2011 16:17:22 +0100
+Subject: [PATCH 1/2] mm: vmscan: Limit direct reclaim for higher order
+ allocations
+
+When suffering from memory fragmentation due to unfreeable pages,
+THP page faults will repeatedly try to compact memory.  Due to the
+unfreeable pages, compaction fails.
+
+Needless to say, at that point page reclaim also fails to create
+free contiguous 2MB areas.  However, that doesn't stop the current
+code from trying, over and over again, and freeing a minimum of 4MB
+(2UL << sc->order pages) at every single invocation.
+
+This resulted in my 12GB system having 2-3GB free memory, a
+corresponding amount of used swap and very sluggish response times.
+
+This can be avoided by having the direct reclaim code not reclaim from
+zones that already have plenty of free memory available for compaction.
+
+If compaction still fails due to unmovable memory, doing additional
+reclaim will only hurt the system, not help.
+
+Signed-off-by: Rik van Riel <riel at redhat.com>
+Signed-off-by: Mel Gorman <mgorman at suse.de>
+---
+ mm/vmscan.c |   10 ++++++++++
+ 1 files changed, 10 insertions(+), 0 deletions(-)
+
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 6072d74..8c03534 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -2022,6 +2022,16 @@ static void shrink_zones(int priority, struct zonelist *zonelist,
+ 				continue;
+ 			if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ 				continue;	/* Let kswapd poll it */
++			if (COMPACTION_BUILD) {
++				/*
++				 * If we already have plenty of memory free
++				 * for compaction, don't free any more.
++				 */
++				if (sc->order > PAGE_ALLOC_COSTLY_ORDER &&
++					(compaction_suitable(zone, sc->order) ||
++					 compaction_deferred(zone)))
++					continue;
++			}
+ 			/*
+ 			 * This steals pages from memory cgroups over softlimit
+ 			 * and returns the number of reclaimed pages and
+-- 
+1.7.6.4
+
diff --git a/0002-mm-Abort-reclaim-compaction-if-compaction-can-procee.patch b/0002-mm-Abort-reclaim-compaction-if-compaction-can-procee.patch
new file mode 100644
index 0000000..e74b64d
--- /dev/null
+++ b/0002-mm-Abort-reclaim-compaction-if-compaction-can-procee.patch
@@ -0,0 +1,81 @@
+From c01043c9aa51a63bd01c60e53494ca4a7e994542 Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman at suse.de>
+Date: Fri, 7 Oct 2011 16:17:23 +0100
+Subject: [PATCH 2/2] mm: Abort reclaim/compaction if compaction can proceed
+
+If compaction can proceed, shrink_zones() stops doing any work but
+the callers still shrink_slab(), raises the priority and potentially
+sleeps.  This patch aborts direct reclaim/compaction entirely if
+compaction can proceed.
+
+Signed-off-by: Mel Gorman <mgorman at suse.de>
+---
+ mm/vmscan.c |   20 ++++++++++++++++----
+ 1 files changed, 16 insertions(+), 4 deletions(-)
+
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 8c03534..b295a38 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -2000,14 +2000,19 @@ restart:
+  *
+  * If a zone is deemed to be full of pinned pages then just give it a light
+  * scan then give up on it.
++ *
++ * This function returns true if a zone is being reclaimed for a costly
++ * high-order allocation and compaction is either ready to begin or deferred.
++ * This indicates to the caller that it should retry the allocation or fail.
+  */
+-static void shrink_zones(int priority, struct zonelist *zonelist,
++static bool shrink_zones(int priority, struct zonelist *zonelist,
+ 					struct scan_control *sc)
+ {
+ 	struct zoneref *z;
+ 	struct zone *zone;
+ 	unsigned long nr_soft_reclaimed;
+ 	unsigned long nr_soft_scanned;
++	bool should_abort_reclaim = false;
+ 
+ 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
+ 					gfp_zone(sc->gfp_mask), sc->nodemask) {
+@@ -2025,12 +2030,15 @@ static void shrink_zones(int priority, struct zonelist *zonelist,
+ 			if (COMPACTION_BUILD) {
+ 				/*
+ 				 * If we already have plenty of memory free
+-				 * for compaction, don't free any more.
++				 * for compaction in this zone , don't free any
++				 * more.
+ 				 */
+ 				if (sc->order > PAGE_ALLOC_COSTLY_ORDER &&
+ 					(compaction_suitable(zone, sc->order) ||
+-					 compaction_deferred(zone)))
++					 compaction_deferred(zone))) {
++					should_abort_reclaim = true;
+ 					continue;
++				}
+ 			}
+ 			/*
+ 			 * This steals pages from memory cgroups over softlimit
+@@ -2049,6 +2057,8 @@ static void shrink_zones(int priority, struct zonelist *zonelist,
+ 
+ 		shrink_zone(priority, zone, sc);
+ 	}
++
++	return should_abort_reclaim;
+ }
+ 
+ static bool zone_reclaimable(struct zone *zone)
+@@ -2113,7 +2123,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
+ 		sc->nr_scanned = 0;
+ 		if (!priority)
+ 			disable_swap_token(sc->mem_cgroup);
+-		shrink_zones(priority, zonelist, sc);
++		if (shrink_zones(priority, zonelist, sc))
++			break;
++
+ 		/*
+ 		 * Don't shrink slabs when reclaiming memory from
+ 		 * over limit cgroups
+-- 
+1.7.6.4
+
diff --git a/TODO b/TODO
index ff6db7d..647d480 100644
--- a/TODO
+++ b/TODO
@@ -17,6 +17,7 @@
 * add-macbookair41-keyboard.patch
 * ucvideo-fix-crash-when-linking-entities.patch
 * mmc-Always-check-for-lower-base-frequency-quirk-for-.patch (also CC'd stable)
+* 000[12]-mm-*
 
 **** Other stuff that should go upstream (in decreasing likelyhood) ************************************
 
diff --git a/kernel.spec b/kernel.spec
index 9d8f211..ced743b 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -741,6 +741,10 @@ Patch21001: arm-smsc-support-reading-mac-address-from-device-tree.patch
 #rhbz #722509
 Patch21002: mmc-Always-check-for-lower-base-frequency-quirk-for-.patch
 
+#rhbz #735946
+Patch21020: 0001-mm-vmscan-Limit-direct-reclaim-for-higher-order-allo.patch
+Patch21021: 0002-mm-Abort-reclaim-compaction-if-compaction-can-procee.patch
+
 %endif
 
 BuildRoot: %{_tmppath}/kernel-%{KVERREL}-root
@@ -1358,6 +1362,10 @@ ApplyPatch mmc-Always-check-for-lower-base-frequency-quirk-for-.patch
 # utrace.
 ApplyPatch utrace.patch
 
+#rhbz #735946
+ApplyPatch 0001-mm-vmscan-Limit-direct-reclaim-for-higher-order-allo.patch
+ApplyPatch 0002-mm-Abort-reclaim-compaction-if-compaction-can-procee.patch
+
 # END OF PATCH APPLICATIONS
 
 %endif
@@ -2066,6 +2074,9 @@ fi
 #                 ||----w |
 #                 ||     ||
 %changelog
+* Mon Oct 17 2011 Josh Boyer <jwboyer at redhat.com>
+- Add two patches to fix stalls in khugepaged (rhbz 735946)
+
 * Fri Oct 14 2011 Dave Jones <davej at redhat.com>
 - Disable CONFIG_ACPI_PROCFS_POWER which is supposed to be going away soon.
 


More information about the scm-commits mailing list