[kernel/f16] Add patch to fix cpu pinning after suspend/resume (rhbz 714271)

Josh Boyer jwboyer at fedoraproject.org
Wed Jul 25 12:07:17 UTC 2012


commit 0e04e7c4764f828f62c0b6d61f1b0cb7e862e0a7
Author: Josh Boyer <jwboyer at redhat.com>
Date:   Tue Jul 24 12:59:03 2012 -0400

    Add patch to fix cpu pinning after suspend/resume (rhbz 714271)

 ...pusets-suspend-Dont-modify-cpusets-during.patch |  139 ++++++++++++++++++++
 kernel.spec                                        |   11 ++-
 2 files changed, 149 insertions(+), 1 deletions(-)
---
diff --git a/CPU-hotplug-cpusets-suspend-Dont-modify-cpusets-during.patch b/CPU-hotplug-cpusets-suspend-Dont-modify-cpusets-during.patch
new file mode 100644
index 0000000..f57ecee
--- /dev/null
+++ b/CPU-hotplug-cpusets-suspend-Dont-modify-cpusets-during.patch
@@ -0,0 +1,139 @@
+From 0c1508129adc051fabaf8debefea79baa2f1a81b Mon Sep 17 00:00:00 2001
+From: "Srivatsa S. Bhat" <srivatsa.bhat at linux.vnet.ibm.com>
+Date: Thu, 24 May 2012 19:46:26 +0530
+Subject: [PATCH] CPU hotplug, cpusets, suspend: Don't modify cpusets during
+ suspend/resume
+
+In the event of CPU hotplug, the kernel modifies the cpusets' cpus_allowed
+masks as and when necessary to ensure that the tasks belonging to the cpusets
+have some place (online CPUs) to run on. And regular CPU hotplug is
+destructive in the sense that the kernel doesn't remember the original cpuset
+configurations set by the user, across hotplug operations.
+
+However, suspend/resume (which uses CPU hotplug) is a special case in which
+the kernel has the responsibility to restore the system (during resume), to
+exactly the same state it was in before suspend.
+
+In order to achieve that, do the following:
+
+1. Don't modify cpusets during suspend/resume. At all.
+   In particular, don't move the tasks from one cpuset to another, and
+   don't modify any cpuset's cpus_allowed mask. So, simply ignore cpusets
+   during the CPU hotplug operations that are carried out in the
+   suspend/resume path.
+
+2. However, cpusets and sched domains are related. We just want to avoid
+   altering cpusets alone. So, to keep the sched domains updated, build
+   a single sched domain (containing all active cpus) during each of the
+   CPU hotplug operations carried out in s/r path, effectively ignoring
+   the cpusets' cpus_allowed masks.
+
+   (Since userspace is frozen while doing all this, it will go unnoticed.)
+
+3. During the last CPU online operation during resume, build the sched
+   domains by looking up the (unaltered) cpusets' cpus_allowed masks.
+   That will bring back the system to the same original state as it was in
+   before suspend.
+
+Ultimately, this will not only solve the cpuset problem related to suspend
+resume (ie., restores the cpusets to exactly what it was before suspend, by
+not touching it at all) but also speeds up suspend/resume because we avoid
+running cpuset update code for every CPU being offlined/onlined.
+
+Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat at linux.vnet.ibm.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra at chello.nl>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Andrew Morton <akpm at linux-foundation.org>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Link: http://lkml.kernel.org/r/20120524141611.3692.20155.stgit@srivatsabhat.in.ibm.com
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+---
+ kernel/cpuset.c     |    3 +++
+ kernel/sched/core.c |   40 ++++++++++++++++++++++++++++++++++++----
+ 2 files changed, 39 insertions(+), 4 deletions(-)
+
+--- linux-3.4.6-3.1.fc17.noarch.orig/kernel/cpuset.c
++++ linux-3.4.6-3.1.fc17.noarch/kernel/cpuset.c
+@@ -2065,6 +2065,9 @@ static void scan_for_empty_cpusets(struc
+  * (of no affect) on systems that are actively using CPU hotplug
+  * but making no active use of cpusets.
+  *
++ * The only exception to this is suspend/resume, where we don't
++ * modify cpusets at all.
++ *
+  * This routine ensures that top_cpuset.cpus_allowed tracks
+  * cpu_active_mask on each CPU hotplug (cpuhp) event.
+  *
+--- linux-3.4.6-3.1.fc17.noarch.orig/kernel/sched/core.c
++++ linux-3.4.6-3.1.fc17.noarch/kernel/sched/core.c
+@@ -6931,34 +6931,66 @@ int __init sched_create_sysfs_power_savi
+ }
+ #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
+ 
++static int num_cpus_frozen;	/* used to mark begin/end of suspend/resume */
++
+ /*
+  * Update cpusets according to cpu_active mask.  If cpusets are
+  * disabled, cpuset_update_active_cpus() becomes a simple wrapper
+  * around partition_sched_domains().
++ *
++ * If we come here as part of a suspend/resume, don't touch cpusets because we
++ * want to restore it back to its original state upon resume anyway.
+  */
+ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
+ 			     void *hcpu)
+ {
+-	switch (action & ~CPU_TASKS_FROZEN) {
++	switch (action) {
++	case CPU_ONLINE_FROZEN:
++	case CPU_DOWN_FAILED_FROZEN:
++
++		/*
++		 * num_cpus_frozen tracks how many CPUs are involved in suspend
++		 * resume sequence. As long as this is not the last online
++		 * operation in the resume sequence, just build a single sched
++		 * domain, ignoring cpusets.
++		 */
++		num_cpus_frozen--;
++		if (likely(num_cpus_frozen)) {
++			partition_sched_domains(1, NULL, NULL);
++			break;
++		}
++
++		/*
++		 * This is the last CPU online operation. So fall through and
++		 * restore the original sched domains by considering the
++		 * cpuset configurations.
++		 */
++
+ 	case CPU_ONLINE:
+ 	case CPU_DOWN_FAILED:
+ 		cpuset_update_active_cpus();
+-		return NOTIFY_OK;
++		break;
+ 	default:
+ 		return NOTIFY_DONE;
+ 	}
++	return NOTIFY_OK;
+ }
+ 
+ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
+ 			       void *hcpu)
+ {
+-	switch (action & ~CPU_TASKS_FROZEN) {
++	switch (action) {
+ 	case CPU_DOWN_PREPARE:
+ 		cpuset_update_active_cpus();
+-		return NOTIFY_OK;
++		break;
++	case CPU_DOWN_PREPARE_FROZEN:
++		num_cpus_frozen++;
++		partition_sched_domains(1, NULL, NULL);
++		break;
+ 	default:
+ 		return NOTIFY_DONE;
+ 	}
++	return NOTIFY_OK;
+ }
+ 
+ void __init sched_init_smp(void)
diff --git a/kernel.spec b/kernel.spec
index 420764e..2069a06 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -54,7 +54,7 @@ Summary: The Linux kernel
 # For non-released -rc kernels, this will be appended after the rcX and
 # gitX tags, so a 3 here would become part of release "0.rcX.gitX.3"
 #
-%global baserelease 1
+%global baserelease 2
 %global fedora_build %{baserelease}
 
 # base_sublevel is the kernel version we're starting with and patching
@@ -761,6 +761,9 @@ Patch22056: crypto-aesni-intel-fix-wrong-kfree-pointer.patch
 #rhbz 772730
 Patch22058: ACPI-AC-check-the-return-value-of-power_supply_register.patch
 
+#rhbz 714271
+Patch22060: CPU-hotplug-cpusets-suspend-Dont-modify-cpusets-during.patch
+
 # END OF PATCH DEFINITIONS
 
 %endif
@@ -1409,6 +1412,9 @@ ApplyPatch crypto-aesni-intel-fix-wrong-kfree-pointer.patch
 #rhbz 772730
 ApplyPatch ACPI-AC-check-the-return-value-of-power_supply_register.patch
 
+#rhbz 714271
+ApplyPatch CPU-hotplug-cpusets-suspend-Dont-modify-cpusets-during.patch
+
 # END OF PATCH APPLICATIONS
 
 %endif
@@ -2147,6 +2153,9 @@ fi
 # and build.
 
 %changelog
+* Wed Jul 25 2012 Josh Boyer <jwboyer at redhat.com>
+- Add patch to fix cpu pinning after suspend/resume (rhbz 714271)
+
 * Thu Jul 19 2012 Josh Boyer <jwboyer at redhat.com> - 3.4.6-1
 - Linux v3.4.6
 


More information about the scm-commits mailing list