aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSrivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>2012-05-24 10:16:26 -0400
committerIngo Molnar <mingo@kernel.org>2012-07-24 07:53:14 -0400
commitd35be8bab9b0ce44bed4b9453f86ebf64062721e (patch)
tree295823c4f0d50be5b30714a083de658e1a4185e6 /kernel
parentee08d1284ea9235b29bd2d9b7493b4b4cf3da09c (diff)
CPU hotplug, cpusets, suspend: Don't modify cpusets during suspend/resume
In the event of CPU hotplug, the kernel modifies the cpusets' cpus_allowed masks as and when necessary to ensure that the tasks belonging to the cpusets have some place (online CPUs) to run on. And regular CPU hotplug is destructive in the sense that the kernel doesn't remember the original cpuset configurations set by the user, across hotplug operations. However, suspend/resume (which uses CPU hotplug) is a special case in which the kernel has the responsibility to restore the system (during resume), to exactly the same state it was in before suspend. In order to achieve that, do the following: 1. Don't modify cpusets during suspend/resume. At all. In particular, don't move the tasks from one cpuset to another, and don't modify any cpuset's cpus_allowed mask. So, simply ignore cpusets during the CPU hotplug operations that are carried out in the suspend/resume path. 2. However, cpusets and sched domains are related. We just want to avoid altering cpusets alone. So, to keep the sched domains updated, build a single sched domain (containing all active cpus) during each of the CPU hotplug operations carried out in s/r path, effectively ignoring the cpusets' cpus_allowed masks. (Since userspace is frozen while doing all this, it will go unnoticed.) 3. During the last CPU online operation during resume, build the sched domains by looking up the (unaltered) cpusets' cpus_allowed masks. That will bring back the system to the same original state as it was in before suspend. Ultimately, this will not only solve the cpuset problem related to suspend resume (ie., restores the cpusets to exactly what it was before suspend, by not touching it at all) but also speeds up suspend/resume because we avoid running cpuset update code for every CPU being offlined/onlined. Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20120524141611.3692.20155.stgit@srivatsabhat.in.ibm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c3
-rw-r--r--kernel/sched/core.c40
2 files changed, 39 insertions, 4 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 8c8bd652dd12..746d1eeb5dbe 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2054,6 +2054,9 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2054 * (of no affect) on systems that are actively using CPU hotplug 2054 * (of no affect) on systems that are actively using CPU hotplug
2055 * but making no active use of cpusets. 2055 * but making no active use of cpusets.
2056 * 2056 *
2057 * The only exception to this is suspend/resume, where we don't
2058 * modify cpusets at all.
2059 *
2057 * This routine ensures that top_cpuset.cpus_allowed tracks 2060 * This routine ensures that top_cpuset.cpus_allowed tracks
2058 * cpu_active_mask on each CPU hotplug (cpuhp) event. 2061 * cpu_active_mask on each CPU hotplug (cpuhp) event.
2059 * 2062 *
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 468bdd44c1ba..4c1d80c6b318 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7097,34 +7097,66 @@ match2:
7097 mutex_unlock(&sched_domains_mutex); 7097 mutex_unlock(&sched_domains_mutex);
7098} 7098}
7099 7099
7100static int num_cpus_frozen; /* used to mark begin/end of suspend/resume */
7101
7100/* 7102/*
7101 * Update cpusets according to cpu_active mask. If cpusets are 7103 * Update cpusets according to cpu_active mask. If cpusets are
7102 * disabled, cpuset_update_active_cpus() becomes a simple wrapper 7104 * disabled, cpuset_update_active_cpus() becomes a simple wrapper
7103 * around partition_sched_domains(). 7105 * around partition_sched_domains().
7106 *
7107 * If we come here as part of a suspend/resume, don't touch cpusets because we
7108 * want to restore it back to its original state upon resume anyway.
7104 */ 7109 */
7105static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, 7110static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
7106 void *hcpu) 7111 void *hcpu)
7107{ 7112{
7108 switch (action & ~CPU_TASKS_FROZEN) { 7113 switch (action) {
7114 case CPU_ONLINE_FROZEN:
7115 case CPU_DOWN_FAILED_FROZEN:
7116
7117 /*
7118 * num_cpus_frozen tracks how many CPUs are involved in suspend
7119 * resume sequence. As long as this is not the last online
7120 * operation in the resume sequence, just build a single sched
7121 * domain, ignoring cpusets.
7122 */
7123 num_cpus_frozen--;
7124 if (likely(num_cpus_frozen)) {
7125 partition_sched_domains(1, NULL, NULL);
7126 break;
7127 }
7128
7129 /*
7130 * This is the last CPU online operation. So fall through and
7131 * restore the original sched domains by considering the
7132 * cpuset configurations.
7133 */
7134
7109 case CPU_ONLINE: 7135 case CPU_ONLINE:
7110 case CPU_DOWN_FAILED: 7136 case CPU_DOWN_FAILED:
7111 cpuset_update_active_cpus(); 7137 cpuset_update_active_cpus();
7112 return NOTIFY_OK; 7138 break;
7113 default: 7139 default:
7114 return NOTIFY_DONE; 7140 return NOTIFY_DONE;
7115 } 7141 }
7142 return NOTIFY_OK;
7116} 7143}
7117 7144
7118static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, 7145static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
7119 void *hcpu) 7146 void *hcpu)
7120{ 7147{
7121 switch (action & ~CPU_TASKS_FROZEN) { 7148 switch (action) {
7122 case CPU_DOWN_PREPARE: 7149 case CPU_DOWN_PREPARE:
7123 cpuset_update_active_cpus(); 7150 cpuset_update_active_cpus();
7124 return NOTIFY_OK; 7151 break;
7152 case CPU_DOWN_PREPARE_FROZEN:
7153 num_cpus_frozen++;
7154 partition_sched_domains(1, NULL, NULL);
7155 break;
7125 default: 7156 default:
7126 return NOTIFY_DONE; 7157 return NOTIFY_DONE;
7127 } 7158 }
7159 return NOTIFY_OK;
7128} 7160}
7129 7161
7130void __init sched_init_smp(void) 7162void __init sched_init_smp(void)