aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCliff Wickman <cpw@sgi.com>2007-10-19 02:40:46 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-19 14:53:44 -0400
commit470fd646444c65a5d062a371f5ec8dcedee61239 (patch)
tree59b923486d4a95efa07c4b2ad7cb0b1fcc3f3c88
parentbd89aabc6761de1c35b154fe6f914a445d301510 (diff)
hotplug cpu: migrate a task within its cpuset
When a cpu is disabled, move_task_off_dead_cpu() is called for tasks that have been running on that cpu. Currently, such a task is migrated: 1) to any cpu on the same node as the disabled cpu, which is both online and among that task's cpus_allowed 2) to any cpu which is both online and among that task's cpus_allowed It is typical of a multithreaded application running on a large NUMA system to have its tasks confined to a cpuset so as to cluster them near the memory that they share. Furthermore, it is typical to explicitly place such a task on a specific cpu in that cpuset. And in that case the task's cpus_allowed includes only a single cpu. This patch would insert a preference to migrate such a task to some cpu within its cpuset (and set its cpus_allowed to its entire cpuset). With this patch, migrate the task to: 1) to any cpu on the same node as the disabled cpu, which is both online and among that task's cpus_allowed 2) to any online cpu within the task's cpuset 3) to any cpu which is both online and among that task's cpus_allowed In order to do this, move_task_off_dead_cpu() must make a call to cpuset_cpus_allowed_locked(), a new subset of cpuset_cpus_allowed(), that will not block. (name change - per Oleg's suggestion) Calls are made to cpuset_lock() and cpuset_unlock() in migration_call() to set the cpuset mutex during the whole migrate_live_tasks() and migrate_dead_tasks() procedure. [akpm@linux-foundation.org: build fix] [pj@sgi.com: Fix indentation and spacing] Signed-off-by: Cliff Wickman <cpw@sgi.com> Cc: Oleg Nesterov <oleg@tv-sign.ru> Cc: Christoph Lameter <clameter@sgi.com> Cc: Paul Jackson <pj@sgi.com> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/cpu-hotplug.txt4
-rw-r--r--include/linux/cpuset.h5
-rw-r--r--kernel/cpuset.c15
-rw-r--r--kernel/sched.c12
4 files changed, 33 insertions, 3 deletions
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index b6d24c22274b..a741f658a3c9 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -220,7 +220,9 @@ A: The following happen, listed in no particular order :-)
220 CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the 220 CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the
221 CPU is being offlined while tasks are frozen due to a suspend operation in 221 CPU is being offlined while tasks are frozen due to a suspend operation in
222 progress 222 progress
223- All process is migrated away from this outgoing CPU to a new CPU 223- All processes are migrated away from this outgoing CPU to new CPUs.
224 The new CPU is chosen from each process' current cpuset, which may be
225 a subset of all online CPUs.
224- All interrupts targeted to this CPU is migrated to a new CPU 226- All interrupts targeted to this CPU is migrated to a new CPU
225- timers/bottom half/task lets are also migrated to a new CPU 227- timers/bottom half/task lets are also migrated to a new CPU
226- Once all services are migrated, kernel calls an arch specific routine 228- Once all services are migrated, kernel calls an arch specific routine
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 31adfde1c95f..ecae585ec3da 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -21,6 +21,7 @@ extern int cpuset_init_early(void);
21extern int cpuset_init(void); 21extern int cpuset_init(void);
22extern void cpuset_init_smp(void); 22extern void cpuset_init_smp(void);
23extern cpumask_t cpuset_cpus_allowed(struct task_struct *p); 23extern cpumask_t cpuset_cpus_allowed(struct task_struct *p);
24extern cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p);
24extern nodemask_t cpuset_mems_allowed(struct task_struct *p); 25extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
25#define cpuset_current_mems_allowed (current->mems_allowed) 26#define cpuset_current_mems_allowed (current->mems_allowed)
26void cpuset_init_current_mems_allowed(void); 27void cpuset_init_current_mems_allowed(void);
@@ -87,6 +88,10 @@ static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p)
87{ 88{
88 return cpu_possible_map; 89 return cpu_possible_map;
89} 90}
91static inline cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p)
92{
93 return cpu_possible_map;
94}
90 95
91static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) 96static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
92{ 97{
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index fa31cb9f9898..50f5dc463688 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1818,10 +1818,23 @@ cpumask_t cpuset_cpus_allowed(struct task_struct *tsk)
1818 cpumask_t mask; 1818 cpumask_t mask;
1819 1819
1820 mutex_lock(&callback_mutex); 1820 mutex_lock(&callback_mutex);
1821 mask = cpuset_cpus_allowed_locked(tsk);
1822 mutex_unlock(&callback_mutex);
1823
1824 return mask;
1825}
1826
1827/**
1828 * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
1829 * Must be called with callback_mutex held.
1830 **/
1831cpumask_t cpuset_cpus_allowed_locked(struct task_struct *tsk)
1832{
1833 cpumask_t mask;
1834
1821 task_lock(tsk); 1835 task_lock(tsk);
1822 guarantee_online_cpus(task_cs(tsk), &mask); 1836 guarantee_online_cpus(task_cs(tsk), &mask);
1823 task_unlock(tsk); 1837 task_unlock(tsk);
1824 mutex_unlock(&callback_mutex);
1825 1838
1826 return mask; 1839 return mask;
1827} 1840}
diff --git a/kernel/sched.c b/kernel/sched.c
index a7e30462600f..4071306e1088 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5160,8 +5160,16 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
5160 5160
5161 /* No more Mr. Nice Guy. */ 5161 /* No more Mr. Nice Guy. */
5162 if (dest_cpu == NR_CPUS) { 5162 if (dest_cpu == NR_CPUS) {
5163 cpumask_t cpus_allowed = cpuset_cpus_allowed_locked(p);
5164 /*
5165 * Try to stay on the same cpuset, where the
5166 * current cpuset may be a subset of all cpus.
5167 * The cpuset_cpus_allowed_locked() variant of
5168 * cpuset_cpus_allowed() will not block. It must be
5169 * called within calls to cpuset_lock/cpuset_unlock.
5170 */
5163 rq = task_rq_lock(p, &flags); 5171 rq = task_rq_lock(p, &flags);
5164 cpus_setall(p->cpus_allowed); 5172 p->cpus_allowed = cpus_allowed;
5165 dest_cpu = any_online_cpu(p->cpus_allowed); 5173 dest_cpu = any_online_cpu(p->cpus_allowed);
5166 task_rq_unlock(rq, &flags); 5174 task_rq_unlock(rq, &flags);
5167 5175
@@ -5527,6 +5535,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5527 5535
5528 case CPU_DEAD: 5536 case CPU_DEAD:
5529 case CPU_DEAD_FROZEN: 5537 case CPU_DEAD_FROZEN:
5538 cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
5530 migrate_live_tasks(cpu); 5539 migrate_live_tasks(cpu);
5531 rq = cpu_rq(cpu); 5540 rq = cpu_rq(cpu);
5532 kthread_stop(rq->migration_thread); 5541 kthread_stop(rq->migration_thread);
@@ -5540,6 +5549,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5540 rq->idle->sched_class = &idle_sched_class; 5549 rq->idle->sched_class = &idle_sched_class;
5541 migrate_dead_tasks(cpu); 5550 migrate_dead_tasks(cpu);
5542 spin_unlock_irq(&rq->lock); 5551 spin_unlock_irq(&rq->lock);
5552 cpuset_unlock();
5543 migrate_nr_uninterruptible(rq); 5553 migrate_nr_uninterruptible(rq);
5544 BUG_ON(rq->nr_running != 0); 5554 BUG_ON(rq->nr_running != 0);
5545 5555