diff options
author | Cliff Wickman <cpw@sgi.com> | 2007-10-19 02:40:46 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-19 14:53:44 -0400 |
commit | 470fd646444c65a5d062a371f5ec8dcedee61239 (patch) | |
tree | 59b923486d4a95efa07c4b2ad7cb0b1fcc3f3c88 | |
parent | bd89aabc6761de1c35b154fe6f914a445d301510 (diff) |
hotplug cpu: migrate a task within its cpuset
When a cpu is disabled, move_task_off_dead_cpu() is called for tasks that have
been running on that cpu.
Currently, such a task is migrated:
1) to any cpu on the same node as the disabled cpu, which is both online
and among that task's cpus_allowed
2) to any cpu which is both online and among that task's cpus_allowed
It is typical of a multithreaded application running on a large NUMA system to
have its tasks confined to a cpuset so as to cluster them near the memory that
they share. Furthermore, it is typical to explicitly place such a task on a
specific cpu in that cpuset. And in that case the task's cpus_allowed
includes only a single cpu.
This patch would insert a preference to migrate such a task to some cpu within
its cpuset (and set its cpus_allowed to its entire cpuset).
With this patch, migrate the task to:
1) to any cpu on the same node as the disabled cpu, which is both online
and among that task's cpus_allowed
2) to any online cpu within the task's cpuset
3) to any cpu which is both online and among that task's cpus_allowed
In order to do this, move_task_off_dead_cpu() must make a call to
cpuset_cpus_allowed_locked(), a new subset of cpuset_cpus_allowed(), that will
not block. (name change - per Oleg's suggestion)
Calls are made to cpuset_lock() and cpuset_unlock() in migration_call() to set
the cpuset mutex during the whole migrate_live_tasks() and
migrate_dead_tasks() procedure.
[akpm@linux-foundation.org: build fix]
[pj@sgi.com: Fix indentation and spacing]
Signed-off-by: Cliff Wickman <cpw@sgi.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/cpu-hotplug.txt | 4 | ||||
-rw-r--r-- | include/linux/cpuset.h | 5 | ||||
-rw-r--r-- | kernel/cpuset.c | 15 | ||||
-rw-r--r-- | kernel/sched.c | 12 |
4 files changed, 33 insertions, 3 deletions
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index b6d24c22274b..a741f658a3c9 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt | |||
@@ -220,7 +220,9 @@ A: The following happen, listed in no particular order :-) | |||
220 | CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the | 220 | CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the |
221 | CPU is being offlined while tasks are frozen due to a suspend operation in | 221 | CPU is being offlined while tasks are frozen due to a suspend operation in |
222 | progress | 222 | progress |
223 | - All process is migrated away from this outgoing CPU to a new CPU | 223 | - All processes are migrated away from this outgoing CPU to new CPUs. |
224 | The new CPU is chosen from each process' current cpuset, which may be | ||
225 | a subset of all online CPUs. | ||
224 | - All interrupts targeted to this CPU is migrated to a new CPU | 226 | - All interrupts targeted to this CPU is migrated to a new CPU |
225 | - timers/bottom half/task lets are also migrated to a new CPU | 227 | - timers/bottom half/task lets are also migrated to a new CPU |
226 | - Once all services are migrated, kernel calls an arch specific routine | 228 | - Once all services are migrated, kernel calls an arch specific routine |
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 31adfde1c95f..ecae585ec3da 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h | |||
@@ -21,6 +21,7 @@ extern int cpuset_init_early(void); | |||
21 | extern int cpuset_init(void); | 21 | extern int cpuset_init(void); |
22 | extern void cpuset_init_smp(void); | 22 | extern void cpuset_init_smp(void); |
23 | extern cpumask_t cpuset_cpus_allowed(struct task_struct *p); | 23 | extern cpumask_t cpuset_cpus_allowed(struct task_struct *p); |
24 | extern cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p); | ||
24 | extern nodemask_t cpuset_mems_allowed(struct task_struct *p); | 25 | extern nodemask_t cpuset_mems_allowed(struct task_struct *p); |
25 | #define cpuset_current_mems_allowed (current->mems_allowed) | 26 | #define cpuset_current_mems_allowed (current->mems_allowed) |
26 | void cpuset_init_current_mems_allowed(void); | 27 | void cpuset_init_current_mems_allowed(void); |
@@ -87,6 +88,10 @@ static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p) | |||
87 | { | 88 | { |
88 | return cpu_possible_map; | 89 | return cpu_possible_map; |
89 | } | 90 | } |
91 | static inline cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p) | ||
92 | { | ||
93 | return cpu_possible_map; | ||
94 | } | ||
90 | 95 | ||
91 | static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) | 96 | static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) |
92 | { | 97 | { |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index fa31cb9f9898..50f5dc463688 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1818,10 +1818,23 @@ cpumask_t cpuset_cpus_allowed(struct task_struct *tsk) | |||
1818 | cpumask_t mask; | 1818 | cpumask_t mask; |
1819 | 1819 | ||
1820 | mutex_lock(&callback_mutex); | 1820 | mutex_lock(&callback_mutex); |
1821 | mask = cpuset_cpus_allowed_locked(tsk); | ||
1822 | mutex_unlock(&callback_mutex); | ||
1823 | |||
1824 | return mask; | ||
1825 | } | ||
1826 | |||
1827 | /** | ||
1828 | * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset. | ||
1829 | * Must be called with callback_mutex held. | ||
1830 | **/ | ||
1831 | cpumask_t cpuset_cpus_allowed_locked(struct task_struct *tsk) | ||
1832 | { | ||
1833 | cpumask_t mask; | ||
1834 | |||
1821 | task_lock(tsk); | 1835 | task_lock(tsk); |
1822 | guarantee_online_cpus(task_cs(tsk), &mask); | 1836 | guarantee_online_cpus(task_cs(tsk), &mask); |
1823 | task_unlock(tsk); | 1837 | task_unlock(tsk); |
1824 | mutex_unlock(&callback_mutex); | ||
1825 | 1838 | ||
1826 | return mask; | 1839 | return mask; |
1827 | } | 1840 | } |
diff --git a/kernel/sched.c b/kernel/sched.c index a7e30462600f..4071306e1088 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -5160,8 +5160,16 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | |||
5160 | 5160 | ||
5161 | /* No more Mr. Nice Guy. */ | 5161 | /* No more Mr. Nice Guy. */ |
5162 | if (dest_cpu == NR_CPUS) { | 5162 | if (dest_cpu == NR_CPUS) { |
5163 | cpumask_t cpus_allowed = cpuset_cpus_allowed_locked(p); | ||
5164 | /* | ||
5165 | * Try to stay on the same cpuset, where the | ||
5166 | * current cpuset may be a subset of all cpus. | ||
5167 | * The cpuset_cpus_allowed_locked() variant of | ||
5168 | * cpuset_cpus_allowed() will not block. It must be | ||
5169 | * called within calls to cpuset_lock/cpuset_unlock. | ||
5170 | */ | ||
5163 | rq = task_rq_lock(p, &flags); | 5171 | rq = task_rq_lock(p, &flags); |
5164 | cpus_setall(p->cpus_allowed); | 5172 | p->cpus_allowed = cpus_allowed; |
5165 | dest_cpu = any_online_cpu(p->cpus_allowed); | 5173 | dest_cpu = any_online_cpu(p->cpus_allowed); |
5166 | task_rq_unlock(rq, &flags); | 5174 | task_rq_unlock(rq, &flags); |
5167 | 5175 | ||
@@ -5527,6 +5535,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
5527 | 5535 | ||
5528 | case CPU_DEAD: | 5536 | case CPU_DEAD: |
5529 | case CPU_DEAD_FROZEN: | 5537 | case CPU_DEAD_FROZEN: |
5538 | cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */ | ||
5530 | migrate_live_tasks(cpu); | 5539 | migrate_live_tasks(cpu); |
5531 | rq = cpu_rq(cpu); | 5540 | rq = cpu_rq(cpu); |
5532 | kthread_stop(rq->migration_thread); | 5541 | kthread_stop(rq->migration_thread); |
@@ -5540,6 +5549,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
5540 | rq->idle->sched_class = &idle_sched_class; | 5549 | rq->idle->sched_class = &idle_sched_class; |
5541 | migrate_dead_tasks(cpu); | 5550 | migrate_dead_tasks(cpu); |
5542 | spin_unlock_irq(&rq->lock); | 5551 | spin_unlock_irq(&rq->lock); |
5552 | cpuset_unlock(); | ||
5543 | migrate_nr_uninterruptible(rq); | 5553 | migrate_nr_uninterruptible(rq); |
5544 | BUG_ON(rq->nr_running != 0); | 5554 | BUG_ON(rq->nr_running != 0); |
5545 | 5555 | ||