diff options
author | Tang Chen <tangchen@cn.fujitsu.com> | 2013-02-22 19:33:33 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-23 20:50:13 -0500 |
commit | aa00d89c2780d72d082a015e8cbb751e65fb30ee (patch) | |
tree | a326cbf62ab21d76af7c82c0038a985db77e69b1 /kernel | |
parent | e13fe8695c57fed678877a9f3f8e99fc637ff4fb (diff) |
sched: do not use cpu_to_node() to find an offlined cpu's node.
If a cpu is offline, its nid will be set to -1, and cpu_to_node(cpu)
will return -1. As a result, cpumask_of_node(nid) will return NULL. In
this case, find_next_bit() in for_each_cpu will get a NULL pointer and
cause panic.
Here is a call trace:
Call Trace:
<IRQ>
select_fallback_rq+0x71/0x190
try_to_wake_up+0x2cb/0x2f0
wake_up_process+0x15/0x20
hrtimer_wakeup+0x22/0x30
__run_hrtimer+0x83/0x320
hrtimer_interrupt+0x106/0x280
smp_apic_timer_interrupt+0x69/0x99
apic_timer_interrupt+0x6f/0x80
There is a hrtimer process sleeping, whose cpu has already been
offlined. When it is waken up, it tries to find another cpu to run, and
get a -1 nid. As a result, cpumask_of_node(-1) returns NULL, and causes
ernel panic.
This patch fixes this problem by judging if the nid is -1. If nid is
not -1, a cpu on the same node will be picked. Else, a online cpu on
another node will be picked.
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Jiang Liu <liuj97@gmail.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched/core.c | 28 |
1 files changed, 19 insertions, 9 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3a673a3b0c6b..053dfd7692d1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -1132,18 +1132,28 @@ EXPORT_SYMBOL_GPL(kick_process); | |||
1132 | */ | 1132 | */ |
1133 | static int select_fallback_rq(int cpu, struct task_struct *p) | 1133 | static int select_fallback_rq(int cpu, struct task_struct *p) |
1134 | { | 1134 | { |
1135 | const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); | 1135 | int nid = cpu_to_node(cpu); |
1136 | const struct cpumask *nodemask = NULL; | ||
1136 | enum { cpuset, possible, fail } state = cpuset; | 1137 | enum { cpuset, possible, fail } state = cpuset; |
1137 | int dest_cpu; | 1138 | int dest_cpu; |
1138 | 1139 | ||
1139 | /* Look for allowed, online CPU in same node. */ | 1140 | /* |
1140 | for_each_cpu(dest_cpu, nodemask) { | 1141 | * If the node that the cpu is on has been offlined, cpu_to_node() |
1141 | if (!cpu_online(dest_cpu)) | 1142 | * will return -1. There is no cpu on the node, and we should |
1142 | continue; | 1143 | * select the cpu on the other node. |
1143 | if (!cpu_active(dest_cpu)) | 1144 | */ |
1144 | continue; | 1145 | if (nid != -1) { |
1145 | if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) | 1146 | nodemask = cpumask_of_node(nid); |
1146 | return dest_cpu; | 1147 | |
1148 | /* Look for allowed, online CPU in same node. */ | ||
1149 | for_each_cpu(dest_cpu, nodemask) { | ||
1150 | if (!cpu_online(dest_cpu)) | ||
1151 | continue; | ||
1152 | if (!cpu_active(dest_cpu)) | ||
1153 | continue; | ||
1154 | if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) | ||
1155 | return dest_cpu; | ||
1156 | } | ||
1147 | } | 1157 | } |
1148 | 1158 | ||
1149 | for (;;) { | 1159 | for (;;) { |