aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/stop_machine.h20
-rw-r--r--kernel/cpu.c13
-rw-r--r--kernel/stop_machine.c293
3 files changed, 136 insertions, 190 deletions
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 18af011c13af..36c2c7284eb3 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -17,13 +17,12 @@
17 * @data: the data ptr for the @fn() 17 * @data: the data ptr for the @fn()
18 * @cpu: if @cpu == n, run @fn() on cpu n 18 * @cpu: if @cpu == n, run @fn() on cpu n
19 * if @cpu == NR_CPUS, run @fn() on any cpu 19 * if @cpu == NR_CPUS, run @fn() on any cpu
20 * if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and then 20 * if @cpu == ALL_CPUS, run @fn() on every online CPU.
21 * concurrently on all the other cpus
22 * 21 *
23 * Description: This causes a thread to be scheduled on every other cpu, 22 * Description: This causes a thread to be scheduled on every cpu,
24 * each of which disables interrupts, and finally interrupts are disabled 23 * each of which disables interrupts. The result is that noone is
25 * on the current CPU. The result is that noone is holding a spinlock 24 * holding a spinlock or inside any other preempt-disabled region when
26 * or inside any other preempt-disabled region when @fn() runs. 25 * @fn() runs.
27 * 26 *
28 * This can be thought of as a very heavy write lock, equivalent to 27 * This can be thought of as a very heavy write lock, equivalent to
29 * grabbing every spinlock in the kernel. */ 28 * grabbing every spinlock in the kernel. */
@@ -35,13 +34,10 @@ int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
35 * @data: the data ptr for the @fn 34 * @data: the data ptr for the @fn
36 * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS. 35 * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS.
37 * 36 *
38 * Description: This is a special version of the above, which returns the 37 * Description: This is a special version of the above, which assumes cpus
39 * thread which has run @fn(): kthread_stop will return the return value 38 * won't come or go while it's being called. Used by hotplug cpu.
40 * of @fn(). Used by hotplug cpu.
41 */ 39 */
42struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, 40int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
43 unsigned int cpu);
44
45#else 41#else
46 42
47static inline int stop_machine_run(int (*fn)(void *), void *data, 43static inline int stop_machine_run(int (*fn)(void *), void *data,
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 10ba5f1004a5..cf79bb911371 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -216,7 +216,6 @@ static int __ref take_cpu_down(void *_param)
216static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) 216static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
217{ 217{
218 int err, nr_calls = 0; 218 int err, nr_calls = 0;
219 struct task_struct *p;
220 cpumask_t old_allowed, tmp; 219 cpumask_t old_allowed, tmp;
221 void *hcpu = (void *)(long)cpu; 220 void *hcpu = (void *)(long)cpu;
222 unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; 221 unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
@@ -250,19 +249,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
250 cpu_clear(cpu, tmp); 249 cpu_clear(cpu, tmp);
251 set_cpus_allowed_ptr(current, &tmp); 250 set_cpus_allowed_ptr(current, &tmp);
252 251
253 p = __stop_machine_run(take_cpu_down, &tcd_param, cpu); 252 err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
254 253
255 if (IS_ERR(p) || cpu_online(cpu)) { 254 if (err || cpu_online(cpu)) {
256 /* CPU didn't die: tell everyone. Can't complain. */ 255 /* CPU didn't die: tell everyone. Can't complain. */
257 if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, 256 if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
258 hcpu) == NOTIFY_BAD) 257 hcpu) == NOTIFY_BAD)
259 BUG(); 258 BUG();
260 259
261 if (IS_ERR(p)) { 260 goto out_allowed;
262 err = PTR_ERR(p);
263 goto out_allowed;
264 }
265 goto out_thread;
266 } 261 }
267 262
268 /* Wait for it to sleep (leaving idle task). */ 263 /* Wait for it to sleep (leaving idle task). */
@@ -279,8 +274,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
279 274
280 check_for_tasks(cpu); 275 check_for_tasks(cpu);
281 276
282out_thread:
283 err = kthread_stop(p);
284out_allowed: 277out_allowed:
285 set_cpus_allowed_ptr(current, &old_allowed); 278 set_cpus_allowed_ptr(current, &old_allowed);
286out_release: 279out_release:
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index a473bd0cb71b..35882dccc943 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -1,4 +1,4 @@
1/* Copyright 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation. 1/* Copyright 2008, 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
2 * GPL v2 and any later version. 2 * GPL v2 and any later version.
3 */ 3 */
4#include <linux/cpu.h> 4#include <linux/cpu.h>
@@ -13,220 +13,177 @@
13#include <asm/atomic.h> 13#include <asm/atomic.h>
14#include <asm/uaccess.h> 14#include <asm/uaccess.h>
15 15
16/* Since we effect priority and affinity (both of which are visible 16/* This controls the threads on each CPU. */
17 * to, and settable by outside processes) we do indirection via a
18 * kthread. */
19
20/* Thread to stop each CPU in user context. */
21enum stopmachine_state { 17enum stopmachine_state {
22 STOPMACHINE_WAIT, 18 /* Dummy starting state for thread. */
19 STOPMACHINE_NONE,
20 /* Awaiting everyone to be scheduled. */
23 STOPMACHINE_PREPARE, 21 STOPMACHINE_PREPARE,
22 /* Disable interrupts. */
24 STOPMACHINE_DISABLE_IRQ, 23 STOPMACHINE_DISABLE_IRQ,
24 /* Run the function */
25 STOPMACHINE_RUN, 25 STOPMACHINE_RUN,
26 /* Exit */
26 STOPMACHINE_EXIT, 27 STOPMACHINE_EXIT,
27}; 28};
29static enum stopmachine_state state;
28 30
29struct stop_machine_data { 31struct stop_machine_data {
30 int (*fn)(void *); 32 int (*fn)(void *);
31 void *data; 33 void *data;
32 struct completion done; 34 int fnret;
33 int run_all; 35};
34} smdata;
35 36
36static enum stopmachine_state stopmachine_state; 37/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
37static unsigned int stopmachine_num_threads; 38static unsigned int num_threads;
38static atomic_t stopmachine_thread_ack; 39static atomic_t thread_ack;
40static struct completion finished;
41static DEFINE_MUTEX(lock);
39 42
40static int stopmachine(void *cpu) 43static void set_state(enum stopmachine_state newstate)
41{ 44{
42 int irqs_disabled = 0; 45 /* Reset ack counter. */
43 int prepared = 0; 46 atomic_set(&thread_ack, num_threads);
44 int ran = 0; 47 smp_wmb();
45 cpumask_of_cpu_ptr(cpumask, (int)(long)cpu); 48 state = newstate;
46
47 set_cpus_allowed_ptr(current, cpumask);
48
49 /* Ack: we are alive */
50 smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
51 atomic_inc(&stopmachine_thread_ack);
52
53 /* Simple state machine */
54 while (stopmachine_state != STOPMACHINE_EXIT) {
55 if (stopmachine_state == STOPMACHINE_DISABLE_IRQ
56 && !irqs_disabled) {
57 local_irq_disable();
58 hard_irq_disable();
59 irqs_disabled = 1;
60 /* Ack: irqs disabled. */
61 smp_mb(); /* Must read state first. */
62 atomic_inc(&stopmachine_thread_ack);
63 } else if (stopmachine_state == STOPMACHINE_PREPARE
64 && !prepared) {
65 /* Everyone is in place, hold CPU. */
66 preempt_disable();
67 prepared = 1;
68 smp_mb(); /* Must read state first. */
69 atomic_inc(&stopmachine_thread_ack);
70 } else if (stopmachine_state == STOPMACHINE_RUN && !ran) {
71 smdata.fn(smdata.data);
72 ran = 1;
73 smp_mb(); /* Must read state first. */
74 atomic_inc(&stopmachine_thread_ack);
75 }
76 /* Yield in first stage: migration threads need to
77 * help our sisters onto their CPUs. */
78 if (!prepared && !irqs_disabled)
79 yield();
80 cpu_relax();
81 }
82
83 /* Ack: we are exiting. */
84 smp_mb(); /* Must read state first. */
85 atomic_inc(&stopmachine_thread_ack);
86
87 if (irqs_disabled)
88 local_irq_enable();
89 if (prepared)
90 preempt_enable();
91
92 return 0;
93} 49}
94 50
95/* Change the thread state */ 51/* Last one to ack a state moves to the next state. */
96static void stopmachine_set_state(enum stopmachine_state state) 52static void ack_state(void)
97{ 53{
98 atomic_set(&stopmachine_thread_ack, 0); 54 if (atomic_dec_and_test(&thread_ack)) {
99 smp_wmb(); 55 /* If we're the last one to ack the EXIT, we're finished. */
100 stopmachine_state = state; 56 if (state == STOPMACHINE_EXIT)
101 while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) 57 complete(&finished);
102 cpu_relax(); 58 else
59 set_state(state + 1);
60 }
103} 61}
104 62
105static int stop_machine(void) 63/* This is the actual thread which stops the CPU. It exits by itself rather
64 * than waiting for kthread_stop(), because it's easier for hotplug CPU. */
65static int stop_cpu(struct stop_machine_data *smdata)
106{ 66{
107 int i, ret = 0; 67 enum stopmachine_state curstate = STOPMACHINE_NONE;
108 68 int uninitialized_var(ret);
109 atomic_set(&stopmachine_thread_ack, 0);
110 stopmachine_num_threads = 0;
111 stopmachine_state = STOPMACHINE_WAIT;
112 69
113 for_each_online_cpu(i) { 70 /* Simple state machine */
114 if (i == raw_smp_processor_id()) 71 do {
115 continue; 72 /* Chill out and ensure we re-read stopmachine_state. */
116 ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
117 if (ret < 0)
118 break;
119 stopmachine_num_threads++;
120 }
121
122 /* Wait for them all to come to life. */
123 while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) {
124 yield();
125 cpu_relax(); 73 cpu_relax();
126 } 74 if (state != curstate) {
127 75 curstate = state;
128 /* If some failed, kill them all. */ 76 switch (curstate) {
129 if (ret < 0) { 77 case STOPMACHINE_DISABLE_IRQ:
130 stopmachine_set_state(STOPMACHINE_EXIT); 78 local_irq_disable();
131 return ret; 79 hard_irq_disable();
132 } 80 break;
133 81 case STOPMACHINE_RUN:
134 /* Now they are all started, make them hold the CPUs, ready. */ 82 /* |= allows error detection if functions on
135 preempt_disable(); 83 * multiple CPUs. */
136 stopmachine_set_state(STOPMACHINE_PREPARE); 84 smdata->fnret |= smdata->fn(smdata->data);
137 85 break;
138 /* Make them disable irqs. */ 86 default:
139 local_irq_disable(); 87 break;
140 hard_irq_disable(); 88 }
141 stopmachine_set_state(STOPMACHINE_DISABLE_IRQ); 89 ack_state();
142 90 }
143 return 0; 91 } while (curstate != STOPMACHINE_EXIT);
144}
145 92
146static void restart_machine(void)
147{
148 stopmachine_set_state(STOPMACHINE_EXIT);
149 local_irq_enable(); 93 local_irq_enable();
150 preempt_enable_no_resched(); 94 do_exit(0);
151} 95}
152 96
153static void run_other_cpus(void) 97/* Callback for CPUs which aren't supposed to do anything. */
98static int chill(void *unused)
154{ 99{
155 stopmachine_set_state(STOPMACHINE_RUN); 100 return 0;
156} 101}
157 102
158static int do_stop(void *_smdata) 103int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
159{ 104{
160 struct stop_machine_data *smdata = _smdata; 105 int i, err;
161 int ret; 106 struct stop_machine_data active, idle;
107 struct task_struct **threads;
108
109 active.fn = fn;
110 active.data = data;
111 active.fnret = 0;
112 idle.fn = chill;
113 idle.data = NULL;
114
115 /* If they don't care which cpu fn runs on, just pick one. */
116 if (cpu == NR_CPUS)
117 cpu = any_online_cpu(cpu_online_map);
118
119 /* This could be too big for stack on large machines. */
120 threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
121 if (!threads)
122 return -ENOMEM;
123
124 /* Set up initial state. */
125 mutex_lock(&lock);
126 init_completion(&finished);
127 num_threads = num_online_cpus();
128 set_state(STOPMACHINE_PREPARE);
162 129
163 ret = stop_machine(); 130 for_each_online_cpu(i) {
164 if (ret == 0) { 131 struct stop_machine_data *smdata;
165 ret = smdata->fn(smdata->data); 132 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
166 if (smdata->run_all)
167 run_other_cpus();
168 restart_machine();
169 }
170 133
171 /* We're done: you can kthread_stop us now */ 134 if (cpu == ALL_CPUS || i == cpu)
172 complete(&smdata->done); 135 smdata = &active;
136 else
137 smdata = &idle;
138
139 threads[i] = kthread_create((void *)stop_cpu, smdata, "kstop%u",
140 i);
141 if (IS_ERR(threads[i])) {
142 err = PTR_ERR(threads[i]);
143 threads[i] = NULL;
144 goto kill_threads;
145 }
173 146
174 /* Wait for kthread_stop */ 147 /* Place it onto correct cpu. */
175 set_current_state(TASK_INTERRUPTIBLE); 148 kthread_bind(threads[i], i);
176 while (!kthread_should_stop()) {
177 schedule();
178 set_current_state(TASK_INTERRUPTIBLE);
179 }
180 __set_current_state(TASK_RUNNING);
181 return ret;
182}
183 149
184struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, 150 /* Make it highest prio. */
185 unsigned int cpu) 151 if (sched_setscheduler_nocheck(threads[i], SCHED_FIFO, &param))
186{ 152 BUG();
187 static DEFINE_MUTEX(stopmachine_mutex); 153 }
188 struct stop_machine_data smdata;
189 struct task_struct *p;
190 154
191 mutex_lock(&stopmachine_mutex); 155 /* We've created all the threads. Wake them all: hold this CPU so one
156 * doesn't hit this CPU until we're ready. */
157 cpu = get_cpu();
158 for_each_online_cpu(i)
159 wake_up_process(threads[i]);
192 160
193 smdata.fn = fn; 161 /* This will release the thread on our CPU. */
194 smdata.data = data; 162 put_cpu();
195 smdata.run_all = (cpu == ALL_CPUS) ? 1 : 0; 163 wait_for_completion(&finished);
196 init_completion(&smdata.done); 164 mutex_unlock(&lock);
197 165
198 smp_wmb(); /* make sure other cpus see smdata updates */ 166 kfree(threads);
199 167
200 /* If they don't care which CPU fn runs on, bind to any online one. */ 168 return active.fnret;
201 if (cpu == NR_CPUS || cpu == ALL_CPUS)
202 cpu = raw_smp_processor_id();
203 169
204 p = kthread_create(do_stop, &smdata, "kstopmachine"); 170kill_threads:
205 if (!IS_ERR(p)) { 171 for_each_online_cpu(i)
206 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 172 if (threads[i])
173 kthread_stop(threads[i]);
174 mutex_unlock(&lock);
207 175
208 /* One high-prio thread per cpu. We'll do this one. */ 176 kfree(threads);
209 sched_setscheduler_nocheck(p, SCHED_FIFO, &param); 177 return err;
210 kthread_bind(p, cpu);
211 wake_up_process(p);
212 wait_for_completion(&smdata.done);
213 }
214 mutex_unlock(&stopmachine_mutex);
215 return p;
216} 178}
217 179
218int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu) 180int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
219{ 181{
220 struct task_struct *p;
221 int ret; 182 int ret;
222 183
223 /* No CPUs can come up or down during this. */ 184 /* No CPUs can come up or down during this. */
224 get_online_cpus(); 185 get_online_cpus();
225 p = __stop_machine_run(fn, data, cpu); 186 ret = __stop_machine_run(fn, data, cpu);
226 if (!IS_ERR(p))
227 ret = kthread_stop(p);
228 else
229 ret = PTR_ERR(p);
230 put_online_cpus(); 187 put_online_cpus();
231 188
232 return ret; 189 return ret;