aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/stop_machine.c
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2013-10-07 06:29:15 -0400
committerIngo Molnar <mingo@kernel.org>2013-10-09 06:40:45 -0400
commit1be0bd77c5dd7c903f46abf52f9a3650face3c1d (patch)
treee8b2a6c742951e8a9f98a53a285ab85b79437e4d /kernel/stop_machine.c
parent25cbbef1924299249756bc4030fcb2436c019813 (diff)
stop_machine: Introduce stop_two_cpus()
Introduce stop_two_cpus() in order to allow controlled swapping of two tasks. It repurposes the stop_machine() state machine but only stops the two cpus which we can do with on-stack structures and avoid machine wide synchronization issues. The ordering of CPUs is important to avoid deadlocks. If unordered then two cpus calling stop_two_cpus on each other simultaneously would attempt to queue in the opposite order on each CPU causing an AB-BA style deadlock. By always having the lowest number CPU doing the queueing of works, we can guarantee that works are always queued in the same order, and deadlocks are avoided. Signed-off-by: Peter Zijlstra <peterz@infradead.org> [ Implemented deadlock avoidance. ] Signed-off-by: Rik van Riel <riel@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Mel Gorman <mgorman@suse.de> Link: http://lkml.kernel.org/r/1381141781-10992-38-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/stop_machine.c')
-rw-r--r--kernel/stop_machine.c272
1 files changed, 174 insertions, 98 deletions
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index c09f2955ae30..32a6c44d8f78 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -115,6 +115,166 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
115 return done.executed ? done.ret : -ENOENT; 115 return done.executed ? done.ret : -ENOENT;
116} 116}
117 117
118/* This controls the threads on each CPU. */
119enum multi_stop_state {
120 /* Dummy starting state for thread. */
121 MULTI_STOP_NONE,
122 /* Awaiting everyone to be scheduled. */
123 MULTI_STOP_PREPARE,
124 /* Disable interrupts. */
125 MULTI_STOP_DISABLE_IRQ,
126 /* Run the function */
127 MULTI_STOP_RUN,
128 /* Exit */
129 MULTI_STOP_EXIT,
130};
131
132struct multi_stop_data {
133 int (*fn)(void *);
134 void *data;
135 /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
136 unsigned int num_threads;
137 const struct cpumask *active_cpus;
138
139 enum multi_stop_state state;
140 atomic_t thread_ack;
141};
142
143static void set_state(struct multi_stop_data *msdata,
144 enum multi_stop_state newstate)
145{
146 /* Reset ack counter. */
147 atomic_set(&msdata->thread_ack, msdata->num_threads);
148 smp_wmb();
149 msdata->state = newstate;
150}
151
152/* Last one to ack a state moves to the next state. */
153static void ack_state(struct multi_stop_data *msdata)
154{
155 if (atomic_dec_and_test(&msdata->thread_ack))
156 set_state(msdata, msdata->state + 1);
157}
158
159/* This is the cpu_stop function which stops the CPU. */
160static int multi_cpu_stop(void *data)
161{
162 struct multi_stop_data *msdata = data;
163 enum multi_stop_state curstate = MULTI_STOP_NONE;
164 int cpu = smp_processor_id(), err = 0;
165 unsigned long flags;
166 bool is_active;
167
168 /*
169 * When called from stop_machine_from_inactive_cpu(), irq might
170 * already be disabled. Save the state and restore it on exit.
171 */
172 local_save_flags(flags);
173
174 if (!msdata->active_cpus)
175 is_active = cpu == cpumask_first(cpu_online_mask);
176 else
177 is_active = cpumask_test_cpu(cpu, msdata->active_cpus);
178
179 /* Simple state machine */
180 do {
181 /* Chill out and ensure we re-read multi_stop_state. */
182 cpu_relax();
183 if (msdata->state != curstate) {
184 curstate = msdata->state;
185 switch (curstate) {
186 case MULTI_STOP_DISABLE_IRQ:
187 local_irq_disable();
188 hard_irq_disable();
189 break;
190 case MULTI_STOP_RUN:
191 if (is_active)
192 err = msdata->fn(msdata->data);
193 break;
194 default:
195 break;
196 }
197 ack_state(msdata);
198 }
199 } while (curstate != MULTI_STOP_EXIT);
200
201 local_irq_restore(flags);
202 return err;
203}
204
205struct irq_cpu_stop_queue_work_info {
206 int cpu1;
207 int cpu2;
208 struct cpu_stop_work *work1;
209 struct cpu_stop_work *work2;
210};
211
212/*
213 * This function is always run with irqs and preemption disabled.
214 * This guarantees that both work1 and work2 get queued, before
215 * our local migrate thread gets the chance to preempt us.
216 */
217static void irq_cpu_stop_queue_work(void *arg)
218{
219 struct irq_cpu_stop_queue_work_info *info = arg;
220 cpu_stop_queue_work(info->cpu1, info->work1);
221 cpu_stop_queue_work(info->cpu2, info->work2);
222}
223
224/**
225 * stop_two_cpus - stops two cpus
226 * @cpu1: the cpu to stop
227 * @cpu2: the other cpu to stop
228 * @fn: function to execute
229 * @arg: argument to @fn
230 *
231 * Stops both the current and specified CPU and runs @fn on one of them.
232 *
233 * returns when both are completed.
234 */
235int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
236{
237 int call_cpu;
238 struct cpu_stop_done done;
239 struct cpu_stop_work work1, work2;
240 struct irq_cpu_stop_queue_work_info call_args;
241 struct multi_stop_data msdata = {
242 .fn = fn,
243 .data = arg,
244 .num_threads = 2,
245 .active_cpus = cpumask_of(cpu1),
246 };
247
248 work1 = work2 = (struct cpu_stop_work){
249 .fn = multi_cpu_stop,
250 .arg = &msdata,
251 .done = &done
252 };
253
254 call_args = (struct irq_cpu_stop_queue_work_info){
255 .cpu1 = cpu1,
256 .cpu2 = cpu2,
257 .work1 = &work1,
258 .work2 = &work2,
259 };
260
261 cpu_stop_init_done(&done, 2);
262 set_state(&msdata, MULTI_STOP_PREPARE);
263
264 /*
265 * Queuing needs to be done by the lowest numbered CPU, to ensure
266 * that works are always queued in the same order on every CPU.
267 * This prevents deadlocks.
268 */
269 call_cpu = min(cpu1, cpu2);
270
271 smp_call_function_single(call_cpu, &irq_cpu_stop_queue_work,
272 &call_args, 0);
273
274 wait_for_completion(&done.completion);
275 return done.executed ? done.ret : -ENOENT;
276}
277
118/** 278/**
119 * stop_one_cpu_nowait - stop a cpu but don't wait for completion 279 * stop_one_cpu_nowait - stop a cpu but don't wait for completion
120 * @cpu: cpu to stop 280 * @cpu: cpu to stop
@@ -359,98 +519,14 @@ early_initcall(cpu_stop_init);
359 519
360#ifdef CONFIG_STOP_MACHINE 520#ifdef CONFIG_STOP_MACHINE
361 521
362/* This controls the threads on each CPU. */
363enum stopmachine_state {
364 /* Dummy starting state for thread. */
365 STOPMACHINE_NONE,
366 /* Awaiting everyone to be scheduled. */
367 STOPMACHINE_PREPARE,
368 /* Disable interrupts. */
369 STOPMACHINE_DISABLE_IRQ,
370 /* Run the function */
371 STOPMACHINE_RUN,
372 /* Exit */
373 STOPMACHINE_EXIT,
374};
375
376struct stop_machine_data {
377 int (*fn)(void *);
378 void *data;
379 /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
380 unsigned int num_threads;
381 const struct cpumask *active_cpus;
382
383 enum stopmachine_state state;
384 atomic_t thread_ack;
385};
386
387static void set_state(struct stop_machine_data *smdata,
388 enum stopmachine_state newstate)
389{
390 /* Reset ack counter. */
391 atomic_set(&smdata->thread_ack, smdata->num_threads);
392 smp_wmb();
393 smdata->state = newstate;
394}
395
396/* Last one to ack a state moves to the next state. */
397static void ack_state(struct stop_machine_data *smdata)
398{
399 if (atomic_dec_and_test(&smdata->thread_ack))
400 set_state(smdata, smdata->state + 1);
401}
402
403/* This is the cpu_stop function which stops the CPU. */
404static int stop_machine_cpu_stop(void *data)
405{
406 struct stop_machine_data *smdata = data;
407 enum stopmachine_state curstate = STOPMACHINE_NONE;
408 int cpu = smp_processor_id(), err = 0;
409 unsigned long flags;
410 bool is_active;
411
412 /*
413 * When called from stop_machine_from_inactive_cpu(), irq might
414 * already be disabled. Save the state and restore it on exit.
415 */
416 local_save_flags(flags);
417
418 if (!smdata->active_cpus)
419 is_active = cpu == cpumask_first(cpu_online_mask);
420 else
421 is_active = cpumask_test_cpu(cpu, smdata->active_cpus);
422
423 /* Simple state machine */
424 do {
425 /* Chill out and ensure we re-read stopmachine_state. */
426 cpu_relax();
427 if (smdata->state != curstate) {
428 curstate = smdata->state;
429 switch (curstate) {
430 case STOPMACHINE_DISABLE_IRQ:
431 local_irq_disable();
432 hard_irq_disable();
433 break;
434 case STOPMACHINE_RUN:
435 if (is_active)
436 err = smdata->fn(smdata->data);
437 break;
438 default:
439 break;
440 }
441 ack_state(smdata);
442 }
443 } while (curstate != STOPMACHINE_EXIT);
444
445 local_irq_restore(flags);
446 return err;
447}
448
449int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) 522int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
450{ 523{
451 struct stop_machine_data smdata = { .fn = fn, .data = data, 524 struct multi_stop_data msdata = {
452 .num_threads = num_online_cpus(), 525 .fn = fn,
453 .active_cpus = cpus }; 526 .data = data,
527 .num_threads = num_online_cpus(),
528 .active_cpus = cpus,
529 };
454 530
455 if (!stop_machine_initialized) { 531 if (!stop_machine_initialized) {
456 /* 532 /*
@@ -461,7 +537,7 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
461 unsigned long flags; 537 unsigned long flags;
462 int ret; 538 int ret;
463 539
464 WARN_ON_ONCE(smdata.num_threads != 1); 540 WARN_ON_ONCE(msdata.num_threads != 1);
465 541
466 local_irq_save(flags); 542 local_irq_save(flags);
467 hard_irq_disable(); 543 hard_irq_disable();
@@ -472,8 +548,8 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
472 } 548 }
473 549
474 /* Set the initial state and stop all online cpus. */ 550 /* Set the initial state and stop all online cpus. */
475 set_state(&smdata, STOPMACHINE_PREPARE); 551 set_state(&msdata, MULTI_STOP_PREPARE);
476 return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata); 552 return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
477} 553}
478 554
479int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) 555int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
@@ -513,25 +589,25 @@ EXPORT_SYMBOL_GPL(stop_machine);
513int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data, 589int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
514 const struct cpumask *cpus) 590 const struct cpumask *cpus)
515{ 591{
516 struct stop_machine_data smdata = { .fn = fn, .data = data, 592 struct multi_stop_data msdata = { .fn = fn, .data = data,
517 .active_cpus = cpus }; 593 .active_cpus = cpus };
518 struct cpu_stop_done done; 594 struct cpu_stop_done done;
519 int ret; 595 int ret;
520 596
521 /* Local CPU must be inactive and CPU hotplug in progress. */ 597 /* Local CPU must be inactive and CPU hotplug in progress. */
522 BUG_ON(cpu_active(raw_smp_processor_id())); 598 BUG_ON(cpu_active(raw_smp_processor_id()));
523 smdata.num_threads = num_active_cpus() + 1; /* +1 for local */ 599 msdata.num_threads = num_active_cpus() + 1; /* +1 for local */
524 600
525 /* No proper task established and can't sleep - busy wait for lock. */ 601 /* No proper task established and can't sleep - busy wait for lock. */
526 while (!mutex_trylock(&stop_cpus_mutex)) 602 while (!mutex_trylock(&stop_cpus_mutex))
527 cpu_relax(); 603 cpu_relax();
528 604
529 /* Schedule work on other CPUs and execute directly for local CPU */ 605 /* Schedule work on other CPUs and execute directly for local CPU */
530 set_state(&smdata, STOPMACHINE_PREPARE); 606 set_state(&msdata, MULTI_STOP_PREPARE);
531 cpu_stop_init_done(&done, num_active_cpus()); 607 cpu_stop_init_done(&done, num_active_cpus());
532 queue_stop_cpus_work(cpu_active_mask, stop_machine_cpu_stop, &smdata, 608 queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
533 &done); 609 &done);
534 ret = stop_machine_cpu_stop(&smdata); 610 ret = multi_cpu_stop(&msdata);
535 611
536 /* Busy wait for completion. */ 612 /* Busy wait for completion. */
537 while (!completion_done(&done.completion)) 613 while (!completion_done(&done.completion))