diff options
Diffstat (limited to 'kernel/stop_machine.c')
| -rw-r--r-- | kernel/stop_machine.c | 303 |
1 files changed, 203 insertions, 100 deletions
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index c09f2955ae30..84571e09c907 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <linux/kallsyms.h> | 20 | #include <linux/kallsyms.h> |
| 21 | #include <linux/smpboot.h> | 21 | #include <linux/smpboot.h> |
| 22 | #include <linux/atomic.h> | 22 | #include <linux/atomic.h> |
| 23 | #include <linux/lglock.h> | ||
| 23 | 24 | ||
| 24 | /* | 25 | /* |
| 25 | * Structure to determine completion condition and record errors. May | 26 | * Structure to determine completion condition and record errors. May |
| @@ -43,6 +44,14 @@ static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); | |||
| 43 | static DEFINE_PER_CPU(struct task_struct *, cpu_stopper_task); | 44 | static DEFINE_PER_CPU(struct task_struct *, cpu_stopper_task); |
| 44 | static bool stop_machine_initialized = false; | 45 | static bool stop_machine_initialized = false; |
| 45 | 46 | ||
| 47 | /* | ||
| 48 | * Avoids a race between stop_two_cpus and global stop_cpus, where | ||
| 49 | * the stoppers could get queued up in reverse order, leading to | ||
| 50 | * system deadlock. Using an lglock means stop_two_cpus remains | ||
| 51 | * relatively cheap. | ||
| 52 | */ | ||
| 53 | DEFINE_STATIC_LGLOCK(stop_cpus_lock); | ||
| 54 | |||
| 46 | static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) | 55 | static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) |
| 47 | { | 56 | { |
| 48 | memset(done, 0, sizeof(*done)); | 57 | memset(done, 0, sizeof(*done)); |
| @@ -115,6 +124,184 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg) | |||
| 115 | return done.executed ? done.ret : -ENOENT; | 124 | return done.executed ? done.ret : -ENOENT; |
| 116 | } | 125 | } |
| 117 | 126 | ||
| 127 | /* This controls the threads on each CPU. */ | ||
| 128 | enum multi_stop_state { | ||
| 129 | /* Dummy starting state for thread. */ | ||
| 130 | MULTI_STOP_NONE, | ||
| 131 | /* Awaiting everyone to be scheduled. */ | ||
| 132 | MULTI_STOP_PREPARE, | ||
| 133 | /* Disable interrupts. */ | ||
| 134 | MULTI_STOP_DISABLE_IRQ, | ||
| 135 | /* Run the function */ | ||
| 136 | MULTI_STOP_RUN, | ||
| 137 | /* Exit */ | ||
| 138 | MULTI_STOP_EXIT, | ||
| 139 | }; | ||
| 140 | |||
| 141 | struct multi_stop_data { | ||
| 142 | int (*fn)(void *); | ||
| 143 | void *data; | ||
| 144 | /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ | ||
| 145 | unsigned int num_threads; | ||
| 146 | const struct cpumask *active_cpus; | ||
| 147 | |||
| 148 | enum multi_stop_state state; | ||
| 149 | atomic_t thread_ack; | ||
| 150 | }; | ||
| 151 | |||
| 152 | static void set_state(struct multi_stop_data *msdata, | ||
| 153 | enum multi_stop_state newstate) | ||
| 154 | { | ||
| 155 | /* Reset ack counter. */ | ||
| 156 | atomic_set(&msdata->thread_ack, msdata->num_threads); | ||
| 157 | smp_wmb(); | ||
| 158 | msdata->state = newstate; | ||
| 159 | } | ||
| 160 | |||
| 161 | /* Last one to ack a state moves to the next state. */ | ||
| 162 | static void ack_state(struct multi_stop_data *msdata) | ||
| 163 | { | ||
| 164 | if (atomic_dec_and_test(&msdata->thread_ack)) | ||
| 165 | set_state(msdata, msdata->state + 1); | ||
| 166 | } | ||
| 167 | |||
| 168 | /* This is the cpu_stop function which stops the CPU. */ | ||
| 169 | static int multi_cpu_stop(void *data) | ||
| 170 | { | ||
| 171 | struct multi_stop_data *msdata = data; | ||
| 172 | enum multi_stop_state curstate = MULTI_STOP_NONE; | ||
| 173 | int cpu = smp_processor_id(), err = 0; | ||
| 174 | unsigned long flags; | ||
| 175 | bool is_active; | ||
| 176 | |||
| 177 | /* | ||
| 178 | * When called from stop_machine_from_inactive_cpu(), irq might | ||
| 179 | * already be disabled. Save the state and restore it on exit. | ||
| 180 | */ | ||
| 181 | local_save_flags(flags); | ||
| 182 | |||
| 183 | if (!msdata->active_cpus) | ||
| 184 | is_active = cpu == cpumask_first(cpu_online_mask); | ||
| 185 | else | ||
| 186 | is_active = cpumask_test_cpu(cpu, msdata->active_cpus); | ||
| 187 | |||
| 188 | /* Simple state machine */ | ||
| 189 | do { | ||
| 190 | /* Chill out and ensure we re-read multi_stop_state. */ | ||
| 191 | cpu_relax(); | ||
| 192 | if (msdata->state != curstate) { | ||
| 193 | curstate = msdata->state; | ||
| 194 | switch (curstate) { | ||
| 195 | case MULTI_STOP_DISABLE_IRQ: | ||
| 196 | local_irq_disable(); | ||
| 197 | hard_irq_disable(); | ||
| 198 | break; | ||
| 199 | case MULTI_STOP_RUN: | ||
| 200 | if (is_active) | ||
| 201 | err = msdata->fn(msdata->data); | ||
| 202 | break; | ||
| 203 | default: | ||
| 204 | break; | ||
| 205 | } | ||
| 206 | ack_state(msdata); | ||
| 207 | } | ||
| 208 | } while (curstate != MULTI_STOP_EXIT); | ||
| 209 | |||
| 210 | local_irq_restore(flags); | ||
| 211 | return err; | ||
| 212 | } | ||
| 213 | |||
| 214 | struct irq_cpu_stop_queue_work_info { | ||
| 215 | int cpu1; | ||
| 216 | int cpu2; | ||
| 217 | struct cpu_stop_work *work1; | ||
| 218 | struct cpu_stop_work *work2; | ||
| 219 | }; | ||
| 220 | |||
| 221 | /* | ||
| 222 | * This function is always run with irqs and preemption disabled. | ||
| 223 | * This guarantees that both work1 and work2 get queued, before | ||
| 224 | * our local migrate thread gets the chance to preempt us. | ||
| 225 | */ | ||
| 226 | static void irq_cpu_stop_queue_work(void *arg) | ||
| 227 | { | ||
| 228 | struct irq_cpu_stop_queue_work_info *info = arg; | ||
| 229 | cpu_stop_queue_work(info->cpu1, info->work1); | ||
| 230 | cpu_stop_queue_work(info->cpu2, info->work2); | ||
| 231 | } | ||
| 232 | |||
| 233 | /** | ||
| 234 | * stop_two_cpus - stops two cpus | ||
| 235 | * @cpu1: the cpu to stop | ||
| 236 | * @cpu2: the other cpu to stop | ||
| 237 | * @fn: function to execute | ||
| 238 | * @arg: argument to @fn | ||
| 239 | * | ||
| 240 | * Stops both the current and specified CPU and runs @fn on one of them. | ||
| 241 | * | ||
| 242 | * returns when both are completed. | ||
| 243 | */ | ||
| 244 | int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg) | ||
| 245 | { | ||
| 246 | struct cpu_stop_done done; | ||
| 247 | struct cpu_stop_work work1, work2; | ||
| 248 | struct irq_cpu_stop_queue_work_info call_args; | ||
| 249 | struct multi_stop_data msdata; | ||
| 250 | |||
| 251 | preempt_disable(); | ||
| 252 | msdata = (struct multi_stop_data){ | ||
| 253 | .fn = fn, | ||
| 254 | .data = arg, | ||
| 255 | .num_threads = 2, | ||
| 256 | .active_cpus = cpumask_of(cpu1), | ||
| 257 | }; | ||
| 258 | |||
| 259 | work1 = work2 = (struct cpu_stop_work){ | ||
| 260 | .fn = multi_cpu_stop, | ||
| 261 | .arg = &msdata, | ||
| 262 | .done = &done | ||
| 263 | }; | ||
| 264 | |||
| 265 | call_args = (struct irq_cpu_stop_queue_work_info){ | ||
| 266 | .cpu1 = cpu1, | ||
| 267 | .cpu2 = cpu2, | ||
| 268 | .work1 = &work1, | ||
| 269 | .work2 = &work2, | ||
| 270 | }; | ||
| 271 | |||
| 272 | cpu_stop_init_done(&done, 2); | ||
| 273 | set_state(&msdata, MULTI_STOP_PREPARE); | ||
| 274 | |||
| 275 | /* | ||
| 276 | * If we observe both CPUs active we know _cpu_down() cannot yet have | ||
| 277 | * queued its stop_machine works and therefore ours will get executed | ||
| 278 | * first. Or its not either one of our CPUs that's getting unplugged, | ||
| 279 | * in which case we don't care. | ||
| 280 | * | ||
| 281 | * This relies on the stopper workqueues to be FIFO. | ||
| 282 | */ | ||
| 283 | if (!cpu_active(cpu1) || !cpu_active(cpu2)) { | ||
| 284 | preempt_enable(); | ||
| 285 | return -ENOENT; | ||
| 286 | } | ||
| 287 | |||
| 288 | lg_local_lock(&stop_cpus_lock); | ||
| 289 | /* | ||
| 290 | * Queuing needs to be done by the lowest numbered CPU, to ensure | ||
| 291 | * that works are always queued in the same order on every CPU. | ||
| 292 | * This prevents deadlocks. | ||
| 293 | */ | ||
| 294 | smp_call_function_single(min(cpu1, cpu2), | ||
| 295 | &irq_cpu_stop_queue_work, | ||
| 296 | &call_args, 0); | ||
| 297 | lg_local_unlock(&stop_cpus_lock); | ||
| 298 | preempt_enable(); | ||
| 299 | |||
| 300 | wait_for_completion(&done.completion); | ||
| 301 | |||
| 302 | return done.executed ? done.ret : -ENOENT; | ||
| 303 | } | ||
| 304 | |||
| 118 | /** | 305 | /** |
| 119 | * stop_one_cpu_nowait - stop a cpu but don't wait for completion | 306 | * stop_one_cpu_nowait - stop a cpu but don't wait for completion |
| 120 | * @cpu: cpu to stop | 307 | * @cpu: cpu to stop |
| @@ -159,10 +346,10 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask, | |||
| 159 | * preempted by a stopper which might wait for other stoppers | 346 | * preempted by a stopper which might wait for other stoppers |
| 160 | * to enter @fn which can lead to deadlock. | 347 | * to enter @fn which can lead to deadlock. |
| 161 | */ | 348 | */ |
| 162 | preempt_disable(); | 349 | lg_global_lock(&stop_cpus_lock); |
| 163 | for_each_cpu(cpu, cpumask) | 350 | for_each_cpu(cpu, cpumask) |
| 164 | cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu)); | 351 | cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu)); |
| 165 | preempt_enable(); | 352 | lg_global_unlock(&stop_cpus_lock); |
| 166 | } | 353 | } |
| 167 | 354 | ||
| 168 | static int __stop_cpus(const struct cpumask *cpumask, | 355 | static int __stop_cpus(const struct cpumask *cpumask, |
| @@ -359,98 +546,14 @@ early_initcall(cpu_stop_init); | |||
| 359 | 546 | ||
| 360 | #ifdef CONFIG_STOP_MACHINE | 547 | #ifdef CONFIG_STOP_MACHINE |
| 361 | 548 | ||
| 362 | /* This controls the threads on each CPU. */ | ||
| 363 | enum stopmachine_state { | ||
| 364 | /* Dummy starting state for thread. */ | ||
| 365 | STOPMACHINE_NONE, | ||
| 366 | /* Awaiting everyone to be scheduled. */ | ||
| 367 | STOPMACHINE_PREPARE, | ||
| 368 | /* Disable interrupts. */ | ||
| 369 | STOPMACHINE_DISABLE_IRQ, | ||
| 370 | /* Run the function */ | ||
| 371 | STOPMACHINE_RUN, | ||
| 372 | /* Exit */ | ||
| 373 | STOPMACHINE_EXIT, | ||
| 374 | }; | ||
| 375 | |||
| 376 | struct stop_machine_data { | ||
| 377 | int (*fn)(void *); | ||
| 378 | void *data; | ||
| 379 | /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ | ||
| 380 | unsigned int num_threads; | ||
| 381 | const struct cpumask *active_cpus; | ||
| 382 | |||
| 383 | enum stopmachine_state state; | ||
| 384 | atomic_t thread_ack; | ||
| 385 | }; | ||
| 386 | |||
| 387 | static void set_state(struct stop_machine_data *smdata, | ||
| 388 | enum stopmachine_state newstate) | ||
| 389 | { | ||
| 390 | /* Reset ack counter. */ | ||
| 391 | atomic_set(&smdata->thread_ack, smdata->num_threads); | ||
| 392 | smp_wmb(); | ||
| 393 | smdata->state = newstate; | ||
| 394 | } | ||
| 395 | |||
| 396 | /* Last one to ack a state moves to the next state. */ | ||
| 397 | static void ack_state(struct stop_machine_data *smdata) | ||
| 398 | { | ||
| 399 | if (atomic_dec_and_test(&smdata->thread_ack)) | ||
| 400 | set_state(smdata, smdata->state + 1); | ||
| 401 | } | ||
| 402 | |||
| 403 | /* This is the cpu_stop function which stops the CPU. */ | ||
| 404 | static int stop_machine_cpu_stop(void *data) | ||
| 405 | { | ||
| 406 | struct stop_machine_data *smdata = data; | ||
| 407 | enum stopmachine_state curstate = STOPMACHINE_NONE; | ||
| 408 | int cpu = smp_processor_id(), err = 0; | ||
| 409 | unsigned long flags; | ||
| 410 | bool is_active; | ||
| 411 | |||
| 412 | /* | ||
| 413 | * When called from stop_machine_from_inactive_cpu(), irq might | ||
| 414 | * already be disabled. Save the state and restore it on exit. | ||
| 415 | */ | ||
| 416 | local_save_flags(flags); | ||
| 417 | |||
| 418 | if (!smdata->active_cpus) | ||
| 419 | is_active = cpu == cpumask_first(cpu_online_mask); | ||
| 420 | else | ||
| 421 | is_active = cpumask_test_cpu(cpu, smdata->active_cpus); | ||
| 422 | |||
| 423 | /* Simple state machine */ | ||
| 424 | do { | ||
| 425 | /* Chill out and ensure we re-read stopmachine_state. */ | ||
| 426 | cpu_relax(); | ||
| 427 | if (smdata->state != curstate) { | ||
| 428 | curstate = smdata->state; | ||
| 429 | switch (curstate) { | ||
| 430 | case STOPMACHINE_DISABLE_IRQ: | ||
| 431 | local_irq_disable(); | ||
| 432 | hard_irq_disable(); | ||
| 433 | break; | ||
| 434 | case STOPMACHINE_RUN: | ||
| 435 | if (is_active) | ||
| 436 | err = smdata->fn(smdata->data); | ||
| 437 | break; | ||
| 438 | default: | ||
| 439 | break; | ||
| 440 | } | ||
| 441 | ack_state(smdata); | ||
| 442 | } | ||
| 443 | } while (curstate != STOPMACHINE_EXIT); | ||
| 444 | |||
| 445 | local_irq_restore(flags); | ||
| 446 | return err; | ||
| 447 | } | ||
| 448 | |||
| 449 | int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) | 549 | int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) |
| 450 | { | 550 | { |
| 451 | struct stop_machine_data smdata = { .fn = fn, .data = data, | 551 | struct multi_stop_data msdata = { |
| 452 | .num_threads = num_online_cpus(), | 552 | .fn = fn, |
| 453 | .active_cpus = cpus }; | 553 | .data = data, |
| 554 | .num_threads = num_online_cpus(), | ||
| 555 | .active_cpus = cpus, | ||
| 556 | }; | ||
| 454 | 557 | ||
| 455 | if (!stop_machine_initialized) { | 558 | if (!stop_machine_initialized) { |
| 456 | /* | 559 | /* |
| @@ -461,7 +564,7 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) | |||
| 461 | unsigned long flags; | 564 | unsigned long flags; |
| 462 | int ret; | 565 | int ret; |
| 463 | 566 | ||
| 464 | WARN_ON_ONCE(smdata.num_threads != 1); | 567 | WARN_ON_ONCE(msdata.num_threads != 1); |
| 465 | 568 | ||
| 466 | local_irq_save(flags); | 569 | local_irq_save(flags); |
| 467 | hard_irq_disable(); | 570 | hard_irq_disable(); |
| @@ -472,8 +575,8 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) | |||
| 472 | } | 575 | } |
| 473 | 576 | ||
| 474 | /* Set the initial state and stop all online cpus. */ | 577 | /* Set the initial state and stop all online cpus. */ |
| 475 | set_state(&smdata, STOPMACHINE_PREPARE); | 578 | set_state(&msdata, MULTI_STOP_PREPARE); |
| 476 | return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata); | 579 | return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata); |
| 477 | } | 580 | } |
| 478 | 581 | ||
| 479 | int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) | 582 | int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) |
| @@ -513,25 +616,25 @@ EXPORT_SYMBOL_GPL(stop_machine); | |||
| 513 | int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data, | 616 | int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data, |
| 514 | const struct cpumask *cpus) | 617 | const struct cpumask *cpus) |
| 515 | { | 618 | { |
| 516 | struct stop_machine_data smdata = { .fn = fn, .data = data, | 619 | struct multi_stop_data msdata = { .fn = fn, .data = data, |
| 517 | .active_cpus = cpus }; | 620 | .active_cpus = cpus }; |
| 518 | struct cpu_stop_done done; | 621 | struct cpu_stop_done done; |
| 519 | int ret; | 622 | int ret; |
| 520 | 623 | ||
| 521 | /* Local CPU must be inactive and CPU hotplug in progress. */ | 624 | /* Local CPU must be inactive and CPU hotplug in progress. */ |
| 522 | BUG_ON(cpu_active(raw_smp_processor_id())); | 625 | BUG_ON(cpu_active(raw_smp_processor_id())); |
| 523 | smdata.num_threads = num_active_cpus() + 1; /* +1 for local */ | 626 | msdata.num_threads = num_active_cpus() + 1; /* +1 for local */ |
| 524 | 627 | ||
| 525 | /* No proper task established and can't sleep - busy wait for lock. */ | 628 | /* No proper task established and can't sleep - busy wait for lock. */ |
| 526 | while (!mutex_trylock(&stop_cpus_mutex)) | 629 | while (!mutex_trylock(&stop_cpus_mutex)) |
| 527 | cpu_relax(); | 630 | cpu_relax(); |
| 528 | 631 | ||
| 529 | /* Schedule work on other CPUs and execute directly for local CPU */ | 632 | /* Schedule work on other CPUs and execute directly for local CPU */ |
| 530 | set_state(&smdata, STOPMACHINE_PREPARE); | 633 | set_state(&msdata, MULTI_STOP_PREPARE); |
| 531 | cpu_stop_init_done(&done, num_active_cpus()); | 634 | cpu_stop_init_done(&done, num_active_cpus()); |
| 532 | queue_stop_cpus_work(cpu_active_mask, stop_machine_cpu_stop, &smdata, | 635 | queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata, |
| 533 | &done); | 636 | &done); |
| 534 | ret = stop_machine_cpu_stop(&smdata); | 637 | ret = multi_cpu_stop(&msdata); |
| 535 | 638 | ||
| 536 | /* Busy wait for completion. */ | 639 | /* Busy wait for completion. */ |
| 537 | while (!completion_done(&done.completion)) | 640 | while (!completion_done(&done.completion)) |
