diff options
Diffstat (limited to 'kernel/rcu/update.c')
-rw-r--r-- | kernel/rcu/update.c | 345 |
1 files changed, 344 insertions, 1 deletions
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 4056d7992a6c..3ef8ba58694e 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c | |||
@@ -47,6 +47,8 @@ | |||
47 | #include <linux/hardirq.h> | 47 | #include <linux/hardirq.h> |
48 | #include <linux/delay.h> | 48 | #include <linux/delay.h> |
49 | #include <linux/module.h> | 49 | #include <linux/module.h> |
50 | #include <linux/kthread.h> | ||
51 | #include <linux/tick.h> | ||
50 | 52 | ||
51 | #define CREATE_TRACE_POINTS | 53 | #define CREATE_TRACE_POINTS |
52 | 54 | ||
@@ -91,7 +93,7 @@ void __rcu_read_unlock(void) | |||
91 | barrier(); /* critical section before exit code. */ | 93 | barrier(); /* critical section before exit code. */ |
92 | t->rcu_read_lock_nesting = INT_MIN; | 94 | t->rcu_read_lock_nesting = INT_MIN; |
93 | barrier(); /* assign before ->rcu_read_unlock_special load */ | 95 | barrier(); /* assign before ->rcu_read_unlock_special load */ |
94 | if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | 96 | if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s))) |
95 | rcu_read_unlock_special(t); | 97 | rcu_read_unlock_special(t); |
96 | barrier(); /* ->rcu_read_unlock_special load before assign */ | 98 | barrier(); /* ->rcu_read_unlock_special load before assign */ |
97 | t->rcu_read_lock_nesting = 0; | 99 | t->rcu_read_lock_nesting = 0; |
@@ -137,6 +139,38 @@ int notrace debug_lockdep_rcu_enabled(void) | |||
137 | EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); | 139 | EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); |
138 | 140 | ||
139 | /** | 141 | /** |
142 | * rcu_read_lock_held() - might we be in RCU read-side critical section? | ||
143 | * | ||
144 | * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU | ||
145 | * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, | ||
146 | * this assumes we are in an RCU read-side critical section unless it can | ||
147 | * prove otherwise. This is useful for debug checks in functions that | ||
148 | * require that they be called within an RCU read-side critical section. | ||
149 | * | ||
150 | * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot | ||
151 | * and while lockdep is disabled. | ||
152 | * | ||
153 | * Note that rcu_read_lock() and the matching rcu_read_unlock() must | ||
154 | * occur in the same context, for example, it is illegal to invoke | ||
155 | * rcu_read_unlock() in process context if the matching rcu_read_lock() | ||
156 | * was invoked from within an irq handler. | ||
157 | * | ||
158 | * Note that rcu_read_lock() is disallowed if the CPU is either idle or | ||
159 | * offline from an RCU perspective, so check for those as well. | ||
160 | */ | ||
161 | int rcu_read_lock_held(void) | ||
162 | { | ||
163 | if (!debug_lockdep_rcu_enabled()) | ||
164 | return 1; | ||
165 | if (!rcu_is_watching()) | ||
166 | return 0; | ||
167 | if (!rcu_lockdep_current_cpu_online()) | ||
168 | return 0; | ||
169 | return lock_is_held(&rcu_lock_map); | ||
170 | } | ||
171 | EXPORT_SYMBOL_GPL(rcu_read_lock_held); | ||
172 | |||
173 | /** | ||
140 | * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? | 174 | * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? |
141 | * | 175 | * |
142 | * Check for bottom half being disabled, which covers both the | 176 | * Check for bottom half being disabled, which covers both the |
@@ -347,3 +381,312 @@ static int __init check_cpu_stall_init(void) | |||
347 | early_initcall(check_cpu_stall_init); | 381 | early_initcall(check_cpu_stall_init); |
348 | 382 | ||
349 | #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ | 383 | #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ |
384 | |||
385 | #ifdef CONFIG_TASKS_RCU | ||
386 | |||
387 | /* | ||
388 | * Simple variant of RCU whose quiescent states are voluntary context switch, | ||
389 | * user-space execution, and idle. As such, grace periods can take one good | ||
390 | * long time. There are no read-side primitives similar to rcu_read_lock() | ||
391 | * and rcu_read_unlock() because this implementation is intended to get | ||
392 | * the system into a safe state for some of the manipulations involved in | ||
393 | * tracing and the like. Finally, this implementation does not support | ||
394 | * high call_rcu_tasks() rates from multiple CPUs. If this is required, | ||
395 | * per-CPU callback lists will be needed. | ||
396 | */ | ||
397 | |||
398 | /* Global list of callbacks and associated lock. */ | ||
399 | static struct rcu_head *rcu_tasks_cbs_head; | ||
400 | static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head; | ||
401 | static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq); | ||
402 | static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock); | ||
403 | |||
404 | /* Track exiting tasks in order to allow them to be waited for. */ | ||
405 | DEFINE_SRCU(tasks_rcu_exit_srcu); | ||
406 | |||
407 | /* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */ | ||
408 | static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10; | ||
409 | module_param(rcu_task_stall_timeout, int, 0644); | ||
410 | |||
411 | static void rcu_spawn_tasks_kthread(void); | ||
412 | |||
413 | /* | ||
414 | * Post an RCU-tasks callback. First call must be from process context | ||
415 | * after the scheduler if fully operational. | ||
416 | */ | ||
417 | void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp)) | ||
418 | { | ||
419 | unsigned long flags; | ||
420 | bool needwake; | ||
421 | |||
422 | rhp->next = NULL; | ||
423 | rhp->func = func; | ||
424 | raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags); | ||
425 | needwake = !rcu_tasks_cbs_head; | ||
426 | *rcu_tasks_cbs_tail = rhp; | ||
427 | rcu_tasks_cbs_tail = &rhp->next; | ||
428 | raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags); | ||
429 | if (needwake) { | ||
430 | rcu_spawn_tasks_kthread(); | ||
431 | wake_up(&rcu_tasks_cbs_wq); | ||
432 | } | ||
433 | } | ||
434 | EXPORT_SYMBOL_GPL(call_rcu_tasks); | ||
435 | |||
436 | /** | ||
437 | * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed. | ||
438 | * | ||
439 | * Control will return to the caller some time after a full rcu-tasks | ||
440 | * grace period has elapsed, in other words after all currently | ||
441 | * executing rcu-tasks read-side critical sections have elapsed. These | ||
442 | * read-side critical sections are delimited by calls to schedule(), | ||
443 | * cond_resched_rcu_qs(), idle execution, userspace execution, calls | ||
444 | * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched(). | ||
445 | * | ||
446 | * This is a very specialized primitive, intended only for a few uses in | ||
447 | * tracing and other situations requiring manipulation of function | ||
448 | * preambles and profiling hooks. The synchronize_rcu_tasks() function | ||
449 | * is not (yet) intended for heavy use from multiple CPUs. | ||
450 | * | ||
451 | * Note that this guarantee implies further memory-ordering guarantees. | ||
452 | * On systems with more than one CPU, when synchronize_rcu_tasks() returns, | ||
453 | * each CPU is guaranteed to have executed a full memory barrier since the | ||
454 | * end of its last RCU-tasks read-side critical section whose beginning | ||
455 | * preceded the call to synchronize_rcu_tasks(). In addition, each CPU | ||
456 | * having an RCU-tasks read-side critical section that extends beyond | ||
457 | * the return from synchronize_rcu_tasks() is guaranteed to have executed | ||
458 | * a full memory barrier after the beginning of synchronize_rcu_tasks() | ||
459 | * and before the beginning of that RCU-tasks read-side critical section. | ||
460 | * Note that these guarantees include CPUs that are offline, idle, or | ||
461 | * executing in user mode, as well as CPUs that are executing in the kernel. | ||
462 | * | ||
463 | * Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned | ||
464 | * to its caller on CPU B, then both CPU A and CPU B are guaranteed | ||
465 | * to have executed a full memory barrier during the execution of | ||
466 | * synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU | ||
467 | * (but again only if the system has more than one CPU). | ||
468 | */ | ||
469 | void synchronize_rcu_tasks(void) | ||
470 | { | ||
471 | /* Complain if the scheduler has not started. */ | ||
472 | rcu_lockdep_assert(!rcu_scheduler_active, | ||
473 | "synchronize_rcu_tasks called too soon"); | ||
474 | |||
475 | /* Wait for the grace period. */ | ||
476 | wait_rcu_gp(call_rcu_tasks); | ||
477 | } | ||
478 | EXPORT_SYMBOL_GPL(synchronize_rcu_tasks); | ||
479 | |||
480 | /** | ||
481 | * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks. | ||
482 | * | ||
483 | * Although the current implementation is guaranteed to wait, it is not | ||
484 | * obligated to, for example, if there are no pending callbacks. | ||
485 | */ | ||
486 | void rcu_barrier_tasks(void) | ||
487 | { | ||
488 | /* There is only one callback queue, so this is easy. ;-) */ | ||
489 | synchronize_rcu_tasks(); | ||
490 | } | ||
491 | EXPORT_SYMBOL_GPL(rcu_barrier_tasks); | ||
492 | |||
493 | /* See if tasks are still holding out, complain if so. */ | ||
494 | static void check_holdout_task(struct task_struct *t, | ||
495 | bool needreport, bool *firstreport) | ||
496 | { | ||
497 | int cpu; | ||
498 | |||
499 | if (!ACCESS_ONCE(t->rcu_tasks_holdout) || | ||
500 | t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) || | ||
501 | !ACCESS_ONCE(t->on_rq) || | ||
502 | (IS_ENABLED(CONFIG_NO_HZ_FULL) && | ||
503 | !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) { | ||
504 | ACCESS_ONCE(t->rcu_tasks_holdout) = false; | ||
505 | list_del_init(&t->rcu_tasks_holdout_list); | ||
506 | put_task_struct(t); | ||
507 | return; | ||
508 | } | ||
509 | if (!needreport) | ||
510 | return; | ||
511 | if (*firstreport) { | ||
512 | pr_err("INFO: rcu_tasks detected stalls on tasks:\n"); | ||
513 | *firstreport = false; | ||
514 | } | ||
515 | cpu = task_cpu(t); | ||
516 | pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n", | ||
517 | t, ".I"[is_idle_task(t)], | ||
518 | "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)], | ||
519 | t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout, | ||
520 | t->rcu_tasks_idle_cpu, cpu); | ||
521 | sched_show_task(t); | ||
522 | } | ||
523 | |||
524 | /* RCU-tasks kthread that detects grace periods and invokes callbacks. */ | ||
525 | static int __noreturn rcu_tasks_kthread(void *arg) | ||
526 | { | ||
527 | unsigned long flags; | ||
528 | struct task_struct *g, *t; | ||
529 | unsigned long lastreport; | ||
530 | struct rcu_head *list; | ||
531 | struct rcu_head *next; | ||
532 | LIST_HEAD(rcu_tasks_holdouts); | ||
533 | |||
534 | /* FIXME: Add housekeeping affinity. */ | ||
535 | |||
536 | /* | ||
537 | * Each pass through the following loop makes one check for | ||
538 | * newly arrived callbacks, and, if there are some, waits for | ||
539 | * one RCU-tasks grace period and then invokes the callbacks. | ||
540 | * This loop is terminated by the system going down. ;-) | ||
541 | */ | ||
542 | for (;;) { | ||
543 | |||
544 | /* Pick up any new callbacks. */ | ||
545 | raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags); | ||
546 | list = rcu_tasks_cbs_head; | ||
547 | rcu_tasks_cbs_head = NULL; | ||
548 | rcu_tasks_cbs_tail = &rcu_tasks_cbs_head; | ||
549 | raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags); | ||
550 | |||
551 | /* If there were none, wait a bit and start over. */ | ||
552 | if (!list) { | ||
553 | wait_event_interruptible(rcu_tasks_cbs_wq, | ||
554 | rcu_tasks_cbs_head); | ||
555 | if (!rcu_tasks_cbs_head) { | ||
556 | WARN_ON(signal_pending(current)); | ||
557 | schedule_timeout_interruptible(HZ/10); | ||
558 | } | ||
559 | continue; | ||
560 | } | ||
561 | |||
562 | /* | ||
563 | * Wait for all pre-existing t->on_rq and t->nvcsw | ||
564 | * transitions to complete. Invoking synchronize_sched() | ||
565 | * suffices because all these transitions occur with | ||
566 | * interrupts disabled. Without this synchronize_sched(), | ||
567 | * a read-side critical section that started before the | ||
568 | * grace period might be incorrectly seen as having started | ||
569 | * after the grace period. | ||
570 | * | ||
571 | * This synchronize_sched() also dispenses with the | ||
572 | * need for a memory barrier on the first store to | ||
573 | * ->rcu_tasks_holdout, as it forces the store to happen | ||
574 | * after the beginning of the grace period. | ||
575 | */ | ||
576 | synchronize_sched(); | ||
577 | |||
578 | /* | ||
579 | * There were callbacks, so we need to wait for an | ||
580 | * RCU-tasks grace period. Start off by scanning | ||
581 | * the task list for tasks that are not already | ||
582 | * voluntarily blocked. Mark these tasks and make | ||
583 | * a list of them in rcu_tasks_holdouts. | ||
584 | */ | ||
585 | rcu_read_lock(); | ||
586 | for_each_process_thread(g, t) { | ||
587 | if (t != current && ACCESS_ONCE(t->on_rq) && | ||
588 | !is_idle_task(t)) { | ||
589 | get_task_struct(t); | ||
590 | t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw); | ||
591 | ACCESS_ONCE(t->rcu_tasks_holdout) = true; | ||
592 | list_add(&t->rcu_tasks_holdout_list, | ||
593 | &rcu_tasks_holdouts); | ||
594 | } | ||
595 | } | ||
596 | rcu_read_unlock(); | ||
597 | |||
598 | /* | ||
599 | * Wait for tasks that are in the process of exiting. | ||
600 | * This does only part of the job, ensuring that all | ||
601 | * tasks that were previously exiting reach the point | ||
602 | * where they have disabled preemption, allowing the | ||
603 | * later synchronize_sched() to finish the job. | ||
604 | */ | ||
605 | synchronize_srcu(&tasks_rcu_exit_srcu); | ||
606 | |||
607 | /* | ||
608 | * Each pass through the following loop scans the list | ||
609 | * of holdout tasks, removing any that are no longer | ||
610 | * holdouts. When the list is empty, we are done. | ||
611 | */ | ||
612 | lastreport = jiffies; | ||
613 | while (!list_empty(&rcu_tasks_holdouts)) { | ||
614 | bool firstreport; | ||
615 | bool needreport; | ||
616 | int rtst; | ||
617 | struct task_struct *t1; | ||
618 | |||
619 | schedule_timeout_interruptible(HZ); | ||
620 | rtst = ACCESS_ONCE(rcu_task_stall_timeout); | ||
621 | needreport = rtst > 0 && | ||
622 | time_after(jiffies, lastreport + rtst); | ||
623 | if (needreport) | ||
624 | lastreport = jiffies; | ||
625 | firstreport = true; | ||
626 | WARN_ON(signal_pending(current)); | ||
627 | list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts, | ||
628 | rcu_tasks_holdout_list) { | ||
629 | check_holdout_task(t, needreport, &firstreport); | ||
630 | cond_resched(); | ||
631 | } | ||
632 | } | ||
633 | |||
634 | /* | ||
635 | * Because ->on_rq and ->nvcsw are not guaranteed | ||
636 | * to have a full memory barriers prior to them in the | ||
637 | * schedule() path, memory reordering on other CPUs could | ||
638 | * cause their RCU-tasks read-side critical sections to | ||
639 | * extend past the end of the grace period. However, | ||
640 | * because these ->nvcsw updates are carried out with | ||
641 | * interrupts disabled, we can use synchronize_sched() | ||
642 | * to force the needed ordering on all such CPUs. | ||
643 | * | ||
644 | * This synchronize_sched() also confines all | ||
645 | * ->rcu_tasks_holdout accesses to be within the grace | ||
646 | * period, avoiding the need for memory barriers for | ||
647 | * ->rcu_tasks_holdout accesses. | ||
648 | * | ||
649 | * In addition, this synchronize_sched() waits for exiting | ||
650 | * tasks to complete their final preempt_disable() region | ||
651 | * of execution, cleaning up after the synchronize_srcu() | ||
652 | * above. | ||
653 | */ | ||
654 | synchronize_sched(); | ||
655 | |||
656 | /* Invoke the callbacks. */ | ||
657 | while (list) { | ||
658 | next = list->next; | ||
659 | local_bh_disable(); | ||
660 | list->func(list); | ||
661 | local_bh_enable(); | ||
662 | list = next; | ||
663 | cond_resched(); | ||
664 | } | ||
665 | schedule_timeout_uninterruptible(HZ/10); | ||
666 | } | ||
667 | } | ||
668 | |||
669 | /* Spawn rcu_tasks_kthread() at first call to call_rcu_tasks(). */ | ||
670 | static void rcu_spawn_tasks_kthread(void) | ||
671 | { | ||
672 | static DEFINE_MUTEX(rcu_tasks_kthread_mutex); | ||
673 | static struct task_struct *rcu_tasks_kthread_ptr; | ||
674 | struct task_struct *t; | ||
675 | |||
676 | if (ACCESS_ONCE(rcu_tasks_kthread_ptr)) { | ||
677 | smp_mb(); /* Ensure caller sees full kthread. */ | ||
678 | return; | ||
679 | } | ||
680 | mutex_lock(&rcu_tasks_kthread_mutex); | ||
681 | if (rcu_tasks_kthread_ptr) { | ||
682 | mutex_unlock(&rcu_tasks_kthread_mutex); | ||
683 | return; | ||
684 | } | ||
685 | t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread"); | ||
686 | BUG_ON(IS_ERR(t)); | ||
687 | smp_mb(); /* Ensure others see full kthread. */ | ||
688 | ACCESS_ONCE(rcu_tasks_kthread_ptr) = t; | ||
689 | mutex_unlock(&rcu_tasks_kthread_mutex); | ||
690 | } | ||
691 | |||
692 | #endif /* #ifdef CONFIG_TASKS_RCU */ | ||