aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcu/update.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu/update.c')
-rw-r--r--kernel/rcu/update.c345
1 files changed, 344 insertions, 1 deletions
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 4056d7992a6c..3ef8ba58694e 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -47,6 +47,8 @@
47#include <linux/hardirq.h> 47#include <linux/hardirq.h>
48#include <linux/delay.h> 48#include <linux/delay.h>
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/kthread.h>
51#include <linux/tick.h>
50 52
51#define CREATE_TRACE_POINTS 53#define CREATE_TRACE_POINTS
52 54
@@ -91,7 +93,7 @@ void __rcu_read_unlock(void)
91 barrier(); /* critical section before exit code. */ 93 barrier(); /* critical section before exit code. */
92 t->rcu_read_lock_nesting = INT_MIN; 94 t->rcu_read_lock_nesting = INT_MIN;
93 barrier(); /* assign before ->rcu_read_unlock_special load */ 95 barrier(); /* assign before ->rcu_read_unlock_special load */
94 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) 96 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s)))
95 rcu_read_unlock_special(t); 97 rcu_read_unlock_special(t);
96 barrier(); /* ->rcu_read_unlock_special load before assign */ 98 barrier(); /* ->rcu_read_unlock_special load before assign */
97 t->rcu_read_lock_nesting = 0; 99 t->rcu_read_lock_nesting = 0;
@@ -137,6 +139,38 @@ int notrace debug_lockdep_rcu_enabled(void)
137EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); 139EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
138 140
139/** 141/**
142 * rcu_read_lock_held() - might we be in RCU read-side critical section?
143 *
144 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
145 * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC,
146 * this assumes we are in an RCU read-side critical section unless it can
147 * prove otherwise. This is useful for debug checks in functions that
148 * require that they be called within an RCU read-side critical section.
149 *
150 * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
151 * and while lockdep is disabled.
152 *
153 * Note that rcu_read_lock() and the matching rcu_read_unlock() must
154 * occur in the same context, for example, it is illegal to invoke
155 * rcu_read_unlock() in process context if the matching rcu_read_lock()
156 * was invoked from within an irq handler.
157 *
158 * Note that rcu_read_lock() is disallowed if the CPU is either idle or
159 * offline from an RCU perspective, so check for those as well.
160 */
161int rcu_read_lock_held(void)
162{
163 if (!debug_lockdep_rcu_enabled())
164 return 1;
165 if (!rcu_is_watching())
166 return 0;
167 if (!rcu_lockdep_current_cpu_online())
168 return 0;
169 return lock_is_held(&rcu_lock_map);
170}
171EXPORT_SYMBOL_GPL(rcu_read_lock_held);
172
173/**
140 * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? 174 * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
141 * 175 *
142 * Check for bottom half being disabled, which covers both the 176 * Check for bottom half being disabled, which covers both the
@@ -347,3 +381,312 @@ static int __init check_cpu_stall_init(void)
347early_initcall(check_cpu_stall_init); 381early_initcall(check_cpu_stall_init);
348 382
349#endif /* #ifdef CONFIG_RCU_STALL_COMMON */ 383#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
384
385#ifdef CONFIG_TASKS_RCU
386
387/*
388 * Simple variant of RCU whose quiescent states are voluntary context switch,
389 * user-space execution, and idle. As such, grace periods can take one good
390 * long time. There are no read-side primitives similar to rcu_read_lock()
391 * and rcu_read_unlock() because this implementation is intended to get
392 * the system into a safe state for some of the manipulations involved in
393 * tracing and the like. Finally, this implementation does not support
394 * high call_rcu_tasks() rates from multiple CPUs. If this is required,
395 * per-CPU callback lists will be needed.
396 */
397
398/* Global list of callbacks and associated lock. */
399static struct rcu_head *rcu_tasks_cbs_head;
400static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
401static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq);
402static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
403
404/* Track exiting tasks in order to allow them to be waited for. */
405DEFINE_SRCU(tasks_rcu_exit_srcu);
406
407/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
408static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10;
409module_param(rcu_task_stall_timeout, int, 0644);
410
411static void rcu_spawn_tasks_kthread(void);
412
413/*
414 * Post an RCU-tasks callback. First call must be from process context
415 * after the scheduler if fully operational.
416 */
417void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
418{
419 unsigned long flags;
420 bool needwake;
421
422 rhp->next = NULL;
423 rhp->func = func;
424 raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
425 needwake = !rcu_tasks_cbs_head;
426 *rcu_tasks_cbs_tail = rhp;
427 rcu_tasks_cbs_tail = &rhp->next;
428 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
429 if (needwake) {
430 rcu_spawn_tasks_kthread();
431 wake_up(&rcu_tasks_cbs_wq);
432 }
433}
434EXPORT_SYMBOL_GPL(call_rcu_tasks);
435
436/**
437 * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
438 *
439 * Control will return to the caller some time after a full rcu-tasks
440 * grace period has elapsed, in other words after all currently
441 * executing rcu-tasks read-side critical sections have elapsed. These
442 * read-side critical sections are delimited by calls to schedule(),
443 * cond_resched_rcu_qs(), idle execution, userspace execution, calls
444 * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
445 *
446 * This is a very specialized primitive, intended only for a few uses in
447 * tracing and other situations requiring manipulation of function
448 * preambles and profiling hooks. The synchronize_rcu_tasks() function
449 * is not (yet) intended for heavy use from multiple CPUs.
450 *
451 * Note that this guarantee implies further memory-ordering guarantees.
452 * On systems with more than one CPU, when synchronize_rcu_tasks() returns,
453 * each CPU is guaranteed to have executed a full memory barrier since the
454 * end of its last RCU-tasks read-side critical section whose beginning
455 * preceded the call to synchronize_rcu_tasks(). In addition, each CPU
456 * having an RCU-tasks read-side critical section that extends beyond
457 * the return from synchronize_rcu_tasks() is guaranteed to have executed
458 * a full memory barrier after the beginning of synchronize_rcu_tasks()
459 * and before the beginning of that RCU-tasks read-side critical section.
460 * Note that these guarantees include CPUs that are offline, idle, or
461 * executing in user mode, as well as CPUs that are executing in the kernel.
462 *
463 * Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned
464 * to its caller on CPU B, then both CPU A and CPU B are guaranteed
465 * to have executed a full memory barrier during the execution of
466 * synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU
467 * (but again only if the system has more than one CPU).
468 */
469void synchronize_rcu_tasks(void)
470{
471 /* Complain if the scheduler has not started. */
472 rcu_lockdep_assert(!rcu_scheduler_active,
473 "synchronize_rcu_tasks called too soon");
474
475 /* Wait for the grace period. */
476 wait_rcu_gp(call_rcu_tasks);
477}
478EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
479
480/**
481 * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
482 *
483 * Although the current implementation is guaranteed to wait, it is not
484 * obligated to, for example, if there are no pending callbacks.
485 */
486void rcu_barrier_tasks(void)
487{
488 /* There is only one callback queue, so this is easy. ;-) */
489 synchronize_rcu_tasks();
490}
491EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
492
493/* See if tasks are still holding out, complain if so. */
494static void check_holdout_task(struct task_struct *t,
495 bool needreport, bool *firstreport)
496{
497 int cpu;
498
499 if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
500 t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
501 !ACCESS_ONCE(t->on_rq) ||
502 (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
503 !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
504 ACCESS_ONCE(t->rcu_tasks_holdout) = false;
505 list_del_init(&t->rcu_tasks_holdout_list);
506 put_task_struct(t);
507 return;
508 }
509 if (!needreport)
510 return;
511 if (*firstreport) {
512 pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
513 *firstreport = false;
514 }
515 cpu = task_cpu(t);
516 pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
517 t, ".I"[is_idle_task(t)],
518 "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)],
519 t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
520 t->rcu_tasks_idle_cpu, cpu);
521 sched_show_task(t);
522}
523
524/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
525static int __noreturn rcu_tasks_kthread(void *arg)
526{
527 unsigned long flags;
528 struct task_struct *g, *t;
529 unsigned long lastreport;
530 struct rcu_head *list;
531 struct rcu_head *next;
532 LIST_HEAD(rcu_tasks_holdouts);
533
534 /* FIXME: Add housekeeping affinity. */
535
536 /*
537 * Each pass through the following loop makes one check for
538 * newly arrived callbacks, and, if there are some, waits for
539 * one RCU-tasks grace period and then invokes the callbacks.
540 * This loop is terminated by the system going down. ;-)
541 */
542 for (;;) {
543
544 /* Pick up any new callbacks. */
545 raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
546 list = rcu_tasks_cbs_head;
547 rcu_tasks_cbs_head = NULL;
548 rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
549 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
550
551 /* If there were none, wait a bit and start over. */
552 if (!list) {
553 wait_event_interruptible(rcu_tasks_cbs_wq,
554 rcu_tasks_cbs_head);
555 if (!rcu_tasks_cbs_head) {
556 WARN_ON(signal_pending(current));
557 schedule_timeout_interruptible(HZ/10);
558 }
559 continue;
560 }
561
562 /*
563 * Wait for all pre-existing t->on_rq and t->nvcsw
564 * transitions to complete. Invoking synchronize_sched()
565 * suffices because all these transitions occur with
566 * interrupts disabled. Without this synchronize_sched(),
567 * a read-side critical section that started before the
568 * grace period might be incorrectly seen as having started
569 * after the grace period.
570 *
571 * This synchronize_sched() also dispenses with the
572 * need for a memory barrier on the first store to
573 * ->rcu_tasks_holdout, as it forces the store to happen
574 * after the beginning of the grace period.
575 */
576 synchronize_sched();
577
578 /*
579 * There were callbacks, so we need to wait for an
580 * RCU-tasks grace period. Start off by scanning
581 * the task list for tasks that are not already
582 * voluntarily blocked. Mark these tasks and make
583 * a list of them in rcu_tasks_holdouts.
584 */
585 rcu_read_lock();
586 for_each_process_thread(g, t) {
587 if (t != current && ACCESS_ONCE(t->on_rq) &&
588 !is_idle_task(t)) {
589 get_task_struct(t);
590 t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw);
591 ACCESS_ONCE(t->rcu_tasks_holdout) = true;
592 list_add(&t->rcu_tasks_holdout_list,
593 &rcu_tasks_holdouts);
594 }
595 }
596 rcu_read_unlock();
597
598 /*
599 * Wait for tasks that are in the process of exiting.
600 * This does only part of the job, ensuring that all
601 * tasks that were previously exiting reach the point
602 * where they have disabled preemption, allowing the
603 * later synchronize_sched() to finish the job.
604 */
605 synchronize_srcu(&tasks_rcu_exit_srcu);
606
607 /*
608 * Each pass through the following loop scans the list
609 * of holdout tasks, removing any that are no longer
610 * holdouts. When the list is empty, we are done.
611 */
612 lastreport = jiffies;
613 while (!list_empty(&rcu_tasks_holdouts)) {
614 bool firstreport;
615 bool needreport;
616 int rtst;
617 struct task_struct *t1;
618
619 schedule_timeout_interruptible(HZ);
620 rtst = ACCESS_ONCE(rcu_task_stall_timeout);
621 needreport = rtst > 0 &&
622 time_after(jiffies, lastreport + rtst);
623 if (needreport)
624 lastreport = jiffies;
625 firstreport = true;
626 WARN_ON(signal_pending(current));
627 list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts,
628 rcu_tasks_holdout_list) {
629 check_holdout_task(t, needreport, &firstreport);
630 cond_resched();
631 }
632 }
633
634 /*
635 * Because ->on_rq and ->nvcsw are not guaranteed
636 * to have a full memory barriers prior to them in the
637 * schedule() path, memory reordering on other CPUs could
638 * cause their RCU-tasks read-side critical sections to
639 * extend past the end of the grace period. However,
640 * because these ->nvcsw updates are carried out with
641 * interrupts disabled, we can use synchronize_sched()
642 * to force the needed ordering on all such CPUs.
643 *
644 * This synchronize_sched() also confines all
645 * ->rcu_tasks_holdout accesses to be within the grace
646 * period, avoiding the need for memory barriers for
647 * ->rcu_tasks_holdout accesses.
648 *
649 * In addition, this synchronize_sched() waits for exiting
650 * tasks to complete their final preempt_disable() region
651 * of execution, cleaning up after the synchronize_srcu()
652 * above.
653 */
654 synchronize_sched();
655
656 /* Invoke the callbacks. */
657 while (list) {
658 next = list->next;
659 local_bh_disable();
660 list->func(list);
661 local_bh_enable();
662 list = next;
663 cond_resched();
664 }
665 schedule_timeout_uninterruptible(HZ/10);
666 }
667}
668
669/* Spawn rcu_tasks_kthread() at first call to call_rcu_tasks(). */
670static void rcu_spawn_tasks_kthread(void)
671{
672 static DEFINE_MUTEX(rcu_tasks_kthread_mutex);
673 static struct task_struct *rcu_tasks_kthread_ptr;
674 struct task_struct *t;
675
676 if (ACCESS_ONCE(rcu_tasks_kthread_ptr)) {
677 smp_mb(); /* Ensure caller sees full kthread. */
678 return;
679 }
680 mutex_lock(&rcu_tasks_kthread_mutex);
681 if (rcu_tasks_kthread_ptr) {
682 mutex_unlock(&rcu_tasks_kthread_mutex);
683 return;
684 }
685 t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
686 BUG_ON(IS_ERR(t));
687 smp_mb(); /* Ensure others see full kthread. */
688 ACCESS_ONCE(rcu_tasks_kthread_ptr) = t;
689 mutex_unlock(&rcu_tasks_kthread_mutex);
690}
691
692#endif /* #ifdef CONFIG_TASKS_RCU */