aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcu
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2014-06-27 16:42:20 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2014-09-07 19:27:19 -0400
commit8315f42295d2667a7f942f154b73a86fd7cb2227 (patch)
tree67057935dada3305e0dab95f546359b40cc29b96 /kernel/rcu
parent11ed7f934cb807f26da09547b5946c2e534d1dac (diff)
rcu: Add call_rcu_tasks()
This commit adds a new RCU-tasks flavor of RCU, which provides call_rcu_tasks(). This RCU flavor's quiescent states are voluntary context switch (not preemption!) and userspace execution (not the idle loop -- use some sort of schedule_on_each_cpu() if you need to handle the idle tasks. Note that unlike other RCU flavors, these quiescent states occur in tasks, not necessarily CPUs. Includes fixes from Steven Rostedt. This RCU flavor is assumed to have very infrequent latency-tolerant updaters. This assumption permits significant simplifications, including a single global callback list protected by a single global lock, along with a single task-private linked list containing all tasks that have not yet passed through a quiescent state. If experience shows this assumption to be incorrect, the required additional complexity will be added. Suggested-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcu')
-rw-r--r--kernel/rcu/tiny.c2
-rw-r--r--kernel/rcu/tree.c2
-rw-r--r--kernel/rcu/update.c171
3 files changed, 175 insertions, 0 deletions
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index d9efcc13008c..717f00854fc0 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -254,6 +254,8 @@ void rcu_check_callbacks(int cpu, int user)
254 rcu_sched_qs(cpu); 254 rcu_sched_qs(cpu);
255 else if (!in_softirq()) 255 else if (!in_softirq())
256 rcu_bh_qs(cpu); 256 rcu_bh_qs(cpu);
257 if (user)
258 rcu_note_voluntary_context_switch(current);
257} 259}
258 260
259/* 261/*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1b70cb6fbe3c..8ad91d1e317d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2410,6 +2410,8 @@ void rcu_check_callbacks(int cpu, int user)
2410 rcu_preempt_check_callbacks(cpu); 2410 rcu_preempt_check_callbacks(cpu);
2411 if (rcu_pending(cpu)) 2411 if (rcu_pending(cpu))
2412 invoke_rcu_core(); 2412 invoke_rcu_core();
2413 if (user)
2414 rcu_note_voluntary_context_switch(current);
2413 trace_rcu_utilization(TPS("End scheduler-tick")); 2415 trace_rcu_utilization(TPS("End scheduler-tick"));
2414} 2416}
2415 2417
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 4056d7992a6c..19b3dacb0753 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -47,6 +47,7 @@
47#include <linux/hardirq.h> 47#include <linux/hardirq.h>
48#include <linux/delay.h> 48#include <linux/delay.h>
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/kthread.h>
50 51
51#define CREATE_TRACE_POINTS 52#define CREATE_TRACE_POINTS
52 53
@@ -347,3 +348,173 @@ static int __init check_cpu_stall_init(void)
347early_initcall(check_cpu_stall_init); 348early_initcall(check_cpu_stall_init);
348 349
349#endif /* #ifdef CONFIG_RCU_STALL_COMMON */ 350#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
351
352#ifdef CONFIG_TASKS_RCU
353
354/*
355 * Simple variant of RCU whose quiescent states are voluntary context switch,
356 * user-space execution, and idle. As such, grace periods can take one good
357 * long time. There are no read-side primitives similar to rcu_read_lock()
358 * and rcu_read_unlock() because this implementation is intended to get
359 * the system into a safe state for some of the manipulations involved in
360 * tracing and the like. Finally, this implementation does not support
361 * high call_rcu_tasks() rates from multiple CPUs. If this is required,
362 * per-CPU callback lists will be needed.
363 */
364
365/* Global list of callbacks and associated lock. */
366static struct rcu_head *rcu_tasks_cbs_head;
367static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
368static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
369
370/* Post an RCU-tasks callback. */
371void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
372{
373 unsigned long flags;
374
375 rhp->next = NULL;
376 rhp->func = func;
377 raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
378 *rcu_tasks_cbs_tail = rhp;
379 rcu_tasks_cbs_tail = &rhp->next;
380 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
381}
382EXPORT_SYMBOL_GPL(call_rcu_tasks);
383
384/* See if the current task has stopped holding out, remove from list if so. */
385static void check_holdout_task(struct task_struct *t)
386{
387 if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
388 t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
389 !ACCESS_ONCE(t->on_rq)) {
390 ACCESS_ONCE(t->rcu_tasks_holdout) = false;
391 list_del_rcu(&t->rcu_tasks_holdout_list);
392 put_task_struct(t);
393 }
394}
395
396/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
397static int __noreturn rcu_tasks_kthread(void *arg)
398{
399 unsigned long flags;
400 struct task_struct *g, *t;
401 struct rcu_head *list;
402 struct rcu_head *next;
403 LIST_HEAD(rcu_tasks_holdouts);
404
405 /* FIXME: Add housekeeping affinity. */
406
407 /*
408 * Each pass through the following loop makes one check for
409 * newly arrived callbacks, and, if there are some, waits for
410 * one RCU-tasks grace period and then invokes the callbacks.
411 * This loop is terminated by the system going down. ;-)
412 */
413 for (;;) {
414
415 /* Pick up any new callbacks. */
416 raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
417 list = rcu_tasks_cbs_head;
418 rcu_tasks_cbs_head = NULL;
419 rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
420 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
421
422 /* If there were none, wait a bit and start over. */
423 if (!list) {
424 schedule_timeout_interruptible(HZ);
425 WARN_ON(signal_pending(current));
426 continue;
427 }
428
429 /*
430 * Wait for all pre-existing t->on_rq and t->nvcsw
431 * transitions to complete. Invoking synchronize_sched()
432 * suffices because all these transitions occur with
433 * interrupts disabled. Without this synchronize_sched(),
434 * a read-side critical section that started before the
435 * grace period might be incorrectly seen as having started
436 * after the grace period.
437 *
438 * This synchronize_sched() also dispenses with the
439 * need for a memory barrier on the first store to
440 * ->rcu_tasks_holdout, as it forces the store to happen
441 * after the beginning of the grace period.
442 */
443 synchronize_sched();
444
445 /*
446 * There were callbacks, so we need to wait for an
447 * RCU-tasks grace period. Start off by scanning
448 * the task list for tasks that are not already
449 * voluntarily blocked. Mark these tasks and make
450 * a list of them in rcu_tasks_holdouts.
451 */
452 rcu_read_lock();
453 for_each_process_thread(g, t) {
454 if (t != current && ACCESS_ONCE(t->on_rq) &&
455 !is_idle_task(t)) {
456 get_task_struct(t);
457 t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw);
458 ACCESS_ONCE(t->rcu_tasks_holdout) = true;
459 list_add(&t->rcu_tasks_holdout_list,
460 &rcu_tasks_holdouts);
461 }
462 }
463 rcu_read_unlock();
464
465 /*
466 * Each pass through the following loop scans the list
467 * of holdout tasks, removing any that are no longer
468 * holdouts. When the list is empty, we are done.
469 */
470 while (!list_empty(&rcu_tasks_holdouts)) {
471 schedule_timeout_interruptible(HZ);
472 WARN_ON(signal_pending(current));
473 rcu_read_lock();
474 list_for_each_entry_rcu(t, &rcu_tasks_holdouts,
475 rcu_tasks_holdout_list)
476 check_holdout_task(t);
477 rcu_read_unlock();
478 }
479
480 /*
481 * Because ->on_rq and ->nvcsw are not guaranteed
482 * to have a full memory barriers prior to them in the
483 * schedule() path, memory reordering on other CPUs could
484 * cause their RCU-tasks read-side critical sections to
485 * extend past the end of the grace period. However,
486 * because these ->nvcsw updates are carried out with
487 * interrupts disabled, we can use synchronize_sched()
488 * to force the needed ordering on all such CPUs.
489 *
490 * This synchronize_sched() also confines all
491 * ->rcu_tasks_holdout accesses to be within the grace
492 * period, avoiding the need for memory barriers for
493 * ->rcu_tasks_holdout accesses.
494 */
495 synchronize_sched();
496
497 /* Invoke the callbacks. */
498 while (list) {
499 next = list->next;
500 local_bh_disable();
501 list->func(list);
502 local_bh_enable();
503 list = next;
504 cond_resched();
505 }
506 }
507}
508
509/* Spawn rcu_tasks_kthread() at boot time. */
510static int __init rcu_spawn_tasks_kthread(void)
511{
512 struct task_struct __maybe_unused *t;
513
514 t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
515 BUG_ON(IS_ERR(t));
516 return 0;
517}
518early_initcall(rcu_spawn_tasks_kthread);
519
520#endif /* #ifdef CONFIG_TASKS_RCU */