aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2014-09-16 13:10:44 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2014-09-16 13:10:44 -0400
commit96b4672703ed4538c7fc25de36df4415a0ee237c (patch)
treee5bb8f4c3eb41c5741a7b232cff8e502f6509fc3
parente98d06dd6cd791b5138b0fc6c14a9c0b4d1f2e72 (diff)
parenta53dd6a65668850493cce94395c1b88a015eb338 (diff)
Merge branch 'rcu-tasks.2014.09.10a' into HEAD
rcu-tasks.2014.09.10a: Add RCU-tasks flavor of RCU.
-rw-r--r--Documentation/RCU/stallwarn.txt33
-rw-r--r--Documentation/kernel-parameters.txt5
-rw-r--r--fs/file.c2
-rw-r--r--include/linux/init_task.h12
-rw-r--r--include/linux/rcupdate.h57
-rw-r--r--include/linux/rcutiny.h2
-rw-r--r--include/linux/sched.h39
-rw-r--r--init/Kconfig10
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/rcu/rcutorture.c54
-rw-r--r--kernel/rcu/tiny.c12
-rw-r--r--kernel/rcu/tree.c50
-rw-r--r--kernel/rcu/tree.h2
-rw-r--r--kernel/rcu/tree_plugin.h77
-rw-r--r--kernel/rcu/update.c313
-rw-r--r--kernel/softirq.c2
-rw-r--r--mm/mlock.c2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFLIST3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS019
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS025
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS0313
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot1
24 files changed, 614 insertions, 94 deletions
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index 68fe3ad27015..ef5a2fd4ff70 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -56,8 +56,20 @@ RCU_STALL_RAT_DELAY
56 two jiffies. (This is a cpp macro, not a kernel configuration 56 two jiffies. (This is a cpp macro, not a kernel configuration
57 parameter.) 57 parameter.)
58 58
59When a CPU detects that it is stalling, it will print a message similar 59rcupdate.rcu_task_stall_timeout
60to the following: 60
61 This boot/sysfs parameter controls the RCU-tasks stall warning
62 interval. A value of zero or less suppresses RCU-tasks stall
63 warnings. A positive value sets the stall-warning interval
64 in jiffies. An RCU-tasks stall warning starts wtih the line:
65
66 INFO: rcu_tasks detected stalls on tasks:
67
68 And continues with the output of sched_show_task() for each
69 task stalling the current RCU-tasks grace period.
70
71For non-RCU-tasks flavors of RCU, when a CPU detects that it is stalling,
72it will print a message similar to the following:
61 73
62INFO: rcu_sched_state detected stall on CPU 5 (t=2500 jiffies) 74INFO: rcu_sched_state detected stall on CPU 5 (t=2500 jiffies)
63 75
@@ -174,8 +186,12 @@ o A CPU looping with preemption disabled. This condition can
174o A CPU looping with bottom halves disabled. This condition can 186o A CPU looping with bottom halves disabled. This condition can
175 result in RCU-sched and RCU-bh stalls. 187 result in RCU-sched and RCU-bh stalls.
176 188
177o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel 189o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the
178 without invoking schedule(). 190 kernel without invoking schedule(). Note that cond_resched()
191 does not necessarily prevent RCU CPU stall warnings. Therefore,
192 if the looping in the kernel is really expected and desirable
193 behavior, you might need to replace some of the cond_resched()
194 calls with calls to cond_resched_rcu_qs().
179 195
180o A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might 196o A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might
181 happen to preempt a low-priority task in the middle of an RCU 197 happen to preempt a low-priority task in the middle of an RCU
@@ -208,11 +224,10 @@ o A hardware failure. This is quite unlikely, but has occurred
208 This resulted in a series of RCU CPU stall warnings, eventually 224 This resulted in a series of RCU CPU stall warnings, eventually
209 leading the realization that the CPU had failed. 225 leading the realization that the CPU had failed.
210 226
211The RCU, RCU-sched, and RCU-bh implementations have CPU stall warning. 227The RCU, RCU-sched, RCU-bh, and RCU-tasks implementations have CPU stall
212SRCU does not have its own CPU stall warnings, but its calls to 228warning. Note that SRCU does -not- have CPU stall warnings. Please note
213synchronize_sched() will result in RCU-sched detecting RCU-sched-related 229that RCU only detects CPU stalls when there is a grace period in progress.
214CPU stalls. Please note that RCU only detects CPU stalls when there is 230No grace period, no CPU stall warnings.
215a grace period in progress. No grace period, no CPU stall warnings.
216 231
217To diagnose the cause of the stall, inspect the stack traces. 232To diagnose the cause of the stall, inspect the stack traces.
218The offending function will usually be near the top of the stack. 233The offending function will usually be near the top of the stack.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 0a104be4ad86..e1147bc62633 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3000,6 +3000,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
3000 rcupdate.rcu_cpu_stall_timeout= [KNL] 3000 rcupdate.rcu_cpu_stall_timeout= [KNL]
3001 Set timeout for RCU CPU stall warning messages. 3001 Set timeout for RCU CPU stall warning messages.
3002 3002
3003 rcupdate.rcu_task_stall_timeout= [KNL]
3004 Set timeout in jiffies for RCU task stall warning
3005 messages. Disable with a value less than or equal
3006 to zero.
3007
3003 rdinit= [KNL] 3008 rdinit= [KNL]
3004 Format: <full_path> 3009 Format: <full_path>
3005 Run specified binary instead of /init from the ramdisk, 3010 Run specified binary instead of /init from the ramdisk,
diff --git a/fs/file.c b/fs/file.c
index 66923fe3176e..1cafc4c9275b 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -367,7 +367,7 @@ static struct fdtable *close_files(struct files_struct * files)
367 struct file * file = xchg(&fdt->fd[i], NULL); 367 struct file * file = xchg(&fdt->fd[i], NULL);
368 if (file) { 368 if (file) {
369 filp_close(file, files); 369 filp_close(file, files);
370 cond_resched(); 370 cond_resched_rcu_qs();
371 } 371 }
372 } 372 }
373 i++; 373 i++;
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 2bb4c4f3531a..77fc43f8fb72 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -111,12 +111,21 @@ extern struct group_info init_groups;
111#ifdef CONFIG_PREEMPT_RCU 111#ifdef CONFIG_PREEMPT_RCU
112#define INIT_TASK_RCU_PREEMPT(tsk) \ 112#define INIT_TASK_RCU_PREEMPT(tsk) \
113 .rcu_read_lock_nesting = 0, \ 113 .rcu_read_lock_nesting = 0, \
114 .rcu_read_unlock_special = 0, \ 114 .rcu_read_unlock_special.s = 0, \
115 .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ 115 .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \
116 INIT_TASK_RCU_TREE_PREEMPT() 116 INIT_TASK_RCU_TREE_PREEMPT()
117#else 117#else
118#define INIT_TASK_RCU_PREEMPT(tsk) 118#define INIT_TASK_RCU_PREEMPT(tsk)
119#endif 119#endif
120#ifdef CONFIG_TASKS_RCU
121#define INIT_TASK_RCU_TASKS(tsk) \
122 .rcu_tasks_holdout = false, \
123 .rcu_tasks_holdout_list = \
124 LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list), \
125 .rcu_tasks_idle_cpu = -1,
126#else
127#define INIT_TASK_RCU_TASKS(tsk)
128#endif
120 129
121extern struct cred init_cred; 130extern struct cred init_cred;
122 131
@@ -224,6 +233,7 @@ extern struct task_group root_task_group;
224 INIT_FTRACE_GRAPH \ 233 INIT_FTRACE_GRAPH \
225 INIT_TRACE_RECURSION \ 234 INIT_TRACE_RECURSION \
226 INIT_TASK_RCU_PREEMPT(tsk) \ 235 INIT_TASK_RCU_PREEMPT(tsk) \
236 INIT_TASK_RCU_TASKS(tsk) \
227 INIT_CPUSET_SEQ(tsk) \ 237 INIT_CPUSET_SEQ(tsk) \
228 INIT_RT_MUTEXES(tsk) \ 238 INIT_RT_MUTEXES(tsk) \
229 INIT_VTIME(tsk) \ 239 INIT_VTIME(tsk) \
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 334ff89aada0..5cafd60c1ee4 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -55,6 +55,7 @@ enum rcutorture_type {
55 RCU_FLAVOR, 55 RCU_FLAVOR,
56 RCU_BH_FLAVOR, 56 RCU_BH_FLAVOR,
57 RCU_SCHED_FLAVOR, 57 RCU_SCHED_FLAVOR,
58 RCU_TASKS_FLAVOR,
58 SRCU_FLAVOR, 59 SRCU_FLAVOR,
59 INVALID_RCU_FLAVOR 60 INVALID_RCU_FLAVOR
60}; 61};
@@ -197,6 +198,28 @@ void call_rcu_sched(struct rcu_head *head,
197 198
198void synchronize_sched(void); 199void synchronize_sched(void);
199 200
201/**
202 * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
203 * @head: structure to be used for queueing the RCU updates.
204 * @func: actual callback function to be invoked after the grace period
205 *
206 * The callback function will be invoked some time after a full grace
207 * period elapses, in other words after all currently executing RCU
208 * read-side critical sections have completed. call_rcu_tasks() assumes
209 * that the read-side critical sections end at a voluntary context
210 * switch (not a preemption!), entry into idle, or transition to usermode
211 * execution. As such, there are no read-side primitives analogous to
212 * rcu_read_lock() and rcu_read_unlock() because this primitive is intended
213 * to determine that all tasks have passed through a safe state, not so
214 * much for data-strcuture synchronization.
215 *
216 * See the description of call_rcu() for more detailed information on
217 * memory ordering guarantees.
218 */
219void call_rcu_tasks(struct rcu_head *head, void (*func)(struct rcu_head *head));
220void synchronize_rcu_tasks(void);
221void rcu_barrier_tasks(void);
222
200#ifdef CONFIG_PREEMPT_RCU 223#ifdef CONFIG_PREEMPT_RCU
201 224
202void __rcu_read_lock(void); 225void __rcu_read_lock(void);
@@ -238,8 +261,8 @@ static inline int rcu_preempt_depth(void)
238 261
239/* Internal to kernel */ 262/* Internal to kernel */
240void rcu_init(void); 263void rcu_init(void);
241void rcu_sched_qs(int cpu); 264void rcu_sched_qs(void);
242void rcu_bh_qs(int cpu); 265void rcu_bh_qs(void);
243void rcu_check_callbacks(int cpu, int user); 266void rcu_check_callbacks(int cpu, int user);
244struct notifier_block; 267struct notifier_block;
245void rcu_idle_enter(void); 268void rcu_idle_enter(void);
@@ -302,6 +325,36 @@ static inline void rcu_init_nohz(void)
302 rcu_irq_exit(); \ 325 rcu_irq_exit(); \
303 } while (0) 326 } while (0)
304 327
328/*
329 * Note a voluntary context switch for RCU-tasks benefit. This is a
330 * macro rather than an inline function to avoid #include hell.
331 */
332#ifdef CONFIG_TASKS_RCU
333#define TASKS_RCU(x) x
334extern struct srcu_struct tasks_rcu_exit_srcu;
335#define rcu_note_voluntary_context_switch(t) \
336 do { \
337 if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \
338 ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \
339 } while (0)
340#else /* #ifdef CONFIG_TASKS_RCU */
341#define TASKS_RCU(x) do { } while (0)
342#define rcu_note_voluntary_context_switch(t) do { } while (0)
343#endif /* #else #ifdef CONFIG_TASKS_RCU */
344
345/**
346 * cond_resched_rcu_qs - Report potential quiescent states to RCU
347 *
348 * This macro resembles cond_resched(), except that it is defined to
349 * report potential quiescent states to RCU-tasks even if the cond_resched()
350 * machinery were to be shut off, as some advocate for PREEMPT kernels.
351 */
352#define cond_resched_rcu_qs() \
353do { \
354 rcu_note_voluntary_context_switch(current); \
355 cond_resched(); \
356} while (0)
357
305#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) 358#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP)
306bool __rcu_is_watching(void); 359bool __rcu_is_watching(void);
307#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */ 360#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index d40a6a451330..38cc5b1e252d 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -80,7 +80,7 @@ static inline void kfree_call_rcu(struct rcu_head *head,
80 80
81static inline void rcu_note_context_switch(int cpu) 81static inline void rcu_note_context_switch(int cpu)
82{ 82{
83 rcu_sched_qs(cpu); 83 rcu_sched_qs();
84} 84}
85 85
86/* 86/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c2c885ee52b..42888d715fb1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1212,6 +1212,13 @@ struct sched_dl_entity {
1212 struct hrtimer dl_timer; 1212 struct hrtimer dl_timer;
1213}; 1213};
1214 1214
1215union rcu_special {
1216 struct {
1217 bool blocked;
1218 bool need_qs;
1219 } b;
1220 short s;
1221};
1215struct rcu_node; 1222struct rcu_node;
1216 1223
1217enum perf_event_task_context { 1224enum perf_event_task_context {
@@ -1264,12 +1271,18 @@ struct task_struct {
1264 1271
1265#ifdef CONFIG_PREEMPT_RCU 1272#ifdef CONFIG_PREEMPT_RCU
1266 int rcu_read_lock_nesting; 1273 int rcu_read_lock_nesting;
1267 char rcu_read_unlock_special; 1274 union rcu_special rcu_read_unlock_special;
1268 struct list_head rcu_node_entry; 1275 struct list_head rcu_node_entry;
1269#endif /* #ifdef CONFIG_PREEMPT_RCU */ 1276#endif /* #ifdef CONFIG_PREEMPT_RCU */
1270#ifdef CONFIG_TREE_PREEMPT_RCU 1277#ifdef CONFIG_TREE_PREEMPT_RCU
1271 struct rcu_node *rcu_blocked_node; 1278 struct rcu_node *rcu_blocked_node;
1272#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 1279#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1280#ifdef CONFIG_TASKS_RCU
1281 unsigned long rcu_tasks_nvcsw;
1282 bool rcu_tasks_holdout;
1283 struct list_head rcu_tasks_holdout_list;
1284 int rcu_tasks_idle_cpu;
1285#endif /* #ifdef CONFIG_TASKS_RCU */
1273 1286
1274#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 1287#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
1275 struct sched_info sched_info; 1288 struct sched_info sched_info;
@@ -1999,29 +2012,21 @@ extern void task_clear_jobctl_trapping(struct task_struct *task);
1999extern void task_clear_jobctl_pending(struct task_struct *task, 2012extern void task_clear_jobctl_pending(struct task_struct *task,
2000 unsigned int mask); 2013 unsigned int mask);
2001 2014
2002#ifdef CONFIG_PREEMPT_RCU
2003
2004#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
2005#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
2006
2007static inline void rcu_copy_process(struct task_struct *p) 2015static inline void rcu_copy_process(struct task_struct *p)
2008{ 2016{
2017#ifdef CONFIG_PREEMPT_RCU
2009 p->rcu_read_lock_nesting = 0; 2018 p->rcu_read_lock_nesting = 0;
2010 p->rcu_read_unlock_special = 0; 2019 p->rcu_read_unlock_special.s = 0;
2011#ifdef CONFIG_TREE_PREEMPT_RCU
2012 p->rcu_blocked_node = NULL; 2020 p->rcu_blocked_node = NULL;
2013#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
2014 INIT_LIST_HEAD(&p->rcu_node_entry); 2021 INIT_LIST_HEAD(&p->rcu_node_entry);
2022#endif /* #ifdef CONFIG_PREEMPT_RCU */
2023#ifdef CONFIG_TASKS_RCU
2024 p->rcu_tasks_holdout = false;
2025 INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
2026 p->rcu_tasks_idle_cpu = -1;
2027#endif /* #ifdef CONFIG_TASKS_RCU */
2015} 2028}
2016 2029
2017#else
2018
2019static inline void rcu_copy_process(struct task_struct *p)
2020{
2021}
2022
2023#endif
2024
2025static inline void tsk_restore_flags(struct task_struct *task, 2030static inline void tsk_restore_flags(struct task_struct *task,
2026 unsigned long orig_flags, unsigned long flags) 2031 unsigned long orig_flags, unsigned long flags)
2027{ 2032{
diff --git a/init/Kconfig b/init/Kconfig
index 64ee4d967786..4fe5500b998f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -507,6 +507,16 @@ config PREEMPT_RCU
507 This option enables preemptible-RCU code that is common between 507 This option enables preemptible-RCU code that is common between
508 TREE_PREEMPT_RCU and, in the old days, TINY_PREEMPT_RCU. 508 TREE_PREEMPT_RCU and, in the old days, TINY_PREEMPT_RCU.
509 509
510config TASKS_RCU
511 bool "Task_based RCU implementation using voluntary context switch"
512 default n
513 help
514 This option enables a task-based RCU implementation that uses
515 only voluntary context switch (not preemption!), idle, and
516 user-mode execution as quiescent states.
517
518 If unsure, say N.
519
510config RCU_STALL_COMMON 520config RCU_STALL_COMMON
511 def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE ) 521 def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE )
512 help 522 help
diff --git a/kernel/exit.c b/kernel/exit.c
index 32c58f7433a3..d13f2eec4bb8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -667,6 +667,7 @@ void do_exit(long code)
667{ 667{
668 struct task_struct *tsk = current; 668 struct task_struct *tsk = current;
669 int group_dead; 669 int group_dead;
670 TASKS_RCU(int tasks_rcu_i);
670 671
671 profile_task_exit(tsk); 672 profile_task_exit(tsk);
672 673
@@ -775,6 +776,7 @@ void do_exit(long code)
775 */ 776 */
776 flush_ptrace_hw_breakpoint(tsk); 777 flush_ptrace_hw_breakpoint(tsk);
777 778
779 TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
778 exit_notify(tsk, group_dead); 780 exit_notify(tsk, group_dead);
779 proc_exit_connector(tsk); 781 proc_exit_connector(tsk);
780#ifdef CONFIG_NUMA 782#ifdef CONFIG_NUMA
@@ -814,6 +816,7 @@ void do_exit(long code)
814 if (tsk->nr_dirtied) 816 if (tsk->nr_dirtied)
815 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); 817 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
816 exit_rcu(); 818 exit_rcu();
819 TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
817 820
818 /* 821 /*
819 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed 822 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 0bcd53adac73..6d1509500d2b 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -612,6 +612,52 @@ static struct rcu_torture_ops sched_ops = {
612 .name = "sched" 612 .name = "sched"
613}; 613};
614 614
615#ifdef CONFIG_TASKS_RCU
616
617/*
618 * Definitions for RCU-tasks torture testing.
619 */
620
621static int tasks_torture_read_lock(void)
622{
623 return 0;
624}
625
626static void tasks_torture_read_unlock(int idx)
627{
628}
629
630static void rcu_tasks_torture_deferred_free(struct rcu_torture *p)
631{
632 call_rcu_tasks(&p->rtort_rcu, rcu_torture_cb);
633}
634
635static struct rcu_torture_ops tasks_ops = {
636 .ttype = RCU_TASKS_FLAVOR,
637 .init = rcu_sync_torture_init,
638 .readlock = tasks_torture_read_lock,
639 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
640 .readunlock = tasks_torture_read_unlock,
641 .completed = rcu_no_completed,
642 .deferred_free = rcu_tasks_torture_deferred_free,
643 .sync = synchronize_rcu_tasks,
644 .exp_sync = synchronize_rcu_tasks,
645 .call = call_rcu_tasks,
646 .cb_barrier = rcu_barrier_tasks,
647 .fqs = NULL,
648 .stats = NULL,
649 .irq_capable = 1,
650 .name = "tasks"
651};
652
653#define RCUTORTURE_TASKS_OPS &tasks_ops,
654
655#else /* #ifdef CONFIG_TASKS_RCU */
656
657#define RCUTORTURE_TASKS_OPS
658
659#endif /* #else #ifdef CONFIG_TASKS_RCU */
660
615/* 661/*
616 * RCU torture priority-boost testing. Runs one real-time thread per 662 * RCU torture priority-boost testing. Runs one real-time thread per
617 * CPU for moderate bursts, repeatedly registering RCU callbacks and 663 * CPU for moderate bursts, repeatedly registering RCU callbacks and
@@ -678,7 +724,7 @@ static int rcu_torture_boost(void *arg)
678 } 724 }
679 call_rcu_time = jiffies; 725 call_rcu_time = jiffies;
680 } 726 }
681 cond_resched(); 727 cond_resched_rcu_qs();
682 stutter_wait("rcu_torture_boost"); 728 stutter_wait("rcu_torture_boost");
683 if (torture_must_stop()) 729 if (torture_must_stop())
684 goto checkwait; 730 goto checkwait;
@@ -1082,7 +1128,7 @@ rcu_torture_reader(void *arg)
1082 __this_cpu_inc(rcu_torture_batch[completed]); 1128 __this_cpu_inc(rcu_torture_batch[completed]);
1083 preempt_enable(); 1129 preempt_enable();
1084 cur_ops->readunlock(idx); 1130 cur_ops->readunlock(idx);
1085 cond_resched(); 1131 cond_resched_rcu_qs();
1086 stutter_wait("rcu_torture_reader"); 1132 stutter_wait("rcu_torture_reader");
1087 } while (!torture_must_stop()); 1133 } while (!torture_must_stop());
1088 if (irqreader && cur_ops->irq_capable) { 1134 if (irqreader && cur_ops->irq_capable) {
@@ -1344,7 +1390,8 @@ static int rcu_torture_barrier_cbs(void *arg)
1344 if (atomic_dec_and_test(&barrier_cbs_count)) 1390 if (atomic_dec_and_test(&barrier_cbs_count))
1345 wake_up(&barrier_wq); 1391 wake_up(&barrier_wq);
1346 } while (!torture_must_stop()); 1392 } while (!torture_must_stop());
1347 cur_ops->cb_barrier(); 1393 if (cur_ops->cb_barrier != NULL)
1394 cur_ops->cb_barrier();
1348 destroy_rcu_head_on_stack(&rcu); 1395 destroy_rcu_head_on_stack(&rcu);
1349 torture_kthread_stopping("rcu_torture_barrier_cbs"); 1396 torture_kthread_stopping("rcu_torture_barrier_cbs");
1350 return 0; 1397 return 0;
@@ -1585,6 +1632,7 @@ rcu_torture_init(void)
1585 int firsterr = 0; 1632 int firsterr = 0;
1586 static struct rcu_torture_ops *torture_ops[] = { 1633 static struct rcu_torture_ops *torture_ops[] = {
1587 &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops, 1634 &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops,
1635 RCUTORTURE_TASKS_OPS
1588 }; 1636 };
1589 1637
1590 if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable)) 1638 if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable))
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 4a55a2416e3c..c0623fc47125 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -72,7 +72,7 @@ static void rcu_idle_enter_common(long long newval)
72 current->pid, current->comm, 72 current->pid, current->comm,
73 idle->pid, idle->comm); /* must be idle task! */ 73 idle->pid, idle->comm); /* must be idle task! */
74 } 74 }
75 rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ 75 rcu_sched_qs(); /* implies rcu_bh_inc() */
76 barrier(); 76 barrier();
77 rcu_dynticks_nesting = newval; 77 rcu_dynticks_nesting = newval;
78} 78}
@@ -217,7 +217,7 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
217 * are at it, given that any rcu quiescent state is also an rcu_bh 217 * are at it, given that any rcu quiescent state is also an rcu_bh
218 * quiescent state. Use "+" instead of "||" to defeat short circuiting. 218 * quiescent state. Use "+" instead of "||" to defeat short circuiting.
219 */ 219 */
220void rcu_sched_qs(int cpu) 220void rcu_sched_qs(void)
221{ 221{
222 unsigned long flags; 222 unsigned long flags;
223 223
@@ -231,7 +231,7 @@ void rcu_sched_qs(int cpu)
231/* 231/*
232 * Record an rcu_bh quiescent state. 232 * Record an rcu_bh quiescent state.
233 */ 233 */
234void rcu_bh_qs(int cpu) 234void rcu_bh_qs(void)
235{ 235{
236 unsigned long flags; 236 unsigned long flags;
237 237
@@ -251,9 +251,11 @@ void rcu_check_callbacks(int cpu, int user)
251{ 251{
252 RCU_TRACE(check_cpu_stalls()); 252 RCU_TRACE(check_cpu_stalls());
253 if (user || rcu_is_cpu_rrupt_from_idle()) 253 if (user || rcu_is_cpu_rrupt_from_idle())
254 rcu_sched_qs(cpu); 254 rcu_sched_qs();
255 else if (!in_softirq()) 255 else if (!in_softirq())
256 rcu_bh_qs(cpu); 256 rcu_bh_qs();
257 if (user)
258 rcu_note_voluntary_context_switch(current);
257} 259}
258 260
259/* 261/*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index be0d0a1b7129..d7a3b13bc94c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -197,22 +197,24 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
197 * one since the start of the grace period, this just sets a flag. 197 * one since the start of the grace period, this just sets a flag.
198 * The caller must have disabled preemption. 198 * The caller must have disabled preemption.
199 */ 199 */
200void rcu_sched_qs(int cpu) 200void rcu_sched_qs(void)
201{ 201{
202 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); 202 if (!__this_cpu_read(rcu_sched_data.passed_quiesce)) {
203 203 trace_rcu_grace_period(TPS("rcu_sched"),
204 if (rdp->passed_quiesce == 0) 204 __this_cpu_read(rcu_sched_data.gpnum),
205 trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs")); 205 TPS("cpuqs"));
206 rdp->passed_quiesce = 1; 206 __this_cpu_write(rcu_sched_data.passed_quiesce, 1);
207 }
207} 208}
208 209
209void rcu_bh_qs(int cpu) 210void rcu_bh_qs(void)
210{ 211{
211 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); 212 if (!__this_cpu_read(rcu_bh_data.passed_quiesce)) {
212 213 trace_rcu_grace_period(TPS("rcu_bh"),
213 if (rdp->passed_quiesce == 0) 214 __this_cpu_read(rcu_bh_data.gpnum),
214 trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs")); 215 TPS("cpuqs"));
215 rdp->passed_quiesce = 1; 216 __this_cpu_write(rcu_bh_data.passed_quiesce, 1);
217 }
216} 218}
217 219
218static DEFINE_PER_CPU(int, rcu_sched_qs_mask); 220static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
@@ -287,7 +289,7 @@ static void rcu_momentary_dyntick_idle(void)
287void rcu_note_context_switch(int cpu) 289void rcu_note_context_switch(int cpu)
288{ 290{
289 trace_rcu_utilization(TPS("Start context switch")); 291 trace_rcu_utilization(TPS("Start context switch"));
290 rcu_sched_qs(cpu); 292 rcu_sched_qs();
291 rcu_preempt_note_context_switch(cpu); 293 rcu_preempt_note_context_switch(cpu);
292 if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) 294 if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
293 rcu_momentary_dyntick_idle(); 295 rcu_momentary_dyntick_idle();
@@ -535,6 +537,7 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
535 atomic_inc(&rdtp->dynticks); 537 atomic_inc(&rdtp->dynticks);
536 smp_mb__after_atomic(); /* Force ordering with next sojourn. */ 538 smp_mb__after_atomic(); /* Force ordering with next sojourn. */
537 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 539 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
540 rcu_dynticks_task_enter();
538 541
539 /* 542 /*
540 * It is illegal to enter an extended quiescent state while 543 * It is illegal to enter an extended quiescent state while
@@ -651,6 +654,7 @@ void rcu_irq_exit(void)
651static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, 654static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
652 int user) 655 int user)
653{ 656{
657 rcu_dynticks_task_exit();
654 smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */ 658 smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */
655 atomic_inc(&rdtp->dynticks); 659 atomic_inc(&rdtp->dynticks);
656 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 660 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
@@ -1656,7 +1660,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1656 rnp->level, rnp->grplo, 1660 rnp->level, rnp->grplo,
1657 rnp->grphi, rnp->qsmask); 1661 rnp->grphi, rnp->qsmask);
1658 raw_spin_unlock_irq(&rnp->lock); 1662 raw_spin_unlock_irq(&rnp->lock);
1659 cond_resched(); 1663 cond_resched_rcu_qs();
1660 } 1664 }
1661 1665
1662 mutex_unlock(&rsp->onoff_mutex); 1666 mutex_unlock(&rsp->onoff_mutex);
@@ -1746,7 +1750,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1746 /* smp_mb() provided by prior unlock-lock pair. */ 1750 /* smp_mb() provided by prior unlock-lock pair. */
1747 nocb += rcu_future_gp_cleanup(rsp, rnp); 1751 nocb += rcu_future_gp_cleanup(rsp, rnp);
1748 raw_spin_unlock_irq(&rnp->lock); 1752 raw_spin_unlock_irq(&rnp->lock);
1749 cond_resched(); 1753 cond_resched_rcu_qs();
1750 } 1754 }
1751 rnp = rcu_get_root(rsp); 1755 rnp = rcu_get_root(rsp);
1752 raw_spin_lock_irq(&rnp->lock); 1756 raw_spin_lock_irq(&rnp->lock);
@@ -1795,7 +1799,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
1795 /* Locking provides needed memory barrier. */ 1799 /* Locking provides needed memory barrier. */
1796 if (rcu_gp_init(rsp)) 1800 if (rcu_gp_init(rsp))
1797 break; 1801 break;
1798 cond_resched(); 1802 cond_resched_rcu_qs();
1799 WARN_ON(signal_pending(current)); 1803 WARN_ON(signal_pending(current));
1800 trace_rcu_grace_period(rsp->name, 1804 trace_rcu_grace_period(rsp->name,
1801 ACCESS_ONCE(rsp->gpnum), 1805 ACCESS_ONCE(rsp->gpnum),
@@ -1838,10 +1842,10 @@ static int __noreturn rcu_gp_kthread(void *arg)
1838 trace_rcu_grace_period(rsp->name, 1842 trace_rcu_grace_period(rsp->name,
1839 ACCESS_ONCE(rsp->gpnum), 1843 ACCESS_ONCE(rsp->gpnum),
1840 TPS("fqsend")); 1844 TPS("fqsend"));
1841 cond_resched(); 1845 cond_resched_rcu_qs();
1842 } else { 1846 } else {
1843 /* Deal with stray signal. */ 1847 /* Deal with stray signal. */
1844 cond_resched(); 1848 cond_resched_rcu_qs();
1845 WARN_ON(signal_pending(current)); 1849 WARN_ON(signal_pending(current));
1846 trace_rcu_grace_period(rsp->name, 1850 trace_rcu_grace_period(rsp->name,
1847 ACCESS_ONCE(rsp->gpnum), 1851 ACCESS_ONCE(rsp->gpnum),
@@ -2401,8 +2405,8 @@ void rcu_check_callbacks(int cpu, int user)
2401 * at least not while the corresponding CPU is online. 2405 * at least not while the corresponding CPU is online.
2402 */ 2406 */
2403 2407
2404 rcu_sched_qs(cpu); 2408 rcu_sched_qs();
2405 rcu_bh_qs(cpu); 2409 rcu_bh_qs();
2406 2410
2407 } else if (!in_softirq()) { 2411 } else if (!in_softirq()) {
2408 2412
@@ -2413,11 +2417,13 @@ void rcu_check_callbacks(int cpu, int user)
2413 * critical section, so note it. 2417 * critical section, so note it.
2414 */ 2418 */
2415 2419
2416 rcu_bh_qs(cpu); 2420 rcu_bh_qs();
2417 } 2421 }
2418 rcu_preempt_check_callbacks(cpu); 2422 rcu_preempt_check_callbacks(cpu);
2419 if (rcu_pending(cpu)) 2423 if (rcu_pending(cpu))
2420 invoke_rcu_core(); 2424 invoke_rcu_core();
2425 if (user)
2426 rcu_note_voluntary_context_switch(current);
2421 trace_rcu_utilization(TPS("End scheduler-tick")); 2427 trace_rcu_utilization(TPS("End scheduler-tick"));
2422} 2428}
2423 2429
@@ -2440,7 +2446,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
2440 struct rcu_node *rnp; 2446 struct rcu_node *rnp;
2441 2447
2442 rcu_for_each_leaf_node(rsp, rnp) { 2448 rcu_for_each_leaf_node(rsp, rnp) {
2443 cond_resched(); 2449 cond_resched_rcu_qs();
2444 mask = 0; 2450 mask = 0;
2445 raw_spin_lock_irqsave(&rnp->lock, flags); 2451 raw_spin_lock_irqsave(&rnp->lock, flags);
2446 smp_mb__after_unlock_lock(); 2452 smp_mb__after_unlock_lock();
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index ffedcb9d42dc..d03764652d91 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -615,6 +615,8 @@ static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
615static void rcu_bind_gp_kthread(void); 615static void rcu_bind_gp_kthread(void);
616static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp); 616static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
617static bool rcu_nohz_full_cpu(struct rcu_state *rsp); 617static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
618static void rcu_dynticks_task_enter(void);
619static void rcu_dynticks_task_exit(void);
618 620
619#endif /* #ifndef RCU_TREE_NONCORE */ 621#endif /* #ifndef RCU_TREE_NONCORE */
620 622
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 59318ea32bc8..e2c5910546f6 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -128,18 +128,19 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
128 * not in a quiescent state. There might be any number of tasks blocked 128 * not in a quiescent state. There might be any number of tasks blocked
129 * while in an RCU read-side critical section. 129 * while in an RCU read-side critical section.
130 * 130 *
131 * Unlike the other rcu_*_qs() functions, callers to this function 131 * As with the other rcu_*_qs() functions, callers to this function
132 * must disable irqs in order to protect the assignment to 132 * must disable preemption.
133 * ->rcu_read_unlock_special. 133 */
134 */ 134static void rcu_preempt_qs(void)
135static void rcu_preempt_qs(int cpu) 135{
136{ 136 if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) {
137 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 137 trace_rcu_grace_period(TPS("rcu_preempt"),
138 138 __this_cpu_read(rcu_preempt_data.gpnum),
139 if (rdp->passed_quiesce == 0) 139 TPS("cpuqs"));
140 trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs")); 140 __this_cpu_write(rcu_preempt_data.passed_quiesce, 1);
141 rdp->passed_quiesce = 1; 141 barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
142 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 142 current->rcu_read_unlock_special.b.need_qs = false;
143 }
143} 144}
144 145
145/* 146/*
@@ -163,14 +164,14 @@ static void rcu_preempt_note_context_switch(int cpu)
163 struct rcu_node *rnp; 164 struct rcu_node *rnp;
164 165
165 if (t->rcu_read_lock_nesting > 0 && 166 if (t->rcu_read_lock_nesting > 0 &&
166 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 167 !t->rcu_read_unlock_special.b.blocked) {
167 168
168 /* Possibly blocking in an RCU read-side critical section. */ 169 /* Possibly blocking in an RCU read-side critical section. */
169 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); 170 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
170 rnp = rdp->mynode; 171 rnp = rdp->mynode;
171 raw_spin_lock_irqsave(&rnp->lock, flags); 172 raw_spin_lock_irqsave(&rnp->lock, flags);
172 smp_mb__after_unlock_lock(); 173 smp_mb__after_unlock_lock();
173 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 174 t->rcu_read_unlock_special.b.blocked = true;
174 t->rcu_blocked_node = rnp; 175 t->rcu_blocked_node = rnp;
175 176
176 /* 177 /*
@@ -212,7 +213,7 @@ static void rcu_preempt_note_context_switch(int cpu)
212 : rnp->gpnum + 1); 213 : rnp->gpnum + 1);
213 raw_spin_unlock_irqrestore(&rnp->lock, flags); 214 raw_spin_unlock_irqrestore(&rnp->lock, flags);
214 } else if (t->rcu_read_lock_nesting < 0 && 215 } else if (t->rcu_read_lock_nesting < 0 &&
215 t->rcu_read_unlock_special) { 216 t->rcu_read_unlock_special.s) {
216 217
217 /* 218 /*
218 * Complete exit from RCU read-side critical section on 219 * Complete exit from RCU read-side critical section on
@@ -230,9 +231,7 @@ static void rcu_preempt_note_context_switch(int cpu)
230 * grace period, then the fact that the task has been enqueued 231 * grace period, then the fact that the task has been enqueued
231 * means that we continue to block the current grace period. 232 * means that we continue to block the current grace period.
232 */ 233 */
233 local_irq_save(flags); 234 rcu_preempt_qs();
234 rcu_preempt_qs(cpu);
235 local_irq_restore(flags);
236} 235}
237 236
238/* 237/*
@@ -313,7 +312,7 @@ void rcu_read_unlock_special(struct task_struct *t)
313 bool drop_boost_mutex = false; 312 bool drop_boost_mutex = false;
314#endif /* #ifdef CONFIG_RCU_BOOST */ 313#endif /* #ifdef CONFIG_RCU_BOOST */
315 struct rcu_node *rnp; 314 struct rcu_node *rnp;
316 int special; 315 union rcu_special special;
317 316
318 /* NMI handlers cannot block and cannot safely manipulate state. */ 317 /* NMI handlers cannot block and cannot safely manipulate state. */
319 if (in_nmi()) 318 if (in_nmi())
@@ -323,12 +322,13 @@ void rcu_read_unlock_special(struct task_struct *t)
323 322
324 /* 323 /*
325 * If RCU core is waiting for this CPU to exit critical section, 324 * If RCU core is waiting for this CPU to exit critical section,
326 * let it know that we have done so. 325 * let it know that we have done so. Because irqs are disabled,
326 * t->rcu_read_unlock_special cannot change.
327 */ 327 */
328 special = t->rcu_read_unlock_special; 328 special = t->rcu_read_unlock_special;
329 if (special & RCU_READ_UNLOCK_NEED_QS) { 329 if (special.b.need_qs) {
330 rcu_preempt_qs(smp_processor_id()); 330 rcu_preempt_qs();
331 if (!t->rcu_read_unlock_special) { 331 if (!t->rcu_read_unlock_special.s) {
332 local_irq_restore(flags); 332 local_irq_restore(flags);
333 return; 333 return;
334 } 334 }
@@ -341,8 +341,8 @@ void rcu_read_unlock_special(struct task_struct *t)
341 } 341 }
342 342
343 /* Clean up if blocked during RCU read-side critical section. */ 343 /* Clean up if blocked during RCU read-side critical section. */
344 if (special & RCU_READ_UNLOCK_BLOCKED) { 344 if (special.b.blocked) {
345 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; 345 t->rcu_read_unlock_special.b.blocked = false;
346 346
347 /* 347 /*
348 * Remove this task from the list it blocked on. The 348 * Remove this task from the list it blocked on. The
@@ -626,12 +626,13 @@ static void rcu_preempt_check_callbacks(int cpu)
626 struct task_struct *t = current; 626 struct task_struct *t = current;
627 627
628 if (t->rcu_read_lock_nesting == 0) { 628 if (t->rcu_read_lock_nesting == 0) {
629 rcu_preempt_qs(cpu); 629 rcu_preempt_qs();
630 return; 630 return;
631 } 631 }
632 if (t->rcu_read_lock_nesting > 0 && 632 if (t->rcu_read_lock_nesting > 0 &&
633 per_cpu(rcu_preempt_data, cpu).qs_pending) 633 per_cpu(rcu_preempt_data, cpu).qs_pending &&
634 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 634 !per_cpu(rcu_preempt_data, cpu).passed_quiesce)
635 t->rcu_read_unlock_special.b.need_qs = true;
635} 636}
636 637
637#ifdef CONFIG_RCU_BOOST 638#ifdef CONFIG_RCU_BOOST
@@ -915,7 +916,7 @@ void exit_rcu(void)
915 return; 916 return;
916 t->rcu_read_lock_nesting = 1; 917 t->rcu_read_lock_nesting = 1;
917 barrier(); 918 barrier();
918 t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED; 919 t->rcu_read_unlock_special.b.blocked = true;
919 __rcu_read_unlock(); 920 __rcu_read_unlock();
920} 921}
921 922
@@ -1816,7 +1817,7 @@ static int rcu_oom_notify(struct notifier_block *self,
1816 get_online_cpus(); 1817 get_online_cpus();
1817 for_each_online_cpu(cpu) { 1818 for_each_online_cpu(cpu) {
1818 smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1); 1819 smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
1819 cond_resched(); 1820 cond_resched_rcu_qs();
1820 } 1821 }
1821 put_online_cpus(); 1822 put_online_cpus();
1822 1823
@@ -3162,3 +3163,19 @@ static void rcu_bind_gp_kthread(void)
3162 housekeeping_affine(current); 3163 housekeeping_affine(current);
3163#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ 3164#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
3164} 3165}
3166
3167/* Record the current task on dyntick-idle entry. */
3168static void rcu_dynticks_task_enter(void)
3169{
3170#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
3171 ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id();
3172#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
3173}
3174
3175/* Record no current task on dyntick-idle exit. */
3176static void rcu_dynticks_task_exit(void)
3177{
3178#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
3179 ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1;
3180#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
3181}
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index ea8ea7b16e11..3ef8ba58694e 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -47,6 +47,8 @@
47#include <linux/hardirq.h> 47#include <linux/hardirq.h>
48#include <linux/delay.h> 48#include <linux/delay.h>
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/kthread.h>
51#include <linux/tick.h>
50 52
51#define CREATE_TRACE_POINTS 53#define CREATE_TRACE_POINTS
52 54
@@ -91,7 +93,7 @@ void __rcu_read_unlock(void)
91 barrier(); /* critical section before exit code. */ 93 barrier(); /* critical section before exit code. */
92 t->rcu_read_lock_nesting = INT_MIN; 94 t->rcu_read_lock_nesting = INT_MIN;
93 barrier(); /* assign before ->rcu_read_unlock_special load */ 95 barrier(); /* assign before ->rcu_read_unlock_special load */
94 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) 96 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s)))
95 rcu_read_unlock_special(t); 97 rcu_read_unlock_special(t);
96 barrier(); /* ->rcu_read_unlock_special load before assign */ 98 barrier(); /* ->rcu_read_unlock_special load before assign */
97 t->rcu_read_lock_nesting = 0; 99 t->rcu_read_lock_nesting = 0;
@@ -379,3 +381,312 @@ static int __init check_cpu_stall_init(void)
379early_initcall(check_cpu_stall_init); 381early_initcall(check_cpu_stall_init);
380 382
381#endif /* #ifdef CONFIG_RCU_STALL_COMMON */ 383#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
384
385#ifdef CONFIG_TASKS_RCU
386
387/*
388 * Simple variant of RCU whose quiescent states are voluntary context switch,
389 * user-space execution, and idle. As such, grace periods can take one good
390 * long time. There are no read-side primitives similar to rcu_read_lock()
391 * and rcu_read_unlock() because this implementation is intended to get
392 * the system into a safe state for some of the manipulations involved in
393 * tracing and the like. Finally, this implementation does not support
394 * high call_rcu_tasks() rates from multiple CPUs. If this is required,
395 * per-CPU callback lists will be needed.
396 */
397
398/* Global list of callbacks and associated lock. */
399static struct rcu_head *rcu_tasks_cbs_head;
400static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
401static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq);
402static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
403
404/* Track exiting tasks in order to allow them to be waited for. */
405DEFINE_SRCU(tasks_rcu_exit_srcu);
406
407/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
408static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10;
409module_param(rcu_task_stall_timeout, int, 0644);
410
411static void rcu_spawn_tasks_kthread(void);
412
413/*
414 * Post an RCU-tasks callback. First call must be from process context
415 * after the scheduler if fully operational.
416 */
417void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
418{
419 unsigned long flags;
420 bool needwake;
421
422 rhp->next = NULL;
423 rhp->func = func;
424 raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
425 needwake = !rcu_tasks_cbs_head;
426 *rcu_tasks_cbs_tail = rhp;
427 rcu_tasks_cbs_tail = &rhp->next;
428 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
429 if (needwake) {
430 rcu_spawn_tasks_kthread();
431 wake_up(&rcu_tasks_cbs_wq);
432 }
433}
434EXPORT_SYMBOL_GPL(call_rcu_tasks);
435
436/**
437 * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
438 *
439 * Control will return to the caller some time after a full rcu-tasks
440 * grace period has elapsed, in other words after all currently
441 * executing rcu-tasks read-side critical sections have elapsed. These
442 * read-side critical sections are delimited by calls to schedule(),
443 * cond_resched_rcu_qs(), idle execution, userspace execution, calls
444 * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
445 *
446 * This is a very specialized primitive, intended only for a few uses in
447 * tracing and other situations requiring manipulation of function
448 * preambles and profiling hooks. The synchronize_rcu_tasks() function
449 * is not (yet) intended for heavy use from multiple CPUs.
450 *
451 * Note that this guarantee implies further memory-ordering guarantees.
452 * On systems with more than one CPU, when synchronize_rcu_tasks() returns,
453 * each CPU is guaranteed to have executed a full memory barrier since the
454 * end of its last RCU-tasks read-side critical section whose beginning
455 * preceded the call to synchronize_rcu_tasks(). In addition, each CPU
456 * having an RCU-tasks read-side critical section that extends beyond
457 * the return from synchronize_rcu_tasks() is guaranteed to have executed
458 * a full memory barrier after the beginning of synchronize_rcu_tasks()
459 * and before the beginning of that RCU-tasks read-side critical section.
460 * Note that these guarantees include CPUs that are offline, idle, or
461 * executing in user mode, as well as CPUs that are executing in the kernel.
462 *
463 * Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned
464 * to its caller on CPU B, then both CPU A and CPU B are guaranteed
465 * to have executed a full memory barrier during the execution of
466 * synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU
467 * (but again only if the system has more than one CPU).
468 */
469void synchronize_rcu_tasks(void)
470{
471 /* Complain if the scheduler has not started. */
472 rcu_lockdep_assert(!rcu_scheduler_active,
473 "synchronize_rcu_tasks called too soon");
474
475 /* Wait for the grace period. */
476 wait_rcu_gp(call_rcu_tasks);
477}
478EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
479
480/**
481 * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
482 *
483 * Although the current implementation is guaranteed to wait, it is not
484 * obligated to, for example, if there are no pending callbacks.
485 */
486void rcu_barrier_tasks(void)
487{
488 /* There is only one callback queue, so this is easy. ;-) */
489 synchronize_rcu_tasks();
490}
491EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
492
493/* See if tasks are still holding out, complain if so. */
494static void check_holdout_task(struct task_struct *t,
495 bool needreport, bool *firstreport)
496{
497 int cpu;
498
499 if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
500 t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
501 !ACCESS_ONCE(t->on_rq) ||
502 (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
503 !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
504 ACCESS_ONCE(t->rcu_tasks_holdout) = false;
505 list_del_init(&t->rcu_tasks_holdout_list);
506 put_task_struct(t);
507 return;
508 }
509 if (!needreport)
510 return;
511 if (*firstreport) {
512 pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
513 *firstreport = false;
514 }
515 cpu = task_cpu(t);
516 pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
517 t, ".I"[is_idle_task(t)],
518 "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)],
519 t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
520 t->rcu_tasks_idle_cpu, cpu);
521 sched_show_task(t);
522}
523
524/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
525static int __noreturn rcu_tasks_kthread(void *arg)
526{
527 unsigned long flags;
528 struct task_struct *g, *t;
529 unsigned long lastreport;
530 struct rcu_head *list;
531 struct rcu_head *next;
532 LIST_HEAD(rcu_tasks_holdouts);
533
534 /* FIXME: Add housekeeping affinity. */
535
536 /*
537 * Each pass through the following loop makes one check for
538 * newly arrived callbacks, and, if there are some, waits for
539 * one RCU-tasks grace period and then invokes the callbacks.
540 * This loop is terminated by the system going down. ;-)
541 */
542 for (;;) {
543
544 /* Pick up any new callbacks. */
545 raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
546 list = rcu_tasks_cbs_head;
547 rcu_tasks_cbs_head = NULL;
548 rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
549 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
550
551 /* If there were none, wait a bit and start over. */
552 if (!list) {
553 wait_event_interruptible(rcu_tasks_cbs_wq,
554 rcu_tasks_cbs_head);
555 if (!rcu_tasks_cbs_head) {
556 WARN_ON(signal_pending(current));
557 schedule_timeout_interruptible(HZ/10);
558 }
559 continue;
560 }
561
562 /*
563 * Wait for all pre-existing t->on_rq and t->nvcsw
564 * transitions to complete. Invoking synchronize_sched()
565 * suffices because all these transitions occur with
566 * interrupts disabled. Without this synchronize_sched(),
567 * a read-side critical section that started before the
568 * grace period might be incorrectly seen as having started
569 * after the grace period.
570 *
571 * This synchronize_sched() also dispenses with the
572 * need for a memory barrier on the first store to
573 * ->rcu_tasks_holdout, as it forces the store to happen
574 * after the beginning of the grace period.
575 */
576 synchronize_sched();
577
578 /*
579 * There were callbacks, so we need to wait for an
580 * RCU-tasks grace period. Start off by scanning
581 * the task list for tasks that are not already
582 * voluntarily blocked. Mark these tasks and make
583 * a list of them in rcu_tasks_holdouts.
584 */
585 rcu_read_lock();
586 for_each_process_thread(g, t) {
587 if (t != current && ACCESS_ONCE(t->on_rq) &&
588 !is_idle_task(t)) {
589 get_task_struct(t);
590 t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw);
591 ACCESS_ONCE(t->rcu_tasks_holdout) = true;
592 list_add(&t->rcu_tasks_holdout_list,
593 &rcu_tasks_holdouts);
594 }
595 }
596 rcu_read_unlock();
597
598 /*
599 * Wait for tasks that are in the process of exiting.
600 * This does only part of the job, ensuring that all
601 * tasks that were previously exiting reach the point
602 * where they have disabled preemption, allowing the
603 * later synchronize_sched() to finish the job.
604 */
605 synchronize_srcu(&tasks_rcu_exit_srcu);
606
607 /*
608 * Each pass through the following loop scans the list
609 * of holdout tasks, removing any that are no longer
610 * holdouts. When the list is empty, we are done.
611 */
612 lastreport = jiffies;
613 while (!list_empty(&rcu_tasks_holdouts)) {
614 bool firstreport;
615 bool needreport;
616 int rtst;
617 struct task_struct *t1;
618
619 schedule_timeout_interruptible(HZ);
620 rtst = ACCESS_ONCE(rcu_task_stall_timeout);
621 needreport = rtst > 0 &&
622 time_after(jiffies, lastreport + rtst);
623 if (needreport)
624 lastreport = jiffies;
625 firstreport = true;
626 WARN_ON(signal_pending(current));
627 list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts,
628 rcu_tasks_holdout_list) {
629 check_holdout_task(t, needreport, &firstreport);
630 cond_resched();
631 }
632 }
633
634 /*
635 * Because ->on_rq and ->nvcsw are not guaranteed
636 * to have a full memory barriers prior to them in the
637 * schedule() path, memory reordering on other CPUs could
638 * cause their RCU-tasks read-side critical sections to
639 * extend past the end of the grace period. However,
640 * because these ->nvcsw updates are carried out with
641 * interrupts disabled, we can use synchronize_sched()
642 * to force the needed ordering on all such CPUs.
643 *
644 * This synchronize_sched() also confines all
645 * ->rcu_tasks_holdout accesses to be within the grace
646 * period, avoiding the need for memory barriers for
647 * ->rcu_tasks_holdout accesses.
648 *
649 * In addition, this synchronize_sched() waits for exiting
650 * tasks to complete their final preempt_disable() region
651 * of execution, cleaning up after the synchronize_srcu()
652 * above.
653 */
654 synchronize_sched();
655
656 /* Invoke the callbacks. */
657 while (list) {
658 next = list->next;
659 local_bh_disable();
660 list->func(list);
661 local_bh_enable();
662 list = next;
663 cond_resched();
664 }
665 schedule_timeout_uninterruptible(HZ/10);
666 }
667}
668
669/* Spawn rcu_tasks_kthread() at first call to call_rcu_tasks(). */
670static void rcu_spawn_tasks_kthread(void)
671{
672 static DEFINE_MUTEX(rcu_tasks_kthread_mutex);
673 static struct task_struct *rcu_tasks_kthread_ptr;
674 struct task_struct *t;
675
676 if (ACCESS_ONCE(rcu_tasks_kthread_ptr)) {
677 smp_mb(); /* Ensure caller sees full kthread. */
678 return;
679 }
680 mutex_lock(&rcu_tasks_kthread_mutex);
681 if (rcu_tasks_kthread_ptr) {
682 mutex_unlock(&rcu_tasks_kthread_mutex);
683 return;
684 }
685 t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
686 BUG_ON(IS_ERR(t));
687 smp_mb(); /* Ensure others see full kthread. */
688 ACCESS_ONCE(rcu_tasks_kthread_ptr) = t;
689 mutex_unlock(&rcu_tasks_kthread_mutex);
690}
691
692#endif /* #ifdef CONFIG_TASKS_RCU */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 5918d227730f..348ec763b104 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -278,7 +278,7 @@ restart:
278 pending >>= softirq_bit; 278 pending >>= softirq_bit;
279 } 279 }
280 280
281 rcu_bh_qs(smp_processor_id()); 281 rcu_bh_qs();
282 local_irq_disable(); 282 local_irq_disable();
283 283
284 pending = local_softirq_pending(); 284 pending = local_softirq_pending();
diff --git a/mm/mlock.c b/mm/mlock.c
index ce84cb0b83ef..ab3150c26711 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -789,7 +789,7 @@ static int do_mlockall(int flags)
789 789
790 /* Ignore errors */ 790 /* Ignore errors */
791 mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); 791 mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
792 cond_resched(); 792 cond_resched_rcu_qs();
793 } 793 }
794out: 794out:
795 return 0; 795 return 0;
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
index cd3d29cb0a47..a3a1a05a2b5c 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
@@ -11,3 +11,6 @@ SRCU-N
11SRCU-P 11SRCU-P
12TINY01 12TINY01
13TINY02 13TINY02
14TASKS01
15TASKS02
16TASKS03
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
new file mode 100644
index 000000000000..97f0a0b27ef7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
@@ -0,0 +1,9 @@
1CONFIG_SMP=y
2CONFIG_NR_CPUS=2
3CONFIG_HOTPLUG_CPU=y
4CONFIG_PREEMPT_NONE=n
5CONFIG_PREEMPT_VOLUNTARY=n
6CONFIG_PREEMPT=y
7CONFIG_DEBUG_LOCK_ALLOC=y
8CONFIG_PROVE_RCU=y
9CONFIG_TASKS_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot
new file mode 100644
index 000000000000..cd2a188eeb6d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot
@@ -0,0 +1 @@
rcutorture.torture_type=tasks
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
new file mode 100644
index 000000000000..696d2ea74d13
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
@@ -0,0 +1,5 @@
1CONFIG_SMP=n
2CONFIG_PREEMPT_NONE=y
3CONFIG_PREEMPT_VOLUNTARY=n
4CONFIG_PREEMPT=n
5CONFIG_TASKS_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
new file mode 100644
index 000000000000..cd2a188eeb6d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
@@ -0,0 +1 @@
rcutorture.torture_type=tasks
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
new file mode 100644
index 000000000000..9c60da5b5d1d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
@@ -0,0 +1,13 @@
1CONFIG_SMP=y
2CONFIG_NR_CPUS=2
3CONFIG_HOTPLUG_CPU=n
4CONFIG_SUSPEND=n
5CONFIG_HIBERNATION=n
6CONFIG_PREEMPT_NONE=n
7CONFIG_PREEMPT_VOLUNTARY=n
8CONFIG_PREEMPT=y
9CONFIG_TASKS_RCU=y
10CONFIG_HZ_PERIODIC=n
11CONFIG_NO_HZ_IDLE=n
12CONFIG_NO_HZ_FULL=y
13CONFIG_NO_HZ_FULL_ALL=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot
new file mode 100644
index 000000000000..cd2a188eeb6d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot
@@ -0,0 +1 @@
rcutorture.torture_type=tasks