aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2014-09-16 13:10:44 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2014-09-16 13:10:44 -0400
commit96b4672703ed4538c7fc25de36df4415a0ee237c (patch)
treee5bb8f4c3eb41c5741a7b232cff8e502f6509fc3 /kernel
parente98d06dd6cd791b5138b0fc6c14a9c0b4d1f2e72 (diff)
parenta53dd6a65668850493cce94395c1b88a015eb338 (diff)
Merge branch 'rcu-tasks.2014.09.10a' into HEAD
rcu-tasks.2014.09.10a: Add RCU-tasks flavor of RCU.
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/rcu/rcutorture.c54
-rw-r--r--kernel/rcu/tiny.c12
-rw-r--r--kernel/rcu/tree.c50
-rw-r--r--kernel/rcu/tree.h2
-rw-r--r--kernel/rcu/tree_plugin.h77
-rw-r--r--kernel/rcu/update.c313
-rw-r--r--kernel/softirq.c2
8 files changed, 451 insertions, 62 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 32c58f7433a3..d13f2eec4bb8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -667,6 +667,7 @@ void do_exit(long code)
667{ 667{
668 struct task_struct *tsk = current; 668 struct task_struct *tsk = current;
669 int group_dead; 669 int group_dead;
670 TASKS_RCU(int tasks_rcu_i);
670 671
671 profile_task_exit(tsk); 672 profile_task_exit(tsk);
672 673
@@ -775,6 +776,7 @@ void do_exit(long code)
775 */ 776 */
776 flush_ptrace_hw_breakpoint(tsk); 777 flush_ptrace_hw_breakpoint(tsk);
777 778
779 TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
778 exit_notify(tsk, group_dead); 780 exit_notify(tsk, group_dead);
779 proc_exit_connector(tsk); 781 proc_exit_connector(tsk);
780#ifdef CONFIG_NUMA 782#ifdef CONFIG_NUMA
@@ -814,6 +816,7 @@ void do_exit(long code)
814 if (tsk->nr_dirtied) 816 if (tsk->nr_dirtied)
815 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); 817 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
816 exit_rcu(); 818 exit_rcu();
819 TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
817 820
818 /* 821 /*
819 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed 822 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 0bcd53adac73..6d1509500d2b 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -612,6 +612,52 @@ static struct rcu_torture_ops sched_ops = {
612 .name = "sched" 612 .name = "sched"
613}; 613};
614 614
615#ifdef CONFIG_TASKS_RCU
616
617/*
618 * Definitions for RCU-tasks torture testing.
619 */
620
621static int tasks_torture_read_lock(void)
622{
623 return 0;
624}
625
626static void tasks_torture_read_unlock(int idx)
627{
628}
629
630static void rcu_tasks_torture_deferred_free(struct rcu_torture *p)
631{
632 call_rcu_tasks(&p->rtort_rcu, rcu_torture_cb);
633}
634
635static struct rcu_torture_ops tasks_ops = {
636 .ttype = RCU_TASKS_FLAVOR,
637 .init = rcu_sync_torture_init,
638 .readlock = tasks_torture_read_lock,
639 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
640 .readunlock = tasks_torture_read_unlock,
641 .completed = rcu_no_completed,
642 .deferred_free = rcu_tasks_torture_deferred_free,
643 .sync = synchronize_rcu_tasks,
644 .exp_sync = synchronize_rcu_tasks,
645 .call = call_rcu_tasks,
646 .cb_barrier = rcu_barrier_tasks,
647 .fqs = NULL,
648 .stats = NULL,
649 .irq_capable = 1,
650 .name = "tasks"
651};
652
653#define RCUTORTURE_TASKS_OPS &tasks_ops,
654
655#else /* #ifdef CONFIG_TASKS_RCU */
656
657#define RCUTORTURE_TASKS_OPS
658
659#endif /* #else #ifdef CONFIG_TASKS_RCU */
660
615/* 661/*
616 * RCU torture priority-boost testing. Runs one real-time thread per 662 * RCU torture priority-boost testing. Runs one real-time thread per
617 * CPU for moderate bursts, repeatedly registering RCU callbacks and 663 * CPU for moderate bursts, repeatedly registering RCU callbacks and
@@ -678,7 +724,7 @@ static int rcu_torture_boost(void *arg)
678 } 724 }
679 call_rcu_time = jiffies; 725 call_rcu_time = jiffies;
680 } 726 }
681 cond_resched(); 727 cond_resched_rcu_qs();
682 stutter_wait("rcu_torture_boost"); 728 stutter_wait("rcu_torture_boost");
683 if (torture_must_stop()) 729 if (torture_must_stop())
684 goto checkwait; 730 goto checkwait;
@@ -1082,7 +1128,7 @@ rcu_torture_reader(void *arg)
1082 __this_cpu_inc(rcu_torture_batch[completed]); 1128 __this_cpu_inc(rcu_torture_batch[completed]);
1083 preempt_enable(); 1129 preempt_enable();
1084 cur_ops->readunlock(idx); 1130 cur_ops->readunlock(idx);
1085 cond_resched(); 1131 cond_resched_rcu_qs();
1086 stutter_wait("rcu_torture_reader"); 1132 stutter_wait("rcu_torture_reader");
1087 } while (!torture_must_stop()); 1133 } while (!torture_must_stop());
1088 if (irqreader && cur_ops->irq_capable) { 1134 if (irqreader && cur_ops->irq_capable) {
@@ -1344,7 +1390,8 @@ static int rcu_torture_barrier_cbs(void *arg)
1344 if (atomic_dec_and_test(&barrier_cbs_count)) 1390 if (atomic_dec_and_test(&barrier_cbs_count))
1345 wake_up(&barrier_wq); 1391 wake_up(&barrier_wq);
1346 } while (!torture_must_stop()); 1392 } while (!torture_must_stop());
1347 cur_ops->cb_barrier(); 1393 if (cur_ops->cb_barrier != NULL)
1394 cur_ops->cb_barrier();
1348 destroy_rcu_head_on_stack(&rcu); 1395 destroy_rcu_head_on_stack(&rcu);
1349 torture_kthread_stopping("rcu_torture_barrier_cbs"); 1396 torture_kthread_stopping("rcu_torture_barrier_cbs");
1350 return 0; 1397 return 0;
@@ -1585,6 +1632,7 @@ rcu_torture_init(void)
1585 int firsterr = 0; 1632 int firsterr = 0;
1586 static struct rcu_torture_ops *torture_ops[] = { 1633 static struct rcu_torture_ops *torture_ops[] = {
1587 &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops, 1634 &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops,
1635 RCUTORTURE_TASKS_OPS
1588 }; 1636 };
1589 1637
1590 if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable)) 1638 if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable))
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 4a55a2416e3c..c0623fc47125 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -72,7 +72,7 @@ static void rcu_idle_enter_common(long long newval)
72 current->pid, current->comm, 72 current->pid, current->comm,
73 idle->pid, idle->comm); /* must be idle task! */ 73 idle->pid, idle->comm); /* must be idle task! */
74 } 74 }
75 rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ 75 rcu_sched_qs(); /* implies rcu_bh_inc() */
76 barrier(); 76 barrier();
77 rcu_dynticks_nesting = newval; 77 rcu_dynticks_nesting = newval;
78} 78}
@@ -217,7 +217,7 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
217 * are at it, given that any rcu quiescent state is also an rcu_bh 217 * are at it, given that any rcu quiescent state is also an rcu_bh
218 * quiescent state. Use "+" instead of "||" to defeat short circuiting. 218 * quiescent state. Use "+" instead of "||" to defeat short circuiting.
219 */ 219 */
220void rcu_sched_qs(int cpu) 220void rcu_sched_qs(void)
221{ 221{
222 unsigned long flags; 222 unsigned long flags;
223 223
@@ -231,7 +231,7 @@ void rcu_sched_qs(int cpu)
231/* 231/*
232 * Record an rcu_bh quiescent state. 232 * Record an rcu_bh quiescent state.
233 */ 233 */
234void rcu_bh_qs(int cpu) 234void rcu_bh_qs(void)
235{ 235{
236 unsigned long flags; 236 unsigned long flags;
237 237
@@ -251,9 +251,11 @@ void rcu_check_callbacks(int cpu, int user)
251{ 251{
252 RCU_TRACE(check_cpu_stalls()); 252 RCU_TRACE(check_cpu_stalls());
253 if (user || rcu_is_cpu_rrupt_from_idle()) 253 if (user || rcu_is_cpu_rrupt_from_idle())
254 rcu_sched_qs(cpu); 254 rcu_sched_qs();
255 else if (!in_softirq()) 255 else if (!in_softirq())
256 rcu_bh_qs(cpu); 256 rcu_bh_qs();
257 if (user)
258 rcu_note_voluntary_context_switch(current);
257} 259}
258 260
259/* 261/*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index be0d0a1b7129..d7a3b13bc94c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -197,22 +197,24 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
197 * one since the start of the grace period, this just sets a flag. 197 * one since the start of the grace period, this just sets a flag.
198 * The caller must have disabled preemption. 198 * The caller must have disabled preemption.
199 */ 199 */
200void rcu_sched_qs(int cpu) 200void rcu_sched_qs(void)
201{ 201{
202 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); 202 if (!__this_cpu_read(rcu_sched_data.passed_quiesce)) {
203 203 trace_rcu_grace_period(TPS("rcu_sched"),
204 if (rdp->passed_quiesce == 0) 204 __this_cpu_read(rcu_sched_data.gpnum),
205 trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs")); 205 TPS("cpuqs"));
206 rdp->passed_quiesce = 1; 206 __this_cpu_write(rcu_sched_data.passed_quiesce, 1);
207 }
207} 208}
208 209
209void rcu_bh_qs(int cpu) 210void rcu_bh_qs(void)
210{ 211{
211 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); 212 if (!__this_cpu_read(rcu_bh_data.passed_quiesce)) {
212 213 trace_rcu_grace_period(TPS("rcu_bh"),
213 if (rdp->passed_quiesce == 0) 214 __this_cpu_read(rcu_bh_data.gpnum),
214 trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs")); 215 TPS("cpuqs"));
215 rdp->passed_quiesce = 1; 216 __this_cpu_write(rcu_bh_data.passed_quiesce, 1);
217 }
216} 218}
217 219
218static DEFINE_PER_CPU(int, rcu_sched_qs_mask); 220static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
@@ -287,7 +289,7 @@ static void rcu_momentary_dyntick_idle(void)
287void rcu_note_context_switch(int cpu) 289void rcu_note_context_switch(int cpu)
288{ 290{
289 trace_rcu_utilization(TPS("Start context switch")); 291 trace_rcu_utilization(TPS("Start context switch"));
290 rcu_sched_qs(cpu); 292 rcu_sched_qs();
291 rcu_preempt_note_context_switch(cpu); 293 rcu_preempt_note_context_switch(cpu);
292 if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) 294 if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
293 rcu_momentary_dyntick_idle(); 295 rcu_momentary_dyntick_idle();
@@ -535,6 +537,7 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
535 atomic_inc(&rdtp->dynticks); 537 atomic_inc(&rdtp->dynticks);
536 smp_mb__after_atomic(); /* Force ordering with next sojourn. */ 538 smp_mb__after_atomic(); /* Force ordering with next sojourn. */
537 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 539 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
540 rcu_dynticks_task_enter();
538 541
539 /* 542 /*
540 * It is illegal to enter an extended quiescent state while 543 * It is illegal to enter an extended quiescent state while
@@ -651,6 +654,7 @@ void rcu_irq_exit(void)
651static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, 654static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
652 int user) 655 int user)
653{ 656{
657 rcu_dynticks_task_exit();
654 smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */ 658 smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */
655 atomic_inc(&rdtp->dynticks); 659 atomic_inc(&rdtp->dynticks);
656 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 660 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
@@ -1656,7 +1660,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1656 rnp->level, rnp->grplo, 1660 rnp->level, rnp->grplo,
1657 rnp->grphi, rnp->qsmask); 1661 rnp->grphi, rnp->qsmask);
1658 raw_spin_unlock_irq(&rnp->lock); 1662 raw_spin_unlock_irq(&rnp->lock);
1659 cond_resched(); 1663 cond_resched_rcu_qs();
1660 } 1664 }
1661 1665
1662 mutex_unlock(&rsp->onoff_mutex); 1666 mutex_unlock(&rsp->onoff_mutex);
@@ -1746,7 +1750,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1746 /* smp_mb() provided by prior unlock-lock pair. */ 1750 /* smp_mb() provided by prior unlock-lock pair. */
1747 nocb += rcu_future_gp_cleanup(rsp, rnp); 1751 nocb += rcu_future_gp_cleanup(rsp, rnp);
1748 raw_spin_unlock_irq(&rnp->lock); 1752 raw_spin_unlock_irq(&rnp->lock);
1749 cond_resched(); 1753 cond_resched_rcu_qs();
1750 } 1754 }
1751 rnp = rcu_get_root(rsp); 1755 rnp = rcu_get_root(rsp);
1752 raw_spin_lock_irq(&rnp->lock); 1756 raw_spin_lock_irq(&rnp->lock);
@@ -1795,7 +1799,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
1795 /* Locking provides needed memory barrier. */ 1799 /* Locking provides needed memory barrier. */
1796 if (rcu_gp_init(rsp)) 1800 if (rcu_gp_init(rsp))
1797 break; 1801 break;
1798 cond_resched(); 1802 cond_resched_rcu_qs();
1799 WARN_ON(signal_pending(current)); 1803 WARN_ON(signal_pending(current));
1800 trace_rcu_grace_period(rsp->name, 1804 trace_rcu_grace_period(rsp->name,
1801 ACCESS_ONCE(rsp->gpnum), 1805 ACCESS_ONCE(rsp->gpnum),
@@ -1838,10 +1842,10 @@ static int __noreturn rcu_gp_kthread(void *arg)
1838 trace_rcu_grace_period(rsp->name, 1842 trace_rcu_grace_period(rsp->name,
1839 ACCESS_ONCE(rsp->gpnum), 1843 ACCESS_ONCE(rsp->gpnum),
1840 TPS("fqsend")); 1844 TPS("fqsend"));
1841 cond_resched(); 1845 cond_resched_rcu_qs();
1842 } else { 1846 } else {
1843 /* Deal with stray signal. */ 1847 /* Deal with stray signal. */
1844 cond_resched(); 1848 cond_resched_rcu_qs();
1845 WARN_ON(signal_pending(current)); 1849 WARN_ON(signal_pending(current));
1846 trace_rcu_grace_period(rsp->name, 1850 trace_rcu_grace_period(rsp->name,
1847 ACCESS_ONCE(rsp->gpnum), 1851 ACCESS_ONCE(rsp->gpnum),
@@ -2401,8 +2405,8 @@ void rcu_check_callbacks(int cpu, int user)
2401 * at least not while the corresponding CPU is online. 2405 * at least not while the corresponding CPU is online.
2402 */ 2406 */
2403 2407
2404 rcu_sched_qs(cpu); 2408 rcu_sched_qs();
2405 rcu_bh_qs(cpu); 2409 rcu_bh_qs();
2406 2410
2407 } else if (!in_softirq()) { 2411 } else if (!in_softirq()) {
2408 2412
@@ -2413,11 +2417,13 @@ void rcu_check_callbacks(int cpu, int user)
2413 * critical section, so note it. 2417 * critical section, so note it.
2414 */ 2418 */
2415 2419
2416 rcu_bh_qs(cpu); 2420 rcu_bh_qs();
2417 } 2421 }
2418 rcu_preempt_check_callbacks(cpu); 2422 rcu_preempt_check_callbacks(cpu);
2419 if (rcu_pending(cpu)) 2423 if (rcu_pending(cpu))
2420 invoke_rcu_core(); 2424 invoke_rcu_core();
2425 if (user)
2426 rcu_note_voluntary_context_switch(current);
2421 trace_rcu_utilization(TPS("End scheduler-tick")); 2427 trace_rcu_utilization(TPS("End scheduler-tick"));
2422} 2428}
2423 2429
@@ -2440,7 +2446,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
2440 struct rcu_node *rnp; 2446 struct rcu_node *rnp;
2441 2447
2442 rcu_for_each_leaf_node(rsp, rnp) { 2448 rcu_for_each_leaf_node(rsp, rnp) {
2443 cond_resched(); 2449 cond_resched_rcu_qs();
2444 mask = 0; 2450 mask = 0;
2445 raw_spin_lock_irqsave(&rnp->lock, flags); 2451 raw_spin_lock_irqsave(&rnp->lock, flags);
2446 smp_mb__after_unlock_lock(); 2452 smp_mb__after_unlock_lock();
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index ffedcb9d42dc..d03764652d91 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -615,6 +615,8 @@ static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
615static void rcu_bind_gp_kthread(void); 615static void rcu_bind_gp_kthread(void);
616static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp); 616static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
617static bool rcu_nohz_full_cpu(struct rcu_state *rsp); 617static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
618static void rcu_dynticks_task_enter(void);
619static void rcu_dynticks_task_exit(void);
618 620
619#endif /* #ifndef RCU_TREE_NONCORE */ 621#endif /* #ifndef RCU_TREE_NONCORE */
620 622
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 59318ea32bc8..e2c5910546f6 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -128,18 +128,19 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
128 * not in a quiescent state. There might be any number of tasks blocked 128 * not in a quiescent state. There might be any number of tasks blocked
129 * while in an RCU read-side critical section. 129 * while in an RCU read-side critical section.
130 * 130 *
131 * Unlike the other rcu_*_qs() functions, callers to this function 131 * As with the other rcu_*_qs() functions, callers to this function
132 * must disable irqs in order to protect the assignment to 132 * must disable preemption.
133 * ->rcu_read_unlock_special. 133 */
134 */ 134static void rcu_preempt_qs(void)
135static void rcu_preempt_qs(int cpu) 135{
136{ 136 if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) {
137 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 137 trace_rcu_grace_period(TPS("rcu_preempt"),
138 138 __this_cpu_read(rcu_preempt_data.gpnum),
139 if (rdp->passed_quiesce == 0) 139 TPS("cpuqs"));
140 trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs")); 140 __this_cpu_write(rcu_preempt_data.passed_quiesce, 1);
141 rdp->passed_quiesce = 1; 141 barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
142 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 142 current->rcu_read_unlock_special.b.need_qs = false;
143 }
143} 144}
144 145
145/* 146/*
@@ -163,14 +164,14 @@ static void rcu_preempt_note_context_switch(int cpu)
163 struct rcu_node *rnp; 164 struct rcu_node *rnp;
164 165
165 if (t->rcu_read_lock_nesting > 0 && 166 if (t->rcu_read_lock_nesting > 0 &&
166 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 167 !t->rcu_read_unlock_special.b.blocked) {
167 168
168 /* Possibly blocking in an RCU read-side critical section. */ 169 /* Possibly blocking in an RCU read-side critical section. */
169 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); 170 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
170 rnp = rdp->mynode; 171 rnp = rdp->mynode;
171 raw_spin_lock_irqsave(&rnp->lock, flags); 172 raw_spin_lock_irqsave(&rnp->lock, flags);
172 smp_mb__after_unlock_lock(); 173 smp_mb__after_unlock_lock();
173 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 174 t->rcu_read_unlock_special.b.blocked = true;
174 t->rcu_blocked_node = rnp; 175 t->rcu_blocked_node = rnp;
175 176
176 /* 177 /*
@@ -212,7 +213,7 @@ static void rcu_preempt_note_context_switch(int cpu)
212 : rnp->gpnum + 1); 213 : rnp->gpnum + 1);
213 raw_spin_unlock_irqrestore(&rnp->lock, flags); 214 raw_spin_unlock_irqrestore(&rnp->lock, flags);
214 } else if (t->rcu_read_lock_nesting < 0 && 215 } else if (t->rcu_read_lock_nesting < 0 &&
215 t->rcu_read_unlock_special) { 216 t->rcu_read_unlock_special.s) {
216 217
217 /* 218 /*
218 * Complete exit from RCU read-side critical section on 219 * Complete exit from RCU read-side critical section on
@@ -230,9 +231,7 @@ static void rcu_preempt_note_context_switch(int cpu)
230 * grace period, then the fact that the task has been enqueued 231 * grace period, then the fact that the task has been enqueued
231 * means that we continue to block the current grace period. 232 * means that we continue to block the current grace period.
232 */ 233 */
233 local_irq_save(flags); 234 rcu_preempt_qs();
234 rcu_preempt_qs(cpu);
235 local_irq_restore(flags);
236} 235}
237 236
238/* 237/*
@@ -313,7 +312,7 @@ void rcu_read_unlock_special(struct task_struct *t)
313 bool drop_boost_mutex = false; 312 bool drop_boost_mutex = false;
314#endif /* #ifdef CONFIG_RCU_BOOST */ 313#endif /* #ifdef CONFIG_RCU_BOOST */
315 struct rcu_node *rnp; 314 struct rcu_node *rnp;
316 int special; 315 union rcu_special special;
317 316
318 /* NMI handlers cannot block and cannot safely manipulate state. */ 317 /* NMI handlers cannot block and cannot safely manipulate state. */
319 if (in_nmi()) 318 if (in_nmi())
@@ -323,12 +322,13 @@ void rcu_read_unlock_special(struct task_struct *t)
323 322
324 /* 323 /*
325 * If RCU core is waiting for this CPU to exit critical section, 324 * If RCU core is waiting for this CPU to exit critical section,
326 * let it know that we have done so. 325 * let it know that we have done so. Because irqs are disabled,
326 * t->rcu_read_unlock_special cannot change.
327 */ 327 */
328 special = t->rcu_read_unlock_special; 328 special = t->rcu_read_unlock_special;
329 if (special & RCU_READ_UNLOCK_NEED_QS) { 329 if (special.b.need_qs) {
330 rcu_preempt_qs(smp_processor_id()); 330 rcu_preempt_qs();
331 if (!t->rcu_read_unlock_special) { 331 if (!t->rcu_read_unlock_special.s) {
332 local_irq_restore(flags); 332 local_irq_restore(flags);
333 return; 333 return;
334 } 334 }
@@ -341,8 +341,8 @@ void rcu_read_unlock_special(struct task_struct *t)
341 } 341 }
342 342
343 /* Clean up if blocked during RCU read-side critical section. */ 343 /* Clean up if blocked during RCU read-side critical section. */
344 if (special & RCU_READ_UNLOCK_BLOCKED) { 344 if (special.b.blocked) {
345 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; 345 t->rcu_read_unlock_special.b.blocked = false;
346 346
347 /* 347 /*
348 * Remove this task from the list it blocked on. The 348 * Remove this task from the list it blocked on. The
@@ -626,12 +626,13 @@ static void rcu_preempt_check_callbacks(int cpu)
626 struct task_struct *t = current; 626 struct task_struct *t = current;
627 627
628 if (t->rcu_read_lock_nesting == 0) { 628 if (t->rcu_read_lock_nesting == 0) {
629 rcu_preempt_qs(cpu); 629 rcu_preempt_qs();
630 return; 630 return;
631 } 631 }
632 if (t->rcu_read_lock_nesting > 0 && 632 if (t->rcu_read_lock_nesting > 0 &&
633 per_cpu(rcu_preempt_data, cpu).qs_pending) 633 per_cpu(rcu_preempt_data, cpu).qs_pending &&
634 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 634 !per_cpu(rcu_preempt_data, cpu).passed_quiesce)
635 t->rcu_read_unlock_special.b.need_qs = true;
635} 636}
636 637
637#ifdef CONFIG_RCU_BOOST 638#ifdef CONFIG_RCU_BOOST
@@ -915,7 +916,7 @@ void exit_rcu(void)
915 return; 916 return;
916 t->rcu_read_lock_nesting = 1; 917 t->rcu_read_lock_nesting = 1;
917 barrier(); 918 barrier();
918 t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED; 919 t->rcu_read_unlock_special.b.blocked = true;
919 __rcu_read_unlock(); 920 __rcu_read_unlock();
920} 921}
921 922
@@ -1816,7 +1817,7 @@ static int rcu_oom_notify(struct notifier_block *self,
1816 get_online_cpus(); 1817 get_online_cpus();
1817 for_each_online_cpu(cpu) { 1818 for_each_online_cpu(cpu) {
1818 smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1); 1819 smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
1819 cond_resched(); 1820 cond_resched_rcu_qs();
1820 } 1821 }
1821 put_online_cpus(); 1822 put_online_cpus();
1822 1823
@@ -3162,3 +3163,19 @@ static void rcu_bind_gp_kthread(void)
3162 housekeeping_affine(current); 3163 housekeeping_affine(current);
3163#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ 3164#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
3164} 3165}
3166
3167/* Record the current task on dyntick-idle entry. */
3168static void rcu_dynticks_task_enter(void)
3169{
3170#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
3171 ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id();
3172#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
3173}
3174
3175/* Record no current task on dyntick-idle exit. */
3176static void rcu_dynticks_task_exit(void)
3177{
3178#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
3179 ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1;
3180#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
3181}
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index ea8ea7b16e11..3ef8ba58694e 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -47,6 +47,8 @@
47#include <linux/hardirq.h> 47#include <linux/hardirq.h>
48#include <linux/delay.h> 48#include <linux/delay.h>
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/kthread.h>
51#include <linux/tick.h>
50 52
51#define CREATE_TRACE_POINTS 53#define CREATE_TRACE_POINTS
52 54
@@ -91,7 +93,7 @@ void __rcu_read_unlock(void)
91 barrier(); /* critical section before exit code. */ 93 barrier(); /* critical section before exit code. */
92 t->rcu_read_lock_nesting = INT_MIN; 94 t->rcu_read_lock_nesting = INT_MIN;
93 barrier(); /* assign before ->rcu_read_unlock_special load */ 95 barrier(); /* assign before ->rcu_read_unlock_special load */
94 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) 96 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s)))
95 rcu_read_unlock_special(t); 97 rcu_read_unlock_special(t);
96 barrier(); /* ->rcu_read_unlock_special load before assign */ 98 barrier(); /* ->rcu_read_unlock_special load before assign */
97 t->rcu_read_lock_nesting = 0; 99 t->rcu_read_lock_nesting = 0;
@@ -379,3 +381,312 @@ static int __init check_cpu_stall_init(void)
379early_initcall(check_cpu_stall_init); 381early_initcall(check_cpu_stall_init);
380 382
381#endif /* #ifdef CONFIG_RCU_STALL_COMMON */ 383#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
384
385#ifdef CONFIG_TASKS_RCU
386
387/*
388 * Simple variant of RCU whose quiescent states are voluntary context switch,
389 * user-space execution, and idle. As such, grace periods can take one good
390 * long time. There are no read-side primitives similar to rcu_read_lock()
391 * and rcu_read_unlock() because this implementation is intended to get
392 * the system into a safe state for some of the manipulations involved in
393 * tracing and the like. Finally, this implementation does not support
394 * high call_rcu_tasks() rates from multiple CPUs. If this is required,
395 * per-CPU callback lists will be needed.
396 */
397
398/* Global list of callbacks and associated lock. */
399static struct rcu_head *rcu_tasks_cbs_head;
400static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
401static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq);
402static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
403
404/* Track exiting tasks in order to allow them to be waited for. */
405DEFINE_SRCU(tasks_rcu_exit_srcu);
406
407/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
408static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10;
409module_param(rcu_task_stall_timeout, int, 0644);
410
411static void rcu_spawn_tasks_kthread(void);
412
413/*
414 * Post an RCU-tasks callback. First call must be from process context
415 * after the scheduler if fully operational.
416 */
417void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
418{
419 unsigned long flags;
420 bool needwake;
421
422 rhp->next = NULL;
423 rhp->func = func;
424 raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
425 needwake = !rcu_tasks_cbs_head;
426 *rcu_tasks_cbs_tail = rhp;
427 rcu_tasks_cbs_tail = &rhp->next;
428 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
429 if (needwake) {
430 rcu_spawn_tasks_kthread();
431 wake_up(&rcu_tasks_cbs_wq);
432 }
433}
434EXPORT_SYMBOL_GPL(call_rcu_tasks);
435
436/**
437 * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
438 *
439 * Control will return to the caller some time after a full rcu-tasks
440 * grace period has elapsed, in other words after all currently
441 * executing rcu-tasks read-side critical sections have elapsed. These
442 * read-side critical sections are delimited by calls to schedule(),
443 * cond_resched_rcu_qs(), idle execution, userspace execution, calls
444 * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
445 *
446 * This is a very specialized primitive, intended only for a few uses in
447 * tracing and other situations requiring manipulation of function
448 * preambles and profiling hooks. The synchronize_rcu_tasks() function
449 * is not (yet) intended for heavy use from multiple CPUs.
450 *
451 * Note that this guarantee implies further memory-ordering guarantees.
452 * On systems with more than one CPU, when synchronize_rcu_tasks() returns,
453 * each CPU is guaranteed to have executed a full memory barrier since the
454 * end of its last RCU-tasks read-side critical section whose beginning
455 * preceded the call to synchronize_rcu_tasks(). In addition, each CPU
456 * having an RCU-tasks read-side critical section that extends beyond
457 * the return from synchronize_rcu_tasks() is guaranteed to have executed
458 * a full memory barrier after the beginning of synchronize_rcu_tasks()
459 * and before the beginning of that RCU-tasks read-side critical section.
460 * Note that these guarantees include CPUs that are offline, idle, or
461 * executing in user mode, as well as CPUs that are executing in the kernel.
462 *
463 * Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned
464 * to its caller on CPU B, then both CPU A and CPU B are guaranteed
465 * to have executed a full memory barrier during the execution of
466 * synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU
467 * (but again only if the system has more than one CPU).
468 */
469void synchronize_rcu_tasks(void)
470{
471 /* Complain if the scheduler has not started. */
472 rcu_lockdep_assert(!rcu_scheduler_active,
473 "synchronize_rcu_tasks called too soon");
474
475 /* Wait for the grace period. */
476 wait_rcu_gp(call_rcu_tasks);
477}
478EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
479
480/**
481 * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
482 *
483 * Although the current implementation is guaranteed to wait, it is not
484 * obligated to, for example, if there are no pending callbacks.
485 */
486void rcu_barrier_tasks(void)
487{
488 /* There is only one callback queue, so this is easy. ;-) */
489 synchronize_rcu_tasks();
490}
491EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
492
493/* See if tasks are still holding out, complain if so. */
494static void check_holdout_task(struct task_struct *t,
495 bool needreport, bool *firstreport)
496{
497 int cpu;
498
499 if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
500 t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
501 !ACCESS_ONCE(t->on_rq) ||
502 (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
503 !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
504 ACCESS_ONCE(t->rcu_tasks_holdout) = false;
505 list_del_init(&t->rcu_tasks_holdout_list);
506 put_task_struct(t);
507 return;
508 }
509 if (!needreport)
510 return;
511 if (*firstreport) {
512 pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
513 *firstreport = false;
514 }
515 cpu = task_cpu(t);
516 pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
517 t, ".I"[is_idle_task(t)],
518 "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)],
519 t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
520 t->rcu_tasks_idle_cpu, cpu);
521 sched_show_task(t);
522}
523
524/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
525static int __noreturn rcu_tasks_kthread(void *arg)
526{
527 unsigned long flags;
528 struct task_struct *g, *t;
529 unsigned long lastreport;
530 struct rcu_head *list;
531 struct rcu_head *next;
532 LIST_HEAD(rcu_tasks_holdouts);
533
534 /* FIXME: Add housekeeping affinity. */
535
536 /*
537 * Each pass through the following loop makes one check for
538 * newly arrived callbacks, and, if there are some, waits for
539 * one RCU-tasks grace period and then invokes the callbacks.
540 * This loop is terminated by the system going down. ;-)
541 */
542 for (;;) {
543
544 /* Pick up any new callbacks. */
545 raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
546 list = rcu_tasks_cbs_head;
547 rcu_tasks_cbs_head = NULL;
548 rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
549 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
550
551 /* If there were none, wait a bit and start over. */
552 if (!list) {
553 wait_event_interruptible(rcu_tasks_cbs_wq,
554 rcu_tasks_cbs_head);
555 if (!rcu_tasks_cbs_head) {
556 WARN_ON(signal_pending(current));
557 schedule_timeout_interruptible(HZ/10);
558 }
559 continue;
560 }
561
562 /*
563 * Wait for all pre-existing t->on_rq and t->nvcsw
564 * transitions to complete. Invoking synchronize_sched()
565 * suffices because all these transitions occur with
566 * interrupts disabled. Without this synchronize_sched(),
567 * a read-side critical section that started before the
568 * grace period might be incorrectly seen as having started
569 * after the grace period.
570 *
571 * This synchronize_sched() also dispenses with the
572 * need for a memory barrier on the first store to
573 * ->rcu_tasks_holdout, as it forces the store to happen
574 * after the beginning of the grace period.
575 */
576 synchronize_sched();
577
578 /*
579 * There were callbacks, so we need to wait for an
580 * RCU-tasks grace period. Start off by scanning
581 * the task list for tasks that are not already
582 * voluntarily blocked. Mark these tasks and make
583 * a list of them in rcu_tasks_holdouts.
584 */
585 rcu_read_lock();
586 for_each_process_thread(g, t) {
587 if (t != current && ACCESS_ONCE(t->on_rq) &&
588 !is_idle_task(t)) {
589 get_task_struct(t);
590 t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw);
591 ACCESS_ONCE(t->rcu_tasks_holdout) = true;
592 list_add(&t->rcu_tasks_holdout_list,
593 &rcu_tasks_holdouts);
594 }
595 }
596 rcu_read_unlock();
597
598 /*
599 * Wait for tasks that are in the process of exiting.
600 * This does only part of the job, ensuring that all
601 * tasks that were previously exiting reach the point
602 * where they have disabled preemption, allowing the
603 * later synchronize_sched() to finish the job.
604 */
605 synchronize_srcu(&tasks_rcu_exit_srcu);
606
607 /*
608 * Each pass through the following loop scans the list
609 * of holdout tasks, removing any that are no longer
610 * holdouts. When the list is empty, we are done.
611 */
612 lastreport = jiffies;
613 while (!list_empty(&rcu_tasks_holdouts)) {
614 bool firstreport;
615 bool needreport;
616 int rtst;
617 struct task_struct *t1;
618
619 schedule_timeout_interruptible(HZ);
620 rtst = ACCESS_ONCE(rcu_task_stall_timeout);
621 needreport = rtst > 0 &&
622 time_after(jiffies, lastreport + rtst);
623 if (needreport)
624 lastreport = jiffies;
625 firstreport = true;
626 WARN_ON(signal_pending(current));
627 list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts,
628 rcu_tasks_holdout_list) {
629 check_holdout_task(t, needreport, &firstreport);
630 cond_resched();
631 }
632 }
633
634 /*
635 * Because ->on_rq and ->nvcsw are not guaranteed
636 * to have a full memory barriers prior to them in the
637 * schedule() path, memory reordering on other CPUs could
638 * cause their RCU-tasks read-side critical sections to
639 * extend past the end of the grace period. However,
640 * because these ->nvcsw updates are carried out with
641 * interrupts disabled, we can use synchronize_sched()
642 * to force the needed ordering on all such CPUs.
643 *
644 * This synchronize_sched() also confines all
645 * ->rcu_tasks_holdout accesses to be within the grace
646 * period, avoiding the need for memory barriers for
647 * ->rcu_tasks_holdout accesses.
648 *
649 * In addition, this synchronize_sched() waits for exiting
650 * tasks to complete their final preempt_disable() region
651 * of execution, cleaning up after the synchronize_srcu()
652 * above.
653 */
654 synchronize_sched();
655
656 /* Invoke the callbacks. */
657 while (list) {
658 next = list->next;
659 local_bh_disable();
660 list->func(list);
661 local_bh_enable();
662 list = next;
663 cond_resched();
664 }
665 schedule_timeout_uninterruptible(HZ/10);
666 }
667}
668
669/* Spawn rcu_tasks_kthread() at first call to call_rcu_tasks(). */
670static void rcu_spawn_tasks_kthread(void)
671{
672 static DEFINE_MUTEX(rcu_tasks_kthread_mutex);
673 static struct task_struct *rcu_tasks_kthread_ptr;
674 struct task_struct *t;
675
676 if (ACCESS_ONCE(rcu_tasks_kthread_ptr)) {
677 smp_mb(); /* Ensure caller sees full kthread. */
678 return;
679 }
680 mutex_lock(&rcu_tasks_kthread_mutex);
681 if (rcu_tasks_kthread_ptr) {
682 mutex_unlock(&rcu_tasks_kthread_mutex);
683 return;
684 }
685 t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
686 BUG_ON(IS_ERR(t));
687 smp_mb(); /* Ensure others see full kthread. */
688 ACCESS_ONCE(rcu_tasks_kthread_ptr) = t;
689 mutex_unlock(&rcu_tasks_kthread_mutex);
690}
691
692#endif /* #ifdef CONFIG_TASKS_RCU */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 5918d227730f..348ec763b104 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -278,7 +278,7 @@ restart:
278 pending >>= softirq_bit; 278 pending >>= softirq_bit;
279 } 279 }
280 280
281 rcu_bh_qs(smp_processor_id()); 281 rcu_bh_qs();
282 local_irq_disable(); 282 local_irq_disable();
283 283
284 pending = local_softirq_pending(); 284 pending = local_softirq_pending();