aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2014-12-13 23:32:04 -0500
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2015-01-16 02:33:14 -0500
commit5cd37193ce8539be1e6ef76be226f4bcc984e0f5 (patch)
tree7bba4e3fd57b669d60e37fd32011db4860016505
parenta94844b22a2e2b9155bbc0878c507850477221c2 (diff)
rcu: Make cond_resched_rcu_qs() apply to normal RCU flavors
Although cond_resched_rcu_qs() only applies to TASKS_RCU, it is used in places where it would be useful for it to apply to the normal RCU flavors, rcu_preempt, rcu_sched, and rcu_bh. This is especially the case for workloads that aggressively overload the system, particularly those that generate large numbers of RCU updates on systems running NO_HZ_FULL CPUs. This commit therefore communicates quiescent states from cond_resched_rcu_qs() to the normal RCU flavors. Note that it is unfortunately necessary to leave the old ->passed_quiesce mechanism in place to allow quiescent states that apply to only one flavor to be recorded. (Yes, we could decrement ->rcu_qs_ctr_snap in that case, but that is not so good for debugging of RCU internals.) In addition, if one of the RCU flavor's grace period has stalled, this will invoke rcu_momentary_dyntick_idle(), resulting in a heavy-weight quiescent state visible from other CPUs. Reported-by: Sasha Levin <sasha.levin@oracle.com> Reported-by: Dave Jones <davej@redhat.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> [ paulmck: Merge commit from Sasha Levin fixing a bug where __this_cpu() was used in preemptible code. ]
-rw-r--r--Documentation/RCU/trace.txt32
-rw-r--r--include/linux/rcupdate.h3
-rw-r--r--include/linux/rcutiny.h5
-rw-r--r--include/linux/rcutree.h2
-rw-r--r--kernel/rcu/tree.c38
-rw-r--r--kernel/rcu/tree.h2
-rw-r--r--kernel/rcu/tree_trace.c8
7 files changed, 65 insertions, 25 deletions
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index b63b9bb3bc0c..08651da15448 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -56,14 +56,14 @@ rcuboost:
56 56
57The output of "cat rcu/rcu_preempt/rcudata" looks as follows: 57The output of "cat rcu/rcu_preempt/rcudata" looks as follows:
58 58
59 0!c=30455 g=30456 pq=1 qp=1 dt=126535/140000000000000/0 df=2002 of=4 ql=0/0 qs=N... b=10 ci=74572 nci=0 co=1131 ca=716 59 0!c=30455 g=30456 pq=1/0 qp=1 dt=126535/140000000000000/0 df=2002 of=4 ql=0/0 qs=N... b=10 ci=74572 nci=0 co=1131 ca=716
60 1!c=30719 g=30720 pq=1 qp=0 dt=132007/140000000000000/0 df=1874 of=10 ql=0/0 qs=N... b=10 ci=123209 nci=0 co=685 ca=982 60 1!c=30719 g=30720 pq=1/0 qp=0 dt=132007/140000000000000/0 df=1874 of=10 ql=0/0 qs=N... b=10 ci=123209 nci=0 co=685 ca=982
61 2!c=30150 g=30151 pq=1 qp=1 dt=138537/140000000000000/0 df=1707 of=8 ql=0/0 qs=N... b=10 ci=80132 nci=0 co=1328 ca=1458 61 2!c=30150 g=30151 pq=1/1 qp=1 dt=138537/140000000000000/0 df=1707 of=8 ql=0/0 qs=N... b=10 ci=80132 nci=0 co=1328 ca=1458
62 3 c=31249 g=31250 pq=1 qp=0 dt=107255/140000000000000/0 df=1749 of=6 ql=0/450 qs=NRW. b=10 ci=151700 nci=0 co=509 ca=622 62 3 c=31249 g=31250 pq=1/1 qp=0 dt=107255/140000000000000/0 df=1749 of=6 ql=0/450 qs=NRW. b=10 ci=151700 nci=0 co=509 ca=622
63 4!c=29502 g=29503 pq=1 qp=1 dt=83647/140000000000000/0 df=965 of=5 ql=0/0 qs=N... b=10 ci=65643 nci=0 co=1373 ca=1521 63 4!c=29502 g=29503 pq=1/0 qp=1 dt=83647/140000000000000/0 df=965 of=5 ql=0/0 qs=N... b=10 ci=65643 nci=0 co=1373 ca=1521
64 5 c=31201 g=31202 pq=1 qp=1 dt=70422/0/0 df=535 of=7 ql=0/0 qs=.... b=10 ci=58500 nci=0 co=764 ca=698 64 5 c=31201 g=31202 pq=1/0 qp=1 dt=70422/0/0 df=535 of=7 ql=0/0 qs=.... b=10 ci=58500 nci=0 co=764 ca=698
65 6!c=30253 g=30254 pq=1 qp=1 dt=95363/140000000000000/0 df=780 of=5 ql=0/0 qs=N... b=10 ci=100607 nci=0 co=1414 ca=1353 65 6!c=30253 g=30254 pq=1/0 qp=1 dt=95363/140000000000000/0 df=780 of=5 ql=0/0 qs=N... b=10 ci=100607 nci=0 co=1414 ca=1353
66 7 c=31178 g=31178 pq=1 qp=0 dt=91536/0/0 df=547 of=4 ql=0/0 qs=.... b=10 ci=109819 nci=0 co=1115 ca=969 66 7 c=31178 g=31178 pq=1/0 qp=0 dt=91536/0/0 df=547 of=4 ql=0/0 qs=.... b=10 ci=109819 nci=0 co=1115 ca=969
67 67
68This file has one line per CPU, or eight for this 8-CPU system. 68This file has one line per CPU, or eight for this 8-CPU system.
69The fields are as follows: 69The fields are as follows:
@@ -188,14 +188,14 @@ o "ca" is the number of RCU callbacks that have been adopted by this
188Kernels compiled with CONFIG_RCU_BOOST=y display the following from 188Kernels compiled with CONFIG_RCU_BOOST=y display the following from
189/debug/rcu/rcu_preempt/rcudata: 189/debug/rcu/rcu_preempt/rcudata:
190 190
191 0!c=12865 g=12866 pq=1 qp=1 dt=83113/140000000000000/0 df=288 of=11 ql=0/0 qs=N... kt=0/O ktl=944 b=10 ci=60709 nci=0 co=748 ca=871 191 0!c=12865 g=12866 pq=1/0 qp=1 dt=83113/140000000000000/0 df=288 of=11 ql=0/0 qs=N... kt=0/O ktl=944 b=10 ci=60709 nci=0 co=748 ca=871
192 1 c=14407 g=14408 pq=1 qp=0 dt=100679/140000000000000/0 df=378 of=7 ql=0/119 qs=NRW. kt=0/W ktl=9b6 b=10 ci=109740 nci=0 co=589 ca=485 192 1 c=14407 g=14408 pq=1/0 qp=0 dt=100679/140000000000000/0 df=378 of=7 ql=0/119 qs=NRW. kt=0/W ktl=9b6 b=10 ci=109740 nci=0 co=589 ca=485
193 2 c=14407 g=14408 pq=1 qp=0 dt=105486/0/0 df=90 of=9 ql=0/89 qs=NRW. kt=0/W ktl=c0c b=10 ci=83113 nci=0 co=533 ca=490 193 2 c=14407 g=14408 pq=1/0 qp=0 dt=105486/0/0 df=90 of=9 ql=0/89 qs=NRW. kt=0/W ktl=c0c b=10 ci=83113 nci=0 co=533 ca=490
194 3 c=14407 g=14408 pq=1 qp=0 dt=107138/0/0 df=142 of=8 ql=0/188 qs=NRW. kt=0/W ktl=b96 b=10 ci=121114 nci=0 co=426 ca=290 194 3 c=14407 g=14408 pq=1/0 qp=0 dt=107138/0/0 df=142 of=8 ql=0/188 qs=NRW. kt=0/W ktl=b96 b=10 ci=121114 nci=0 co=426 ca=290
195 4 c=14405 g=14406 pq=1 qp=1 dt=50238/0/0 df=706 of=7 ql=0/0 qs=.... kt=0/W ktl=812 b=10 ci=34929 nci=0 co=643 ca=114 195 4 c=14405 g=14406 pq=1/0 qp=1 dt=50238/0/0 df=706 of=7 ql=0/0 qs=.... kt=0/W ktl=812 b=10 ci=34929 nci=0 co=643 ca=114
196 5!c=14168 g=14169 pq=1 qp=0 dt=45465/140000000000000/0 df=161 of=11 ql=0/0 qs=N... kt=0/O ktl=b4d b=10 ci=47712 nci=0 co=677 ca=722 196 5!c=14168 g=14169 pq=1/0 qp=0 dt=45465/140000000000000/0 df=161 of=11 ql=0/0 qs=N... kt=0/O ktl=b4d b=10 ci=47712 nci=0 co=677 ca=722
197 6 c=14404 g=14405 pq=1 qp=0 dt=59454/0/0 df=94 of=6 ql=0/0 qs=.... kt=0/W ktl=e57 b=10 ci=55597 nci=0 co=701 ca=811 197 6 c=14404 g=14405 pq=1/0 qp=0 dt=59454/0/0 df=94 of=6 ql=0/0 qs=.... kt=0/W ktl=e57 b=10 ci=55597 nci=0 co=701 ca=811
198 7 c=14407 g=14408 pq=1 qp=1 dt=68850/0/0 df=31 of=8 ql=0/0 qs=.... kt=0/W ktl=14bd b=10 ci=77475 nci=0 co=508 ca=1042 198 7 c=14407 g=14408 pq=1/0 qp=1 dt=68850/0/0 df=31 of=8 ql=0/0 qs=.... kt=0/W ktl=14bd b=10 ci=77475 nci=0 co=508 ca=1042
199 199
200This is similar to the output discussed above, but contains the following 200This is similar to the output discussed above, but contains the following
201additional fields: 201additional fields:
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index ed4f5939a452..468228750299 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -331,12 +331,13 @@ static inline void rcu_init_nohz(void)
331extern struct srcu_struct tasks_rcu_exit_srcu; 331extern struct srcu_struct tasks_rcu_exit_srcu;
332#define rcu_note_voluntary_context_switch(t) \ 332#define rcu_note_voluntary_context_switch(t) \
333 do { \ 333 do { \
334 rcu_all_qs(); \
334 if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \ 335 if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \
335 ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \ 336 ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \
336 } while (0) 337 } while (0)
337#else /* #ifdef CONFIG_TASKS_RCU */ 338#else /* #ifdef CONFIG_TASKS_RCU */
338#define TASKS_RCU(x) do { } while (0) 339#define TASKS_RCU(x) do { } while (0)
339#define rcu_note_voluntary_context_switch(t) do { } while (0) 340#define rcu_note_voluntary_context_switch(t) rcu_all_qs()
340#endif /* #else #ifdef CONFIG_TASKS_RCU */ 341#endif /* #else #ifdef CONFIG_TASKS_RCU */
341 342
342/** 343/**
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 0e5366200154..fabd3fad8516 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -154,7 +154,10 @@ static inline bool rcu_is_watching(void)
154 return true; 154 return true;
155} 155}
156 156
157
158#endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ 157#endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
159 158
159static inline void rcu_all_qs(void)
160{
161}
162
160#endif /* __LINUX_RCUTINY_H */ 163#endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 52953790dcca..ddba927f7316 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -97,4 +97,6 @@ extern int rcu_scheduler_active __read_mostly;
97 97
98bool rcu_is_watching(void); 98bool rcu_is_watching(void);
99 99
100void rcu_all_qs(void);
101
100#endif /* __LINUX_RCUTREE_H */ 102#endif /* __LINUX_RCUTREE_H */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 75ce12316b4c..cb00e038c2f2 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -219,6 +219,9 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
219#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ 219#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
220}; 220};
221 221
222DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
223EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr);
224
222/* 225/*
223 * Let the RCU core know that this CPU has gone through the scheduler, 226 * Let the RCU core know that this CPU has gone through the scheduler,
224 * which is a quiescent state. This is called when the need for a 227 * which is a quiescent state. This is called when the need for a
@@ -288,6 +291,22 @@ void rcu_note_context_switch(void)
288} 291}
289EXPORT_SYMBOL_GPL(rcu_note_context_switch); 292EXPORT_SYMBOL_GPL(rcu_note_context_switch);
290 293
294/*
295 * Register a quiesecent state for all RCU flavors. If there is an
296 * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight
297 * dyntick-idle quiescent state visible to other CPUs (but only for those
298 * RCU flavors in desparate need of a quiescent state, which will normally
299 * be none of them). Either way, do a lightweight quiescent state for
300 * all RCU flavors.
301 */
302void rcu_all_qs(void)
303{
304 if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
305 rcu_momentary_dyntick_idle();
306 this_cpu_inc(rcu_qs_ctr);
307}
308EXPORT_SYMBOL_GPL(rcu_all_qs);
309
291static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ 310static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */
292static long qhimark = 10000; /* If this many pending, ignore blimit. */ 311static long qhimark = 10000; /* If this many pending, ignore blimit. */
293static long qlowmark = 100; /* Once only this many pending, use blimit. */ 312static long qlowmark = 100; /* Once only this many pending, use blimit. */
@@ -1609,6 +1628,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1609 rdp->gpnum = rnp->gpnum; 1628 rdp->gpnum = rnp->gpnum;
1610 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); 1629 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
1611 rdp->passed_quiesce = 0; 1630 rdp->passed_quiesce = 0;
1631 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
1612 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); 1632 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
1613 zero_cpu_stall_ticks(rdp); 1633 zero_cpu_stall_ticks(rdp);
1614 ACCESS_ONCE(rdp->gpwrap) = false; 1634 ACCESS_ONCE(rdp->gpwrap) = false;
@@ -2075,8 +2095,10 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
2075 rnp = rdp->mynode; 2095 rnp = rdp->mynode;
2076 raw_spin_lock_irqsave(&rnp->lock, flags); 2096 raw_spin_lock_irqsave(&rnp->lock, flags);
2077 smp_mb__after_unlock_lock(); 2097 smp_mb__after_unlock_lock();
2078 if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum || 2098 if ((rdp->passed_quiesce == 0 &&
2079 rnp->completed == rnp->gpnum || rdp->gpwrap) { 2099 rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) ||
2100 rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum ||
2101 rdp->gpwrap) {
2080 2102
2081 /* 2103 /*
2082 * The grace period in which this quiescent state was 2104 * The grace period in which this quiescent state was
@@ -2085,6 +2107,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
2085 * within the current grace period. 2107 * within the current grace period.
2086 */ 2108 */
2087 rdp->passed_quiesce = 0; /* need qs for new gp. */ 2109 rdp->passed_quiesce = 0; /* need qs for new gp. */
2110 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
2088 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2111 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2089 return; 2112 return;
2090 } 2113 }
@@ -2129,7 +2152,8 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
2129 * Was there a quiescent state since the beginning of the grace 2152 * Was there a quiescent state since the beginning of the grace
2130 * period? If no, then exit and wait for the next call. 2153 * period? If no, then exit and wait for the next call.
2131 */ 2154 */
2132 if (!rdp->passed_quiesce) 2155 if (!rdp->passed_quiesce &&
2156 rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr))
2133 return; 2157 return;
2134 2158
2135 /* 2159 /*
@@ -3174,9 +3198,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
3174 3198
3175 /* Is the RCU core waiting for a quiescent state from this CPU? */ 3199 /* Is the RCU core waiting for a quiescent state from this CPU? */
3176 if (rcu_scheduler_fully_active && 3200 if (rcu_scheduler_fully_active &&
3177 rdp->qs_pending && !rdp->passed_quiesce) { 3201 rdp->qs_pending && !rdp->passed_quiesce &&
3202 rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) {
3178 rdp->n_rp_qs_pending++; 3203 rdp->n_rp_qs_pending++;
3179 } else if (rdp->qs_pending && rdp->passed_quiesce) { 3204 } else if (rdp->qs_pending &&
3205 (rdp->passed_quiesce ||
3206 rdp->rcu_qs_ctr_snap != __this_cpu_read(rcu_qs_ctr))) {
3180 rdp->n_rp_report_qs++; 3207 rdp->n_rp_report_qs++;
3181 return 1; 3208 return 1;
3182 } 3209 }
@@ -3510,6 +3537,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
3510 rdp->gpnum = rnp->completed; 3537 rdp->gpnum = rnp->completed;
3511 rdp->completed = rnp->completed; 3538 rdp->completed = rnp->completed;
3512 rdp->passed_quiesce = 0; 3539 rdp->passed_quiesce = 0;
3540 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
3513 rdp->qs_pending = 0; 3541 rdp->qs_pending = 0;
3514 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); 3542 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
3515 } 3543 }
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 7472ff388d55..1e7f8b05714e 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -257,6 +257,8 @@ struct rcu_data {
257 /* in order to detect GP end. */ 257 /* in order to detect GP end. */
258 unsigned long gpnum; /* Highest gp number that this CPU */ 258 unsigned long gpnum; /* Highest gp number that this CPU */
259 /* is aware of having started. */ 259 /* is aware of having started. */
260 unsigned long rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */
261 /* for rcu_all_qs() invocations. */
260 bool passed_quiesce; /* User-mode/idle loop etc. */ 262 bool passed_quiesce; /* User-mode/idle loop etc. */
261 bool qs_pending; /* Core waits for quiesc state. */ 263 bool qs_pending; /* Core waits for quiesc state. */
262 bool beenonline; /* CPU online at least once. */ 264 bool beenonline; /* CPU online at least once. */
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 5cdc62e1beeb..fbb6240509ea 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -46,6 +46,8 @@
46#define RCU_TREE_NONCORE 46#define RCU_TREE_NONCORE
47#include "tree.h" 47#include "tree.h"
48 48
49DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
50
49static int r_open(struct inode *inode, struct file *file, 51static int r_open(struct inode *inode, struct file *file,
50 const struct seq_operations *op) 52 const struct seq_operations *op)
51{ 53{
@@ -115,11 +117,13 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
115 117
116 if (!rdp->beenonline) 118 if (!rdp->beenonline)
117 return; 119 return;
118 seq_printf(m, "%3d%cc=%ld g=%ld pq=%d qp=%d", 120 seq_printf(m, "%3d%cc=%ld g=%ld pq=%d/%d qp=%d",
119 rdp->cpu, 121 rdp->cpu,
120 cpu_is_offline(rdp->cpu) ? '!' : ' ', 122 cpu_is_offline(rdp->cpu) ? '!' : ' ',
121 ulong2long(rdp->completed), ulong2long(rdp->gpnum), 123 ulong2long(rdp->completed), ulong2long(rdp->gpnum),
122 rdp->passed_quiesce, rdp->qs_pending); 124 rdp->passed_quiesce,
125 rdp->rcu_qs_ctr_snap == per_cpu(rcu_qs_ctr, rdp->cpu),
126 rdp->qs_pending);
123 seq_printf(m, " dt=%d/%llx/%d df=%lu", 127 seq_printf(m, " dt=%d/%llx/%d df=%lu",
124 atomic_read(&rdp->dynticks->dynticks), 128 atomic_read(&rdp->dynticks->dynticks),
125 rdp->dynticks->dynticks_nesting, 129 rdp->dynticks->dynticks_nesting,