aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Rutland <mark.rutland@arm.com>2016-06-03 10:20:04 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2016-06-15 19:00:05 -0400
commitbc75e99983df1efd977a5cd468893d55d52b8d70 (patch)
tree30e9db69b96bda56040fa09b66491b98b135649d
parent088e9d253d3a4ab7e058dd84bb532c32dadf1882 (diff)
rcu: Correctly handle sparse possible cpus
In many cases in the RCU tree code, we iterate over the set of cpus for a leaf node described by rcu_node::grplo and rcu_node::grphi, checking per-cpu data for each cpu in this range. However, if the set of possible cpus is sparse, some cpus described in this range are not possible, and thus no per-cpu region will have been allocated (or initialised) for them by the generic percpu code. Erroneous accesses to a per-cpu area for these !possible cpus may fault or may hit other data depending on the addressed generated when the erroneous per cpu offset is applied. In practice, both cases have been observed on arm64 hardware (the former being silent, but detectable with additional patches). To avoid issues resulting from this, we must iterate over the set of *possible* cpus for a given leaf node. This patch add a new helper, for_each_leaf_node_possible_cpu, to enable this. As iteration is often intertwined with rcu_node local bitmask manipulation, a new leaf_node_cpu_bit helper is added to make this simpler and more consistent. The RCU tree code is made to use both of these where appropriate. Without this patch, running reboot at a shell can result in an oops like: [ 3369.075979] Unable to handle kernel paging request at virtual address ffffff8008b21b4c [ 3369.083881] pgd = ffffffc3ecdda000 [ 3369.087270] [ffffff8008b21b4c] *pgd=00000083eca48003, *pud=00000083eca48003, *pmd=0000000000000000 [ 3369.096222] Internal error: Oops: 96000007 [#1] PREEMPT SMP [ 3369.101781] Modules linked in: [ 3369.104825] CPU: 2 PID: 1817 Comm: NetworkManager Tainted: G W 4.6.0+ #3 [ 3369.121239] task: ffffffc0fa13e000 ti: ffffffc3eb940000 task.ti: ffffffc3eb940000 [ 3369.128708] PC is at sync_rcu_exp_select_cpus+0x188/0x510 [ 3369.134094] LR is at sync_rcu_exp_select_cpus+0x104/0x510 [ 3369.139479] pc : [<ffffff80081109a8>] lr : [<ffffff8008110924>] pstate: 200001c5 [ 3369.146860] sp : ffffffc3eb9435a0 [ 3369.150162] x29: ffffffc3eb9435a0 x28: ffffff8008be4f88 [ 3369.155465] x27: ffffff8008b66c80 x26: ffffffc3eceb2600 [ 3369.160767] x25: 0000000000000001 x24: ffffff8008be4f88 [ 3369.166070] x23: ffffff8008b51c3c x22: ffffff8008b66c80 [ 3369.171371] x21: 0000000000000001 x20: ffffff8008b21b40 [ 3369.176673] x19: ffffff8008b66c80 x18: 0000000000000000 [ 3369.181975] x17: 0000007fa951a010 x16: ffffff80086a30f0 [ 3369.187278] x15: 0000007fa9505590 x14: 0000000000000000 [ 3369.192580] x13: ffffff8008b51000 x12: ffffffc3eb940000 [ 3369.197882] x11: 0000000000000006 x10: ffffff8008b51b78 [ 3369.203184] x9 : 0000000000000001 x8 : ffffff8008be4000 [ 3369.208486] x7 : ffffff8008b21b40 x6 : 0000000000001003 [ 3369.213788] x5 : 0000000000000000 x4 : ffffff8008b27280 [ 3369.219090] x3 : ffffff8008b21b4c x2 : 0000000000000001 [ 3369.224406] x1 : 0000000000000001 x0 : 0000000000000140 ... [ 3369.972257] [<ffffff80081109a8>] sync_rcu_exp_select_cpus+0x188/0x510 [ 3369.978685] [<ffffff80081128b4>] synchronize_rcu_expedited+0x64/0xa8 [ 3369.985026] [<ffffff80086b987c>] synchronize_net+0x24/0x30 [ 3369.990499] [<ffffff80086ddb54>] dev_deactivate_many+0x28c/0x298 [ 3369.996493] [<ffffff80086b6bb8>] __dev_close_many+0x60/0xd0 [ 3370.002052] [<ffffff80086b6d48>] __dev_close+0x28/0x40 [ 3370.007178] [<ffffff80086bf62c>] __dev_change_flags+0x8c/0x158 [ 3370.012999] [<ffffff80086bf718>] dev_change_flags+0x20/0x60 [ 3370.018558] [<ffffff80086cf7f0>] do_setlink+0x288/0x918 [ 3370.023771] [<ffffff80086d0798>] rtnl_newlink+0x398/0x6a8 [ 3370.029158] [<ffffff80086cee84>] rtnetlink_rcv_msg+0xe4/0x220 [ 3370.034891] [<ffffff80086e274c>] netlink_rcv_skb+0xc4/0xf8 [ 3370.040364] [<ffffff80086ced8c>] rtnetlink_rcv+0x2c/0x40 [ 3370.045663] [<ffffff80086e1fe8>] netlink_unicast+0x160/0x238 [ 3370.051309] [<ffffff80086e24b8>] netlink_sendmsg+0x2f0/0x358 [ 3370.056956] [<ffffff80086a0070>] sock_sendmsg+0x18/0x30 [ 3370.062168] [<ffffff80086a21cc>] ___sys_sendmsg+0x26c/0x280 [ 3370.067728] [<ffffff80086a30ac>] __sys_sendmsg+0x44/0x88 [ 3370.073027] [<ffffff80086a3100>] SyS_sendmsg+0x10/0x20 [ 3370.078153] [<ffffff8008085e70>] el0_svc_naked+0x24/0x28 Signed-off-by: Mark Rutland <mark.rutland@arm.com> Reported-by: Dennis Chen <dennis.chen@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Josh Triplett <josh@joshtriplett.org> Cc: Lai Jiangshan <jiangshanlai@gmail.com> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Steve Capper <steve.capper@arm.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Will Deacon <will.deacon@arm.com> Cc: linux-kernel@vger.kernel.org Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-rw-r--r--kernel/rcu/tree.c21
-rw-r--r--kernel/rcu/tree.h15
-rw-r--r--kernel/rcu/tree_exp.h16
-rw-r--r--kernel/rcu/tree_plugin.h5
4 files changed, 34 insertions, 23 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e5ca15a461b9..f433959e9322 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1287,9 +1287,9 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
1287 rcu_for_each_leaf_node(rsp, rnp) { 1287 rcu_for_each_leaf_node(rsp, rnp) {
1288 raw_spin_lock_irqsave_rcu_node(rnp, flags); 1288 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1289 if (rnp->qsmask != 0) { 1289 if (rnp->qsmask != 0) {
1290 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) 1290 for_each_leaf_node_possible_cpu(rnp, cpu)
1291 if (rnp->qsmask & (1UL << cpu)) 1291 if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu))
1292 dump_cpu_task(rnp->grplo + cpu); 1292 dump_cpu_task(cpu);
1293 } 1293 }
1294 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 1294 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1295 } 1295 }
@@ -1360,10 +1360,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
1360 raw_spin_lock_irqsave_rcu_node(rnp, flags); 1360 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1361 ndetected += rcu_print_task_stall(rnp); 1361 ndetected += rcu_print_task_stall(rnp);
1362 if (rnp->qsmask != 0) { 1362 if (rnp->qsmask != 0) {
1363 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) 1363 for_each_leaf_node_possible_cpu(rnp, cpu)
1364 if (rnp->qsmask & (1UL << cpu)) { 1364 if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
1365 print_cpu_stall_info(rsp, 1365 print_cpu_stall_info(rsp, cpu);
1366 rnp->grplo + cpu);
1367 ndetected++; 1366 ndetected++;
1368 } 1367 }
1369 } 1368 }
@@ -2884,7 +2883,6 @@ static void force_qs_rnp(struct rcu_state *rsp,
2884 unsigned long *maxj), 2883 unsigned long *maxj),
2885 bool *isidle, unsigned long *maxj) 2884 bool *isidle, unsigned long *maxj)
2886{ 2885{
2887 unsigned long bit;
2888 int cpu; 2886 int cpu;
2889 unsigned long flags; 2887 unsigned long flags;
2890 unsigned long mask; 2888 unsigned long mask;
@@ -2919,9 +2917,8 @@ static void force_qs_rnp(struct rcu_state *rsp,
2919 continue; 2917 continue;
2920 } 2918 }
2921 } 2919 }
2922 cpu = rnp->grplo; 2920 for_each_leaf_node_possible_cpu(rnp, cpu) {
2923 bit = 1; 2921 unsigned long bit = leaf_node_cpu_bit(rnp, cpu);
2924 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
2925 if ((rnp->qsmask & bit) != 0) { 2922 if ((rnp->qsmask & bit) != 0) {
2926 if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) 2923 if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
2927 mask |= bit; 2924 mask |= bit;
@@ -3750,7 +3747,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
3750 3747
3751 /* Set up local state, ensuring consistent view of global state. */ 3748 /* Set up local state, ensuring consistent view of global state. */
3752 raw_spin_lock_irqsave_rcu_node(rnp, flags); 3749 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3753 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); 3750 rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
3754 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 3751 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
3755 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); 3752 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
3756 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); 3753 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e3959f5e6ddf..f714f873bf9d 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -254,6 +254,13 @@ struct rcu_node {
254} ____cacheline_internodealigned_in_smp; 254} ____cacheline_internodealigned_in_smp;
255 255
256/* 256/*
257 * Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and
258 * are indexed relative to this interval rather than the global CPU ID space.
259 * This generates the bit for a CPU in node-local masks.
260 */
261#define leaf_node_cpu_bit(rnp, cpu) (1UL << ((cpu) - (rnp)->grplo))
262
263/*
257 * Do a full breadth-first scan of the rcu_node structures for the 264 * Do a full breadth-first scan of the rcu_node structures for the
258 * specified rcu_state structure. 265 * specified rcu_state structure.
259 */ 266 */
@@ -281,6 +288,14 @@ struct rcu_node {
281 (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) 288 (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
282 289
283/* 290/*
291 * Iterate over all possible CPUs in a leaf RCU node.
292 */
293#define for_each_leaf_node_possible_cpu(rnp, cpu) \
294 for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \
295 cpu <= rnp->grphi; \
296 cpu = cpumask_next((cpu), cpu_possible_mask))
297
298/*
284 * Union to allow "aggregate OR" operation on the need for a quiescent 299 * Union to allow "aggregate OR" operation on the need for a quiescent
285 * state by the normal and expedited grace periods. 300 * state by the normal and expedited grace periods.
286 */ 301 */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 00a02a231ada..d400434af6b2 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -344,7 +344,6 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
344{ 344{
345 int cpu; 345 int cpu;
346 unsigned long flags; 346 unsigned long flags;
347 unsigned long mask;
348 unsigned long mask_ofl_test; 347 unsigned long mask_ofl_test;
349 unsigned long mask_ofl_ipi; 348 unsigned long mask_ofl_ipi;
350 int ret; 349 int ret;
@@ -356,7 +355,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
356 355
357 /* Each pass checks a CPU for identity, offline, and idle. */ 356 /* Each pass checks a CPU for identity, offline, and idle. */
358 mask_ofl_test = 0; 357 mask_ofl_test = 0;
359 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) { 358 for_each_leaf_node_possible_cpu(rnp, cpu) {
360 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 359 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
361 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 360 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
362 361
@@ -376,8 +375,8 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
376 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 375 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
377 376
378 /* IPI the remaining CPUs for expedited quiescent state. */ 377 /* IPI the remaining CPUs for expedited quiescent state. */
379 mask = 1; 378 for_each_leaf_node_possible_cpu(rnp, cpu) {
380 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) { 379 unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
381 if (!(mask_ofl_ipi & mask)) 380 if (!(mask_ofl_ipi & mask))
382 continue; 381 continue;
383retry_ipi: 382retry_ipi:
@@ -440,10 +439,10 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
440 ndetected = 0; 439 ndetected = 0;
441 rcu_for_each_leaf_node(rsp, rnp) { 440 rcu_for_each_leaf_node(rsp, rnp) {
442 ndetected += rcu_print_task_exp_stall(rnp); 441 ndetected += rcu_print_task_exp_stall(rnp);
443 mask = 1; 442 for_each_leaf_node_possible_cpu(rnp, cpu) {
444 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
445 struct rcu_data *rdp; 443 struct rcu_data *rdp;
446 444
445 mask = leaf_node_cpu_bit(rnp, cpu);
447 if (!(rnp->expmask & mask)) 446 if (!(rnp->expmask & mask))
448 continue; 447 continue;
449 ndetected++; 448 ndetected++;
@@ -453,7 +452,6 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
453 "o."[!!(rdp->grpmask & rnp->expmaskinit)], 452 "o."[!!(rdp->grpmask & rnp->expmaskinit)],
454 "N."[!!(rdp->grpmask & rnp->expmaskinitnext)]); 453 "N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
455 } 454 }
456 mask <<= 1;
457 } 455 }
458 pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n", 456 pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
459 jiffies - jiffies_start, rsp->expedited_sequence, 457 jiffies - jiffies_start, rsp->expedited_sequence,
@@ -473,8 +471,8 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
473 pr_cont("\n"); 471 pr_cont("\n");
474 } 472 }
475 rcu_for_each_leaf_node(rsp, rnp) { 473 rcu_for_each_leaf_node(rsp, rnp) {
476 mask = 1; 474 for_each_leaf_node_possible_cpu(rnp, cpu) {
477 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) { 475 mask = leaf_node_cpu_bit(rnp, cpu);
478 if (!(rnp->expmask & mask)) 476 if (!(rnp->expmask & mask))
479 continue; 477 continue;
480 dump_cpu_task(cpu); 478 dump_cpu_task(cpu);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 695071dd1e9c..534c590e8852 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1166,8 +1166,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1166 return; 1166 return;
1167 if (!zalloc_cpumask_var(&cm, GFP_KERNEL)) 1167 if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
1168 return; 1168 return;
1169 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) 1169 for_each_leaf_node_possible_cpu(rnp, cpu)
1170 if ((mask & 0x1) && cpu != outgoingcpu) 1170 if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
1171 cpu != outgoingcpu)
1171 cpumask_set_cpu(cpu, cm); 1172 cpumask_set_cpu(cpu, cm);
1172 if (cpumask_weight(cm) == 0) 1173 if (cpumask_weight(cm) == 0)
1173 cpumask_setall(cm); 1174 cpumask_setall(cm);