diff options
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r-- | kernel/rcu/tree.c | 267 |
1 files changed, 162 insertions, 105 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9180158756d2..3b084dbfb4bc 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
@@ -1,27 +1,14 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0+ | ||
1 | /* | 2 | /* |
2 | * Read-Copy Update mechanism for mutual exclusion | 3 | * Read-Copy Update mechanism for mutual exclusion |
3 | * | 4 | * |
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, you can access it online at | ||
16 | * http://www.gnu.org/licenses/gpl-2.0.html. | ||
17 | * | ||
18 | * Copyright IBM Corporation, 2008 | 5 | * Copyright IBM Corporation, 2008 |
19 | * | 6 | * |
20 | * Authors: Dipankar Sarma <dipankar@in.ibm.com> | 7 | * Authors: Dipankar Sarma <dipankar@in.ibm.com> |
21 | * Manfred Spraul <manfred@colorfullife.com> | 8 | * Manfred Spraul <manfred@colorfullife.com> |
22 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version | 9 | * Paul E. McKenney <paulmck@linux.ibm.com> Hierarchical version |
23 | * | 10 | * |
24 | * Based on the original work by Paul McKenney <paulmck@us.ibm.com> | 11 | * Based on the original work by Paul McKenney <paulmck@linux.ibm.com> |
25 | * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. | 12 | * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. |
26 | * | 13 | * |
27 | * For detailed explanation of Read-Copy Update mechanism see - | 14 | * For detailed explanation of Read-Copy Update mechanism see - |
@@ -62,6 +49,7 @@ | |||
62 | #include <linux/suspend.h> | 49 | #include <linux/suspend.h> |
63 | #include <linux/ftrace.h> | 50 | #include <linux/ftrace.h> |
64 | #include <linux/tick.h> | 51 | #include <linux/tick.h> |
52 | #include <linux/sysrq.h> | ||
65 | 53 | ||
66 | #include "tree.h" | 54 | #include "tree.h" |
67 | #include "rcu.h" | 55 | #include "rcu.h" |
@@ -115,6 +103,9 @@ int num_rcu_lvl[] = NUM_RCU_LVL_INIT; | |||
115 | int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ | 103 | int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ |
116 | /* panic() on RCU Stall sysctl. */ | 104 | /* panic() on RCU Stall sysctl. */ |
117 | int sysctl_panic_on_rcu_stall __read_mostly; | 105 | int sysctl_panic_on_rcu_stall __read_mostly; |
106 | /* Commandeer a sysrq key to dump RCU's tree. */ | ||
107 | static bool sysrq_rcu; | ||
108 | module_param(sysrq_rcu, bool, 0444); | ||
118 | 109 | ||
119 | /* | 110 | /* |
120 | * The rcu_scheduler_active variable is initialized to the value | 111 | * The rcu_scheduler_active variable is initialized to the value |
@@ -479,7 +470,6 @@ module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next | |||
479 | module_param(rcu_kick_kthreads, bool, 0644); | 470 | module_param(rcu_kick_kthreads, bool, 0644); |
480 | 471 | ||
481 | static void force_qs_rnp(int (*f)(struct rcu_data *rdp)); | 472 | static void force_qs_rnp(int (*f)(struct rcu_data *rdp)); |
482 | static void force_quiescent_state(void); | ||
483 | static int rcu_pending(void); | 473 | static int rcu_pending(void); |
484 | 474 | ||
485 | /* | 475 | /* |
@@ -504,13 +494,12 @@ unsigned long rcu_exp_batches_completed(void) | |||
504 | EXPORT_SYMBOL_GPL(rcu_exp_batches_completed); | 494 | EXPORT_SYMBOL_GPL(rcu_exp_batches_completed); |
505 | 495 | ||
506 | /* | 496 | /* |
507 | * Force a quiescent state. | 497 | * Return the root node of the rcu_state structure. |
508 | */ | 498 | */ |
509 | void rcu_force_quiescent_state(void) | 499 | static struct rcu_node *rcu_get_root(void) |
510 | { | 500 | { |
511 | force_quiescent_state(); | 501 | return &rcu_state.node[0]; |
512 | } | 502 | } |
513 | EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | ||
514 | 503 | ||
515 | /* | 504 | /* |
516 | * Convert a ->gp_state value to a character string. | 505 | * Convert a ->gp_state value to a character string. |
@@ -529,19 +518,30 @@ void show_rcu_gp_kthreads(void) | |||
529 | { | 518 | { |
530 | int cpu; | 519 | int cpu; |
531 | unsigned long j; | 520 | unsigned long j; |
521 | unsigned long ja; | ||
522 | unsigned long jr; | ||
523 | unsigned long jw; | ||
532 | struct rcu_data *rdp; | 524 | struct rcu_data *rdp; |
533 | struct rcu_node *rnp; | 525 | struct rcu_node *rnp; |
534 | 526 | ||
535 | j = jiffies - READ_ONCE(rcu_state.gp_activity); | 527 | j = jiffies; |
536 | pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %ld\n", | 528 | ja = j - READ_ONCE(rcu_state.gp_activity); |
529 | jr = j - READ_ONCE(rcu_state.gp_req_activity); | ||
530 | jw = j - READ_ONCE(rcu_state.gp_wake_time); | ||
531 | pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n", | ||
537 | rcu_state.name, gp_state_getname(rcu_state.gp_state), | 532 | rcu_state.name, gp_state_getname(rcu_state.gp_state), |
538 | rcu_state.gp_state, rcu_state.gp_kthread->state, j); | 533 | rcu_state.gp_state, |
534 | rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL, | ||
535 | ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq), | ||
536 | (long)READ_ONCE(rcu_state.gp_seq), | ||
537 | (long)READ_ONCE(rcu_get_root()->gp_seq_needed), | ||
538 | READ_ONCE(rcu_state.gp_flags)); | ||
539 | rcu_for_each_node_breadth_first(rnp) { | 539 | rcu_for_each_node_breadth_first(rnp) { |
540 | if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed)) | 540 | if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed)) |
541 | continue; | 541 | continue; |
542 | pr_info("\trcu_node %d:%d ->gp_seq %lu ->gp_seq_needed %lu\n", | 542 | pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n", |
543 | rnp->grplo, rnp->grphi, rnp->gp_seq, | 543 | rnp->grplo, rnp->grphi, (long)rnp->gp_seq, |
544 | rnp->gp_seq_needed); | 544 | (long)rnp->gp_seq_needed); |
545 | if (!rcu_is_leaf_node(rnp)) | 545 | if (!rcu_is_leaf_node(rnp)) |
546 | continue; | 546 | continue; |
547 | for_each_leaf_node_possible_cpu(rnp, cpu) { | 547 | for_each_leaf_node_possible_cpu(rnp, cpu) { |
@@ -550,14 +550,35 @@ void show_rcu_gp_kthreads(void) | |||
550 | ULONG_CMP_GE(rcu_state.gp_seq, | 550 | ULONG_CMP_GE(rcu_state.gp_seq, |
551 | rdp->gp_seq_needed)) | 551 | rdp->gp_seq_needed)) |
552 | continue; | 552 | continue; |
553 | pr_info("\tcpu %d ->gp_seq_needed %lu\n", | 553 | pr_info("\tcpu %d ->gp_seq_needed %ld\n", |
554 | cpu, rdp->gp_seq_needed); | 554 | cpu, (long)rdp->gp_seq_needed); |
555 | } | 555 | } |
556 | } | 556 | } |
557 | /* sched_show_task(rcu_state.gp_kthread); */ | 557 | /* sched_show_task(rcu_state.gp_kthread); */ |
558 | } | 558 | } |
559 | EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads); | 559 | EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads); |
560 | 560 | ||
561 | /* Dump grace-period-request information due to commandeered sysrq. */ | ||
562 | static void sysrq_show_rcu(int key) | ||
563 | { | ||
564 | show_rcu_gp_kthreads(); | ||
565 | } | ||
566 | |||
567 | static struct sysrq_key_op sysrq_rcudump_op = { | ||
568 | .handler = sysrq_show_rcu, | ||
569 | .help_msg = "show-rcu(y)", | ||
570 | .action_msg = "Show RCU tree", | ||
571 | .enable_mask = SYSRQ_ENABLE_DUMP, | ||
572 | }; | ||
573 | |||
574 | static int __init rcu_sysrq_init(void) | ||
575 | { | ||
576 | if (sysrq_rcu) | ||
577 | return register_sysrq_key('y', &sysrq_rcudump_op); | ||
578 | return 0; | ||
579 | } | ||
580 | early_initcall(rcu_sysrq_init); | ||
581 | |||
561 | /* | 582 | /* |
562 | * Send along grace-period-related data for rcutorture diagnostics. | 583 | * Send along grace-period-related data for rcutorture diagnostics. |
563 | */ | 584 | */ |
@@ -566,8 +587,6 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, | |||
566 | { | 587 | { |
567 | switch (test_type) { | 588 | switch (test_type) { |
568 | case RCU_FLAVOR: | 589 | case RCU_FLAVOR: |
569 | case RCU_BH_FLAVOR: | ||
570 | case RCU_SCHED_FLAVOR: | ||
571 | *flags = READ_ONCE(rcu_state.gp_flags); | 590 | *flags = READ_ONCE(rcu_state.gp_flags); |
572 | *gp_seq = rcu_seq_current(&rcu_state.gp_seq); | 591 | *gp_seq = rcu_seq_current(&rcu_state.gp_seq); |
573 | break; | 592 | break; |
@@ -578,14 +597,6 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, | |||
578 | EXPORT_SYMBOL_GPL(rcutorture_get_gp_data); | 597 | EXPORT_SYMBOL_GPL(rcutorture_get_gp_data); |
579 | 598 | ||
580 | /* | 599 | /* |
581 | * Return the root node of the rcu_state structure. | ||
582 | */ | ||
583 | static struct rcu_node *rcu_get_root(void) | ||
584 | { | ||
585 | return &rcu_state.node[0]; | ||
586 | } | ||
587 | |||
588 | /* | ||
589 | * Enter an RCU extended quiescent state, which can be either the | 600 | * Enter an RCU extended quiescent state, which can be either the |
590 | * idle loop or adaptive-tickless usermode execution. | 601 | * idle loop or adaptive-tickless usermode execution. |
591 | * | 602 | * |
@@ -701,7 +712,6 @@ static __always_inline void rcu_nmi_exit_common(bool irq) | |||
701 | 712 | ||
702 | /** | 713 | /** |
703 | * rcu_nmi_exit - inform RCU of exit from NMI context | 714 | * rcu_nmi_exit - inform RCU of exit from NMI context |
704 | * @irq: Is this call from rcu_irq_exit? | ||
705 | * | 715 | * |
706 | * If you add or remove a call to rcu_nmi_exit(), be sure to test | 716 | * If you add or remove a call to rcu_nmi_exit(), be sure to test |
707 | * with CONFIG_RCU_EQS_DEBUG=y. | 717 | * with CONFIG_RCU_EQS_DEBUG=y. |
@@ -1115,7 +1125,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | |||
1115 | } | 1125 | } |
1116 | 1126 | ||
1117 | /* | 1127 | /* |
1118 | * NO_HZ_FULL CPUs can run in-kernel without rcu_check_callbacks! | 1128 | * NO_HZ_FULL CPUs can run in-kernel without rcu_sched_clock_irq! |
1119 | * The above code handles this, but only for straight cond_resched(). | 1129 | * The above code handles this, but only for straight cond_resched(). |
1120 | * And some in-kernel loops check need_resched() before calling | 1130 | * And some in-kernel loops check need_resched() before calling |
1121 | * cond_resched(), which defeats the above code for CPUs that are | 1131 | * cond_resched(), which defeats the above code for CPUs that are |
@@ -1181,7 +1191,7 @@ static void rcu_check_gp_kthread_starvation(void) | |||
1181 | pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n", | 1191 | pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n", |
1182 | rcu_state.name, j, | 1192 | rcu_state.name, j, |
1183 | (long)rcu_seq_current(&rcu_state.gp_seq), | 1193 | (long)rcu_seq_current(&rcu_state.gp_seq), |
1184 | rcu_state.gp_flags, | 1194 | READ_ONCE(rcu_state.gp_flags), |
1185 | gp_state_getname(rcu_state.gp_state), rcu_state.gp_state, | 1195 | gp_state_getname(rcu_state.gp_state), rcu_state.gp_state, |
1186 | gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1); | 1196 | gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1); |
1187 | if (gpk) { | 1197 | if (gpk) { |
@@ -1310,7 +1320,7 @@ static void print_other_cpu_stall(unsigned long gp_seq) | |||
1310 | 1320 | ||
1311 | panic_on_rcu_stall(); | 1321 | panic_on_rcu_stall(); |
1312 | 1322 | ||
1313 | force_quiescent_state(); /* Kick them all. */ | 1323 | rcu_force_quiescent_state(); /* Kick them all. */ |
1314 | } | 1324 | } |
1315 | 1325 | ||
1316 | static void print_cpu_stall(void) | 1326 | static void print_cpu_stall(void) |
@@ -1557,17 +1567,28 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp) | |||
1557 | } | 1567 | } |
1558 | 1568 | ||
1559 | /* | 1569 | /* |
1560 | * Awaken the grace-period kthread. Don't do a self-awaken, and don't | 1570 | * Awaken the grace-period kthread. Don't do a self-awaken (unless in |
1561 | * bother awakening when there is nothing for the grace-period kthread | 1571 | * an interrupt or softirq handler), and don't bother awakening when there |
1562 | * to do (as in several CPUs raced to awaken, and we lost), and finally | 1572 | * is nothing for the grace-period kthread to do (as in several CPUs raced |
1563 | * don't try to awaken a kthread that has not yet been created. | 1573 | * to awaken, and we lost), and finally don't try to awaken a kthread that |
1574 | * has not yet been created. If all those checks are passed, track some | ||
1575 | * debug information and awaken. | ||
1576 | * | ||
1577 | * So why do the self-wakeup when in an interrupt or softirq handler | ||
1578 | * in the grace-period kthread's context? Because the kthread might have | ||
1579 | * been interrupted just as it was going to sleep, and just after the final | ||
1580 | * pre-sleep check of the awaken condition. In this case, a wakeup really | ||
1581 | * is required, and is therefore supplied. | ||
1564 | */ | 1582 | */ |
1565 | static void rcu_gp_kthread_wake(void) | 1583 | static void rcu_gp_kthread_wake(void) |
1566 | { | 1584 | { |
1567 | if (current == rcu_state.gp_kthread || | 1585 | if ((current == rcu_state.gp_kthread && |
1586 | !in_interrupt() && !in_serving_softirq()) || | ||
1568 | !READ_ONCE(rcu_state.gp_flags) || | 1587 | !READ_ONCE(rcu_state.gp_flags) || |
1569 | !rcu_state.gp_kthread) | 1588 | !rcu_state.gp_kthread) |
1570 | return; | 1589 | return; |
1590 | WRITE_ONCE(rcu_state.gp_wake_time, jiffies); | ||
1591 | WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq)); | ||
1571 | swake_up_one(&rcu_state.gp_wq); | 1592 | swake_up_one(&rcu_state.gp_wq); |
1572 | } | 1593 | } |
1573 | 1594 | ||
@@ -1711,7 +1732,7 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp) | |||
1711 | zero_cpu_stall_ticks(rdp); | 1732 | zero_cpu_stall_ticks(rdp); |
1712 | } | 1733 | } |
1713 | rdp->gp_seq = rnp->gp_seq; /* Remember new grace-period state. */ | 1734 | rdp->gp_seq = rnp->gp_seq; /* Remember new grace-period state. */ |
1714 | if (ULONG_CMP_GE(rnp->gp_seq_needed, rdp->gp_seq_needed) || rdp->gpwrap) | 1735 | if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap) |
1715 | rdp->gp_seq_needed = rnp->gp_seq_needed; | 1736 | rdp->gp_seq_needed = rnp->gp_seq_needed; |
1716 | WRITE_ONCE(rdp->gpwrap, false); | 1737 | WRITE_ONCE(rdp->gpwrap, false); |
1717 | rcu_gpnum_ovf(rnp, rdp); | 1738 | rcu_gpnum_ovf(rnp, rdp); |
@@ -1939,7 +1960,7 @@ static void rcu_gp_fqs_loop(void) | |||
1939 | if (!ret) { | 1960 | if (!ret) { |
1940 | rcu_state.jiffies_force_qs = jiffies + j; | 1961 | rcu_state.jiffies_force_qs = jiffies + j; |
1941 | WRITE_ONCE(rcu_state.jiffies_kick_kthreads, | 1962 | WRITE_ONCE(rcu_state.jiffies_kick_kthreads, |
1942 | jiffies + 3 * j); | 1963 | jiffies + (j ? 3 * j : 2)); |
1943 | } | 1964 | } |
1944 | trace_rcu_grace_period(rcu_state.name, | 1965 | trace_rcu_grace_period(rcu_state.name, |
1945 | READ_ONCE(rcu_state.gp_seq), | 1966 | READ_ONCE(rcu_state.gp_seq), |
@@ -2497,14 +2518,14 @@ static void rcu_do_batch(struct rcu_data *rdp) | |||
2497 | } | 2518 | } |
2498 | 2519 | ||
2499 | /* | 2520 | /* |
2500 | * Check to see if this CPU is in a non-context-switch quiescent state | 2521 | * This function is invoked from each scheduling-clock interrupt, |
2501 | * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). | 2522 | * and checks to see if this CPU is in a non-context-switch quiescent |
2502 | * Also schedule RCU core processing. | 2523 | * state, for example, user mode or idle loop. It also schedules RCU |
2503 | * | 2524 | * core processing. If the current grace period has gone on too long, |
2504 | * This function must be called from hardirq context. It is normally | 2525 | * it will ask the scheduler to manufacture a context switch for the sole |
2505 | * invoked from the scheduling-clock interrupt. | 2526 | * purpose of providing a providing the needed quiescent state. |
2506 | */ | 2527 | */ |
2507 | void rcu_check_callbacks(int user) | 2528 | void rcu_sched_clock_irq(int user) |
2508 | { | 2529 | { |
2509 | trace_rcu_utilization(TPS("Start scheduler-tick")); | 2530 | trace_rcu_utilization(TPS("Start scheduler-tick")); |
2510 | raw_cpu_inc(rcu_data.ticks_this_gp); | 2531 | raw_cpu_inc(rcu_data.ticks_this_gp); |
@@ -2517,7 +2538,7 @@ void rcu_check_callbacks(int user) | |||
2517 | } | 2538 | } |
2518 | __this_cpu_write(rcu_data.rcu_urgent_qs, false); | 2539 | __this_cpu_write(rcu_data.rcu_urgent_qs, false); |
2519 | } | 2540 | } |
2520 | rcu_flavor_check_callbacks(user); | 2541 | rcu_flavor_sched_clock_irq(user); |
2521 | if (rcu_pending()) | 2542 | if (rcu_pending()) |
2522 | invoke_rcu_core(); | 2543 | invoke_rcu_core(); |
2523 | 2544 | ||
@@ -2578,7 +2599,7 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp)) | |||
2578 | * Force quiescent states on reluctant CPUs, and also detect which | 2599 | * Force quiescent states on reluctant CPUs, and also detect which |
2579 | * CPUs are in dyntick-idle mode. | 2600 | * CPUs are in dyntick-idle mode. |
2580 | */ | 2601 | */ |
2581 | static void force_quiescent_state(void) | 2602 | void rcu_force_quiescent_state(void) |
2582 | { | 2603 | { |
2583 | unsigned long flags; | 2604 | unsigned long flags; |
2584 | bool ret; | 2605 | bool ret; |
@@ -2610,6 +2631,7 @@ static void force_quiescent_state(void) | |||
2610 | raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags); | 2631 | raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags); |
2611 | rcu_gp_kthread_wake(); | 2632 | rcu_gp_kthread_wake(); |
2612 | } | 2633 | } |
2634 | EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | ||
2613 | 2635 | ||
2614 | /* | 2636 | /* |
2615 | * This function checks for grace-period requests that fail to motivate | 2637 | * This function checks for grace-period requests that fail to motivate |
@@ -2657,16 +2679,11 @@ rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp, | |||
2657 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 2679 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
2658 | return; | 2680 | return; |
2659 | } | 2681 | } |
2660 | pr_alert("%s: g%ld->%ld gar:%lu ga:%lu f%#x gs:%d %s->state:%#lx\n", | ||
2661 | __func__, (long)READ_ONCE(rcu_state.gp_seq), | ||
2662 | (long)READ_ONCE(rnp_root->gp_seq_needed), | ||
2663 | j - rcu_state.gp_req_activity, j - rcu_state.gp_activity, | ||
2664 | rcu_state.gp_flags, rcu_state.gp_state, rcu_state.name, | ||
2665 | rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL); | ||
2666 | WARN_ON(1); | 2682 | WARN_ON(1); |
2667 | if (rnp_root != rnp) | 2683 | if (rnp_root != rnp) |
2668 | raw_spin_unlock_rcu_node(rnp_root); | 2684 | raw_spin_unlock_rcu_node(rnp_root); |
2669 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 2685 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
2686 | show_rcu_gp_kthreads(); | ||
2670 | } | 2687 | } |
2671 | 2688 | ||
2672 | /* | 2689 | /* |
@@ -2711,12 +2728,8 @@ void rcu_fwd_progress_check(unsigned long j) | |||
2711 | } | 2728 | } |
2712 | EXPORT_SYMBOL_GPL(rcu_fwd_progress_check); | 2729 | EXPORT_SYMBOL_GPL(rcu_fwd_progress_check); |
2713 | 2730 | ||
2714 | /* | 2731 | /* Perform RCU core processing work for the current CPU. */ |
2715 | * This does the RCU core processing work for the specified rcu_data | 2732 | static __latent_entropy void rcu_core(struct softirq_action *unused) |
2716 | * structures. This may be called only from the CPU to whom the rdp | ||
2717 | * belongs. | ||
2718 | */ | ||
2719 | static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused) | ||
2720 | { | 2733 | { |
2721 | unsigned long flags; | 2734 | unsigned long flags; |
2722 | struct rcu_data *rdp = raw_cpu_ptr(&rcu_data); | 2735 | struct rcu_data *rdp = raw_cpu_ptr(&rcu_data); |
@@ -2801,9 +2814,9 @@ static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head, | |||
2801 | 2814 | ||
2802 | /* | 2815 | /* |
2803 | * Force the grace period if too many callbacks or too long waiting. | 2816 | * Force the grace period if too many callbacks or too long waiting. |
2804 | * Enforce hysteresis, and don't invoke force_quiescent_state() | 2817 | * Enforce hysteresis, and don't invoke rcu_force_quiescent_state() |
2805 | * if some other CPU has recently done so. Also, don't bother | 2818 | * if some other CPU has recently done so. Also, don't bother |
2806 | * invoking force_quiescent_state() if the newly enqueued callback | 2819 | * invoking rcu_force_quiescent_state() if the newly enqueued callback |
2807 | * is the only one waiting for a grace period to complete. | 2820 | * is the only one waiting for a grace period to complete. |
2808 | */ | 2821 | */ |
2809 | if (unlikely(rcu_segcblist_n_cbs(&rdp->cblist) > | 2822 | if (unlikely(rcu_segcblist_n_cbs(&rdp->cblist) > |
@@ -2820,7 +2833,7 @@ static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head, | |||
2820 | rdp->blimit = LONG_MAX; | 2833 | rdp->blimit = LONG_MAX; |
2821 | if (rcu_state.n_force_qs == rdp->n_force_qs_snap && | 2834 | if (rcu_state.n_force_qs == rdp->n_force_qs_snap && |
2822 | rcu_segcblist_first_pend_cb(&rdp->cblist) != head) | 2835 | rcu_segcblist_first_pend_cb(&rdp->cblist) != head) |
2823 | force_quiescent_state(); | 2836 | rcu_force_quiescent_state(); |
2824 | rdp->n_force_qs_snap = rcu_state.n_force_qs; | 2837 | rdp->n_force_qs_snap = rcu_state.n_force_qs; |
2825 | rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist); | 2838 | rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist); |
2826 | } | 2839 | } |
@@ -2889,9 +2902,6 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, int cpu, bool lazy) | |||
2889 | rcu_segcblist_init(&rdp->cblist); | 2902 | rcu_segcblist_init(&rdp->cblist); |
2890 | } | 2903 | } |
2891 | rcu_segcblist_enqueue(&rdp->cblist, head, lazy); | 2904 | rcu_segcblist_enqueue(&rdp->cblist, head, lazy); |
2892 | if (!lazy) | ||
2893 | rcu_idle_count_callbacks_posted(); | ||
2894 | |||
2895 | if (__is_kfree_rcu_offset((unsigned long)func)) | 2905 | if (__is_kfree_rcu_offset((unsigned long)func)) |
2896 | trace_rcu_kfree_callback(rcu_state.name, head, | 2906 | trace_rcu_kfree_callback(rcu_state.name, head, |
2897 | (unsigned long)func, | 2907 | (unsigned long)func, |
@@ -2961,6 +2971,79 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func) | |||
2961 | } | 2971 | } |
2962 | EXPORT_SYMBOL_GPL(kfree_call_rcu); | 2972 | EXPORT_SYMBOL_GPL(kfree_call_rcu); |
2963 | 2973 | ||
2974 | /* | ||
2975 | * During early boot, any blocking grace-period wait automatically | ||
2976 | * implies a grace period. Later on, this is never the case for PREEMPT. | ||
2977 | * | ||
2978 | * Howevr, because a context switch is a grace period for !PREEMPT, any | ||
2979 | * blocking grace-period wait automatically implies a grace period if | ||
2980 | * there is only one CPU online at any point time during execution of | ||
2981 | * either synchronize_rcu() or synchronize_rcu_expedited(). It is OK to | ||
2982 | * occasionally incorrectly indicate that there are multiple CPUs online | ||
2983 | * when there was in fact only one the whole time, as this just adds some | ||
2984 | * overhead: RCU still operates correctly. | ||
2985 | */ | ||
2986 | static int rcu_blocking_is_gp(void) | ||
2987 | { | ||
2988 | int ret; | ||
2989 | |||
2990 | if (IS_ENABLED(CONFIG_PREEMPT)) | ||
2991 | return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE; | ||
2992 | might_sleep(); /* Check for RCU read-side critical section. */ | ||
2993 | preempt_disable(); | ||
2994 | ret = num_online_cpus() <= 1; | ||
2995 | preempt_enable(); | ||
2996 | return ret; | ||
2997 | } | ||
2998 | |||
2999 | /** | ||
3000 | * synchronize_rcu - wait until a grace period has elapsed. | ||
3001 | * | ||
3002 | * Control will return to the caller some time after a full grace | ||
3003 | * period has elapsed, in other words after all currently executing RCU | ||
3004 | * read-side critical sections have completed. Note, however, that | ||
3005 | * upon return from synchronize_rcu(), the caller might well be executing | ||
3006 | * concurrently with new RCU read-side critical sections that began while | ||
3007 | * synchronize_rcu() was waiting. RCU read-side critical sections are | ||
3008 | * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. | ||
3009 | * In addition, regions of code across which interrupts, preemption, or | ||
3010 | * softirqs have been disabled also serve as RCU read-side critical | ||
3011 | * sections. This includes hardware interrupt handlers, softirq handlers, | ||
3012 | * and NMI handlers. | ||
3013 | * | ||
3014 | * Note that this guarantee implies further memory-ordering guarantees. | ||
3015 | * On systems with more than one CPU, when synchronize_rcu() returns, | ||
3016 | * each CPU is guaranteed to have executed a full memory barrier since | ||
3017 | * the end of its last RCU read-side critical section whose beginning | ||
3018 | * preceded the call to synchronize_rcu(). In addition, each CPU having | ||
3019 | * an RCU read-side critical section that extends beyond the return from | ||
3020 | * synchronize_rcu() is guaranteed to have executed a full memory barrier | ||
3021 | * after the beginning of synchronize_rcu() and before the beginning of | ||
3022 | * that RCU read-side critical section. Note that these guarantees include | ||
3023 | * CPUs that are offline, idle, or executing in user mode, as well as CPUs | ||
3024 | * that are executing in the kernel. | ||
3025 | * | ||
3026 | * Furthermore, if CPU A invoked synchronize_rcu(), which returned | ||
3027 | * to its caller on CPU B, then both CPU A and CPU B are guaranteed | ||
3028 | * to have executed a full memory barrier during the execution of | ||
3029 | * synchronize_rcu() -- even if CPU A and CPU B are the same CPU (but | ||
3030 | * again only if the system has more than one CPU). | ||
3031 | */ | ||
3032 | void synchronize_rcu(void) | ||
3033 | { | ||
3034 | RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || | ||
3035 | lock_is_held(&rcu_lock_map) || | ||
3036 | lock_is_held(&rcu_sched_lock_map), | ||
3037 | "Illegal synchronize_rcu() in RCU read-side critical section"); | ||
3038 | if (rcu_blocking_is_gp()) | ||
3039 | return; | ||
3040 | if (rcu_gp_is_expedited()) | ||
3041 | synchronize_rcu_expedited(); | ||
3042 | else | ||
3043 | wait_rcu_gp(call_rcu); | ||
3044 | } | ||
3045 | EXPORT_SYMBOL_GPL(synchronize_rcu); | ||
3046 | |||
2964 | /** | 3047 | /** |
2965 | * get_state_synchronize_rcu - Snapshot current RCU state | 3048 | * get_state_synchronize_rcu - Snapshot current RCU state |
2966 | * | 3049 | * |
@@ -3049,28 +3132,6 @@ static int rcu_pending(void) | |||
3049 | } | 3132 | } |
3050 | 3133 | ||
3051 | /* | 3134 | /* |
3052 | * Return true if the specified CPU has any callback. If all_lazy is | ||
3053 | * non-NULL, store an indication of whether all callbacks are lazy. | ||
3054 | * (If there are no callbacks, all of them are deemed to be lazy.) | ||
3055 | */ | ||
3056 | static bool rcu_cpu_has_callbacks(bool *all_lazy) | ||
3057 | { | ||
3058 | bool al = true; | ||
3059 | bool hc = false; | ||
3060 | struct rcu_data *rdp; | ||
3061 | |||
3062 | rdp = this_cpu_ptr(&rcu_data); | ||
3063 | if (!rcu_segcblist_empty(&rdp->cblist)) { | ||
3064 | hc = true; | ||
3065 | if (rcu_segcblist_n_nonlazy_cbs(&rdp->cblist)) | ||
3066 | al = false; | ||
3067 | } | ||
3068 | if (all_lazy) | ||
3069 | *all_lazy = al; | ||
3070 | return hc; | ||
3071 | } | ||
3072 | |||
3073 | /* | ||
3074 | * Helper function for rcu_barrier() tracing. If tracing is disabled, | 3135 | * Helper function for rcu_barrier() tracing. If tracing is disabled, |
3075 | * the compiler is expected to optimize this away. | 3136 | * the compiler is expected to optimize this away. |
3076 | */ | 3137 | */ |
@@ -3299,7 +3360,7 @@ int rcutree_prepare_cpu(unsigned int cpu) | |||
3299 | trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl")); | 3360 | trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl")); |
3300 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 3361 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
3301 | rcu_prepare_kthreads(cpu); | 3362 | rcu_prepare_kthreads(cpu); |
3302 | rcu_spawn_all_nocb_kthreads(cpu); | 3363 | rcu_spawn_cpu_nocb_kthread(cpu); |
3303 | 3364 | ||
3304 | return 0; | 3365 | return 0; |
3305 | } | 3366 | } |
@@ -3329,8 +3390,6 @@ int rcutree_online_cpu(unsigned int cpu) | |||
3329 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | 3390 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
3330 | rnp->ffmask |= rdp->grpmask; | 3391 | rnp->ffmask |= rdp->grpmask; |
3331 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 3392 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
3332 | if (IS_ENABLED(CONFIG_TREE_SRCU)) | ||
3333 | srcu_online_cpu(cpu); | ||
3334 | if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) | 3393 | if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) |
3335 | return 0; /* Too early in boot for scheduler work. */ | 3394 | return 0; /* Too early in boot for scheduler work. */ |
3336 | sync_sched_exp_online_cleanup(cpu); | 3395 | sync_sched_exp_online_cleanup(cpu); |
@@ -3355,8 +3414,6 @@ int rcutree_offline_cpu(unsigned int cpu) | |||
3355 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 3414 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
3356 | 3415 | ||
3357 | rcutree_affinity_setting(cpu, cpu); | 3416 | rcutree_affinity_setting(cpu, cpu); |
3358 | if (IS_ENABLED(CONFIG_TREE_SRCU)) | ||
3359 | srcu_offline_cpu(cpu); | ||
3360 | return 0; | 3417 | return 0; |
3361 | } | 3418 | } |
3362 | 3419 | ||
@@ -3777,7 +3834,7 @@ void __init rcu_init(void) | |||
3777 | rcu_init_one(); | 3834 | rcu_init_one(); |
3778 | if (dump_tree) | 3835 | if (dump_tree) |
3779 | rcu_dump_rcu_node_tree(); | 3836 | rcu_dump_rcu_node_tree(); |
3780 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | 3837 | open_softirq(RCU_SOFTIRQ, rcu_core); |
3781 | 3838 | ||
3782 | /* | 3839 | /* |
3783 | * We don't need protection against CPU-hotplug here because | 3840 | * We don't need protection against CPU-hotplug here because |