summaryrefslogtreecommitdiffstats
path: root/kernel/rcu/tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r--kernel/rcu/tree.c267
1 files changed, 162 insertions, 105 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 9180158756d2..3b084dbfb4bc 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1,27 +1,14 @@
1// SPDX-License-Identifier: GPL-2.0+
1/* 2/*
2 * Read-Copy Update mechanism for mutual exclusion 3 * Read-Copy Update mechanism for mutual exclusion
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, you can access it online at
16 * http://www.gnu.org/licenses/gpl-2.0.html.
17 *
18 * Copyright IBM Corporation, 2008 5 * Copyright IBM Corporation, 2008
19 * 6 *
20 * Authors: Dipankar Sarma <dipankar@in.ibm.com> 7 * Authors: Dipankar Sarma <dipankar@in.ibm.com>
21 * Manfred Spraul <manfred@colorfullife.com> 8 * Manfred Spraul <manfred@colorfullife.com>
22 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version 9 * Paul E. McKenney <paulmck@linux.ibm.com> Hierarchical version
23 * 10 *
24 * Based on the original work by Paul McKenney <paulmck@us.ibm.com> 11 * Based on the original work by Paul McKenney <paulmck@linux.ibm.com>
25 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 12 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
26 * 13 *
27 * For detailed explanation of Read-Copy Update mechanism see - 14 * For detailed explanation of Read-Copy Update mechanism see -
@@ -62,6 +49,7 @@
62#include <linux/suspend.h> 49#include <linux/suspend.h>
63#include <linux/ftrace.h> 50#include <linux/ftrace.h>
64#include <linux/tick.h> 51#include <linux/tick.h>
52#include <linux/sysrq.h>
65 53
66#include "tree.h" 54#include "tree.h"
67#include "rcu.h" 55#include "rcu.h"
@@ -115,6 +103,9 @@ int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
115int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ 103int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
116/* panic() on RCU Stall sysctl. */ 104/* panic() on RCU Stall sysctl. */
117int sysctl_panic_on_rcu_stall __read_mostly; 105int sysctl_panic_on_rcu_stall __read_mostly;
106/* Commandeer a sysrq key to dump RCU's tree. */
107static bool sysrq_rcu;
108module_param(sysrq_rcu, bool, 0444);
118 109
119/* 110/*
120 * The rcu_scheduler_active variable is initialized to the value 111 * The rcu_scheduler_active variable is initialized to the value
@@ -479,7 +470,6 @@ module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next
479module_param(rcu_kick_kthreads, bool, 0644); 470module_param(rcu_kick_kthreads, bool, 0644);
480 471
481static void force_qs_rnp(int (*f)(struct rcu_data *rdp)); 472static void force_qs_rnp(int (*f)(struct rcu_data *rdp));
482static void force_quiescent_state(void);
483static int rcu_pending(void); 473static int rcu_pending(void);
484 474
485/* 475/*
@@ -504,13 +494,12 @@ unsigned long rcu_exp_batches_completed(void)
504EXPORT_SYMBOL_GPL(rcu_exp_batches_completed); 494EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);
505 495
506/* 496/*
507 * Force a quiescent state. 497 * Return the root node of the rcu_state structure.
508 */ 498 */
509void rcu_force_quiescent_state(void) 499static struct rcu_node *rcu_get_root(void)
510{ 500{
511 force_quiescent_state(); 501 return &rcu_state.node[0];
512} 502}
513EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
514 503
515/* 504/*
516 * Convert a ->gp_state value to a character string. 505 * Convert a ->gp_state value to a character string.
@@ -529,19 +518,30 @@ void show_rcu_gp_kthreads(void)
529{ 518{
530 int cpu; 519 int cpu;
531 unsigned long j; 520 unsigned long j;
521 unsigned long ja;
522 unsigned long jr;
523 unsigned long jw;
532 struct rcu_data *rdp; 524 struct rcu_data *rdp;
533 struct rcu_node *rnp; 525 struct rcu_node *rnp;
534 526
535 j = jiffies - READ_ONCE(rcu_state.gp_activity); 527 j = jiffies;
536 pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %ld\n", 528 ja = j - READ_ONCE(rcu_state.gp_activity);
529 jr = j - READ_ONCE(rcu_state.gp_req_activity);
530 jw = j - READ_ONCE(rcu_state.gp_wake_time);
531 pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
537 rcu_state.name, gp_state_getname(rcu_state.gp_state), 532 rcu_state.name, gp_state_getname(rcu_state.gp_state),
538 rcu_state.gp_state, rcu_state.gp_kthread->state, j); 533 rcu_state.gp_state,
534 rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL,
535 ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq),
536 (long)READ_ONCE(rcu_state.gp_seq),
537 (long)READ_ONCE(rcu_get_root()->gp_seq_needed),
538 READ_ONCE(rcu_state.gp_flags));
539 rcu_for_each_node_breadth_first(rnp) { 539 rcu_for_each_node_breadth_first(rnp) {
540 if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed)) 540 if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed))
541 continue; 541 continue;
542 pr_info("\trcu_node %d:%d ->gp_seq %lu ->gp_seq_needed %lu\n", 542 pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n",
543 rnp->grplo, rnp->grphi, rnp->gp_seq, 543 rnp->grplo, rnp->grphi, (long)rnp->gp_seq,
544 rnp->gp_seq_needed); 544 (long)rnp->gp_seq_needed);
545 if (!rcu_is_leaf_node(rnp)) 545 if (!rcu_is_leaf_node(rnp))
546 continue; 546 continue;
547 for_each_leaf_node_possible_cpu(rnp, cpu) { 547 for_each_leaf_node_possible_cpu(rnp, cpu) {
@@ -550,14 +550,35 @@ void show_rcu_gp_kthreads(void)
550 ULONG_CMP_GE(rcu_state.gp_seq, 550 ULONG_CMP_GE(rcu_state.gp_seq,
551 rdp->gp_seq_needed)) 551 rdp->gp_seq_needed))
552 continue; 552 continue;
553 pr_info("\tcpu %d ->gp_seq_needed %lu\n", 553 pr_info("\tcpu %d ->gp_seq_needed %ld\n",
554 cpu, rdp->gp_seq_needed); 554 cpu, (long)rdp->gp_seq_needed);
555 } 555 }
556 } 556 }
557 /* sched_show_task(rcu_state.gp_kthread); */ 557 /* sched_show_task(rcu_state.gp_kthread); */
558} 558}
559EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads); 559EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
560 560
561/* Dump grace-period-request information due to commandeered sysrq. */
562static void sysrq_show_rcu(int key)
563{
564 show_rcu_gp_kthreads();
565}
566
567static struct sysrq_key_op sysrq_rcudump_op = {
568 .handler = sysrq_show_rcu,
569 .help_msg = "show-rcu(y)",
570 .action_msg = "Show RCU tree",
571 .enable_mask = SYSRQ_ENABLE_DUMP,
572};
573
574static int __init rcu_sysrq_init(void)
575{
576 if (sysrq_rcu)
577 return register_sysrq_key('y', &sysrq_rcudump_op);
578 return 0;
579}
580early_initcall(rcu_sysrq_init);
581
561/* 582/*
562 * Send along grace-period-related data for rcutorture diagnostics. 583 * Send along grace-period-related data for rcutorture diagnostics.
563 */ 584 */
@@ -566,8 +587,6 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
566{ 587{
567 switch (test_type) { 588 switch (test_type) {
568 case RCU_FLAVOR: 589 case RCU_FLAVOR:
569 case RCU_BH_FLAVOR:
570 case RCU_SCHED_FLAVOR:
571 *flags = READ_ONCE(rcu_state.gp_flags); 590 *flags = READ_ONCE(rcu_state.gp_flags);
572 *gp_seq = rcu_seq_current(&rcu_state.gp_seq); 591 *gp_seq = rcu_seq_current(&rcu_state.gp_seq);
573 break; 592 break;
@@ -578,14 +597,6 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
578EXPORT_SYMBOL_GPL(rcutorture_get_gp_data); 597EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
579 598
580/* 599/*
581 * Return the root node of the rcu_state structure.
582 */
583static struct rcu_node *rcu_get_root(void)
584{
585 return &rcu_state.node[0];
586}
587
588/*
589 * Enter an RCU extended quiescent state, which can be either the 600 * Enter an RCU extended quiescent state, which can be either the
590 * idle loop or adaptive-tickless usermode execution. 601 * idle loop or adaptive-tickless usermode execution.
591 * 602 *
@@ -701,7 +712,6 @@ static __always_inline void rcu_nmi_exit_common(bool irq)
701 712
702/** 713/**
703 * rcu_nmi_exit - inform RCU of exit from NMI context 714 * rcu_nmi_exit - inform RCU of exit from NMI context
704 * @irq: Is this call from rcu_irq_exit?
705 * 715 *
706 * If you add or remove a call to rcu_nmi_exit(), be sure to test 716 * If you add or remove a call to rcu_nmi_exit(), be sure to test
707 * with CONFIG_RCU_EQS_DEBUG=y. 717 * with CONFIG_RCU_EQS_DEBUG=y.
@@ -1115,7 +1125,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1115 } 1125 }
1116 1126
1117 /* 1127 /*
1118 * NO_HZ_FULL CPUs can run in-kernel without rcu_check_callbacks! 1128 * NO_HZ_FULL CPUs can run in-kernel without rcu_sched_clock_irq!
1119 * The above code handles this, but only for straight cond_resched(). 1129 * The above code handles this, but only for straight cond_resched().
1120 * And some in-kernel loops check need_resched() before calling 1130 * And some in-kernel loops check need_resched() before calling
1121 * cond_resched(), which defeats the above code for CPUs that are 1131 * cond_resched(), which defeats the above code for CPUs that are
@@ -1181,7 +1191,7 @@ static void rcu_check_gp_kthread_starvation(void)
1181 pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n", 1191 pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
1182 rcu_state.name, j, 1192 rcu_state.name, j,
1183 (long)rcu_seq_current(&rcu_state.gp_seq), 1193 (long)rcu_seq_current(&rcu_state.gp_seq),
1184 rcu_state.gp_flags, 1194 READ_ONCE(rcu_state.gp_flags),
1185 gp_state_getname(rcu_state.gp_state), rcu_state.gp_state, 1195 gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
1186 gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1); 1196 gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
1187 if (gpk) { 1197 if (gpk) {
@@ -1310,7 +1320,7 @@ static void print_other_cpu_stall(unsigned long gp_seq)
1310 1320
1311 panic_on_rcu_stall(); 1321 panic_on_rcu_stall();
1312 1322
1313 force_quiescent_state(); /* Kick them all. */ 1323 rcu_force_quiescent_state(); /* Kick them all. */
1314} 1324}
1315 1325
1316static void print_cpu_stall(void) 1326static void print_cpu_stall(void)
@@ -1557,17 +1567,28 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp)
1557} 1567}
1558 1568
1559/* 1569/*
1560 * Awaken the grace-period kthread. Don't do a self-awaken, and don't 1570 * Awaken the grace-period kthread. Don't do a self-awaken (unless in
1561 * bother awakening when there is nothing for the grace-period kthread 1571 * an interrupt or softirq handler), and don't bother awakening when there
1562 * to do (as in several CPUs raced to awaken, and we lost), and finally 1572 * is nothing for the grace-period kthread to do (as in several CPUs raced
1563 * don't try to awaken a kthread that has not yet been created. 1573 * to awaken, and we lost), and finally don't try to awaken a kthread that
1574 * has not yet been created. If all those checks are passed, track some
1575 * debug information and awaken.
1576 *
1577 * So why do the self-wakeup when in an interrupt or softirq handler
1578 * in the grace-period kthread's context? Because the kthread might have
1579 * been interrupted just as it was going to sleep, and just after the final
1580 * pre-sleep check of the awaken condition. In this case, a wakeup really
1581 * is required, and is therefore supplied.
1564 */ 1582 */
1565static void rcu_gp_kthread_wake(void) 1583static void rcu_gp_kthread_wake(void)
1566{ 1584{
1567 if (current == rcu_state.gp_kthread || 1585 if ((current == rcu_state.gp_kthread &&
1586 !in_interrupt() && !in_serving_softirq()) ||
1568 !READ_ONCE(rcu_state.gp_flags) || 1587 !READ_ONCE(rcu_state.gp_flags) ||
1569 !rcu_state.gp_kthread) 1588 !rcu_state.gp_kthread)
1570 return; 1589 return;
1590 WRITE_ONCE(rcu_state.gp_wake_time, jiffies);
1591 WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq));
1571 swake_up_one(&rcu_state.gp_wq); 1592 swake_up_one(&rcu_state.gp_wq);
1572} 1593}
1573 1594
@@ -1711,7 +1732,7 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
1711 zero_cpu_stall_ticks(rdp); 1732 zero_cpu_stall_ticks(rdp);
1712 } 1733 }
1713 rdp->gp_seq = rnp->gp_seq; /* Remember new grace-period state. */ 1734 rdp->gp_seq = rnp->gp_seq; /* Remember new grace-period state. */
1714 if (ULONG_CMP_GE(rnp->gp_seq_needed, rdp->gp_seq_needed) || rdp->gpwrap) 1735 if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap)
1715 rdp->gp_seq_needed = rnp->gp_seq_needed; 1736 rdp->gp_seq_needed = rnp->gp_seq_needed;
1716 WRITE_ONCE(rdp->gpwrap, false); 1737 WRITE_ONCE(rdp->gpwrap, false);
1717 rcu_gpnum_ovf(rnp, rdp); 1738 rcu_gpnum_ovf(rnp, rdp);
@@ -1939,7 +1960,7 @@ static void rcu_gp_fqs_loop(void)
1939 if (!ret) { 1960 if (!ret) {
1940 rcu_state.jiffies_force_qs = jiffies + j; 1961 rcu_state.jiffies_force_qs = jiffies + j;
1941 WRITE_ONCE(rcu_state.jiffies_kick_kthreads, 1962 WRITE_ONCE(rcu_state.jiffies_kick_kthreads,
1942 jiffies + 3 * j); 1963 jiffies + (j ? 3 * j : 2));
1943 } 1964 }
1944 trace_rcu_grace_period(rcu_state.name, 1965 trace_rcu_grace_period(rcu_state.name,
1945 READ_ONCE(rcu_state.gp_seq), 1966 READ_ONCE(rcu_state.gp_seq),
@@ -2497,14 +2518,14 @@ static void rcu_do_batch(struct rcu_data *rdp)
2497} 2518}
2498 2519
2499/* 2520/*
2500 * Check to see if this CPU is in a non-context-switch quiescent state 2521 * This function is invoked from each scheduling-clock interrupt,
2501 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). 2522 * and checks to see if this CPU is in a non-context-switch quiescent
2502 * Also schedule RCU core processing. 2523 * state, for example, user mode or idle loop. It also schedules RCU
2503 * 2524 * core processing. If the current grace period has gone on too long,
2504 * This function must be called from hardirq context. It is normally 2525 * it will ask the scheduler to manufacture a context switch for the sole
2505 * invoked from the scheduling-clock interrupt. 2526 * purpose of providing a providing the needed quiescent state.
2506 */ 2527 */
2507void rcu_check_callbacks(int user) 2528void rcu_sched_clock_irq(int user)
2508{ 2529{
2509 trace_rcu_utilization(TPS("Start scheduler-tick")); 2530 trace_rcu_utilization(TPS("Start scheduler-tick"));
2510 raw_cpu_inc(rcu_data.ticks_this_gp); 2531 raw_cpu_inc(rcu_data.ticks_this_gp);
@@ -2517,7 +2538,7 @@ void rcu_check_callbacks(int user)
2517 } 2538 }
2518 __this_cpu_write(rcu_data.rcu_urgent_qs, false); 2539 __this_cpu_write(rcu_data.rcu_urgent_qs, false);
2519 } 2540 }
2520 rcu_flavor_check_callbacks(user); 2541 rcu_flavor_sched_clock_irq(user);
2521 if (rcu_pending()) 2542 if (rcu_pending())
2522 invoke_rcu_core(); 2543 invoke_rcu_core();
2523 2544
@@ -2578,7 +2599,7 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
2578 * Force quiescent states on reluctant CPUs, and also detect which 2599 * Force quiescent states on reluctant CPUs, and also detect which
2579 * CPUs are in dyntick-idle mode. 2600 * CPUs are in dyntick-idle mode.
2580 */ 2601 */
2581static void force_quiescent_state(void) 2602void rcu_force_quiescent_state(void)
2582{ 2603{
2583 unsigned long flags; 2604 unsigned long flags;
2584 bool ret; 2605 bool ret;
@@ -2610,6 +2631,7 @@ static void force_quiescent_state(void)
2610 raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags); 2631 raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
2611 rcu_gp_kthread_wake(); 2632 rcu_gp_kthread_wake();
2612} 2633}
2634EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
2613 2635
2614/* 2636/*
2615 * This function checks for grace-period requests that fail to motivate 2637 * This function checks for grace-period requests that fail to motivate
@@ -2657,16 +2679,11 @@ rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp,
2657 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 2679 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2658 return; 2680 return;
2659 } 2681 }
2660 pr_alert("%s: g%ld->%ld gar:%lu ga:%lu f%#x gs:%d %s->state:%#lx\n",
2661 __func__, (long)READ_ONCE(rcu_state.gp_seq),
2662 (long)READ_ONCE(rnp_root->gp_seq_needed),
2663 j - rcu_state.gp_req_activity, j - rcu_state.gp_activity,
2664 rcu_state.gp_flags, rcu_state.gp_state, rcu_state.name,
2665 rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL);
2666 WARN_ON(1); 2682 WARN_ON(1);
2667 if (rnp_root != rnp) 2683 if (rnp_root != rnp)
2668 raw_spin_unlock_rcu_node(rnp_root); 2684 raw_spin_unlock_rcu_node(rnp_root);
2669 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 2685 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2686 show_rcu_gp_kthreads();
2670} 2687}
2671 2688
2672/* 2689/*
@@ -2711,12 +2728,8 @@ void rcu_fwd_progress_check(unsigned long j)
2711} 2728}
2712EXPORT_SYMBOL_GPL(rcu_fwd_progress_check); 2729EXPORT_SYMBOL_GPL(rcu_fwd_progress_check);
2713 2730
2714/* 2731/* Perform RCU core processing work for the current CPU. */
2715 * This does the RCU core processing work for the specified rcu_data 2732static __latent_entropy void rcu_core(struct softirq_action *unused)
2716 * structures. This may be called only from the CPU to whom the rdp
2717 * belongs.
2718 */
2719static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
2720{ 2733{
2721 unsigned long flags; 2734 unsigned long flags;
2722 struct rcu_data *rdp = raw_cpu_ptr(&rcu_data); 2735 struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
@@ -2801,9 +2814,9 @@ static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head,
2801 2814
2802 /* 2815 /*
2803 * Force the grace period if too many callbacks or too long waiting. 2816 * Force the grace period if too many callbacks or too long waiting.
2804 * Enforce hysteresis, and don't invoke force_quiescent_state() 2817 * Enforce hysteresis, and don't invoke rcu_force_quiescent_state()
2805 * if some other CPU has recently done so. Also, don't bother 2818 * if some other CPU has recently done so. Also, don't bother
2806 * invoking force_quiescent_state() if the newly enqueued callback 2819 * invoking rcu_force_quiescent_state() if the newly enqueued callback
2807 * is the only one waiting for a grace period to complete. 2820 * is the only one waiting for a grace period to complete.
2808 */ 2821 */
2809 if (unlikely(rcu_segcblist_n_cbs(&rdp->cblist) > 2822 if (unlikely(rcu_segcblist_n_cbs(&rdp->cblist) >
@@ -2820,7 +2833,7 @@ static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head,
2820 rdp->blimit = LONG_MAX; 2833 rdp->blimit = LONG_MAX;
2821 if (rcu_state.n_force_qs == rdp->n_force_qs_snap && 2834 if (rcu_state.n_force_qs == rdp->n_force_qs_snap &&
2822 rcu_segcblist_first_pend_cb(&rdp->cblist) != head) 2835 rcu_segcblist_first_pend_cb(&rdp->cblist) != head)
2823 force_quiescent_state(); 2836 rcu_force_quiescent_state();
2824 rdp->n_force_qs_snap = rcu_state.n_force_qs; 2837 rdp->n_force_qs_snap = rcu_state.n_force_qs;
2825 rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist); 2838 rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
2826 } 2839 }
@@ -2889,9 +2902,6 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, int cpu, bool lazy)
2889 rcu_segcblist_init(&rdp->cblist); 2902 rcu_segcblist_init(&rdp->cblist);
2890 } 2903 }
2891 rcu_segcblist_enqueue(&rdp->cblist, head, lazy); 2904 rcu_segcblist_enqueue(&rdp->cblist, head, lazy);
2892 if (!lazy)
2893 rcu_idle_count_callbacks_posted();
2894
2895 if (__is_kfree_rcu_offset((unsigned long)func)) 2905 if (__is_kfree_rcu_offset((unsigned long)func))
2896 trace_rcu_kfree_callback(rcu_state.name, head, 2906 trace_rcu_kfree_callback(rcu_state.name, head,
2897 (unsigned long)func, 2907 (unsigned long)func,
@@ -2961,6 +2971,79 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
2961} 2971}
2962EXPORT_SYMBOL_GPL(kfree_call_rcu); 2972EXPORT_SYMBOL_GPL(kfree_call_rcu);
2963 2973
2974/*
2975 * During early boot, any blocking grace-period wait automatically
2976 * implies a grace period. Later on, this is never the case for PREEMPT.
2977 *
2978 * Howevr, because a context switch is a grace period for !PREEMPT, any
2979 * blocking grace-period wait automatically implies a grace period if
2980 * there is only one CPU online at any point time during execution of
2981 * either synchronize_rcu() or synchronize_rcu_expedited(). It is OK to
2982 * occasionally incorrectly indicate that there are multiple CPUs online
2983 * when there was in fact only one the whole time, as this just adds some
2984 * overhead: RCU still operates correctly.
2985 */
2986static int rcu_blocking_is_gp(void)
2987{
2988 int ret;
2989
2990 if (IS_ENABLED(CONFIG_PREEMPT))
2991 return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE;
2992 might_sleep(); /* Check for RCU read-side critical section. */
2993 preempt_disable();
2994 ret = num_online_cpus() <= 1;
2995 preempt_enable();
2996 return ret;
2997}
2998
2999/**
3000 * synchronize_rcu - wait until a grace period has elapsed.
3001 *
3002 * Control will return to the caller some time after a full grace
3003 * period has elapsed, in other words after all currently executing RCU
3004 * read-side critical sections have completed. Note, however, that
3005 * upon return from synchronize_rcu(), the caller might well be executing
3006 * concurrently with new RCU read-side critical sections that began while
3007 * synchronize_rcu() was waiting. RCU read-side critical sections are
3008 * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
3009 * In addition, regions of code across which interrupts, preemption, or
3010 * softirqs have been disabled also serve as RCU read-side critical
3011 * sections. This includes hardware interrupt handlers, softirq handlers,
3012 * and NMI handlers.
3013 *
3014 * Note that this guarantee implies further memory-ordering guarantees.
3015 * On systems with more than one CPU, when synchronize_rcu() returns,
3016 * each CPU is guaranteed to have executed a full memory barrier since
3017 * the end of its last RCU read-side critical section whose beginning
3018 * preceded the call to synchronize_rcu(). In addition, each CPU having
3019 * an RCU read-side critical section that extends beyond the return from
3020 * synchronize_rcu() is guaranteed to have executed a full memory barrier
3021 * after the beginning of synchronize_rcu() and before the beginning of
3022 * that RCU read-side critical section. Note that these guarantees include
3023 * CPUs that are offline, idle, or executing in user mode, as well as CPUs
3024 * that are executing in the kernel.
3025 *
3026 * Furthermore, if CPU A invoked synchronize_rcu(), which returned
3027 * to its caller on CPU B, then both CPU A and CPU B are guaranteed
3028 * to have executed a full memory barrier during the execution of
3029 * synchronize_rcu() -- even if CPU A and CPU B are the same CPU (but
3030 * again only if the system has more than one CPU).
3031 */
3032void synchronize_rcu(void)
3033{
3034 RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
3035 lock_is_held(&rcu_lock_map) ||
3036 lock_is_held(&rcu_sched_lock_map),
3037 "Illegal synchronize_rcu() in RCU read-side critical section");
3038 if (rcu_blocking_is_gp())
3039 return;
3040 if (rcu_gp_is_expedited())
3041 synchronize_rcu_expedited();
3042 else
3043 wait_rcu_gp(call_rcu);
3044}
3045EXPORT_SYMBOL_GPL(synchronize_rcu);
3046
2964/** 3047/**
2965 * get_state_synchronize_rcu - Snapshot current RCU state 3048 * get_state_synchronize_rcu - Snapshot current RCU state
2966 * 3049 *
@@ -3049,28 +3132,6 @@ static int rcu_pending(void)
3049} 3132}
3050 3133
3051/* 3134/*
3052 * Return true if the specified CPU has any callback. If all_lazy is
3053 * non-NULL, store an indication of whether all callbacks are lazy.
3054 * (If there are no callbacks, all of them are deemed to be lazy.)
3055 */
3056static bool rcu_cpu_has_callbacks(bool *all_lazy)
3057{
3058 bool al = true;
3059 bool hc = false;
3060 struct rcu_data *rdp;
3061
3062 rdp = this_cpu_ptr(&rcu_data);
3063 if (!rcu_segcblist_empty(&rdp->cblist)) {
3064 hc = true;
3065 if (rcu_segcblist_n_nonlazy_cbs(&rdp->cblist))
3066 al = false;
3067 }
3068 if (all_lazy)
3069 *all_lazy = al;
3070 return hc;
3071}
3072
3073/*
3074 * Helper function for rcu_barrier() tracing. If tracing is disabled, 3135 * Helper function for rcu_barrier() tracing. If tracing is disabled,
3075 * the compiler is expected to optimize this away. 3136 * the compiler is expected to optimize this away.
3076 */ 3137 */
@@ -3299,7 +3360,7 @@ int rcutree_prepare_cpu(unsigned int cpu)
3299 trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl")); 3360 trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl"));
3300 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 3361 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3301 rcu_prepare_kthreads(cpu); 3362 rcu_prepare_kthreads(cpu);
3302 rcu_spawn_all_nocb_kthreads(cpu); 3363 rcu_spawn_cpu_nocb_kthread(cpu);
3303 3364
3304 return 0; 3365 return 0;
3305} 3366}
@@ -3329,8 +3390,6 @@ int rcutree_online_cpu(unsigned int cpu)
3329 raw_spin_lock_irqsave_rcu_node(rnp, flags); 3390 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3330 rnp->ffmask |= rdp->grpmask; 3391 rnp->ffmask |= rdp->grpmask;
3331 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 3392 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3332 if (IS_ENABLED(CONFIG_TREE_SRCU))
3333 srcu_online_cpu(cpu);
3334 if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) 3393 if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
3335 return 0; /* Too early in boot for scheduler work. */ 3394 return 0; /* Too early in boot for scheduler work. */
3336 sync_sched_exp_online_cleanup(cpu); 3395 sync_sched_exp_online_cleanup(cpu);
@@ -3355,8 +3414,6 @@ int rcutree_offline_cpu(unsigned int cpu)
3355 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 3414 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3356 3415
3357 rcutree_affinity_setting(cpu, cpu); 3416 rcutree_affinity_setting(cpu, cpu);
3358 if (IS_ENABLED(CONFIG_TREE_SRCU))
3359 srcu_offline_cpu(cpu);
3360 return 0; 3417 return 0;
3361} 3418}
3362 3419
@@ -3777,7 +3834,7 @@ void __init rcu_init(void)
3777 rcu_init_one(); 3834 rcu_init_one();
3778 if (dump_tree) 3835 if (dump_tree)
3779 rcu_dump_rcu_node_tree(); 3836 rcu_dump_rcu_node_tree();
3780 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 3837 open_softirq(RCU_SOFTIRQ, rcu_core);
3781 3838
3782 /* 3839 /*
3783 * We don't need protection against CPU-hotplug here because 3840 * We don't need protection against CPU-hotplug here because