aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcu/tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r--kernel/rcu/tree.c355
1 files changed, 258 insertions, 97 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 7680fc275036..48d640ca1a05 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -156,6 +156,10 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
156static void invoke_rcu_core(void); 156static void invoke_rcu_core(void);
157static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); 157static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
158 158
159/* rcuc/rcub kthread realtime priority */
160static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO;
161module_param(kthread_prio, int, 0644);
162
159/* 163/*
160 * Track the rcutorture test sequence number and the update version 164 * Track the rcutorture test sequence number and the update version
161 * number within a given test. The rcutorture_testseq is incremented 165 * number within a given test. The rcutorture_testseq is incremented
@@ -215,6 +219,9 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
215#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ 219#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
216}; 220};
217 221
222DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
223EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr);
224
218/* 225/*
219 * Let the RCU core know that this CPU has gone through the scheduler, 226 * Let the RCU core know that this CPU has gone through the scheduler,
220 * which is a quiescent state. This is called when the need for a 227 * which is a quiescent state. This is called when the need for a
@@ -284,6 +291,22 @@ void rcu_note_context_switch(void)
284} 291}
285EXPORT_SYMBOL_GPL(rcu_note_context_switch); 292EXPORT_SYMBOL_GPL(rcu_note_context_switch);
286 293
294/*
295 * Register a quiesecent state for all RCU flavors. If there is an
296 * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight
297 * dyntick-idle quiescent state visible to other CPUs (but only for those
298 * RCU flavors in desparate need of a quiescent state, which will normally
299 * be none of them). Either way, do a lightweight quiescent state for
300 * all RCU flavors.
301 */
302void rcu_all_qs(void)
303{
304 if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
305 rcu_momentary_dyntick_idle();
306 this_cpu_inc(rcu_qs_ctr);
307}
308EXPORT_SYMBOL_GPL(rcu_all_qs);
309
287static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ 310static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */
288static long qhimark = 10000; /* If this many pending, ignore blimit. */ 311static long qhimark = 10000; /* If this many pending, ignore blimit. */
289static long qlowmark = 100; /* Once only this many pending, use blimit. */ 312static long qlowmark = 100; /* Once only this many pending, use blimit. */
@@ -315,18 +338,54 @@ static void force_quiescent_state(struct rcu_state *rsp);
315static int rcu_pending(void); 338static int rcu_pending(void);
316 339
317/* 340/*
318 * Return the number of RCU-sched batches processed thus far for debug & stats. 341 * Return the number of RCU batches started thus far for debug & stats.
342 */
343unsigned long rcu_batches_started(void)
344{
345 return rcu_state_p->gpnum;
346}
347EXPORT_SYMBOL_GPL(rcu_batches_started);
348
349/*
350 * Return the number of RCU-sched batches started thus far for debug & stats.
351 */
352unsigned long rcu_batches_started_sched(void)
353{
354 return rcu_sched_state.gpnum;
355}
356EXPORT_SYMBOL_GPL(rcu_batches_started_sched);
357
358/*
359 * Return the number of RCU BH batches started thus far for debug & stats.
319 */ 360 */
320long rcu_batches_completed_sched(void) 361unsigned long rcu_batches_started_bh(void)
362{
363 return rcu_bh_state.gpnum;
364}
365EXPORT_SYMBOL_GPL(rcu_batches_started_bh);
366
367/*
368 * Return the number of RCU batches completed thus far for debug & stats.
369 */
370unsigned long rcu_batches_completed(void)
371{
372 return rcu_state_p->completed;
373}
374EXPORT_SYMBOL_GPL(rcu_batches_completed);
375
376/*
377 * Return the number of RCU-sched batches completed thus far for debug & stats.
378 */
379unsigned long rcu_batches_completed_sched(void)
321{ 380{
322 return rcu_sched_state.completed; 381 return rcu_sched_state.completed;
323} 382}
324EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); 383EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
325 384
326/* 385/*
327 * Return the number of RCU BH batches processed thus far for debug & stats. 386 * Return the number of RCU BH batches completed thus far for debug & stats.
328 */ 387 */
329long rcu_batches_completed_bh(void) 388unsigned long rcu_batches_completed_bh(void)
330{ 389{
331 return rcu_bh_state.completed; 390 return rcu_bh_state.completed;
332} 391}
@@ -759,39 +818,71 @@ void rcu_irq_enter(void)
759/** 818/**
760 * rcu_nmi_enter - inform RCU of entry to NMI context 819 * rcu_nmi_enter - inform RCU of entry to NMI context
761 * 820 *
762 * If the CPU was idle with dynamic ticks active, and there is no 821 * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and
763 * irq handler running, this updates rdtp->dynticks_nmi to let the 822 * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know
764 * RCU grace-period handling know that the CPU is active. 823 * that the CPU is active. This implementation permits nested NMIs, as
824 * long as the nesting level does not overflow an int. (You will probably
825 * run out of stack space first.)
765 */ 826 */
766void rcu_nmi_enter(void) 827void rcu_nmi_enter(void)
767{ 828{
768 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 829 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
830 int incby = 2;
769 831
770 if (rdtp->dynticks_nmi_nesting == 0 && 832 /* Complain about underflow. */
771 (atomic_read(&rdtp->dynticks) & 0x1)) 833 WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0);
772 return; 834
773 rdtp->dynticks_nmi_nesting++; 835 /*
774 smp_mb__before_atomic(); /* Force delay from prior write. */ 836 * If idle from RCU viewpoint, atomically increment ->dynticks
775 atomic_inc(&rdtp->dynticks); 837 * to mark non-idle and increment ->dynticks_nmi_nesting by one.
776 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 838 * Otherwise, increment ->dynticks_nmi_nesting by two. This means
777 smp_mb__after_atomic(); /* See above. */ 839 * if ->dynticks_nmi_nesting is equal to one, we are guaranteed
778 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 840 * to be in the outermost NMI handler that interrupted an RCU-idle
841 * period (observation due to Andy Lutomirski).
842 */
843 if (!(atomic_read(&rdtp->dynticks) & 0x1)) {
844 smp_mb__before_atomic(); /* Force delay from prior write. */
845 atomic_inc(&rdtp->dynticks);
846 /* atomic_inc() before later RCU read-side crit sects */
847 smp_mb__after_atomic(); /* See above. */
848 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
849 incby = 1;
850 }
851 rdtp->dynticks_nmi_nesting += incby;
852 barrier();
779} 853}
780 854
781/** 855/**
782 * rcu_nmi_exit - inform RCU of exit from NMI context 856 * rcu_nmi_exit - inform RCU of exit from NMI context
783 * 857 *
784 * If the CPU was idle with dynamic ticks active, and there is no 858 * If we are returning from the outermost NMI handler that interrupted an
785 * irq handler running, this updates rdtp->dynticks_nmi to let the 859 * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting
786 * RCU grace-period handling know that the CPU is no longer active. 860 * to let the RCU grace-period handling know that the CPU is back to
861 * being RCU-idle.
787 */ 862 */
788void rcu_nmi_exit(void) 863void rcu_nmi_exit(void)
789{ 864{
790 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 865 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
791 866
792 if (rdtp->dynticks_nmi_nesting == 0 || 867 /*
793 --rdtp->dynticks_nmi_nesting != 0) 868 * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
869 * (We are exiting an NMI handler, so RCU better be paying attention
870 * to us!)
871 */
872 WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0);
873 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
874
875 /*
876 * If the nesting level is not 1, the CPU wasn't RCU-idle, so
877 * leave it in non-RCU-idle state.
878 */
879 if (rdtp->dynticks_nmi_nesting != 1) {
880 rdtp->dynticks_nmi_nesting -= 2;
794 return; 881 return;
882 }
883
884 /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
885 rdtp->dynticks_nmi_nesting = 0;
795 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ 886 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
796 smp_mb__before_atomic(); /* See above. */ 887 smp_mb__before_atomic(); /* See above. */
797 atomic_inc(&rdtp->dynticks); 888 atomic_inc(&rdtp->dynticks);
@@ -898,17 +989,14 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
898 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); 989 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
899 return 1; 990 return 1;
900 } else { 991 } else {
992 if (ULONG_CMP_LT(ACCESS_ONCE(rdp->gpnum) + ULONG_MAX / 4,
993 rdp->mynode->gpnum))
994 ACCESS_ONCE(rdp->gpwrap) = true;
901 return 0; 995 return 0;
902 } 996 }
903} 997}
904 998
905/* 999/*
906 * This function really isn't for public consumption, but RCU is special in
907 * that context switches can allow the state machine to make progress.
908 */
909extern void resched_cpu(int cpu);
910
911/*
912 * Return true if the specified CPU has passed through a quiescent 1000 * Return true if the specified CPU has passed through a quiescent
913 * state by virtue of being in or having passed through an dynticks 1001 * state by virtue of being in or having passed through an dynticks
914 * idle state since the last call to dyntick_save_progress_counter() 1002 * idle state since the last call to dyntick_save_progress_counter()
@@ -1011,6 +1099,22 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
1011 j1 = rcu_jiffies_till_stall_check(); 1099 j1 = rcu_jiffies_till_stall_check();
1012 ACCESS_ONCE(rsp->jiffies_stall) = j + j1; 1100 ACCESS_ONCE(rsp->jiffies_stall) = j + j1;
1013 rsp->jiffies_resched = j + j1 / 2; 1101 rsp->jiffies_resched = j + j1 / 2;
1102 rsp->n_force_qs_gpstart = ACCESS_ONCE(rsp->n_force_qs);
1103}
1104
1105/*
1106 * Complain about starvation of grace-period kthread.
1107 */
1108static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
1109{
1110 unsigned long gpa;
1111 unsigned long j;
1112
1113 j = jiffies;
1114 gpa = ACCESS_ONCE(rsp->gp_activity);
1115 if (j - gpa > 2 * HZ)
1116 pr_err("%s kthread starved for %ld jiffies!\n",
1117 rsp->name, j - gpa);
1014} 1118}
1015 1119
1016/* 1120/*
@@ -1033,11 +1137,13 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
1033 } 1137 }
1034} 1138}
1035 1139
1036static void print_other_cpu_stall(struct rcu_state *rsp) 1140static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
1037{ 1141{
1038 int cpu; 1142 int cpu;
1039 long delta; 1143 long delta;
1040 unsigned long flags; 1144 unsigned long flags;
1145 unsigned long gpa;
1146 unsigned long j;
1041 int ndetected = 0; 1147 int ndetected = 0;
1042 struct rcu_node *rnp = rcu_get_root(rsp); 1148 struct rcu_node *rnp = rcu_get_root(rsp);
1043 long totqlen = 0; 1149 long totqlen = 0;
@@ -1075,30 +1181,34 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
1075 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1181 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1076 } 1182 }
1077 1183
1078 /*
1079 * Now rat on any tasks that got kicked up to the root rcu_node
1080 * due to CPU offlining.
1081 */
1082 rnp = rcu_get_root(rsp);
1083 raw_spin_lock_irqsave(&rnp->lock, flags);
1084 ndetected += rcu_print_task_stall(rnp);
1085 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1086
1087 print_cpu_stall_info_end(); 1184 print_cpu_stall_info_end();
1088 for_each_possible_cpu(cpu) 1185 for_each_possible_cpu(cpu)
1089 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; 1186 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
1090 pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n", 1187 pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n",
1091 smp_processor_id(), (long)(jiffies - rsp->gp_start), 1188 smp_processor_id(), (long)(jiffies - rsp->gp_start),
1092 (long)rsp->gpnum, (long)rsp->completed, totqlen); 1189 (long)rsp->gpnum, (long)rsp->completed, totqlen);
1093 if (ndetected == 0) 1190 if (ndetected) {
1094 pr_err("INFO: Stall ended before state dump start\n");
1095 else
1096 rcu_dump_cpu_stacks(rsp); 1191 rcu_dump_cpu_stacks(rsp);
1192 } else {
1193 if (ACCESS_ONCE(rsp->gpnum) != gpnum ||
1194 ACCESS_ONCE(rsp->completed) == gpnum) {
1195 pr_err("INFO: Stall ended before state dump start\n");
1196 } else {
1197 j = jiffies;
1198 gpa = ACCESS_ONCE(rsp->gp_activity);
1199 pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld\n",
1200 rsp->name, j - gpa, j, gpa,
1201 jiffies_till_next_fqs);
1202 /* In this case, the current CPU might be at fault. */
1203 sched_show_task(current);
1204 }
1205 }
1097 1206
1098 /* Complain about tasks blocking the grace period. */ 1207 /* Complain about tasks blocking the grace period. */
1099
1100 rcu_print_detail_task_stall(rsp); 1208 rcu_print_detail_task_stall(rsp);
1101 1209
1210 rcu_check_gp_kthread_starvation(rsp);
1211
1102 force_quiescent_state(rsp); /* Kick them all. */ 1212 force_quiescent_state(rsp); /* Kick them all. */
1103} 1213}
1104 1214
@@ -1123,6 +1233,9 @@ static void print_cpu_stall(struct rcu_state *rsp)
1123 pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n", 1233 pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n",
1124 jiffies - rsp->gp_start, 1234 jiffies - rsp->gp_start,
1125 (long)rsp->gpnum, (long)rsp->completed, totqlen); 1235 (long)rsp->gpnum, (long)rsp->completed, totqlen);
1236
1237 rcu_check_gp_kthread_starvation(rsp);
1238
1126 rcu_dump_cpu_stacks(rsp); 1239 rcu_dump_cpu_stacks(rsp);
1127 1240
1128 raw_spin_lock_irqsave(&rnp->lock, flags); 1241 raw_spin_lock_irqsave(&rnp->lock, flags);
@@ -1193,7 +1306,7 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
1193 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) { 1306 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
1194 1307
1195 /* They had a few time units to dump stack, so complain. */ 1308 /* They had a few time units to dump stack, so complain. */
1196 print_other_cpu_stall(rsp); 1309 print_other_cpu_stall(rsp, gpnum);
1197 } 1310 }
1198} 1311}
1199 1312
@@ -1530,7 +1643,8 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1530 bool ret; 1643 bool ret;
1531 1644
1532 /* Handle the ends of any preceding grace periods first. */ 1645 /* Handle the ends of any preceding grace periods first. */
1533 if (rdp->completed == rnp->completed) { 1646 if (rdp->completed == rnp->completed &&
1647 !unlikely(ACCESS_ONCE(rdp->gpwrap))) {
1534 1648
1535 /* No grace period end, so just accelerate recent callbacks. */ 1649 /* No grace period end, so just accelerate recent callbacks. */
1536 ret = rcu_accelerate_cbs(rsp, rnp, rdp); 1650 ret = rcu_accelerate_cbs(rsp, rnp, rdp);
@@ -1545,7 +1659,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1545 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend")); 1659 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend"));
1546 } 1660 }
1547 1661
1548 if (rdp->gpnum != rnp->gpnum) { 1662 if (rdp->gpnum != rnp->gpnum || unlikely(ACCESS_ONCE(rdp->gpwrap))) {
1549 /* 1663 /*
1550 * If the current grace period is waiting for this CPU, 1664 * If the current grace period is waiting for this CPU,
1551 * set up to detect a quiescent state, otherwise don't 1665 * set up to detect a quiescent state, otherwise don't
@@ -1554,8 +1668,10 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1554 rdp->gpnum = rnp->gpnum; 1668 rdp->gpnum = rnp->gpnum;
1555 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); 1669 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
1556 rdp->passed_quiesce = 0; 1670 rdp->passed_quiesce = 0;
1671 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
1557 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); 1672 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
1558 zero_cpu_stall_ticks(rdp); 1673 zero_cpu_stall_ticks(rdp);
1674 ACCESS_ONCE(rdp->gpwrap) = false;
1559 } 1675 }
1560 return ret; 1676 return ret;
1561} 1677}
@@ -1569,7 +1685,8 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
1569 local_irq_save(flags); 1685 local_irq_save(flags);
1570 rnp = rdp->mynode; 1686 rnp = rdp->mynode;
1571 if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) && 1687 if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) &&
1572 rdp->completed == ACCESS_ONCE(rnp->completed)) || /* w/out lock. */ 1688 rdp->completed == ACCESS_ONCE(rnp->completed) &&
1689 !unlikely(ACCESS_ONCE(rdp->gpwrap))) || /* w/out lock. */
1573 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ 1690 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
1574 local_irq_restore(flags); 1691 local_irq_restore(flags);
1575 return; 1692 return;
@@ -1589,6 +1706,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1589 struct rcu_data *rdp; 1706 struct rcu_data *rdp;
1590 struct rcu_node *rnp = rcu_get_root(rsp); 1707 struct rcu_node *rnp = rcu_get_root(rsp);
1591 1708
1709 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1592 rcu_bind_gp_kthread(); 1710 rcu_bind_gp_kthread();
1593 raw_spin_lock_irq(&rnp->lock); 1711 raw_spin_lock_irq(&rnp->lock);
1594 smp_mb__after_unlock_lock(); 1712 smp_mb__after_unlock_lock();
@@ -1649,6 +1767,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1649 rnp->grphi, rnp->qsmask); 1767 rnp->grphi, rnp->qsmask);
1650 raw_spin_unlock_irq(&rnp->lock); 1768 raw_spin_unlock_irq(&rnp->lock);
1651 cond_resched_rcu_qs(); 1769 cond_resched_rcu_qs();
1770 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1652 } 1771 }
1653 1772
1654 mutex_unlock(&rsp->onoff_mutex); 1773 mutex_unlock(&rsp->onoff_mutex);
@@ -1665,6 +1784,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1665 unsigned long maxj; 1784 unsigned long maxj;
1666 struct rcu_node *rnp = rcu_get_root(rsp); 1785 struct rcu_node *rnp = rcu_get_root(rsp);
1667 1786
1787 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1668 rsp->n_force_qs++; 1788 rsp->n_force_qs++;
1669 if (fqs_state == RCU_SAVE_DYNTICK) { 1789 if (fqs_state == RCU_SAVE_DYNTICK) {
1670 /* Collect dyntick-idle snapshots. */ 1790 /* Collect dyntick-idle snapshots. */
@@ -1703,6 +1823,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1703 struct rcu_data *rdp; 1823 struct rcu_data *rdp;
1704 struct rcu_node *rnp = rcu_get_root(rsp); 1824 struct rcu_node *rnp = rcu_get_root(rsp);
1705 1825
1826 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1706 raw_spin_lock_irq(&rnp->lock); 1827 raw_spin_lock_irq(&rnp->lock);
1707 smp_mb__after_unlock_lock(); 1828 smp_mb__after_unlock_lock();
1708 gp_duration = jiffies - rsp->gp_start; 1829 gp_duration = jiffies - rsp->gp_start;
@@ -1739,6 +1860,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1739 nocb += rcu_future_gp_cleanup(rsp, rnp); 1860 nocb += rcu_future_gp_cleanup(rsp, rnp);
1740 raw_spin_unlock_irq(&rnp->lock); 1861 raw_spin_unlock_irq(&rnp->lock);
1741 cond_resched_rcu_qs(); 1862 cond_resched_rcu_qs();
1863 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1742 } 1864 }
1743 rnp = rcu_get_root(rsp); 1865 rnp = rcu_get_root(rsp);
1744 raw_spin_lock_irq(&rnp->lock); 1866 raw_spin_lock_irq(&rnp->lock);
@@ -1788,6 +1910,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
1788 if (rcu_gp_init(rsp)) 1910 if (rcu_gp_init(rsp))
1789 break; 1911 break;
1790 cond_resched_rcu_qs(); 1912 cond_resched_rcu_qs();
1913 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1791 WARN_ON(signal_pending(current)); 1914 WARN_ON(signal_pending(current));
1792 trace_rcu_grace_period(rsp->name, 1915 trace_rcu_grace_period(rsp->name,
1793 ACCESS_ONCE(rsp->gpnum), 1916 ACCESS_ONCE(rsp->gpnum),
@@ -1831,9 +1954,11 @@ static int __noreturn rcu_gp_kthread(void *arg)
1831 ACCESS_ONCE(rsp->gpnum), 1954 ACCESS_ONCE(rsp->gpnum),
1832 TPS("fqsend")); 1955 TPS("fqsend"));
1833 cond_resched_rcu_qs(); 1956 cond_resched_rcu_qs();
1957 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1834 } else { 1958 } else {
1835 /* Deal with stray signal. */ 1959 /* Deal with stray signal. */
1836 cond_resched_rcu_qs(); 1960 cond_resched_rcu_qs();
1961 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1837 WARN_ON(signal_pending(current)); 1962 WARN_ON(signal_pending(current));
1838 trace_rcu_grace_period(rsp->name, 1963 trace_rcu_grace_period(rsp->name,
1839 ACCESS_ONCE(rsp->gpnum), 1964 ACCESS_ONCE(rsp->gpnum),
@@ -2010,8 +2135,10 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
2010 rnp = rdp->mynode; 2135 rnp = rdp->mynode;
2011 raw_spin_lock_irqsave(&rnp->lock, flags); 2136 raw_spin_lock_irqsave(&rnp->lock, flags);
2012 smp_mb__after_unlock_lock(); 2137 smp_mb__after_unlock_lock();
2013 if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum || 2138 if ((rdp->passed_quiesce == 0 &&
2014 rnp->completed == rnp->gpnum) { 2139 rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) ||
2140 rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum ||
2141 rdp->gpwrap) {
2015 2142
2016 /* 2143 /*
2017 * The grace period in which this quiescent state was 2144 * The grace period in which this quiescent state was
@@ -2020,6 +2147,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
2020 * within the current grace period. 2147 * within the current grace period.
2021 */ 2148 */
2022 rdp->passed_quiesce = 0; /* need qs for new gp. */ 2149 rdp->passed_quiesce = 0; /* need qs for new gp. */
2150 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
2023 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2151 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2024 return; 2152 return;
2025 } 2153 }
@@ -2064,7 +2192,8 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
2064 * Was there a quiescent state since the beginning of the grace 2192 * Was there a quiescent state since the beginning of the grace
2065 * period? If no, then exit and wait for the next call. 2193 * period? If no, then exit and wait for the next call.
2066 */ 2194 */
2067 if (!rdp->passed_quiesce) 2195 if (!rdp->passed_quiesce &&
2196 rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr))
2068 return; 2197 return;
2069 2198
2070 /* 2199 /*
@@ -2195,6 +2324,46 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
2195} 2324}
2196 2325
2197/* 2326/*
2327 * All CPUs for the specified rcu_node structure have gone offline,
2328 * and all tasks that were preempted within an RCU read-side critical
2329 * section while running on one of those CPUs have since exited their RCU
2330 * read-side critical section. Some other CPU is reporting this fact with
2331 * the specified rcu_node structure's ->lock held and interrupts disabled.
2332 * This function therefore goes up the tree of rcu_node structures,
2333 * clearing the corresponding bits in the ->qsmaskinit fields. Note that
2334 * the leaf rcu_node structure's ->qsmaskinit field has already been
2335 * updated
2336 *
2337 * This function does check that the specified rcu_node structure has
2338 * all CPUs offline and no blocked tasks, so it is OK to invoke it
2339 * prematurely. That said, invoking it after the fact will cost you
2340 * a needless lock acquisition. So once it has done its work, don't
2341 * invoke it again.
2342 */
2343static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
2344{
2345 long mask;
2346 struct rcu_node *rnp = rnp_leaf;
2347
2348 if (rnp->qsmaskinit || rcu_preempt_has_tasks(rnp))
2349 return;
2350 for (;;) {
2351 mask = rnp->grpmask;
2352 rnp = rnp->parent;
2353 if (!rnp)
2354 break;
2355 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
2356 smp_mb__after_unlock_lock(); /* GP memory ordering. */
2357 rnp->qsmaskinit &= ~mask;
2358 if (rnp->qsmaskinit) {
2359 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2360 return;
2361 }
2362 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2363 }
2364}
2365
2366/*
2198 * The CPU has been completely removed, and some other CPU is reporting 2367 * The CPU has been completely removed, and some other CPU is reporting
2199 * this fact from process context. Do the remainder of the cleanup, 2368 * this fact from process context. Do the remainder of the cleanup,
2200 * including orphaning the outgoing CPU's RCU callbacks, and also 2369 * including orphaning the outgoing CPU's RCU callbacks, and also
@@ -2204,8 +2373,6 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
2204static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) 2373static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2205{ 2374{
2206 unsigned long flags; 2375 unsigned long flags;
2207 unsigned long mask;
2208 int need_report = 0;
2209 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2376 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2210 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ 2377 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
2211 2378
@@ -2219,40 +2386,15 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2219 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ 2386 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
2220 rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); 2387 rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
2221 rcu_adopt_orphan_cbs(rsp, flags); 2388 rcu_adopt_orphan_cbs(rsp, flags);
2389 raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags);
2222 2390
2223 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ 2391 /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
2224 mask = rdp->grpmask; /* rnp->grplo is constant. */ 2392 raw_spin_lock_irqsave(&rnp->lock, flags);
2225 do { 2393 smp_mb__after_unlock_lock(); /* Enforce GP memory-order guarantee. */
2226 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 2394 rnp->qsmaskinit &= ~rdp->grpmask;
2227 smp_mb__after_unlock_lock(); 2395 if (rnp->qsmaskinit == 0 && !rcu_preempt_has_tasks(rnp))
2228 rnp->qsmaskinit &= ~mask; 2396 rcu_cleanup_dead_rnp(rnp);
2229 if (rnp->qsmaskinit != 0) { 2397 rcu_report_qs_rnp(rdp->grpmask, rsp, rnp, flags); /* Rlses rnp->lock. */
2230 if (rnp != rdp->mynode)
2231 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2232 break;
2233 }
2234 if (rnp == rdp->mynode)
2235 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
2236 else
2237 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2238 mask = rnp->grpmask;
2239 rnp = rnp->parent;
2240 } while (rnp != NULL);
2241
2242 /*
2243 * We still hold the leaf rcu_node structure lock here, and
2244 * irqs are still disabled. The reason for this subterfuge is
2245 * because invoking rcu_report_unblock_qs_rnp() with ->orphan_lock
2246 * held leads to deadlock.
2247 */
2248 raw_spin_unlock(&rsp->orphan_lock); /* irqs remain disabled. */
2249 rnp = rdp->mynode;
2250 if (need_report & RCU_OFL_TASKS_NORM_GP)
2251 rcu_report_unblock_qs_rnp(rnp, flags);
2252 else
2253 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2254 if (need_report & RCU_OFL_TASKS_EXP_GP)
2255 rcu_report_exp_rnp(rsp, rnp, true);
2256 WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, 2398 WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
2257 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", 2399 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
2258 cpu, rdp->qlen, rdp->nxtlist); 2400 cpu, rdp->qlen, rdp->nxtlist);
@@ -2268,6 +2410,10 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
2268{ 2410{
2269} 2411}
2270 2412
2413static void __maybe_unused rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
2414{
2415}
2416
2271static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) 2417static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2272{ 2418{
2273} 2419}
@@ -2464,12 +2610,6 @@ static void force_qs_rnp(struct rcu_state *rsp,
2464 } 2610 }
2465 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2611 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2466 } 2612 }
2467 rnp = rcu_get_root(rsp);
2468 if (rnp->qsmask == 0) {
2469 raw_spin_lock_irqsave(&rnp->lock, flags);
2470 smp_mb__after_unlock_lock();
2471 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
2472 }
2473} 2613}
2474 2614
2475/* 2615/*
@@ -2569,7 +2709,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
2569 * Schedule RCU callback invocation. If the specified type of RCU 2709 * Schedule RCU callback invocation. If the specified type of RCU
2570 * does not support RCU priority boosting, just do a direct call, 2710 * does not support RCU priority boosting, just do a direct call,
2571 * otherwise wake up the per-CPU kernel kthread. Note that because we 2711 * otherwise wake up the per-CPU kernel kthread. Note that because we
2572 * are running on the current CPU with interrupts disabled, the 2712 * are running on the current CPU with softirqs disabled, the
2573 * rcu_cpu_kthread_task cannot disappear out from under us. 2713 * rcu_cpu_kthread_task cannot disappear out from under us.
2574 */ 2714 */
2575static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) 2715static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
@@ -3109,9 +3249,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
3109 3249
3110 /* Is the RCU core waiting for a quiescent state from this CPU? */ 3250 /* Is the RCU core waiting for a quiescent state from this CPU? */
3111 if (rcu_scheduler_fully_active && 3251 if (rcu_scheduler_fully_active &&
3112 rdp->qs_pending && !rdp->passed_quiesce) { 3252 rdp->qs_pending && !rdp->passed_quiesce &&
3253 rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) {
3113 rdp->n_rp_qs_pending++; 3254 rdp->n_rp_qs_pending++;
3114 } else if (rdp->qs_pending && rdp->passed_quiesce) { 3255 } else if (rdp->qs_pending &&
3256 (rdp->passed_quiesce ||
3257 rdp->rcu_qs_ctr_snap != __this_cpu_read(rcu_qs_ctr))) {
3115 rdp->n_rp_report_qs++; 3258 rdp->n_rp_report_qs++;
3116 return 1; 3259 return 1;
3117 } 3260 }
@@ -3135,7 +3278,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
3135 } 3278 }
3136 3279
3137 /* Has a new RCU grace period started? */ 3280 /* Has a new RCU grace period started? */
3138 if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */ 3281 if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum ||
3282 unlikely(ACCESS_ONCE(rdp->gpwrap))) { /* outside lock */
3139 rdp->n_rp_gp_started++; 3283 rdp->n_rp_gp_started++;
3140 return 1; 3284 return 1;
3141 } 3285 }
@@ -3318,6 +3462,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
3318 } else { 3462 } else {
3319 _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, 3463 _rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
3320 rsp->n_barrier_done); 3464 rsp->n_barrier_done);
3465 smp_mb__before_atomic();
3321 atomic_inc(&rsp->barrier_cpu_count); 3466 atomic_inc(&rsp->barrier_cpu_count);
3322 __call_rcu(&rdp->barrier_head, 3467 __call_rcu(&rdp->barrier_head,
3323 rcu_barrier_callback, rsp, cpu, 0); 3468 rcu_barrier_callback, rsp, cpu, 0);
@@ -3385,9 +3530,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
3385 /* Set up local state, ensuring consistent view of global state. */ 3530 /* Set up local state, ensuring consistent view of global state. */
3386 raw_spin_lock_irqsave(&rnp->lock, flags); 3531 raw_spin_lock_irqsave(&rnp->lock, flags);
3387 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); 3532 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
3388 init_callback_list(rdp);
3389 rdp->qlen_lazy = 0;
3390 ACCESS_ONCE(rdp->qlen) = 0;
3391 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 3533 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
3392 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); 3534 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
3393 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); 3535 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
@@ -3444,6 +3586,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
3444 rdp->gpnum = rnp->completed; 3586 rdp->gpnum = rnp->completed;
3445 rdp->completed = rnp->completed; 3587 rdp->completed = rnp->completed;
3446 rdp->passed_quiesce = 0; 3588 rdp->passed_quiesce = 0;
3589 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
3447 rdp->qs_pending = 0; 3590 rdp->qs_pending = 0;
3448 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); 3591 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
3449 } 3592 }
@@ -3535,17 +3678,35 @@ static int rcu_pm_notify(struct notifier_block *self,
3535static int __init rcu_spawn_gp_kthread(void) 3678static int __init rcu_spawn_gp_kthread(void)
3536{ 3679{
3537 unsigned long flags; 3680 unsigned long flags;
3681 int kthread_prio_in = kthread_prio;
3538 struct rcu_node *rnp; 3682 struct rcu_node *rnp;
3539 struct rcu_state *rsp; 3683 struct rcu_state *rsp;
3684 struct sched_param sp;
3540 struct task_struct *t; 3685 struct task_struct *t;
3541 3686
3687 /* Force priority into range. */
3688 if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1)
3689 kthread_prio = 1;
3690 else if (kthread_prio < 0)
3691 kthread_prio = 0;
3692 else if (kthread_prio > 99)
3693 kthread_prio = 99;
3694 if (kthread_prio != kthread_prio_in)
3695 pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n",
3696 kthread_prio, kthread_prio_in);
3697
3542 rcu_scheduler_fully_active = 1; 3698 rcu_scheduler_fully_active = 1;
3543 for_each_rcu_flavor(rsp) { 3699 for_each_rcu_flavor(rsp) {
3544 t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name); 3700 t = kthread_create(rcu_gp_kthread, rsp, "%s", rsp->name);
3545 BUG_ON(IS_ERR(t)); 3701 BUG_ON(IS_ERR(t));
3546 rnp = rcu_get_root(rsp); 3702 rnp = rcu_get_root(rsp);
3547 raw_spin_lock_irqsave(&rnp->lock, flags); 3703 raw_spin_lock_irqsave(&rnp->lock, flags);
3548 rsp->gp_kthread = t; 3704 rsp->gp_kthread = t;
3705 if (kthread_prio) {
3706 sp.sched_priority = kthread_prio;
3707 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
3708 }
3709 wake_up_process(t);
3549 raw_spin_unlock_irqrestore(&rnp->lock, flags); 3710 raw_spin_unlock_irqrestore(&rnp->lock, flags);
3550 } 3711 }
3551 rcu_spawn_nocb_kthreads(); 3712 rcu_spawn_nocb_kthreads();