aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-05-19 21:14:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-19 21:14:34 -0400
commiteb04f2f04ed1227c266b3219c0aaeda525639718 (patch)
tree7f224483a3cd0e439cd64a8666ec9dc5ed178a3d
parent5765040ebfc9a28d9dcfaaaaf3d25840d922de96 (diff)
parent80d02085d99039b3b7f3a73c8896226b0cb1ba07 (diff)
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (78 commits) Revert "rcu: Decrease memory-barrier usage based on semi-formal proof" net,rcu: convert call_rcu(prl_entry_destroy_rcu) to kfree batman,rcu: convert call_rcu(softif_neigh_free_rcu) to kfree_rcu batman,rcu: convert call_rcu(neigh_node_free_rcu) to kfree() batman,rcu: convert call_rcu(gw_node_free_rcu) to kfree_rcu net,rcu: convert call_rcu(kfree_tid_tx) to kfree_rcu() net,rcu: convert call_rcu(xt_osf_finger_free_rcu) to kfree_rcu() net/mac80211,rcu: convert call_rcu(work_free_rcu) to kfree_rcu() net,rcu: convert call_rcu(wq_free_rcu) to kfree_rcu() net,rcu: convert call_rcu(phonet_device_rcu_free) to kfree_rcu() perf,rcu: convert call_rcu(swevent_hlist_release_rcu) to kfree_rcu() perf,rcu: convert call_rcu(free_ctx) to kfree_rcu() net,rcu: convert call_rcu(__nf_ct_ext_free_rcu) to kfree_rcu() net,rcu: convert call_rcu(net_generic_release) to kfree_rcu() net,rcu: convert call_rcu(netlbl_unlhsh_free_addr6) to kfree_rcu() net,rcu: convert call_rcu(netlbl_unlhsh_free_addr4) to kfree_rcu() security,rcu: convert call_rcu(sel_netif_free) to kfree_rcu() net,rcu: convert call_rcu(xps_dev_maps_release) to kfree_rcu() net,rcu: convert call_rcu(xps_map_release) to kfree_rcu() net,rcu: convert call_rcu(rps_map_release) to kfree_rcu() ...
-rw-r--r--Documentation/RCU/00-INDEX2
-rw-r--r--Documentation/RCU/stallwarn.txt23
-rw-r--r--Documentation/RCU/trace.txt278
-rw-r--r--Documentation/filesystems/proc.txt1
-rw-r--r--drivers/net/ixgbe/ixgbe_main.c7
-rw-r--r--drivers/net/macvlan.c10
-rw-r--r--include/linux/interrupt.h1
-rw-r--r--include/linux/rcupdate.h70
-rw-r--r--include/linux/rcutiny.h8
-rw-r--r--include/linux/rcutree.h13
-rw-r--r--include/net/sctp/sctp.h1
-rw-r--r--include/trace/events/irq.h3
-rw-r--r--init/Kconfig2
-rw-r--r--kernel/cgroup.c27
-rw-r--r--kernel/events/core.c20
-rw-r--r--kernel/rcupdate.c32
-rw-r--r--kernel/rcutiny.c45
-rw-r--r--kernel/rcutiny_plugin.h203
-rw-r--r--kernel/rcutorture.c26
-rw-r--r--kernel/rcutree.c526
-rw-r--r--kernel/rcutree.h104
-rw-r--r--kernel/rcutree_plugin.h568
-rw-r--r--kernel/rcutree_trace.c180
-rw-r--r--kernel/softirq.c2
-rw-r--r--lib/Kconfig.debug32
-rw-r--r--net/batman-adv/gateway_client.c10
-rw-r--r--net/batman-adv/originator.c10
-rw-r--r--net/batman-adv/soft-interface.c10
-rw-r--r--net/core/dev_addr_lists.c12
-rw-r--r--net/core/drop_monitor.c12
-rw-r--r--net/core/gen_estimator.c9
-rw-r--r--net/core/net-sysfs.c34
-rw-r--r--net/core/net_namespace.c10
-rw-r--r--net/decnet/dn_dev.c7
-rw-r--r--net/ipv4/fib_semantics.c12
-rw-r--r--net/ipv4/fib_trie.c7
-rw-r--r--net/ipv4/igmp.c32
-rw-r--r--net/ipv6/addrconf.c16
-rw-r--r--net/ipv6/mcast.c8
-rw-r--r--net/ipv6/sit.c7
-rw-r--r--net/mac80211/agg-tx.c14
-rw-r--r--net/mac80211/work.c10
-rw-r--r--net/netfilter/nf_conntrack_extend.c8
-rw-r--r--net/netfilter/xt_osf.c11
-rw-r--r--net/netlabel/netlabel_unlabeled.c42
-rw-r--r--net/netlink/af_netlink.c8
-rw-r--r--net/phonet/pn_dev.c10
-rw-r--r--net/sched/act_api.c7
-rw-r--r--net/sched/act_police.c8
-rw-r--r--net/sctp/bind_addr.c2
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/protocol.c9
-rw-r--r--net/socket.c11
-rw-r--r--security/keys/user_defined.c16
-rw-r--r--security/selinux/netif.c18
-rw-r--r--tools/perf/util/trace-event-parse.c1
56 files changed, 1724 insertions, 833 deletions
diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX
index 71b6f500ddb9..1d7a885761f5 100644
--- a/Documentation/RCU/00-INDEX
+++ b/Documentation/RCU/00-INDEX
@@ -21,7 +21,7 @@ rcu.txt
21RTFP.txt 21RTFP.txt
22 - List of RCU papers (bibliography) going back to 1980. 22 - List of RCU papers (bibliography) going back to 1980.
23stallwarn.txt 23stallwarn.txt
24 - RCU CPU stall warnings (CONFIG_RCU_CPU_STALL_DETECTOR) 24 - RCU CPU stall warnings (module parameter rcu_cpu_stall_suppress)
25torture.txt 25torture.txt
26 - RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST) 26 - RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST)
27trace.txt 27trace.txt
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index 862c08ef1fde..4e959208f736 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -1,22 +1,25 @@
1Using RCU's CPU Stall Detector 1Using RCU's CPU Stall Detector
2 2
3The CONFIG_RCU_CPU_STALL_DETECTOR kernel config parameter enables 3The rcu_cpu_stall_suppress module parameter enables RCU's CPU stall
4RCU's CPU stall detector, which detects conditions that unduly delay 4detector, which detects conditions that unduly delay RCU grace periods.
5RCU grace periods. The stall detector's idea of what constitutes 5This module parameter enables CPU stall detection by default, but
6"unduly delayed" is controlled by a set of C preprocessor macros: 6may be overridden via boot-time parameter or at runtime via sysfs.
7The stall detector's idea of what constitutes "unduly delayed" is
8controlled by a set of kernel configuration variables and cpp macros:
7 9
8RCU_SECONDS_TILL_STALL_CHECK 10CONFIG_RCU_CPU_STALL_TIMEOUT
9 11
10 This macro defines the period of time that RCU will wait from 12 This kernel configuration parameter defines the period of time
11 the beginning of a grace period until it issues an RCU CPU 13 that RCU will wait from the beginning of a grace period until it
12 stall warning. This time period is normally ten seconds. 14 issues an RCU CPU stall warning. This time period is normally
15 ten seconds.
13 16
14RCU_SECONDS_TILL_STALL_RECHECK 17RCU_SECONDS_TILL_STALL_RECHECK
15 18
16 This macro defines the period of time that RCU will wait after 19 This macro defines the period of time that RCU will wait after
17 issuing a stall warning until it issues another stall warning 20 issuing a stall warning until it issues another stall warning
18 for the same stall. This time period is normally set to thirty 21 for the same stall. This time period is normally set to three
19 seconds. 22 times the check interval plus thirty seconds.
20 23
21RCU_STALL_RAT_DELAY 24RCU_STALL_RAT_DELAY
22 25
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index 6a8c73f55b80..c078ad48f7a1 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -10,34 +10,46 @@ for rcutree and next for rcutiny.
10 10
11CONFIG_TREE_RCU and CONFIG_TREE_PREEMPT_RCU debugfs Files and Formats 11CONFIG_TREE_RCU and CONFIG_TREE_PREEMPT_RCU debugfs Files and Formats
12 12
13These implementations of RCU provides five debugfs files under the 13These implementations of RCU provides several debugfs files under the
14top-level directory RCU: rcu/rcudata (which displays fields in struct 14top-level directory "rcu":
15rcu_data), rcu/rcudata.csv (which is a .csv spreadsheet version of 15
16rcu/rcudata), rcu/rcugp (which displays grace-period counters), 16rcu/rcudata:
17rcu/rcuhier (which displays the struct rcu_node hierarchy), and 17 Displays fields in struct rcu_data.
18rcu/rcu_pending (which displays counts of the reasons that the 18rcu/rcudata.csv:
19rcu_pending() function decided that there was core RCU work to do). 19 Comma-separated values spreadsheet version of rcudata.
20rcu/rcugp:
21 Displays grace-period counters.
22rcu/rcuhier:
23 Displays the struct rcu_node hierarchy.
24rcu/rcu_pending:
25 Displays counts of the reasons rcu_pending() decided that RCU had
26 work to do.
27rcu/rcutorture:
28 Displays rcutorture test progress.
29rcu/rcuboost:
30 Displays RCU boosting statistics. Only present if
31 CONFIG_RCU_BOOST=y.
20 32
21The output of "cat rcu/rcudata" looks as follows: 33The output of "cat rcu/rcudata" looks as follows:
22 34
23rcu_sched: 35rcu_sched:
24 0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10 36 0 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0
25 1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10 37 1 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0
26 2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10 38 2 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0
27 3 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=6681/1 dn=0 df=1545 of=0 ri=0 ql=0 b=10 39 3 c=20942 g=20943 pq=1 pqc=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0
28 4 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1003/1 dn=0 df=1992 of=0 ri=0 ql=0 b=10 40 4 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0
29 5 c=17829 g=17830 pq=1 pqc=17829 qp=1 dt=3887/1 dn=0 df=3331 of=0 ri=4 ql=2 b=10 41 5 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0
30 6 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=859/1 dn=0 df=3224 of=0 ri=0 ql=0 b=10 42 6 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0
31 7 c=17829 g=17830 pq=0 pqc=17829 qp=1 dt=3761/1 dn=0 df=1818 of=0 ri=0 ql=2 b=10 43 7 c=20897 g=20897 pq=1 pqc=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0
32rcu_bh: 44rcu_bh:
33 0 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=10951/1 dn=0 df=0 of=0 ri=0 ql=0 b=10 45 0 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0
34 1 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=16117/1 dn=0 df=13 of=0 ri=0 ql=0 b=10 46 1 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0
35 2 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1445/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 47 2 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0
36 3 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=6681/1 dn=0 df=9 of=0 ri=0 ql=0 b=10 48 3 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0
37 4 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1003/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 49 4 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0
38 5 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3887/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 50 5 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0
39 6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 51 6 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0
40 7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 52 7 c=1474 g=1474 pq=1 pqc=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0
41 53
42The first section lists the rcu_data structures for rcu_sched, the second 54The first section lists the rcu_data structures for rcu_sched, the second
43for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an 55for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an
@@ -52,17 +64,18 @@ o The number at the beginning of each line is the CPU number.
52 substantially larger than the number of actual CPUs. 64 substantially larger than the number of actual CPUs.
53 65
54o "c" is the count of grace periods that this CPU believes have 66o "c" is the count of grace periods that this CPU believes have
55 completed. CPUs in dynticks idle mode may lag quite a ways 67 completed. Offlined CPUs and CPUs in dynticks idle mode may
56 behind, for example, CPU 4 under "rcu_sched" above, which has 68 lag quite a ways behind, for example, CPU 6 under "rcu_sched"
57 slept through the past 25 RCU grace periods. It is not unusual 69 above, which has been offline through not quite 40,000 RCU grace
58 to see CPUs lagging by thousands of grace periods. 70 periods. It is not unusual to see CPUs lagging by thousands of
71 grace periods.
59 72
60o "g" is the count of grace periods that this CPU believes have 73o "g" is the count of grace periods that this CPU believes have
61 started. Again, CPUs in dynticks idle mode may lag behind. 74 started. Again, offlined CPUs and CPUs in dynticks idle mode
62 If the "c" and "g" values are equal, this CPU has already 75 may lag behind. If the "c" and "g" values are equal, this CPU
63 reported a quiescent state for the last RCU grace period that 76 has already reported a quiescent state for the last RCU grace
64 it is aware of, otherwise, the CPU believes that it owes RCU a 77 period that it is aware of, otherwise, the CPU believes that it
65 quiescent state. 78 owes RCU a quiescent state.
66 79
67o "pq" indicates that this CPU has passed through a quiescent state 80o "pq" indicates that this CPU has passed through a quiescent state
68 for the current grace period. It is possible for "pq" to be 81 for the current grace period. It is possible for "pq" to be
@@ -81,7 +94,8 @@ o "pqc" indicates which grace period the last-observed quiescent
81 the next grace period! 94 the next grace period!
82 95
83o "qp" indicates that RCU still expects a quiescent state from 96o "qp" indicates that RCU still expects a quiescent state from
84 this CPU. 97 this CPU. Offlined CPUs and CPUs in dyntick idle mode might
98 well have qp=1, which is OK: RCU is still ignoring them.
85 99
86o "dt" is the current value of the dyntick counter that is incremented 100o "dt" is the current value of the dyntick counter that is incremented
87 when entering or leaving dynticks idle state, either by the 101 when entering or leaving dynticks idle state, either by the
@@ -108,7 +122,7 @@ o "df" is the number of times that some other CPU has forced a
108 122
109o "of" is the number of times that some other CPU has forced a 123o "of" is the number of times that some other CPU has forced a
110 quiescent state on behalf of this CPU due to this CPU being 124 quiescent state on behalf of this CPU due to this CPU being
111 offline. In a perfect world, this might neve happen, but it 125 offline. In a perfect world, this might never happen, but it
112 turns out that offlining and onlining a CPU can take several grace 126 turns out that offlining and onlining a CPU can take several grace
113 periods, and so there is likely to be an extended period of time 127 periods, and so there is likely to be an extended period of time
114 when RCU believes that the CPU is online when it really is not. 128 when RCU believes that the CPU is online when it really is not.
@@ -125,6 +139,62 @@ o "ql" is the number of RCU callbacks currently residing on
125 of what state they are in (new, waiting for grace period to 139 of what state they are in (new, waiting for grace period to
126 start, waiting for grace period to end, ready to invoke). 140 start, waiting for grace period to end, ready to invoke).
127 141
142o "qs" gives an indication of the state of the callback queue
143 with four characters:
144
145 "N" Indicates that there are callbacks queued that are not
146 ready to be handled by the next grace period, and thus
147 will be handled by the grace period following the next
148 one.
149
150 "R" Indicates that there are callbacks queued that are
151 ready to be handled by the next grace period.
152
153 "W" Indicates that there are callbacks queued that are
154 waiting on the current grace period.
155
156 "D" Indicates that there are callbacks queued that have
157 already been handled by a prior grace period, and are
158 thus waiting to be invoked. Note that callbacks in
159 the process of being invoked are not counted here.
160 Callbacks in the process of being invoked are those
161 that have been removed from the rcu_data structures
162 queues by rcu_do_batch(), but which have not yet been
163 invoked.
164
165 If there are no callbacks in a given one of the above states,
166 the corresponding character is replaced by ".".
167
168o "kt" is the per-CPU kernel-thread state. The digit preceding
169 the first slash is zero if there is no work pending and 1
170 otherwise. The character between the first pair of slashes is
171 as follows:
172
173 "S" The kernel thread is stopped, in other words, all
174 CPUs corresponding to this rcu_node structure are
175 offline.
176
177 "R" The kernel thread is running.
178
179 "W" The kernel thread is waiting because there is no work
180 for it to do.
181
182 "O" The kernel thread is waiting because it has been
183 forced off of its designated CPU or because its
184 ->cpus_allowed mask permits it to run on other than
185 its designated CPU.
186
187 "Y" The kernel thread is yielding to avoid hogging CPU.
188
189 "?" Unknown value, indicates a bug.
190
191 The number after the final slash is the CPU that the kthread
192 is actually running on.
193
194o "ktl" is the low-order 16 bits (in hexadecimal) of the count of
195 the number of times that this CPU's per-CPU kthread has gone
196 through its loop servicing invoke_rcu_cpu_kthread() requests.
197
128o "b" is the batch limit for this CPU. If more than this number 198o "b" is the batch limit for this CPU. If more than this number
129 of RCU callbacks is ready to invoke, then the remainder will 199 of RCU callbacks is ready to invoke, then the remainder will
130 be deferred. 200 be deferred.
@@ -174,14 +244,14 @@ o "gpnum" is the number of grace periods that have started. It is
174The output of "cat rcu/rcuhier" looks as follows, with very long lines: 244The output of "cat rcu/rcuhier" looks as follows, with very long lines:
175 245
176c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 246c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6
1771/1 .>. 0:127 ^0 2471/1 ..>. 0:127 ^0
1783/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 2483/3 ..>. 0:35 ^0 0/0 ..>. 36:71 ^1 0/0 ..>. 72:107 ^2 0/0 ..>. 108:127 ^3
1793/3f .>. 0:5 ^0 2/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 2493/3f ..>. 0:5 ^0 2/3 ..>. 6:11 ^1 0/0 ..>. 12:17 ^2 0/0 ..>. 18:23 ^3 0/0 ..>. 24:29 ^4 0/0 ..>. 30:35 ^5 0/0 ..>. 36:41 ^0 0/0 ..>. 42:47 ^1 0/0 ..>. 48:53 ^2 0/0 ..>. 54:59 ^3 0/0 ..>. 60:65 ^4 0/0 ..>. 66:71 ^5 0/0 ..>. 72:77 ^0 0/0 ..>. 78:83 ^1 0/0 ..>. 84:89 ^2 0/0 ..>. 90:95 ^3 0/0 ..>. 96:101 ^4 0/0 ..>. 102:107 ^5 0/0 ..>. 108:113 ^0 0/0 ..>. 114:119 ^1 0/0 ..>. 120:125 ^2 0/0 ..>. 126:127 ^3
180rcu_bh: 250rcu_bh:
181c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 251c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0
1820/1 .>. 0:127 ^0 2520/1 ..>. 0:127 ^0
1830/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 2530/3 ..>. 0:35 ^0 0/0 ..>. 36:71 ^1 0/0 ..>. 72:107 ^2 0/0 ..>. 108:127 ^3
1840/3f .>. 0:5 ^0 0/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 2540/3f ..>. 0:5 ^0 0/3 ..>. 6:11 ^1 0/0 ..>. 12:17 ^2 0/0 ..>. 18:23 ^3 0/0 ..>. 24:29 ^4 0/0 ..>. 30:35 ^5 0/0 ..>. 36:41 ^0 0/0 ..>. 42:47 ^1 0/0 ..>. 48:53 ^2 0/0 ..>. 54:59 ^3 0/0 ..>. 60:65 ^4 0/0 ..>. 66:71 ^5 0/0 ..>. 72:77 ^0 0/0 ..>. 78:83 ^1 0/0 ..>. 84:89 ^2 0/0 ..>. 90:95 ^3 0/0 ..>. 96:101 ^4 0/0 ..>. 102:107 ^5 0/0 ..>. 108:113 ^0 0/0 ..>. 114:119 ^1 0/0 ..>. 120:125 ^2 0/0 ..>. 126:127 ^3
185 255
186This is once again split into "rcu_sched" and "rcu_bh" portions, 256This is once again split into "rcu_sched" and "rcu_bh" portions,
187and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional 257and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional
@@ -240,13 +310,20 @@ o Each element of the form "1/1 0:127 ^0" represents one struct
240 current grace period. 310 current grace period.
241 311
242 o The characters separated by the ">" indicate the state 312 o The characters separated by the ">" indicate the state
243 of the blocked-tasks lists. A "T" preceding the ">" 313 of the blocked-tasks lists. A "G" preceding the ">"
244 indicates that at least one task blocked in an RCU 314 indicates that at least one task blocked in an RCU
245 read-side critical section blocks the current grace 315 read-side critical section blocks the current grace
246 period, while a "." preceding the ">" indicates otherwise. 316 period, while a "E" preceding the ">" indicates that
247 The character following the ">" indicates similarly for 317 at least one task blocked in an RCU read-side critical
248 the next grace period. A "T" should appear in this 318 section blocks the current expedited grace period.
249 field only for rcu-preempt. 319 A "T" character following the ">" indicates that at
320 least one task is blocked within an RCU read-side
321 critical section, regardless of whether any current
322 grace period (expedited or normal) is inconvenienced.
323 A "." character appears if the corresponding condition
324 does not hold, so that "..>." indicates that no tasks
325 are blocked. In contrast, "GE>T" indicates maximal
326 inconvenience from blocked tasks.
250 327
251 o The numbers separated by the ":" are the range of CPUs 328 o The numbers separated by the ":" are the range of CPUs
252 served by this struct rcu_node. This can be helpful 329 served by this struct rcu_node. This can be helpful
@@ -328,6 +405,113 @@ o "nn" is the number of times that this CPU needed nothing. Alert
328 is due to short-circuit evaluation in rcu_pending(). 405 is due to short-circuit evaluation in rcu_pending().
329 406
330 407
408The output of "cat rcu/rcutorture" looks as follows:
409
410rcutorture test sequence: 0 (test in progress)
411rcutorture update version number: 615
412
413The first line shows the number of rcutorture tests that have completed
414since boot. If a test is currently running, the "(test in progress)"
415string will appear as shown above. The second line shows the number of
416update cycles that the current test has started, or zero if there is
417no test in progress.
418
419
420The output of "cat rcu/rcuboost" looks as follows:
421
4220:5 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=2f95 bt=300f
423 balk: nt=0 egt=989 bt=0 nb=0 ny=0 nos=16
4246:7 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=2f95 bt=300f
425 balk: nt=0 egt=225 bt=0 nb=0 ny=0 nos=6
426
427This information is output only for rcu_preempt. Each two-line entry
428corresponds to a leaf rcu_node strcuture. The fields are as follows:
429
430o "n:m" is the CPU-number range for the corresponding two-line
431 entry. In the sample output above, the first entry covers
432 CPUs zero through five and the second entry covers CPUs 6
433 and 7.
434
435o "tasks=TNEB" gives the state of the various segments of the
436 rnp->blocked_tasks list:
437
438 "T" This indicates that there are some tasks that blocked
439 while running on one of the corresponding CPUs while
440 in an RCU read-side critical section.
441
442 "N" This indicates that some of the blocked tasks are preventing
443 the current normal (non-expedited) grace period from
444 completing.
445
446 "E" This indicates that some of the blocked tasks are preventing
447 the current expedited grace period from completing.
448
449 "B" This indicates that some of the blocked tasks are in
450 need of RCU priority boosting.
451
452 Each character is replaced with "." if the corresponding
453 condition does not hold.
454
455o "kt" is the state of the RCU priority-boosting kernel
456 thread associated with the corresponding rcu_node structure.
457 The state can be one of the following:
458
459 "S" The kernel thread is stopped, in other words, all
460 CPUs corresponding to this rcu_node structure are
461 offline.
462
463 "R" The kernel thread is running.
464
465 "W" The kernel thread is waiting because there is no work
466 for it to do.
467
468 "Y" The kernel thread is yielding to avoid hogging CPU.
469
470 "?" Unknown value, indicates a bug.
471
472o "ntb" is the number of tasks boosted.
473
474o "neb" is the number of tasks boosted in order to complete an
475 expedited grace period.
476
477o "nnb" is the number of tasks boosted in order to complete a
478 normal (non-expedited) grace period. When boosting a task
479 that was blocking both an expedited and a normal grace period,
480 it is counted against the expedited total above.
481
482o "j" is the low-order 16 bits of the jiffies counter in
483 hexadecimal.
484
485o "bt" is the low-order 16 bits of the value that the jiffies
486 counter will have when we next start boosting, assuming that
487 the current grace period does not end beforehand. This is
488 also in hexadecimal.
489
490o "balk: nt" counts the number of times we didn't boost (in
491 other words, we balked) even though it was time to boost because
492 there were no blocked tasks to boost. This situation occurs
493 when there is one blocked task on one rcu_node structure and
494 none on some other rcu_node structure.
495
496o "egt" counts the number of times we balked because although
497 there were blocked tasks, none of them were blocking the
498 current grace period, whether expedited or otherwise.
499
500o "bt" counts the number of times we balked because boosting
501 had already been initiated for the current grace period.
502
503o "nb" counts the number of times we balked because there
504 was at least one task blocking the current non-expedited grace
505 period that never had blocked. If it is already running, it
506 just won't help to boost its priority!
507
508o "ny" counts the number of times we balked because it was
509 not yet time to start boosting.
510
511o "nos" counts the number of times we balked for other
512 reasons, e.g., the grace period ended first.
513
514
331CONFIG_TINY_RCU and CONFIG_TINY_PREEMPT_RCU debugfs Files and Formats 515CONFIG_TINY_RCU and CONFIG_TINY_PREEMPT_RCU debugfs Files and Formats
332 516
333These implementations of RCU provides a single debugfs file under the 517These implementations of RCU provides a single debugfs file under the
@@ -394,9 +578,9 @@ o "neb" is the number of expedited grace periods that have had
394o "nnb" is the number of normal grace periods that have had 578o "nnb" is the number of normal grace periods that have had
395 to resort to RCU priority boosting since boot. 579 to resort to RCU priority boosting since boot.
396 580
397o "j" is the low-order 12 bits of the jiffies counter in hexadecimal. 581o "j" is the low-order 16 bits of the jiffies counter in hexadecimal.
398 582
399o "bt" is the low-order 12 bits of the value that the jiffies counter 583o "bt" is the low-order 16 bits of the value that the jiffies counter
400 will have at the next time that boosting is scheduled to begin. 584 will have at the next time that boosting is scheduled to begin.
401 585
402o In the line beginning with "normal balk", the fields are as follows: 586o In the line beginning with "normal balk", the fields are as follows:
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index b0b814d75ca1..60740e8ecb37 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -836,7 +836,6 @@ Provides counts of softirq handlers serviced since boot time, for each cpu.
836 TASKLET: 0 0 0 290 836 TASKLET: 0 0 0 290
837 SCHED: 27035 26983 26971 26746 837 SCHED: 27035 26983 26971 26746
838 HRTIMER: 0 0 0 0 838 HRTIMER: 0 0 0 0
839 RCU: 1678 1769 2178 2250
840 839
841 840
8421.3 IDE devices in /proc/ide 8411.3 IDE devices in /proc/ide
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 6f8adc7f5d7c..e145f2c455cb 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -5100,11 +5100,6 @@ err_set_interrupt:
5100 return err; 5100 return err;
5101} 5101}
5102 5102
5103static void ring_free_rcu(struct rcu_head *head)
5104{
5105 kfree(container_of(head, struct ixgbe_ring, rcu));
5106}
5107
5108/** 5103/**
5109 * ixgbe_clear_interrupt_scheme - Clear the current interrupt scheme settings 5104 * ixgbe_clear_interrupt_scheme - Clear the current interrupt scheme settings
5110 * @adapter: board private structure to clear interrupt scheme on 5105 * @adapter: board private structure to clear interrupt scheme on
@@ -5126,7 +5121,7 @@ void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter)
5126 /* ixgbe_get_stats64() might access this ring, we must wait 5121 /* ixgbe_get_stats64() might access this ring, we must wait
5127 * a grace period before freeing it. 5122 * a grace period before freeing it.
5128 */ 5123 */
5129 call_rcu(&ring->rcu, ring_free_rcu); 5124 kfree_rcu(ring, rcu);
5130 adapter->rx_ring[i] = NULL; 5125 adapter->rx_ring[i] = NULL;
5131 } 5126 }
5132 5127
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 78e34e9e4f00..d8e4e69ad0b9 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -603,21 +603,13 @@ static int macvlan_port_create(struct net_device *dev)
603 return err; 603 return err;
604} 604}
605 605
606static void macvlan_port_rcu_free(struct rcu_head *head)
607{
608 struct macvlan_port *port;
609
610 port = container_of(head, struct macvlan_port, rcu);
611 kfree(port);
612}
613
614static void macvlan_port_destroy(struct net_device *dev) 606static void macvlan_port_destroy(struct net_device *dev)
615{ 607{
616 struct macvlan_port *port = macvlan_port_get(dev); 608 struct macvlan_port *port = macvlan_port_get(dev);
617 609
618 dev->priv_flags &= ~IFF_MACVLAN_PORT; 610 dev->priv_flags &= ~IFF_MACVLAN_PORT;
619 netdev_rx_handler_unregister(dev); 611 netdev_rx_handler_unregister(dev);
620 call_rcu(&port->rcu, macvlan_port_rcu_free); 612 kfree_rcu(port, rcu);
621} 613}
622 614
623static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[]) 615static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[])
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index bea0ac750712..6c12989839d9 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -414,7 +414,6 @@ enum
414 TASKLET_SOFTIRQ, 414 TASKLET_SOFTIRQ,
415 SCHED_SOFTIRQ, 415 SCHED_SOFTIRQ,
416 HRTIMER_SOFTIRQ, 416 HRTIMER_SOFTIRQ,
417 RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */
418 417
419 NR_SOFTIRQS 418 NR_SOFTIRQS
420}; 419};
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index ff422d2b7f90..99f9aa7c2804 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -47,6 +47,18 @@
47extern int rcutorture_runnable; /* for sysctl */ 47extern int rcutorture_runnable; /* for sysctl */
48#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ 48#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
49 49
50#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
51extern void rcutorture_record_test_transition(void);
52extern void rcutorture_record_progress(unsigned long vernum);
53#else
54static inline void rcutorture_record_test_transition(void)
55{
56}
57static inline void rcutorture_record_progress(unsigned long vernum)
58{
59}
60#endif
61
50#define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b)) 62#define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b))
51#define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) 63#define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b))
52#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) 64#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
@@ -68,7 +80,6 @@ extern void call_rcu_sched(struct rcu_head *head,
68extern void synchronize_sched(void); 80extern void synchronize_sched(void);
69extern void rcu_barrier_bh(void); 81extern void rcu_barrier_bh(void);
70extern void rcu_barrier_sched(void); 82extern void rcu_barrier_sched(void);
71extern int sched_expedited_torture_stats(char *page);
72 83
73static inline void __rcu_read_lock_bh(void) 84static inline void __rcu_read_lock_bh(void)
74{ 85{
@@ -774,6 +785,7 @@ extern struct debug_obj_descr rcuhead_debug_descr;
774 785
775static inline void debug_rcu_head_queue(struct rcu_head *head) 786static inline void debug_rcu_head_queue(struct rcu_head *head)
776{ 787{
788 WARN_ON_ONCE((unsigned long)head & 0x3);
777 debug_object_activate(head, &rcuhead_debug_descr); 789 debug_object_activate(head, &rcuhead_debug_descr);
778 debug_object_active_state(head, &rcuhead_debug_descr, 790 debug_object_active_state(head, &rcuhead_debug_descr,
779 STATE_RCU_HEAD_READY, 791 STATE_RCU_HEAD_READY,
@@ -797,4 +809,60 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
797} 809}
798#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 810#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
799 811
812static __always_inline bool __is_kfree_rcu_offset(unsigned long offset)
813{
814 return offset < 4096;
815}
816
817static __always_inline
818void __kfree_rcu(struct rcu_head *head, unsigned long offset)
819{
820 typedef void (*rcu_callback)(struct rcu_head *);
821
822 BUILD_BUG_ON(!__builtin_constant_p(offset));
823
824 /* See the kfree_rcu() header comment. */
825 BUILD_BUG_ON(!__is_kfree_rcu_offset(offset));
826
827 call_rcu(head, (rcu_callback)offset);
828}
829
830extern void kfree(const void *);
831
832static inline void __rcu_reclaim(struct rcu_head *head)
833{
834 unsigned long offset = (unsigned long)head->func;
835
836 if (__is_kfree_rcu_offset(offset))
837 kfree((void *)head - offset);
838 else
839 head->func(head);
840}
841
842/**
843 * kfree_rcu() - kfree an object after a grace period.
844 * @ptr: pointer to kfree
845 * @rcu_head: the name of the struct rcu_head within the type of @ptr.
846 *
847 * Many rcu callbacks functions just call kfree() on the base structure.
848 * These functions are trivial, but their size adds up, and furthermore
849 * when they are used in a kernel module, that module must invoke the
850 * high-latency rcu_barrier() function at module-unload time.
851 *
852 * The kfree_rcu() function handles this issue. Rather than encoding a
853 * function address in the embedded rcu_head structure, kfree_rcu() instead
854 * encodes the offset of the rcu_head structure within the base structure.
855 * Because the functions are not allowed in the low-order 4096 bytes of
856 * kernel virtual memory, offsets up to 4095 bytes can be accommodated.
857 * If the offset is larger than 4095 bytes, a compile-time error will
858 * be generated in __kfree_rcu(). If this error is triggered, you can
859 * either fall back to use of call_rcu() or rearrange the structure to
860 * position the rcu_head structure into the first 4096 bytes.
861 *
862 * Note that the allowable offset might decrease in the future, for example,
863 * to allow something like kmem_cache_free_rcu().
864 */
865#define kfree_rcu(ptr, rcu_head) \
866 __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
867
800#endif /* __LINUX_RCUPDATE_H */ 868#endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 30ebd7c8d874..52b3e0281fd0 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -100,6 +100,14 @@ static inline void rcu_note_context_switch(int cpu)
100} 100}
101 101
102/* 102/*
103 * Take advantage of the fact that there is only one CPU, which
104 * allows us to ignore virtualization-based context switches.
105 */
106static inline void rcu_virt_note_context_switch(int cpu)
107{
108}
109
110/*
103 * Return the number of grace periods. 111 * Return the number of grace periods.
104 */ 112 */
105static inline long rcu_batches_completed(void) 113static inline long rcu_batches_completed(void)
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 3a933482734a..e65d06634dd8 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -35,6 +35,16 @@ extern void rcu_note_context_switch(int cpu);
35extern int rcu_needs_cpu(int cpu); 35extern int rcu_needs_cpu(int cpu);
36extern void rcu_cpu_stall_reset(void); 36extern void rcu_cpu_stall_reset(void);
37 37
38/*
39 * Note a virtualization-based context switch. This is simply a
40 * wrapper around rcu_note_context_switch(), which allows TINY_RCU
41 * to save a few bytes.
42 */
43static inline void rcu_virt_note_context_switch(int cpu)
44{
45 rcu_note_context_switch(cpu);
46}
47
38#ifdef CONFIG_TREE_PREEMPT_RCU 48#ifdef CONFIG_TREE_PREEMPT_RCU
39 49
40extern void exit_rcu(void); 50extern void exit_rcu(void);
@@ -58,9 +68,12 @@ static inline void synchronize_rcu_bh_expedited(void)
58 68
59extern void rcu_barrier(void); 69extern void rcu_barrier(void);
60 70
71extern unsigned long rcutorture_testseq;
72extern unsigned long rcutorture_vernum;
61extern long rcu_batches_completed(void); 73extern long rcu_batches_completed(void);
62extern long rcu_batches_completed_bh(void); 74extern long rcu_batches_completed_bh(void);
63extern long rcu_batches_completed_sched(void); 75extern long rcu_batches_completed_sched(void);
76
64extern void rcu_force_quiescent_state(void); 77extern void rcu_force_quiescent_state(void);
65extern void rcu_bh_force_quiescent_state(void); 78extern void rcu_bh_force_quiescent_state(void);
66extern void rcu_sched_force_quiescent_state(void); 79extern void rcu_sched_force_quiescent_state(void);
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 505845ddb0be..01e094c6d0ae 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -115,7 +115,6 @@
115 * sctp/protocol.c 115 * sctp/protocol.c
116 */ 116 */
117extern struct sock *sctp_get_ctl_sock(void); 117extern struct sock *sctp_get_ctl_sock(void);
118extern void sctp_local_addr_free(struct rcu_head *head);
119extern int sctp_copy_local_addr_list(struct sctp_bind_addr *, 118extern int sctp_copy_local_addr_list(struct sctp_bind_addr *,
120 sctp_scope_t, gfp_t gfp, 119 sctp_scope_t, gfp_t gfp,
121 int flags); 120 int flags);
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 1c09820df585..ae045ca7d356 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -20,8 +20,7 @@ struct softirq_action;
20 softirq_name(BLOCK_IOPOLL), \ 20 softirq_name(BLOCK_IOPOLL), \
21 softirq_name(TASKLET), \ 21 softirq_name(TASKLET), \
22 softirq_name(SCHED), \ 22 softirq_name(SCHED), \
23 softirq_name(HRTIMER), \ 23 softirq_name(HRTIMER))
24 softirq_name(RCU))
25 24
26/** 25/**
27 * irq_handler_entry - called immediately before the irq action handler 26 * irq_handler_entry - called immediately before the irq action handler
diff --git a/init/Kconfig b/init/Kconfig
index af958ad26d60..4986ecc49e65 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -485,7 +485,7 @@ config TREE_RCU_TRACE
485 485
486config RCU_BOOST 486config RCU_BOOST
487 bool "Enable RCU priority boosting" 487 bool "Enable RCU priority boosting"
488 depends on RT_MUTEXES && TINY_PREEMPT_RCU 488 depends on RT_MUTEXES && PREEMPT_RCU
489 default n 489 default n
490 help 490 help
491 This option boosts the priority of preempted RCU readers that 491 This option boosts the priority of preempted RCU readers that
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 25c7eb52de1a..909a35510af5 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -326,12 +326,6 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
326 return &css_set_table[index]; 326 return &css_set_table[index];
327} 327}
328 328
329static void free_css_set_rcu(struct rcu_head *obj)
330{
331 struct css_set *cg = container_of(obj, struct css_set, rcu_head);
332 kfree(cg);
333}
334
335/* We don't maintain the lists running through each css_set to its 329/* We don't maintain the lists running through each css_set to its
336 * task until after the first call to cgroup_iter_start(). This 330 * task until after the first call to cgroup_iter_start(). This
337 * reduces the fork()/exit() overhead for people who have cgroups 331 * reduces the fork()/exit() overhead for people who have cgroups
@@ -375,7 +369,7 @@ static void __put_css_set(struct css_set *cg, int taskexit)
375 } 369 }
376 370
377 write_unlock(&css_set_lock); 371 write_unlock(&css_set_lock);
378 call_rcu(&cg->rcu_head, free_css_set_rcu); 372 kfree_rcu(cg, rcu_head);
379} 373}
380 374
381/* 375/*
@@ -812,13 +806,6 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp)
812 return ret; 806 return ret;
813} 807}
814 808
815static void free_cgroup_rcu(struct rcu_head *obj)
816{
817 struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
818
819 kfree(cgrp);
820}
821
822static void cgroup_diput(struct dentry *dentry, struct inode *inode) 809static void cgroup_diput(struct dentry *dentry, struct inode *inode)
823{ 810{
824 /* is dentry a directory ? if so, kfree() associated cgroup */ 811 /* is dentry a directory ? if so, kfree() associated cgroup */
@@ -856,7 +843,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
856 */ 843 */
857 BUG_ON(!list_empty(&cgrp->pidlists)); 844 BUG_ON(!list_empty(&cgrp->pidlists));
858 845
859 call_rcu(&cgrp->rcu_head, free_cgroup_rcu); 846 kfree_rcu(cgrp, rcu_head);
860 } 847 }
861 iput(inode); 848 iput(inode);
862} 849}
@@ -4623,14 +4610,6 @@ bool css_is_ancestor(struct cgroup_subsys_state *child,
4623 return ret; 4610 return ret;
4624} 4611}
4625 4612
4626static void __free_css_id_cb(struct rcu_head *head)
4627{
4628 struct css_id *id;
4629
4630 id = container_of(head, struct css_id, rcu_head);
4631 kfree(id);
4632}
4633
4634void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) 4613void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
4635{ 4614{
4636 struct css_id *id = css->id; 4615 struct css_id *id = css->id;
@@ -4645,7 +4624,7 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
4645 spin_lock(&ss->id_lock); 4624 spin_lock(&ss->id_lock);
4646 idr_remove(&ss->idr, id->id); 4625 idr_remove(&ss->idr, id->id);
4647 spin_unlock(&ss->id_lock); 4626 spin_unlock(&ss->id_lock);
4648 call_rcu(&id->rcu_head, __free_css_id_cb); 4627 kfree_rcu(id, rcu_head);
4649} 4628}
4650EXPORT_SYMBOL_GPL(free_css_id); 4629EXPORT_SYMBOL_GPL(free_css_id);
4651 4630
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0fc34a370ba4..c09767f7db3e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -586,14 +586,6 @@ static void get_ctx(struct perf_event_context *ctx)
586 WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); 586 WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
587} 587}
588 588
589static void free_ctx(struct rcu_head *head)
590{
591 struct perf_event_context *ctx;
592
593 ctx = container_of(head, struct perf_event_context, rcu_head);
594 kfree(ctx);
595}
596
597static void put_ctx(struct perf_event_context *ctx) 589static void put_ctx(struct perf_event_context *ctx)
598{ 590{
599 if (atomic_dec_and_test(&ctx->refcount)) { 591 if (atomic_dec_and_test(&ctx->refcount)) {
@@ -601,7 +593,7 @@ static void put_ctx(struct perf_event_context *ctx)
601 put_ctx(ctx->parent_ctx); 593 put_ctx(ctx->parent_ctx);
602 if (ctx->task) 594 if (ctx->task)
603 put_task_struct(ctx->task); 595 put_task_struct(ctx->task);
604 call_rcu(&ctx->rcu_head, free_ctx); 596 kfree_rcu(ctx, rcu_head);
605 } 597 }
606} 598}
607 599
@@ -5331,14 +5323,6 @@ swevent_hlist_deref(struct swevent_htable *swhash)
5331 lockdep_is_held(&swhash->hlist_mutex)); 5323 lockdep_is_held(&swhash->hlist_mutex));
5332} 5324}
5333 5325
5334static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
5335{
5336 struct swevent_hlist *hlist;
5337
5338 hlist = container_of(rcu_head, struct swevent_hlist, rcu_head);
5339 kfree(hlist);
5340}
5341
5342static void swevent_hlist_release(struct swevent_htable *swhash) 5326static void swevent_hlist_release(struct swevent_htable *swhash)
5343{ 5327{
5344 struct swevent_hlist *hlist = swevent_hlist_deref(swhash); 5328 struct swevent_hlist *hlist = swevent_hlist_deref(swhash);
@@ -5347,7 +5331,7 @@ static void swevent_hlist_release(struct swevent_htable *swhash)
5347 return; 5331 return;
5348 5332
5349 rcu_assign_pointer(swhash->swevent_hlist, NULL); 5333 rcu_assign_pointer(swhash->swevent_hlist, NULL);
5350 call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); 5334 kfree_rcu(hlist, rcu_head);
5351} 5335}
5352 5336
5353static void swevent_hlist_put_cpu(struct perf_event *event, int cpu) 5337static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f3240e987928..7784bd216b6a 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -142,10 +142,17 @@ static int rcuhead_fixup_init(void *addr, enum debug_obj_state state)
142 * Ensure that queued callbacks are all executed. 142 * Ensure that queued callbacks are all executed.
143 * If we detect that we are nested in a RCU read-side critical 143 * If we detect that we are nested in a RCU read-side critical
144 * section, we should simply fail, otherwise we would deadlock. 144 * section, we should simply fail, otherwise we would deadlock.
145 * In !PREEMPT configurations, there is no way to tell if we are
146 * in a RCU read-side critical section or not, so we never
147 * attempt any fixup and just print a warning.
145 */ 148 */
149#ifndef CONFIG_PREEMPT
150 WARN_ON_ONCE(1);
151 return 0;
152#endif
146 if (rcu_preempt_depth() != 0 || preempt_count() != 0 || 153 if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
147 irqs_disabled()) { 154 irqs_disabled()) {
148 WARN_ON(1); 155 WARN_ON_ONCE(1);
149 return 0; 156 return 0;
150 } 157 }
151 rcu_barrier(); 158 rcu_barrier();
@@ -184,10 +191,17 @@ static int rcuhead_fixup_activate(void *addr, enum debug_obj_state state)
184 * Ensure that queued callbacks are all executed. 191 * Ensure that queued callbacks are all executed.
185 * If we detect that we are nested in a RCU read-side critical 192 * If we detect that we are nested in a RCU read-side critical
186 * section, we should simply fail, otherwise we would deadlock. 193 * section, we should simply fail, otherwise we would deadlock.
194 * In !PREEMPT configurations, there is no way to tell if we are
195 * in a RCU read-side critical section or not, so we never
196 * attempt any fixup and just print a warning.
187 */ 197 */
198#ifndef CONFIG_PREEMPT
199 WARN_ON_ONCE(1);
200 return 0;
201#endif
188 if (rcu_preempt_depth() != 0 || preempt_count() != 0 || 202 if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
189 irqs_disabled()) { 203 irqs_disabled()) {
190 WARN_ON(1); 204 WARN_ON_ONCE(1);
191 return 0; 205 return 0;
192 } 206 }
193 rcu_barrier(); 207 rcu_barrier();
@@ -214,15 +228,17 @@ static int rcuhead_fixup_free(void *addr, enum debug_obj_state state)
214 * Ensure that queued callbacks are all executed. 228 * Ensure that queued callbacks are all executed.
215 * If we detect that we are nested in a RCU read-side critical 229 * If we detect that we are nested in a RCU read-side critical
216 * section, we should simply fail, otherwise we would deadlock. 230 * section, we should simply fail, otherwise we would deadlock.
217 * Note that the machinery to reliably determine whether 231 * In !PREEMPT configurations, there is no way to tell if we are
218 * or not we are in an RCU read-side critical section 232 * in a RCU read-side critical section or not, so we never
219 * exists only in the preemptible RCU implementations 233 * attempt any fixup and just print a warning.
220 * (TINY_PREEMPT_RCU and TREE_PREEMPT_RCU), which is why
221 * DEBUG_OBJECTS_RCU_HEAD is disallowed if !PREEMPT.
222 */ 234 */
235#ifndef CONFIG_PREEMPT
236 WARN_ON_ONCE(1);
237 return 0;
238#endif
223 if (rcu_preempt_depth() != 0 || preempt_count() != 0 || 239 if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
224 irqs_disabled()) { 240 irqs_disabled()) {
225 WARN_ON(1); 241 WARN_ON_ONCE(1);
226 return 0; 242 return 0;
227 } 243 }
228 rcu_barrier(); 244 rcu_barrier();
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 0c343b9a46d5..421abfd3641d 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -40,10 +40,10 @@
40static struct task_struct *rcu_kthread_task; 40static struct task_struct *rcu_kthread_task;
41static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); 41static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
42static unsigned long have_rcu_kthread_work; 42static unsigned long have_rcu_kthread_work;
43static void invoke_rcu_kthread(void);
44 43
45/* Forward declarations for rcutiny_plugin.h. */ 44/* Forward declarations for rcutiny_plugin.h. */
46struct rcu_ctrlblk; 45struct rcu_ctrlblk;
46static void invoke_rcu_kthread(void);
47static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); 47static void rcu_process_callbacks(struct rcu_ctrlblk *rcp);
48static int rcu_kthread(void *arg); 48static int rcu_kthread(void *arg);
49static void __call_rcu(struct rcu_head *head, 49static void __call_rcu(struct rcu_head *head,
@@ -79,36 +79,45 @@ void rcu_exit_nohz(void)
79#endif /* #ifdef CONFIG_NO_HZ */ 79#endif /* #ifdef CONFIG_NO_HZ */
80 80
81/* 81/*
82 * Helper function for rcu_qsctr_inc() and rcu_bh_qsctr_inc(). 82 * Helper function for rcu_sched_qs() and rcu_bh_qs().
83 * Also disable irqs to avoid confusion due to interrupt handlers 83 * Also irqs are disabled to avoid confusion due to interrupt handlers
84 * invoking call_rcu(). 84 * invoking call_rcu().
85 */ 85 */
86static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) 86static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
87{ 87{
88 unsigned long flags;
89
90 local_irq_save(flags);
91 if (rcp->rcucblist != NULL && 88 if (rcp->rcucblist != NULL &&
92 rcp->donetail != rcp->curtail) { 89 rcp->donetail != rcp->curtail) {
93 rcp->donetail = rcp->curtail; 90 rcp->donetail = rcp->curtail;
94 local_irq_restore(flags);
95 return 1; 91 return 1;
96 } 92 }
97 local_irq_restore(flags);
98 93
99 return 0; 94 return 0;
100} 95}
101 96
102/* 97/*
98 * Wake up rcu_kthread() to process callbacks now eligible for invocation
99 * or to boost readers.
100 */
101static void invoke_rcu_kthread(void)
102{
103 have_rcu_kthread_work = 1;
104 wake_up(&rcu_kthread_wq);
105}
106
107/*
103 * Record an rcu quiescent state. And an rcu_bh quiescent state while we 108 * Record an rcu quiescent state. And an rcu_bh quiescent state while we
104 * are at it, given that any rcu quiescent state is also an rcu_bh 109 * are at it, given that any rcu quiescent state is also an rcu_bh
105 * quiescent state. Use "+" instead of "||" to defeat short circuiting. 110 * quiescent state. Use "+" instead of "||" to defeat short circuiting.
106 */ 111 */
107void rcu_sched_qs(int cpu) 112void rcu_sched_qs(int cpu)
108{ 113{
114 unsigned long flags;
115
116 local_irq_save(flags);
109 if (rcu_qsctr_help(&rcu_sched_ctrlblk) + 117 if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
110 rcu_qsctr_help(&rcu_bh_ctrlblk)) 118 rcu_qsctr_help(&rcu_bh_ctrlblk))
111 invoke_rcu_kthread(); 119 invoke_rcu_kthread();
120 local_irq_restore(flags);
112} 121}
113 122
114/* 123/*
@@ -116,8 +125,12 @@ void rcu_sched_qs(int cpu)
116 */ 125 */
117void rcu_bh_qs(int cpu) 126void rcu_bh_qs(int cpu)
118{ 127{
128 unsigned long flags;
129
130 local_irq_save(flags);
119 if (rcu_qsctr_help(&rcu_bh_ctrlblk)) 131 if (rcu_qsctr_help(&rcu_bh_ctrlblk))
120 invoke_rcu_kthread(); 132 invoke_rcu_kthread();
133 local_irq_restore(flags);
121} 134}
122 135
123/* 136/*
@@ -167,7 +180,7 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp)
167 prefetch(next); 180 prefetch(next);
168 debug_rcu_head_unqueue(list); 181 debug_rcu_head_unqueue(list);
169 local_bh_disable(); 182 local_bh_disable();
170 list->func(list); 183 __rcu_reclaim(list);
171 local_bh_enable(); 184 local_bh_enable();
172 list = next; 185 list = next;
173 RCU_TRACE(cb_count++); 186 RCU_TRACE(cb_count++);
@@ -208,20 +221,6 @@ static int rcu_kthread(void *arg)
208} 221}
209 222
210/* 223/*
211 * Wake up rcu_kthread() to process callbacks now eligible for invocation
212 * or to boost readers.
213 */
214static void invoke_rcu_kthread(void)
215{
216 unsigned long flags;
217
218 local_irq_save(flags);
219 have_rcu_kthread_work = 1;
220 wake_up(&rcu_kthread_wq);
221 local_irq_restore(flags);
222}
223
224/*
225 * Wait for a grace period to elapse. But it is illegal to invoke 224 * Wait for a grace period to elapse. But it is illegal to invoke
226 * synchronize_sched() from within an RCU read-side critical section. 225 * synchronize_sched() from within an RCU read-side critical section.
227 * Therefore, any legal call to synchronize_sched() is a quiescent 226 * Therefore, any legal call to synchronize_sched() is a quiescent
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 3cb8e362e883..f259c676195f 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -100,23 +100,28 @@ struct rcu_preempt_ctrlblk {
100 u8 completed; /* Last grace period completed. */ 100 u8 completed; /* Last grace period completed. */
101 /* If all three are equal, RCU is idle. */ 101 /* If all three are equal, RCU is idle. */
102#ifdef CONFIG_RCU_BOOST 102#ifdef CONFIG_RCU_BOOST
103 s8 boosted_this_gp; /* Has boosting already happened? */
104 unsigned long boost_time; /* When to start boosting (jiffies) */ 103 unsigned long boost_time; /* When to start boosting (jiffies) */
105#endif /* #ifdef CONFIG_RCU_BOOST */ 104#endif /* #ifdef CONFIG_RCU_BOOST */
106#ifdef CONFIG_RCU_TRACE 105#ifdef CONFIG_RCU_TRACE
107 unsigned long n_grace_periods; 106 unsigned long n_grace_periods;
108#ifdef CONFIG_RCU_BOOST 107#ifdef CONFIG_RCU_BOOST
109 unsigned long n_tasks_boosted; 108 unsigned long n_tasks_boosted;
109 /* Total number of tasks boosted. */
110 unsigned long n_exp_boosts; 110 unsigned long n_exp_boosts;
111 /* Number of tasks boosted for expedited GP. */
111 unsigned long n_normal_boosts; 112 unsigned long n_normal_boosts;
112 unsigned long n_normal_balk_blkd_tasks; 113 /* Number of tasks boosted for normal GP. */
113 unsigned long n_normal_balk_gp_tasks; 114 unsigned long n_balk_blkd_tasks;
114 unsigned long n_normal_balk_boost_tasks; 115 /* Refused to boost: no blocked tasks. */
115 unsigned long n_normal_balk_boosted; 116 unsigned long n_balk_exp_gp_tasks;
116 unsigned long n_normal_balk_notyet; 117 /* Refused to boost: nothing blocking GP. */
117 unsigned long n_normal_balk_nos; 118 unsigned long n_balk_boost_tasks;
118 unsigned long n_exp_balk_blkd_tasks; 119 /* Refused to boost: already boosting. */
119 unsigned long n_exp_balk_nos; 120 unsigned long n_balk_notyet;
121 /* Refused to boost: not yet time. */
122 unsigned long n_balk_nos;
123 /* Refused to boost: not sure why, though. */
124 /* This can happen due to race conditions. */
120#endif /* #ifdef CONFIG_RCU_BOOST */ 125#endif /* #ifdef CONFIG_RCU_BOOST */
121#endif /* #ifdef CONFIG_RCU_TRACE */ 126#endif /* #ifdef CONFIG_RCU_TRACE */
122}; 127};
@@ -201,7 +206,6 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t)
201 206
202#ifdef CONFIG_RCU_BOOST 207#ifdef CONFIG_RCU_BOOST
203static void rcu_initiate_boost_trace(void); 208static void rcu_initiate_boost_trace(void);
204static void rcu_initiate_exp_boost_trace(void);
205#endif /* #ifdef CONFIG_RCU_BOOST */ 209#endif /* #ifdef CONFIG_RCU_BOOST */
206 210
207/* 211/*
@@ -219,41 +223,21 @@ static void show_tiny_preempt_stats(struct seq_file *m)
219 "N."[!rcu_preempt_ctrlblk.gp_tasks], 223 "N."[!rcu_preempt_ctrlblk.gp_tasks],
220 "E."[!rcu_preempt_ctrlblk.exp_tasks]); 224 "E."[!rcu_preempt_ctrlblk.exp_tasks]);
221#ifdef CONFIG_RCU_BOOST 225#ifdef CONFIG_RCU_BOOST
222 seq_printf(m, " ttb=%c btg=", 226 seq_printf(m, "%sttb=%c ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n",
223 "B."[!rcu_preempt_ctrlblk.boost_tasks]); 227 " ",
224 switch (rcu_preempt_ctrlblk.boosted_this_gp) { 228 "B."[!rcu_preempt_ctrlblk.boost_tasks],
225 case -1:
226 seq_puts(m, "exp");
227 break;
228 case 0:
229 seq_puts(m, "no");
230 break;
231 case 1:
232 seq_puts(m, "begun");
233 break;
234 case 2:
235 seq_puts(m, "done");
236 break;
237 default:
238 seq_printf(m, "?%d?", rcu_preempt_ctrlblk.boosted_this_gp);
239 }
240 seq_printf(m, " ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n",
241 rcu_preempt_ctrlblk.n_tasks_boosted, 229 rcu_preempt_ctrlblk.n_tasks_boosted,
242 rcu_preempt_ctrlblk.n_exp_boosts, 230 rcu_preempt_ctrlblk.n_exp_boosts,
243 rcu_preempt_ctrlblk.n_normal_boosts, 231 rcu_preempt_ctrlblk.n_normal_boosts,
244 (int)(jiffies & 0xffff), 232 (int)(jiffies & 0xffff),
245 (int)(rcu_preempt_ctrlblk.boost_time & 0xffff)); 233 (int)(rcu_preempt_ctrlblk.boost_time & 0xffff));
246 seq_printf(m, " %s: nt=%lu gt=%lu bt=%lu b=%lu ny=%lu nos=%lu\n", 234 seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu ny=%lu nos=%lu\n",
247 "normal balk", 235 " balk",
248 rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks, 236 rcu_preempt_ctrlblk.n_balk_blkd_tasks,
249 rcu_preempt_ctrlblk.n_normal_balk_gp_tasks, 237 rcu_preempt_ctrlblk.n_balk_exp_gp_tasks,
250 rcu_preempt_ctrlblk.n_normal_balk_boost_tasks, 238 rcu_preempt_ctrlblk.n_balk_boost_tasks,
251 rcu_preempt_ctrlblk.n_normal_balk_boosted, 239 rcu_preempt_ctrlblk.n_balk_notyet,
252 rcu_preempt_ctrlblk.n_normal_balk_notyet, 240 rcu_preempt_ctrlblk.n_balk_nos);
253 rcu_preempt_ctrlblk.n_normal_balk_nos);
254 seq_printf(m, " exp balk: bt=%lu nos=%lu\n",
255 rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks,
256 rcu_preempt_ctrlblk.n_exp_balk_nos);
257#endif /* #ifdef CONFIG_RCU_BOOST */ 241#endif /* #ifdef CONFIG_RCU_BOOST */
258} 242}
259 243
@@ -271,25 +255,59 @@ static int rcu_boost(void)
271{ 255{
272 unsigned long flags; 256 unsigned long flags;
273 struct rt_mutex mtx; 257 struct rt_mutex mtx;
274 struct list_head *np;
275 struct task_struct *t; 258 struct task_struct *t;
259 struct list_head *tb;
276 260
277 if (rcu_preempt_ctrlblk.boost_tasks == NULL) 261 if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
262 rcu_preempt_ctrlblk.exp_tasks == NULL)
278 return 0; /* Nothing to boost. */ 263 return 0; /* Nothing to boost. */
264
279 raw_local_irq_save(flags); 265 raw_local_irq_save(flags);
280 rcu_preempt_ctrlblk.boosted_this_gp++; 266
281 t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct, 267 /*
282 rcu_node_entry); 268 * Recheck with irqs disabled: all tasks in need of boosting
283 np = rcu_next_node_entry(t); 269 * might exit their RCU read-side critical sections on their own
270 * if we are preempted just before disabling irqs.
271 */
272 if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
273 rcu_preempt_ctrlblk.exp_tasks == NULL) {
274 raw_local_irq_restore(flags);
275 return 0;
276 }
277
278 /*
279 * Preferentially boost tasks blocking expedited grace periods.
280 * This cannot starve the normal grace periods because a second
281 * expedited grace period must boost all blocked tasks, including
282 * those blocking the pre-existing normal grace period.
283 */
284 if (rcu_preempt_ctrlblk.exp_tasks != NULL) {
285 tb = rcu_preempt_ctrlblk.exp_tasks;
286 RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
287 } else {
288 tb = rcu_preempt_ctrlblk.boost_tasks;
289 RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
290 }
291 RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++);
292
293 /*
294 * We boost task t by manufacturing an rt_mutex that appears to
295 * be held by task t. We leave a pointer to that rt_mutex where
296 * task t can find it, and task t will release the mutex when it
297 * exits its outermost RCU read-side critical section. Then
298 * simply acquiring this artificial rt_mutex will boost task
299 * t's priority. (Thanks to tglx for suggesting this approach!)
300 */
301 t = container_of(tb, struct task_struct, rcu_node_entry);
284 rt_mutex_init_proxy_locked(&mtx, t); 302 rt_mutex_init_proxy_locked(&mtx, t);
285 t->rcu_boost_mutex = &mtx; 303 t->rcu_boost_mutex = &mtx;
286 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; 304 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
287 raw_local_irq_restore(flags); 305 raw_local_irq_restore(flags);
288 rt_mutex_lock(&mtx); 306 rt_mutex_lock(&mtx);
289 RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++); 307 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
290 rcu_preempt_ctrlblk.boosted_this_gp++; 308
291 rt_mutex_unlock(&mtx); 309 return rcu_preempt_ctrlblk.boost_tasks != NULL ||
292 return rcu_preempt_ctrlblk.boost_tasks != NULL; 310 rcu_preempt_ctrlblk.exp_tasks != NULL;
293} 311}
294 312
295/* 313/*
@@ -304,42 +322,25 @@ static int rcu_boost(void)
304 */ 322 */
305static int rcu_initiate_boost(void) 323static int rcu_initiate_boost(void)
306{ 324{
307 if (!rcu_preempt_blocked_readers_cgp()) { 325 if (!rcu_preempt_blocked_readers_cgp() &&
308 RCU_TRACE(rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks++); 326 rcu_preempt_ctrlblk.exp_tasks == NULL) {
327 RCU_TRACE(rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++);
309 return 0; 328 return 0;
310 } 329 }
311 if (rcu_preempt_ctrlblk.gp_tasks != NULL && 330 if (rcu_preempt_ctrlblk.exp_tasks != NULL ||
312 rcu_preempt_ctrlblk.boost_tasks == NULL && 331 (rcu_preempt_ctrlblk.gp_tasks != NULL &&
313 rcu_preempt_ctrlblk.boosted_this_gp == 0 && 332 rcu_preempt_ctrlblk.boost_tasks == NULL &&
314 ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) { 333 ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))) {
315 rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks; 334 if (rcu_preempt_ctrlblk.exp_tasks == NULL)
335 rcu_preempt_ctrlblk.boost_tasks =
336 rcu_preempt_ctrlblk.gp_tasks;
316 invoke_rcu_kthread(); 337 invoke_rcu_kthread();
317 RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
318 } else 338 } else
319 RCU_TRACE(rcu_initiate_boost_trace()); 339 RCU_TRACE(rcu_initiate_boost_trace());
320 return 1; 340 return 1;
321} 341}
322 342
323/* 343#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
324 * Initiate boosting for an expedited grace period.
325 */
326static void rcu_initiate_expedited_boost(void)
327{
328 unsigned long flags;
329
330 raw_local_irq_save(flags);
331 if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) {
332 rcu_preempt_ctrlblk.boost_tasks =
333 rcu_preempt_ctrlblk.blkd_tasks.next;
334 rcu_preempt_ctrlblk.boosted_this_gp = -1;
335 invoke_rcu_kthread();
336 RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
337 } else
338 RCU_TRACE(rcu_initiate_exp_boost_trace());
339 raw_local_irq_restore(flags);
340}
341
342#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000);
343 344
344/* 345/*
345 * Do priority-boost accounting for the start of a new grace period. 346 * Do priority-boost accounting for the start of a new grace period.
@@ -347,8 +348,6 @@ static void rcu_initiate_expedited_boost(void)
347static void rcu_preempt_boost_start_gp(void) 348static void rcu_preempt_boost_start_gp(void)
348{ 349{
349 rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; 350 rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
350 if (rcu_preempt_ctrlblk.boosted_this_gp > 0)
351 rcu_preempt_ctrlblk.boosted_this_gp = 0;
352} 351}
353 352
354#else /* #ifdef CONFIG_RCU_BOOST */ 353#else /* #ifdef CONFIG_RCU_BOOST */
@@ -372,13 +371,6 @@ static int rcu_initiate_boost(void)
372} 371}
373 372
374/* 373/*
375 * If there is no RCU priority boosting, we don't initiate expedited boosting.
376 */
377static void rcu_initiate_expedited_boost(void)
378{
379}
380
381/*
382 * If there is no RCU priority boosting, nothing to do at grace-period start. 374 * If there is no RCU priority boosting, nothing to do at grace-period start.
383 */ 375 */
384static void rcu_preempt_boost_start_gp(void) 376static void rcu_preempt_boost_start_gp(void)
@@ -418,7 +410,7 @@ static void rcu_preempt_cpu_qs(void)
418 if (!rcu_preempt_gp_in_progress()) 410 if (!rcu_preempt_gp_in_progress())
419 return; 411 return;
420 /* 412 /*
421 * Check up on boosting. If there are no readers blocking the 413 * Check up on boosting. If there are readers blocking the
422 * current grace period, leave. 414 * current grace period, leave.
423 */ 415 */
424 if (rcu_initiate_boost()) 416 if (rcu_initiate_boost())
@@ -578,7 +570,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
578 empty = !rcu_preempt_blocked_readers_cgp(); 570 empty = !rcu_preempt_blocked_readers_cgp();
579 empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; 571 empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
580 np = rcu_next_node_entry(t); 572 np = rcu_next_node_entry(t);
581 list_del(&t->rcu_node_entry); 573 list_del_init(&t->rcu_node_entry);
582 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) 574 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
583 rcu_preempt_ctrlblk.gp_tasks = np; 575 rcu_preempt_ctrlblk.gp_tasks = np;
584 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) 576 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
@@ -587,7 +579,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
587 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks) 579 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks)
588 rcu_preempt_ctrlblk.boost_tasks = np; 580 rcu_preempt_ctrlblk.boost_tasks = np;
589#endif /* #ifdef CONFIG_RCU_BOOST */ 581#endif /* #ifdef CONFIG_RCU_BOOST */
590 INIT_LIST_HEAD(&t->rcu_node_entry);
591 582
592 /* 583 /*
593 * If this was the last task on the current list, and if 584 * If this was the last task on the current list, and if
@@ -812,13 +803,16 @@ void synchronize_rcu_expedited(void)
812 rpcp->exp_tasks = rpcp->blkd_tasks.next; 803 rpcp->exp_tasks = rpcp->blkd_tasks.next;
813 if (rpcp->exp_tasks == &rpcp->blkd_tasks) 804 if (rpcp->exp_tasks == &rpcp->blkd_tasks)
814 rpcp->exp_tasks = NULL; 805 rpcp->exp_tasks = NULL;
815 local_irq_restore(flags);
816 806
817 /* Wait for tail of ->blkd_tasks list to drain. */ 807 /* Wait for tail of ->blkd_tasks list to drain. */
818 if (rcu_preempted_readers_exp()) 808 if (!rcu_preempted_readers_exp())
819 rcu_initiate_expedited_boost(); 809 local_irq_restore(flags);
810 else {
811 rcu_initiate_boost();
812 local_irq_restore(flags);
820 wait_event(sync_rcu_preempt_exp_wq, 813 wait_event(sync_rcu_preempt_exp_wq,
821 !rcu_preempted_readers_exp()); 814 !rcu_preempted_readers_exp());
815 }
822 816
823 /* Clean up and exit. */ 817 /* Clean up and exit. */
824 barrier(); /* ensure expedited GP seen before counter increment. */ 818 barrier(); /* ensure expedited GP seen before counter increment. */
@@ -931,24 +925,17 @@ void __init rcu_scheduler_starting(void)
931 925
932static void rcu_initiate_boost_trace(void) 926static void rcu_initiate_boost_trace(void)
933{ 927{
934 if (rcu_preempt_ctrlblk.gp_tasks == NULL) 928 if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
935 rcu_preempt_ctrlblk.n_normal_balk_gp_tasks++; 929 rcu_preempt_ctrlblk.n_balk_blkd_tasks++;
930 else if (rcu_preempt_ctrlblk.gp_tasks == NULL &&
931 rcu_preempt_ctrlblk.exp_tasks == NULL)
932 rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++;
936 else if (rcu_preempt_ctrlblk.boost_tasks != NULL) 933 else if (rcu_preempt_ctrlblk.boost_tasks != NULL)
937 rcu_preempt_ctrlblk.n_normal_balk_boost_tasks++; 934 rcu_preempt_ctrlblk.n_balk_boost_tasks++;
938 else if (rcu_preempt_ctrlblk.boosted_this_gp != 0)
939 rcu_preempt_ctrlblk.n_normal_balk_boosted++;
940 else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) 935 else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))
941 rcu_preempt_ctrlblk.n_normal_balk_notyet++; 936 rcu_preempt_ctrlblk.n_balk_notyet++;
942 else
943 rcu_preempt_ctrlblk.n_normal_balk_nos++;
944}
945
946static void rcu_initiate_exp_boost_trace(void)
947{
948 if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
949 rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks++;
950 else 937 else
951 rcu_preempt_ctrlblk.n_exp_balk_nos++; 938 rcu_preempt_ctrlblk.n_balk_nos++;
952} 939}
953 940
954#endif /* #ifdef CONFIG_RCU_BOOST */ 941#endif /* #ifdef CONFIG_RCU_BOOST */
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index c224da41890c..2e138db03382 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -131,7 +131,7 @@ struct rcu_torture {
131 131
132static LIST_HEAD(rcu_torture_freelist); 132static LIST_HEAD(rcu_torture_freelist);
133static struct rcu_torture __rcu *rcu_torture_current; 133static struct rcu_torture __rcu *rcu_torture_current;
134static long rcu_torture_current_version; 134static unsigned long rcu_torture_current_version;
135static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; 135static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN];
136static DEFINE_SPINLOCK(rcu_torture_lock); 136static DEFINE_SPINLOCK(rcu_torture_lock);
137static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) = 137static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) =
@@ -146,8 +146,6 @@ static atomic_t n_rcu_torture_mberror;
146static atomic_t n_rcu_torture_error; 146static atomic_t n_rcu_torture_error;
147static long n_rcu_torture_boost_ktrerror; 147static long n_rcu_torture_boost_ktrerror;
148static long n_rcu_torture_boost_rterror; 148static long n_rcu_torture_boost_rterror;
149static long n_rcu_torture_boost_allocerror;
150static long n_rcu_torture_boost_afferror;
151static long n_rcu_torture_boost_failure; 149static long n_rcu_torture_boost_failure;
152static long n_rcu_torture_boosts; 150static long n_rcu_torture_boosts;
153static long n_rcu_torture_timers; 151static long n_rcu_torture_timers;
@@ -163,11 +161,11 @@ static int stutter_pause_test;
163#endif 161#endif
164int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; 162int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
165 163
166#ifdef CONFIG_RCU_BOOST 164#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU)
167#define rcu_can_boost() 1 165#define rcu_can_boost() 1
168#else /* #ifdef CONFIG_RCU_BOOST */ 166#else /* #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
169#define rcu_can_boost() 0 167#define rcu_can_boost() 0
170#endif /* #else #ifdef CONFIG_RCU_BOOST */ 168#endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
171 169
172static unsigned long boost_starttime; /* jiffies of next boost test start. */ 170static unsigned long boost_starttime; /* jiffies of next boost test start. */
173DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ 171DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
@@ -751,6 +749,7 @@ static int rcu_torture_boost(void *arg)
751 n_rcu_torture_boost_rterror++; 749 n_rcu_torture_boost_rterror++;
752 } 750 }
753 751
752 init_rcu_head_on_stack(&rbi.rcu);
754 /* Each pass through the following loop does one boost-test cycle. */ 753 /* Each pass through the following loop does one boost-test cycle. */
755 do { 754 do {
756 /* Wait for the next test interval. */ 755 /* Wait for the next test interval. */
@@ -810,6 +809,7 @@ checkwait: rcu_stutter_wait("rcu_torture_boost");
810 809
811 /* Clean up and exit. */ 810 /* Clean up and exit. */
812 VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); 811 VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping");
812 destroy_rcu_head_on_stack(&rbi.rcu);
813 rcutorture_shutdown_absorb("rcu_torture_boost"); 813 rcutorture_shutdown_absorb("rcu_torture_boost");
814 while (!kthread_should_stop() || rbi.inflight) 814 while (!kthread_should_stop() || rbi.inflight)
815 schedule_timeout_uninterruptible(1); 815 schedule_timeout_uninterruptible(1);
@@ -886,7 +886,7 @@ rcu_torture_writer(void *arg)
886 old_rp->rtort_pipe_count++; 886 old_rp->rtort_pipe_count++;
887 cur_ops->deferred_free(old_rp); 887 cur_ops->deferred_free(old_rp);
888 } 888 }
889 rcu_torture_current_version++; 889 rcutorture_record_progress(++rcu_torture_current_version);
890 oldbatch = cur_ops->completed(); 890 oldbatch = cur_ops->completed();
891 rcu_stutter_wait("rcu_torture_writer"); 891 rcu_stutter_wait("rcu_torture_writer");
892 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); 892 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
@@ -1066,8 +1066,8 @@ rcu_torture_printk(char *page)
1066 } 1066 }
1067 cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); 1067 cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
1068 cnt += sprintf(&page[cnt], 1068 cnt += sprintf(&page[cnt],
1069 "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d " 1069 "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d "
1070 "rtmbe: %d rtbke: %ld rtbre: %ld rtbae: %ld rtbafe: %ld " 1070 "rtmbe: %d rtbke: %ld rtbre: %ld "
1071 "rtbf: %ld rtb: %ld nt: %ld", 1071 "rtbf: %ld rtb: %ld nt: %ld",
1072 rcu_torture_current, 1072 rcu_torture_current,
1073 rcu_torture_current_version, 1073 rcu_torture_current_version,
@@ -1078,16 +1078,12 @@ rcu_torture_printk(char *page)
1078 atomic_read(&n_rcu_torture_mberror), 1078 atomic_read(&n_rcu_torture_mberror),
1079 n_rcu_torture_boost_ktrerror, 1079 n_rcu_torture_boost_ktrerror,
1080 n_rcu_torture_boost_rterror, 1080 n_rcu_torture_boost_rterror,
1081 n_rcu_torture_boost_allocerror,
1082 n_rcu_torture_boost_afferror,
1083 n_rcu_torture_boost_failure, 1081 n_rcu_torture_boost_failure,
1084 n_rcu_torture_boosts, 1082 n_rcu_torture_boosts,
1085 n_rcu_torture_timers); 1083 n_rcu_torture_timers);
1086 if (atomic_read(&n_rcu_torture_mberror) != 0 || 1084 if (atomic_read(&n_rcu_torture_mberror) != 0 ||
1087 n_rcu_torture_boost_ktrerror != 0 || 1085 n_rcu_torture_boost_ktrerror != 0 ||
1088 n_rcu_torture_boost_rterror != 0 || 1086 n_rcu_torture_boost_rterror != 0 ||
1089 n_rcu_torture_boost_allocerror != 0 ||
1090 n_rcu_torture_boost_afferror != 0 ||
1091 n_rcu_torture_boost_failure != 0) 1087 n_rcu_torture_boost_failure != 0)
1092 cnt += sprintf(&page[cnt], " !!!"); 1088 cnt += sprintf(&page[cnt], " !!!");
1093 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); 1089 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
@@ -1331,6 +1327,7 @@ rcu_torture_cleanup(void)
1331 int i; 1327 int i;
1332 1328
1333 mutex_lock(&fullstop_mutex); 1329 mutex_lock(&fullstop_mutex);
1330 rcutorture_record_test_transition();
1334 if (fullstop == FULLSTOP_SHUTDOWN) { 1331 if (fullstop == FULLSTOP_SHUTDOWN) {
1335 printk(KERN_WARNING /* but going down anyway, so... */ 1332 printk(KERN_WARNING /* but going down anyway, so... */
1336 "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); 1333 "Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
@@ -1486,8 +1483,6 @@ rcu_torture_init(void)
1486 atomic_set(&n_rcu_torture_error, 0); 1483 atomic_set(&n_rcu_torture_error, 0);
1487 n_rcu_torture_boost_ktrerror = 0; 1484 n_rcu_torture_boost_ktrerror = 0;
1488 n_rcu_torture_boost_rterror = 0; 1485 n_rcu_torture_boost_rterror = 0;
1489 n_rcu_torture_boost_allocerror = 0;
1490 n_rcu_torture_boost_afferror = 0;
1491 n_rcu_torture_boost_failure = 0; 1486 n_rcu_torture_boost_failure = 0;
1492 n_rcu_torture_boosts = 0; 1487 n_rcu_torture_boosts = 0;
1493 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) 1488 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
@@ -1624,6 +1619,7 @@ rcu_torture_init(void)
1624 } 1619 }
1625 } 1620 }
1626 register_reboot_notifier(&rcutorture_shutdown_nb); 1621 register_reboot_notifier(&rcutorture_shutdown_nb);
1622 rcutorture_record_test_transition();
1627 mutex_unlock(&fullstop_mutex); 1623 mutex_unlock(&fullstop_mutex);
1628 return 0; 1624 return 0;
1629 1625
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index dd4aea806f8e..e486f7c3ffb8 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -47,6 +47,8 @@
47#include <linux/mutex.h> 47#include <linux/mutex.h>
48#include <linux/time.h> 48#include <linux/time.h>
49#include <linux/kernel_stat.h> 49#include <linux/kernel_stat.h>
50#include <linux/wait.h>
51#include <linux/kthread.h>
50 52
51#include "rcutree.h" 53#include "rcutree.h"
52 54
@@ -79,10 +81,41 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
79struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); 81struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
80DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 82DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
81 83
84static struct rcu_state *rcu_state;
85
82int rcu_scheduler_active __read_mostly; 86int rcu_scheduler_active __read_mostly;
83EXPORT_SYMBOL_GPL(rcu_scheduler_active); 87EXPORT_SYMBOL_GPL(rcu_scheduler_active);
84 88
85/* 89/*
90 * Control variables for per-CPU and per-rcu_node kthreads. These
91 * handle all flavors of RCU.
92 */
93static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
94DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
95DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu);
96DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
97static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq);
98DEFINE_PER_CPU(char, rcu_cpu_has_work);
99static char rcu_kthreads_spawnable;
100
101static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
102static void invoke_rcu_cpu_kthread(void);
103
104#define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */
105
106/*
107 * Track the rcutorture test sequence number and the update version
108 * number within a given test. The rcutorture_testseq is incremented
109 * on every rcutorture module load and unload, so has an odd value
110 * when a test is running. The rcutorture_vernum is set to zero
111 * when rcutorture starts and is incremented on each rcutorture update.
112 * These variables enable correlating rcutorture output with the
113 * RCU tracing information.
114 */
115unsigned long rcutorture_testseq;
116unsigned long rcutorture_vernum;
117
118/*
86 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s 119 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
87 * permit this function to be invoked without holding the root rcu_node 120 * permit this function to be invoked without holding the root rcu_node
88 * structure's ->lock, but of course results can be subject to change. 121 * structure's ->lock, but of course results can be subject to change.
@@ -124,6 +157,7 @@ void rcu_note_context_switch(int cpu)
124 rcu_sched_qs(cpu); 157 rcu_sched_qs(cpu);
125 rcu_preempt_note_context_switch(cpu); 158 rcu_preempt_note_context_switch(cpu);
126} 159}
160EXPORT_SYMBOL_GPL(rcu_note_context_switch);
127 161
128#ifdef CONFIG_NO_HZ 162#ifdef CONFIG_NO_HZ
129DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 163DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
@@ -140,10 +174,8 @@ module_param(blimit, int, 0);
140module_param(qhimark, int, 0); 174module_param(qhimark, int, 0);
141module_param(qlowmark, int, 0); 175module_param(qlowmark, int, 0);
142 176
143#ifdef CONFIG_RCU_CPU_STALL_DETECTOR 177int rcu_cpu_stall_suppress __read_mostly;
144int rcu_cpu_stall_suppress __read_mostly = RCU_CPU_STALL_SUPPRESS_INIT;
145module_param(rcu_cpu_stall_suppress, int, 0644); 178module_param(rcu_cpu_stall_suppress, int, 0644);
146#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
147 179
148static void force_quiescent_state(struct rcu_state *rsp, int relaxed); 180static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
149static int rcu_pending(int cpu); 181static int rcu_pending(int cpu);
@@ -176,6 +208,31 @@ void rcu_bh_force_quiescent_state(void)
176EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); 208EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
177 209
178/* 210/*
211 * Record the number of times rcutorture tests have been initiated and
212 * terminated. This information allows the debugfs tracing stats to be
213 * correlated to the rcutorture messages, even when the rcutorture module
214 * is being repeatedly loaded and unloaded. In other words, we cannot
215 * store this state in rcutorture itself.
216 */
217void rcutorture_record_test_transition(void)
218{
219 rcutorture_testseq++;
220 rcutorture_vernum = 0;
221}
222EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
223
224/*
225 * Record the number of writer passes through the current rcutorture test.
226 * This is also used to correlate debugfs tracing stats with the rcutorture
227 * messages.
228 */
229void rcutorture_record_progress(unsigned long vernum)
230{
231 rcutorture_vernum++;
232}
233EXPORT_SYMBOL_GPL(rcutorture_record_progress);
234
235/*
179 * Force a quiescent state for RCU-sched. 236 * Force a quiescent state for RCU-sched.
180 */ 237 */
181void rcu_sched_force_quiescent_state(void) 238void rcu_sched_force_quiescent_state(void)
@@ -234,8 +291,8 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
234 return 1; 291 return 1;
235 } 292 }
236 293
237 /* If preemptable RCU, no point in sending reschedule IPI. */ 294 /* If preemptible RCU, no point in sending reschedule IPI. */
238 if (rdp->preemptable) 295 if (rdp->preemptible)
239 return 0; 296 return 0;
240 297
241 /* The CPU is online, so send it a reschedule IPI. */ 298 /* The CPU is online, so send it a reschedule IPI. */
@@ -450,8 +507,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
450 507
451#endif /* #else #ifdef CONFIG_NO_HZ */ 508#endif /* #else #ifdef CONFIG_NO_HZ */
452 509
453#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
454
455int rcu_cpu_stall_suppress __read_mostly; 510int rcu_cpu_stall_suppress __read_mostly;
456 511
457static void record_gp_stall_check_time(struct rcu_state *rsp) 512static void record_gp_stall_check_time(struct rcu_state *rsp)
@@ -537,21 +592,24 @@ static void print_cpu_stall(struct rcu_state *rsp)
537 592
538static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) 593static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
539{ 594{
540 long delta; 595 unsigned long j;
596 unsigned long js;
541 struct rcu_node *rnp; 597 struct rcu_node *rnp;
542 598
543 if (rcu_cpu_stall_suppress) 599 if (rcu_cpu_stall_suppress)
544 return; 600 return;
545 delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall); 601 j = ACCESS_ONCE(jiffies);
602 js = ACCESS_ONCE(rsp->jiffies_stall);
546 rnp = rdp->mynode; 603 rnp = rdp->mynode;
547 if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && delta >= 0) { 604 if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) {
548 605
549 /* We haven't checked in, so go dump stack. */ 606 /* We haven't checked in, so go dump stack. */
550 print_cpu_stall(rsp); 607 print_cpu_stall(rsp);
551 608
552 } else if (rcu_gp_in_progress(rsp) && delta >= RCU_STALL_RAT_DELAY) { 609 } else if (rcu_gp_in_progress(rsp) &&
610 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
553 611
554 /* They had two time units to dump stack, so complain. */ 612 /* They had a few time units to dump stack, so complain. */
555 print_other_cpu_stall(rsp); 613 print_other_cpu_stall(rsp);
556 } 614 }
557} 615}
@@ -587,26 +645,6 @@ static void __init check_cpu_stall_init(void)
587 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); 645 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
588} 646}
589 647
590#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
591
592static void record_gp_stall_check_time(struct rcu_state *rsp)
593{
594}
595
596static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
597{
598}
599
600void rcu_cpu_stall_reset(void)
601{
602}
603
604static void __init check_cpu_stall_init(void)
605{
606}
607
608#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
609
610/* 648/*
611 * Update CPU-local rcu_data state to record the newly noticed grace period. 649 * Update CPU-local rcu_data state to record the newly noticed grace period.
612 * This is used both when we started the grace period and when we notice 650 * This is used both when we started the grace period and when we notice
@@ -809,6 +847,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
809 rnp->completed = rsp->completed; 847 rnp->completed = rsp->completed;
810 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ 848 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
811 rcu_start_gp_per_cpu(rsp, rnp, rdp); 849 rcu_start_gp_per_cpu(rsp, rnp, rdp);
850 rcu_preempt_boost_start_gp(rnp);
812 raw_spin_unlock_irqrestore(&rnp->lock, flags); 851 raw_spin_unlock_irqrestore(&rnp->lock, flags);
813 return; 852 return;
814 } 853 }
@@ -844,6 +883,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
844 rnp->completed = rsp->completed; 883 rnp->completed = rsp->completed;
845 if (rnp == rdp->mynode) 884 if (rnp == rdp->mynode)
846 rcu_start_gp_per_cpu(rsp, rnp, rdp); 885 rcu_start_gp_per_cpu(rsp, rnp, rdp);
886 rcu_preempt_boost_start_gp(rnp);
847 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 887 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
848 } 888 }
849 889
@@ -864,7 +904,12 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
864static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) 904static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
865 __releases(rcu_get_root(rsp)->lock) 905 __releases(rcu_get_root(rsp)->lock)
866{ 906{
907 unsigned long gp_duration;
908
867 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 909 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
910 gp_duration = jiffies - rsp->gp_start;
911 if (gp_duration > rsp->gp_max)
912 rsp->gp_max = gp_duration;
868 rsp->completed = rsp->gpnum; 913 rsp->completed = rsp->gpnum;
869 rsp->signaled = RCU_GP_IDLE; 914 rsp->signaled = RCU_GP_IDLE;
870 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ 915 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
@@ -894,7 +939,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
894 return; 939 return;
895 } 940 }
896 rnp->qsmask &= ~mask; 941 rnp->qsmask &= ~mask;
897 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { 942 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
898 943
899 /* Other bits still set at this level, so done. */ 944 /* Other bits still set at this level, so done. */
900 raw_spin_unlock_irqrestore(&rnp->lock, flags); 945 raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -1037,6 +1082,8 @@ static void rcu_send_cbs_to_online(struct rcu_state *rsp)
1037/* 1082/*
1038 * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy 1083 * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy
1039 * and move all callbacks from the outgoing CPU to the current one. 1084 * and move all callbacks from the outgoing CPU to the current one.
1085 * There can only be one CPU hotplug operation at a time, so no other
1086 * CPU can be attempting to update rcu_cpu_kthread_task.
1040 */ 1087 */
1041static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) 1088static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1042{ 1089{
@@ -1045,6 +1092,14 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1045 int need_report = 0; 1092 int need_report = 0;
1046 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 1093 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1047 struct rcu_node *rnp; 1094 struct rcu_node *rnp;
1095 struct task_struct *t;
1096
1097 /* Stop the CPU's kthread. */
1098 t = per_cpu(rcu_cpu_kthread_task, cpu);
1099 if (t != NULL) {
1100 per_cpu(rcu_cpu_kthread_task, cpu) = NULL;
1101 kthread_stop(t);
1102 }
1048 1103
1049 /* Exclude any attempts to start a new grace period. */ 1104 /* Exclude any attempts to start a new grace period. */
1050 raw_spin_lock_irqsave(&rsp->onofflock, flags); 1105 raw_spin_lock_irqsave(&rsp->onofflock, flags);
@@ -1082,6 +1137,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1082 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1137 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1083 if (need_report & RCU_OFL_TASKS_EXP_GP) 1138 if (need_report & RCU_OFL_TASKS_EXP_GP)
1084 rcu_report_exp_rnp(rsp, rnp); 1139 rcu_report_exp_rnp(rsp, rnp);
1140 rcu_node_kthread_setaffinity(rnp, -1);
1085} 1141}
1086 1142
1087/* 1143/*
@@ -1143,7 +1199,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1143 next = list->next; 1199 next = list->next;
1144 prefetch(next); 1200 prefetch(next);
1145 debug_rcu_head_unqueue(list); 1201 debug_rcu_head_unqueue(list);
1146 list->func(list); 1202 __rcu_reclaim(list);
1147 list = next; 1203 list = next;
1148 if (++count >= rdp->blimit) 1204 if (++count >= rdp->blimit)
1149 break; 1205 break;
@@ -1179,7 +1235,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1179 1235
1180 /* Re-raise the RCU softirq if there are callbacks remaining. */ 1236 /* Re-raise the RCU softirq if there are callbacks remaining. */
1181 if (cpu_has_callbacks_ready_to_invoke(rdp)) 1237 if (cpu_has_callbacks_ready_to_invoke(rdp))
1182 raise_softirq(RCU_SOFTIRQ); 1238 invoke_rcu_cpu_kthread();
1183} 1239}
1184 1240
1185/* 1241/*
@@ -1225,7 +1281,7 @@ void rcu_check_callbacks(int cpu, int user)
1225 } 1281 }
1226 rcu_preempt_check_callbacks(cpu); 1282 rcu_preempt_check_callbacks(cpu);
1227 if (rcu_pending(cpu)) 1283 if (rcu_pending(cpu))
1228 raise_softirq(RCU_SOFTIRQ); 1284 invoke_rcu_cpu_kthread();
1229} 1285}
1230 1286
1231#ifdef CONFIG_SMP 1287#ifdef CONFIG_SMP
@@ -1233,6 +1289,8 @@ void rcu_check_callbacks(int cpu, int user)
1233/* 1289/*
1234 * Scan the leaf rcu_node structures, processing dyntick state for any that 1290 * Scan the leaf rcu_node structures, processing dyntick state for any that
1235 * have not yet encountered a quiescent state, using the function specified. 1291 * have not yet encountered a quiescent state, using the function specified.
1292 * Also initiate boosting for any threads blocked on the root rcu_node.
1293 *
1236 * The caller must have suppressed start of new grace periods. 1294 * The caller must have suppressed start of new grace periods.
1237 */ 1295 */
1238static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) 1296static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
@@ -1251,7 +1309,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
1251 return; 1309 return;
1252 } 1310 }
1253 if (rnp->qsmask == 0) { 1311 if (rnp->qsmask == 0) {
1254 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1312 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
1255 continue; 1313 continue;
1256 } 1314 }
1257 cpu = rnp->grplo; 1315 cpu = rnp->grplo;
@@ -1269,6 +1327,11 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
1269 } 1327 }
1270 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1328 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1271 } 1329 }
1330 rnp = rcu_get_root(rsp);
1331 if (rnp->qsmask == 0) {
1332 raw_spin_lock_irqsave(&rnp->lock, flags);
1333 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
1334 }
1272} 1335}
1273 1336
1274/* 1337/*
@@ -1389,7 +1452,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1389/* 1452/*
1390 * Do softirq processing for the current CPU. 1453 * Do softirq processing for the current CPU.
1391 */ 1454 */
1392static void rcu_process_callbacks(struct softirq_action *unused) 1455static void rcu_process_callbacks(void)
1393{ 1456{
1394 /* 1457 /*
1395 * Memory references from any prior RCU read-side critical sections 1458 * Memory references from any prior RCU read-side critical sections
@@ -1414,6 +1477,347 @@ static void rcu_process_callbacks(struct softirq_action *unused)
1414 rcu_needs_cpu_flush(); 1477 rcu_needs_cpu_flush();
1415} 1478}
1416 1479
1480/*
1481 * Wake up the current CPU's kthread. This replaces raise_softirq()
1482 * in earlier versions of RCU. Note that because we are running on
1483 * the current CPU with interrupts disabled, the rcu_cpu_kthread_task
1484 * cannot disappear out from under us.
1485 */
1486static void invoke_rcu_cpu_kthread(void)
1487{
1488 unsigned long flags;
1489
1490 local_irq_save(flags);
1491 __this_cpu_write(rcu_cpu_has_work, 1);
1492 if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) {
1493 local_irq_restore(flags);
1494 return;
1495 }
1496 wake_up(&__get_cpu_var(rcu_cpu_wq));
1497 local_irq_restore(flags);
1498}
1499
1500/*
1501 * Wake up the specified per-rcu_node-structure kthread.
1502 * Because the per-rcu_node kthreads are immortal, we don't need
1503 * to do anything to keep them alive.
1504 */
1505static void invoke_rcu_node_kthread(struct rcu_node *rnp)
1506{
1507 struct task_struct *t;
1508
1509 t = rnp->node_kthread_task;
1510 if (t != NULL)
1511 wake_up_process(t);
1512}
1513
1514/*
1515 * Set the specified CPU's kthread to run RT or not, as specified by
1516 * the to_rt argument. The CPU-hotplug locks are held, so the task
1517 * is not going away.
1518 */
1519static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
1520{
1521 int policy;
1522 struct sched_param sp;
1523 struct task_struct *t;
1524
1525 t = per_cpu(rcu_cpu_kthread_task, cpu);
1526 if (t == NULL)
1527 return;
1528 if (to_rt) {
1529 policy = SCHED_FIFO;
1530 sp.sched_priority = RCU_KTHREAD_PRIO;
1531 } else {
1532 policy = SCHED_NORMAL;
1533 sp.sched_priority = 0;
1534 }
1535 sched_setscheduler_nocheck(t, policy, &sp);
1536}
1537
1538/*
1539 * Timer handler to initiate the waking up of per-CPU kthreads that
1540 * have yielded the CPU due to excess numbers of RCU callbacks.
1541 * We wake up the per-rcu_node kthread, which in turn will wake up
1542 * the booster kthread.
1543 */
1544static void rcu_cpu_kthread_timer(unsigned long arg)
1545{
1546 unsigned long flags;
1547 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
1548 struct rcu_node *rnp = rdp->mynode;
1549
1550 raw_spin_lock_irqsave(&rnp->lock, flags);
1551 rnp->wakemask |= rdp->grpmask;
1552 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1553 invoke_rcu_node_kthread(rnp);
1554}
1555
1556/*
1557 * Drop to non-real-time priority and yield, but only after posting a
1558 * timer that will cause us to regain our real-time priority if we
1559 * remain preempted. Either way, we restore our real-time priority
1560 * before returning.
1561 */
1562static void rcu_yield(void (*f)(unsigned long), unsigned long arg)
1563{
1564 struct sched_param sp;
1565 struct timer_list yield_timer;
1566
1567 setup_timer_on_stack(&yield_timer, f, arg);
1568 mod_timer(&yield_timer, jiffies + 2);
1569 sp.sched_priority = 0;
1570 sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp);
1571 set_user_nice(current, 19);
1572 schedule();
1573 sp.sched_priority = RCU_KTHREAD_PRIO;
1574 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1575 del_timer(&yield_timer);
1576}
1577
1578/*
1579 * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU.
1580 * This can happen while the corresponding CPU is either coming online
1581 * or going offline. We cannot wait until the CPU is fully online
1582 * before starting the kthread, because the various notifier functions
1583 * can wait for RCU grace periods. So we park rcu_cpu_kthread() until
1584 * the corresponding CPU is online.
1585 *
1586 * Return 1 if the kthread needs to stop, 0 otherwise.
1587 *
1588 * Caller must disable bh. This function can momentarily enable it.
1589 */
1590static int rcu_cpu_kthread_should_stop(int cpu)
1591{
1592 while (cpu_is_offline(cpu) ||
1593 !cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)) ||
1594 smp_processor_id() != cpu) {
1595 if (kthread_should_stop())
1596 return 1;
1597 per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
1598 per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id();
1599 local_bh_enable();
1600 schedule_timeout_uninterruptible(1);
1601 if (!cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)))
1602 set_cpus_allowed_ptr(current, cpumask_of(cpu));
1603 local_bh_disable();
1604 }
1605 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
1606 return 0;
1607}
1608
1609/*
1610 * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
1611 * earlier RCU softirq.
1612 */
1613static int rcu_cpu_kthread(void *arg)
1614{
1615 int cpu = (int)(long)arg;
1616 unsigned long flags;
1617 int spincnt = 0;
1618 unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu);
1619 wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu);
1620 char work;
1621 char *workp = &per_cpu(rcu_cpu_has_work, cpu);
1622
1623 for (;;) {
1624 *statusp = RCU_KTHREAD_WAITING;
1625 wait_event_interruptible(*wqp,
1626 *workp != 0 || kthread_should_stop());
1627 local_bh_disable();
1628 if (rcu_cpu_kthread_should_stop(cpu)) {
1629 local_bh_enable();
1630 break;
1631 }
1632 *statusp = RCU_KTHREAD_RUNNING;
1633 per_cpu(rcu_cpu_kthread_loops, cpu)++;
1634 local_irq_save(flags);
1635 work = *workp;
1636 *workp = 0;
1637 local_irq_restore(flags);
1638 if (work)
1639 rcu_process_callbacks();
1640 local_bh_enable();
1641 if (*workp != 0)
1642 spincnt++;
1643 else
1644 spincnt = 0;
1645 if (spincnt > 10) {
1646 *statusp = RCU_KTHREAD_YIELDING;
1647 rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu);
1648 spincnt = 0;
1649 }
1650 }
1651 *statusp = RCU_KTHREAD_STOPPED;
1652 return 0;
1653}
1654
1655/*
1656 * Spawn a per-CPU kthread, setting up affinity and priority.
1657 * Because the CPU hotplug lock is held, no other CPU will be attempting
1658 * to manipulate rcu_cpu_kthread_task. There might be another CPU
1659 * attempting to access it during boot, but the locking in kthread_bind()
1660 * will enforce sufficient ordering.
1661 */
1662static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
1663{
1664 struct sched_param sp;
1665 struct task_struct *t;
1666
1667 if (!rcu_kthreads_spawnable ||
1668 per_cpu(rcu_cpu_kthread_task, cpu) != NULL)
1669 return 0;
1670 t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu);
1671 if (IS_ERR(t))
1672 return PTR_ERR(t);
1673 kthread_bind(t, cpu);
1674 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
1675 WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
1676 per_cpu(rcu_cpu_kthread_task, cpu) = t;
1677 wake_up_process(t);
1678 sp.sched_priority = RCU_KTHREAD_PRIO;
1679 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1680 return 0;
1681}
1682
1683/*
1684 * Per-rcu_node kthread, which is in charge of waking up the per-CPU
1685 * kthreads when needed. We ignore requests to wake up kthreads
1686 * for offline CPUs, which is OK because force_quiescent_state()
1687 * takes care of this case.
1688 */
1689static int rcu_node_kthread(void *arg)
1690{
1691 int cpu;
1692 unsigned long flags;
1693 unsigned long mask;
1694 struct rcu_node *rnp = (struct rcu_node *)arg;
1695 struct sched_param sp;
1696 struct task_struct *t;
1697
1698 for (;;) {
1699 rnp->node_kthread_status = RCU_KTHREAD_WAITING;
1700 wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0);
1701 rnp->node_kthread_status = RCU_KTHREAD_RUNNING;
1702 raw_spin_lock_irqsave(&rnp->lock, flags);
1703 mask = rnp->wakemask;
1704 rnp->wakemask = 0;
1705 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
1706 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
1707 if ((mask & 0x1) == 0)
1708 continue;
1709 preempt_disable();
1710 t = per_cpu(rcu_cpu_kthread_task, cpu);
1711 if (!cpu_online(cpu) || t == NULL) {
1712 preempt_enable();
1713 continue;
1714 }
1715 per_cpu(rcu_cpu_has_work, cpu) = 1;
1716 sp.sched_priority = RCU_KTHREAD_PRIO;
1717 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1718 preempt_enable();
1719 }
1720 }
1721 /* NOTREACHED */
1722 rnp->node_kthread_status = RCU_KTHREAD_STOPPED;
1723 return 0;
1724}
1725
1726/*
1727 * Set the per-rcu_node kthread's affinity to cover all CPUs that are
1728 * served by the rcu_node in question. The CPU hotplug lock is still
1729 * held, so the value of rnp->qsmaskinit will be stable.
1730 *
1731 * We don't include outgoingcpu in the affinity set, use -1 if there is
1732 * no outgoing CPU. If there are no CPUs left in the affinity set,
1733 * this function allows the kthread to execute on any CPU.
1734 */
1735static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1736{
1737 cpumask_var_t cm;
1738 int cpu;
1739 unsigned long mask = rnp->qsmaskinit;
1740
1741 if (rnp->node_kthread_task == NULL)
1742 return;
1743 if (!alloc_cpumask_var(&cm, GFP_KERNEL))
1744 return;
1745 cpumask_clear(cm);
1746 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
1747 if ((mask & 0x1) && cpu != outgoingcpu)
1748 cpumask_set_cpu(cpu, cm);
1749 if (cpumask_weight(cm) == 0) {
1750 cpumask_setall(cm);
1751 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
1752 cpumask_clear_cpu(cpu, cm);
1753 WARN_ON_ONCE(cpumask_weight(cm) == 0);
1754 }
1755 set_cpus_allowed_ptr(rnp->node_kthread_task, cm);
1756 rcu_boost_kthread_setaffinity(rnp, cm);
1757 free_cpumask_var(cm);
1758}
1759
1760/*
1761 * Spawn a per-rcu_node kthread, setting priority and affinity.
1762 * Called during boot before online/offline can happen, or, if
1763 * during runtime, with the main CPU-hotplug locks held. So only
1764 * one of these can be executing at a time.
1765 */
1766static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
1767 struct rcu_node *rnp)
1768{
1769 unsigned long flags;
1770 int rnp_index = rnp - &rsp->node[0];
1771 struct sched_param sp;
1772 struct task_struct *t;
1773
1774 if (!rcu_kthreads_spawnable ||
1775 rnp->qsmaskinit == 0)
1776 return 0;
1777 if (rnp->node_kthread_task == NULL) {
1778 t = kthread_create(rcu_node_kthread, (void *)rnp,
1779 "rcun%d", rnp_index);
1780 if (IS_ERR(t))
1781 return PTR_ERR(t);
1782 raw_spin_lock_irqsave(&rnp->lock, flags);
1783 rnp->node_kthread_task = t;
1784 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1785 wake_up_process(t);
1786 sp.sched_priority = 99;
1787 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1788 }
1789 return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
1790}
1791
1792/*
1793 * Spawn all kthreads -- called as soon as the scheduler is running.
1794 */
1795static int __init rcu_spawn_kthreads(void)
1796{
1797 int cpu;
1798 struct rcu_node *rnp;
1799
1800 rcu_kthreads_spawnable = 1;
1801 for_each_possible_cpu(cpu) {
1802 init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu));
1803 per_cpu(rcu_cpu_has_work, cpu) = 0;
1804 if (cpu_online(cpu))
1805 (void)rcu_spawn_one_cpu_kthread(cpu);
1806 }
1807 rnp = rcu_get_root(rcu_state);
1808 init_waitqueue_head(&rnp->node_wq);
1809 rcu_init_boost_waitqueue(rnp);
1810 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1811 if (NUM_RCU_NODES > 1)
1812 rcu_for_each_leaf_node(rcu_state, rnp) {
1813 init_waitqueue_head(&rnp->node_wq);
1814 rcu_init_boost_waitqueue(rnp);
1815 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1816 }
1817 return 0;
1818}
1819early_initcall(rcu_spawn_kthreads);
1820
1417static void 1821static void
1418__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), 1822__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1419 struct rcu_state *rsp) 1823 struct rcu_state *rsp)
@@ -1439,6 +1843,13 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1439 /* Add the callback to our list. */ 1843 /* Add the callback to our list. */
1440 *rdp->nxttail[RCU_NEXT_TAIL] = head; 1844 *rdp->nxttail[RCU_NEXT_TAIL] = head;
1441 rdp->nxttail[RCU_NEXT_TAIL] = &head->next; 1845 rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
1846 rdp->qlen++;
1847
1848 /* If interrupts were disabled, don't dive into RCU core. */
1849 if (irqs_disabled_flags(flags)) {
1850 local_irq_restore(flags);
1851 return;
1852 }
1442 1853
1443 /* 1854 /*
1444 * Force the grace period if too many callbacks or too long waiting. 1855 * Force the grace period if too many callbacks or too long waiting.
@@ -1447,7 +1858,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1447 * invoking force_quiescent_state() if the newly enqueued callback 1858 * invoking force_quiescent_state() if the newly enqueued callback
1448 * is the only one waiting for a grace period to complete. 1859 * is the only one waiting for a grace period to complete.
1449 */ 1860 */
1450 if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { 1861 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
1451 1862
1452 /* Are we ignoring a completed grace period? */ 1863 /* Are we ignoring a completed grace period? */
1453 rcu_process_gp_end(rsp, rdp); 1864 rcu_process_gp_end(rsp, rdp);
@@ -1583,7 +1994,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1583 * or RCU-bh, force a local reschedule. 1994 * or RCU-bh, force a local reschedule.
1584 */ 1995 */
1585 rdp->n_rp_qs_pending++; 1996 rdp->n_rp_qs_pending++;
1586 if (!rdp->preemptable && 1997 if (!rdp->preemptible &&
1587 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, 1998 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1,
1588 jiffies)) 1999 jiffies))
1589 set_need_resched(); 2000 set_need_resched();
@@ -1760,7 +2171,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
1760 * that this CPU cannot possibly have any RCU callbacks in flight yet. 2171 * that this CPU cannot possibly have any RCU callbacks in flight yet.
1761 */ 2172 */
1762static void __cpuinit 2173static void __cpuinit
1763rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) 2174rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
1764{ 2175{
1765 unsigned long flags; 2176 unsigned long flags;
1766 unsigned long mask; 2177 unsigned long mask;
@@ -1772,7 +2183,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
1772 rdp->passed_quiesc = 0; /* We could be racing with new GP, */ 2183 rdp->passed_quiesc = 0; /* We could be racing with new GP, */
1773 rdp->qs_pending = 1; /* so set up to respond to current GP. */ 2184 rdp->qs_pending = 1; /* so set up to respond to current GP. */
1774 rdp->beenonline = 1; /* We have now been online. */ 2185 rdp->beenonline = 1; /* We have now been online. */
1775 rdp->preemptable = preemptable; 2186 rdp->preemptible = preemptible;
1776 rdp->qlen_last_fqs_check = 0; 2187 rdp->qlen_last_fqs_check = 0;
1777 rdp->n_force_qs_snap = rsp->n_force_qs; 2188 rdp->n_force_qs_snap = rsp->n_force_qs;
1778 rdp->blimit = blimit; 2189 rdp->blimit = blimit;
@@ -1813,6 +2224,19 @@ static void __cpuinit rcu_online_cpu(int cpu)
1813 rcu_preempt_init_percpu_data(cpu); 2224 rcu_preempt_init_percpu_data(cpu);
1814} 2225}
1815 2226
2227static void __cpuinit rcu_online_kthreads(int cpu)
2228{
2229 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2230 struct rcu_node *rnp = rdp->mynode;
2231
2232 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
2233 if (rcu_kthreads_spawnable) {
2234 (void)rcu_spawn_one_cpu_kthread(cpu);
2235 if (rnp->node_kthread_task == NULL)
2236 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
2237 }
2238}
2239
1816/* 2240/*
1817 * Handle CPU online/offline notification events. 2241 * Handle CPU online/offline notification events.
1818 */ 2242 */
@@ -1820,11 +2244,23 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
1820 unsigned long action, void *hcpu) 2244 unsigned long action, void *hcpu)
1821{ 2245{
1822 long cpu = (long)hcpu; 2246 long cpu = (long)hcpu;
2247 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2248 struct rcu_node *rnp = rdp->mynode;
1823 2249
1824 switch (action) { 2250 switch (action) {
1825 case CPU_UP_PREPARE: 2251 case CPU_UP_PREPARE:
1826 case CPU_UP_PREPARE_FROZEN: 2252 case CPU_UP_PREPARE_FROZEN:
1827 rcu_online_cpu(cpu); 2253 rcu_online_cpu(cpu);
2254 rcu_online_kthreads(cpu);
2255 break;
2256 case CPU_ONLINE:
2257 case CPU_DOWN_FAILED:
2258 rcu_node_kthread_setaffinity(rnp, -1);
2259 rcu_cpu_kthread_setrt(cpu, 1);
2260 break;
2261 case CPU_DOWN_PREPARE:
2262 rcu_node_kthread_setaffinity(rnp, cpu);
2263 rcu_cpu_kthread_setrt(cpu, 0);
1828 break; 2264 break;
1829 case CPU_DYING: 2265 case CPU_DYING:
1830 case CPU_DYING_FROZEN: 2266 case CPU_DYING_FROZEN:
@@ -1943,10 +2379,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
1943 j / rsp->levelspread[i - 1]; 2379 j / rsp->levelspread[i - 1];
1944 } 2380 }
1945 rnp->level = i; 2381 rnp->level = i;
1946 INIT_LIST_HEAD(&rnp->blocked_tasks[0]); 2382 INIT_LIST_HEAD(&rnp->blkd_tasks);
1947 INIT_LIST_HEAD(&rnp->blocked_tasks[1]);
1948 INIT_LIST_HEAD(&rnp->blocked_tasks[2]);
1949 INIT_LIST_HEAD(&rnp->blocked_tasks[3]);
1950 } 2383 }
1951 } 2384 }
1952 2385
@@ -1968,7 +2401,6 @@ void __init rcu_init(void)
1968 rcu_init_one(&rcu_sched_state, &rcu_sched_data); 2401 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
1969 rcu_init_one(&rcu_bh_state, &rcu_bh_data); 2402 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
1970 __rcu_init_preempt(); 2403 __rcu_init_preempt();
1971 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
1972 2404
1973 /* 2405 /*
1974 * We don't need protection against CPU-hotplug here because 2406 * We don't need protection against CPU-hotplug here because
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index e8f057e44e3e..257664815d5d 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -91,6 +91,14 @@ struct rcu_dynticks {
91 /* remains even for nmi from irq handler. */ 91 /* remains even for nmi from irq handler. */
92}; 92};
93 93
94/* RCU's kthread states for tracing. */
95#define RCU_KTHREAD_STOPPED 0
96#define RCU_KTHREAD_RUNNING 1
97#define RCU_KTHREAD_WAITING 2
98#define RCU_KTHREAD_OFFCPU 3
99#define RCU_KTHREAD_YIELDING 4
100#define RCU_KTHREAD_MAX 4
101
94/* 102/*
95 * Definition for node within the RCU grace-period-detection hierarchy. 103 * Definition for node within the RCU grace-period-detection hierarchy.
96 */ 104 */
@@ -109,10 +117,11 @@ struct rcu_node {
109 /* an rcu_data structure, otherwise, each */ 117 /* an rcu_data structure, otherwise, each */
110 /* bit corresponds to a child rcu_node */ 118 /* bit corresponds to a child rcu_node */
111 /* structure. */ 119 /* structure. */
112 unsigned long expmask; /* Groups that have ->blocked_tasks[] */ 120 unsigned long expmask; /* Groups that have ->blkd_tasks */
113 /* elements that need to drain to allow the */ 121 /* elements that need to drain to allow the */
114 /* current expedited grace period to */ 122 /* current expedited grace period to */
115 /* complete (only for TREE_PREEMPT_RCU). */ 123 /* complete (only for TREE_PREEMPT_RCU). */
124 unsigned long wakemask; /* CPUs whose kthread needs to be awakened. */
116 unsigned long qsmaskinit; 125 unsigned long qsmaskinit;
117 /* Per-GP initial value for qsmask & expmask. */ 126 /* Per-GP initial value for qsmask & expmask. */
118 unsigned long grpmask; /* Mask to apply to parent qsmask. */ 127 unsigned long grpmask; /* Mask to apply to parent qsmask. */
@@ -122,11 +131,68 @@ struct rcu_node {
122 u8 grpnum; /* CPU/group number for next level up. */ 131 u8 grpnum; /* CPU/group number for next level up. */
123 u8 level; /* root is at level 0. */ 132 u8 level; /* root is at level 0. */
124 struct rcu_node *parent; 133 struct rcu_node *parent;
125 struct list_head blocked_tasks[4]; 134 struct list_head blkd_tasks;
126 /* Tasks blocked in RCU read-side critsect. */ 135 /* Tasks blocked in RCU read-side critical */
127 /* Grace period number (->gpnum) x blocked */ 136 /* section. Tasks are placed at the head */
128 /* by tasks on the (x & 0x1) element of the */ 137 /* of this list and age towards the tail. */
129 /* blocked_tasks[] array. */ 138 struct list_head *gp_tasks;
139 /* Pointer to the first task blocking the */
140 /* current grace period, or NULL if there */
141 /* is no such task. */
142 struct list_head *exp_tasks;
143 /* Pointer to the first task blocking the */
144 /* current expedited grace period, or NULL */
145 /* if there is no such task. If there */
146 /* is no current expedited grace period, */
147 /* then there can cannot be any such task. */
148#ifdef CONFIG_RCU_BOOST
149 struct list_head *boost_tasks;
150 /* Pointer to first task that needs to be */
151 /* priority boosted, or NULL if no priority */
152 /* boosting is needed for this rcu_node */
153 /* structure. If there are no tasks */
154 /* queued on this rcu_node structure that */
155 /* are blocking the current grace period, */
156 /* there can be no such task. */
157 unsigned long boost_time;
158 /* When to start boosting (jiffies). */
159 struct task_struct *boost_kthread_task;
160 /* kthread that takes care of priority */
161 /* boosting for this rcu_node structure. */
162 wait_queue_head_t boost_wq;
163 /* Wait queue on which to park the boost */
164 /* kthread. */
165 unsigned int boost_kthread_status;
166 /* State of boost_kthread_task for tracing. */
167 unsigned long n_tasks_boosted;
168 /* Total number of tasks boosted. */
169 unsigned long n_exp_boosts;
170 /* Number of tasks boosted for expedited GP. */
171 unsigned long n_normal_boosts;
172 /* Number of tasks boosted for normal GP. */
173 unsigned long n_balk_blkd_tasks;
174 /* Refused to boost: no blocked tasks. */
175 unsigned long n_balk_exp_gp_tasks;
176 /* Refused to boost: nothing blocking GP. */
177 unsigned long n_balk_boost_tasks;
178 /* Refused to boost: already boosting. */
179 unsigned long n_balk_notblocked;
180 /* Refused to boost: RCU RS CS still running. */
181 unsigned long n_balk_notyet;
182 /* Refused to boost: not yet time. */
183 unsigned long n_balk_nos;
184 /* Refused to boost: not sure why, though. */
185 /* This can happen due to race conditions. */
186#endif /* #ifdef CONFIG_RCU_BOOST */
187 struct task_struct *node_kthread_task;
188 /* kthread that takes care of this rcu_node */
189 /* structure, for example, awakening the */
190 /* per-CPU kthreads as needed. */
191 wait_queue_head_t node_wq;
192 /* Wait queue on which to park the per-node */
193 /* kthread. */
194 unsigned int node_kthread_status;
195 /* State of node_kthread_task for tracing. */
130} ____cacheline_internodealigned_in_smp; 196} ____cacheline_internodealigned_in_smp;
131 197
132/* 198/*
@@ -175,7 +241,7 @@ struct rcu_data {
175 bool passed_quiesc; /* User-mode/idle loop etc. */ 241 bool passed_quiesc; /* User-mode/idle loop etc. */
176 bool qs_pending; /* Core waits for quiesc state. */ 242 bool qs_pending; /* Core waits for quiesc state. */
177 bool beenonline; /* CPU online at least once. */ 243 bool beenonline; /* CPU online at least once. */
178 bool preemptable; /* Preemptable RCU? */ 244 bool preemptible; /* Preemptible RCU? */
179 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ 245 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
180 unsigned long grpmask; /* Mask to apply to leaf qsmask. */ 246 unsigned long grpmask; /* Mask to apply to leaf qsmask. */
181 247
@@ -254,7 +320,6 @@ struct rcu_data {
254#endif /* #else #ifdef CONFIG_NO_HZ */ 320#endif /* #else #ifdef CONFIG_NO_HZ */
255 321
256#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ 322#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
257#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
258 323
259#ifdef CONFIG_PROVE_RCU 324#ifdef CONFIG_PROVE_RCU
260#define RCU_STALL_DELAY_DELTA (5 * HZ) 325#define RCU_STALL_DELAY_DELTA (5 * HZ)
@@ -272,13 +337,6 @@ struct rcu_data {
272 /* scheduling clock irq */ 337 /* scheduling clock irq */
273 /* before ratting on them. */ 338 /* before ratting on them. */
274 339
275#ifdef CONFIG_RCU_CPU_STALL_DETECTOR_RUNNABLE
276#define RCU_CPU_STALL_SUPPRESS_INIT 0
277#else
278#define RCU_CPU_STALL_SUPPRESS_INIT 1
279#endif
280
281#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
282 340
283/* 341/*
284 * RCU global state, including node hierarchy. This hierarchy is 342 * RCU global state, including node hierarchy. This hierarchy is
@@ -325,12 +383,12 @@ struct rcu_state {
325 /* due to lock unavailable. */ 383 /* due to lock unavailable. */
326 unsigned long n_force_qs_ngp; /* Number of calls leaving */ 384 unsigned long n_force_qs_ngp; /* Number of calls leaving */
327 /* due to no GP active. */ 385 /* due to no GP active. */
328#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
329 unsigned long gp_start; /* Time at which GP started, */ 386 unsigned long gp_start; /* Time at which GP started, */
330 /* but in jiffies. */ 387 /* but in jiffies. */
331 unsigned long jiffies_stall; /* Time at which to check */ 388 unsigned long jiffies_stall; /* Time at which to check */
332 /* for CPU stalls. */ 389 /* for CPU stalls. */
333#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 390 unsigned long gp_max; /* Maximum GP duration in */
391 /* jiffies. */
334 char *name; /* Name of structure. */ 392 char *name; /* Name of structure. */
335}; 393};
336 394
@@ -361,16 +419,14 @@ DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
361static void rcu_bootup_announce(void); 419static void rcu_bootup_announce(void);
362long rcu_batches_completed(void); 420long rcu_batches_completed(void);
363static void rcu_preempt_note_context_switch(int cpu); 421static void rcu_preempt_note_context_switch(int cpu);
364static int rcu_preempted_readers(struct rcu_node *rnp); 422static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
365#ifdef CONFIG_HOTPLUG_CPU 423#ifdef CONFIG_HOTPLUG_CPU
366static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, 424static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
367 unsigned long flags); 425 unsigned long flags);
368#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 426#endif /* #ifdef CONFIG_HOTPLUG_CPU */
369#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
370static void rcu_print_detail_task_stall(struct rcu_state *rsp); 427static void rcu_print_detail_task_stall(struct rcu_state *rsp);
371static void rcu_print_task_stall(struct rcu_node *rnp); 428static void rcu_print_task_stall(struct rcu_node *rnp);
372static void rcu_preempt_stall_reset(void); 429static void rcu_preempt_stall_reset(void);
373#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
374static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); 430static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
375#ifdef CONFIG_HOTPLUG_CPU 431#ifdef CONFIG_HOTPLUG_CPU
376static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 432static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
@@ -390,5 +446,13 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
390static void rcu_preempt_send_cbs_to_online(void); 446static void rcu_preempt_send_cbs_to_online(void);
391static void __init __rcu_init_preempt(void); 447static void __init __rcu_init_preempt(void);
392static void rcu_needs_cpu_flush(void); 448static void rcu_needs_cpu_flush(void);
449static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp);
450static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
451static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
452 cpumask_var_t cm);
453static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
454static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
455 struct rcu_node *rnp,
456 int rnp_index);
393 457
394#endif /* #ifndef RCU_TREE_NONCORE */ 458#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index a3638710dc67..3f6559a5f5cd 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version) 2 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
3 * Internal non-public definitions that provide either classic 3 * Internal non-public definitions that provide either classic
4 * or preemptable semantics. 4 * or preemptible semantics.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
@@ -54,10 +54,6 @@ static void __init rcu_bootup_announce_oddness(void)
54#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE 54#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
55 printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); 55 printk(KERN_INFO "\tRCU torture testing starts during boot.\n");
56#endif 56#endif
57#ifndef CONFIG_RCU_CPU_STALL_DETECTOR
58 printk(KERN_INFO
59 "\tRCU-based detection of stalled CPUs is disabled.\n");
60#endif
61#if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) 57#if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)
62 printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); 58 printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n");
63#endif 59#endif
@@ -70,6 +66,7 @@ static void __init rcu_bootup_announce_oddness(void)
70 66
71struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); 67struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
72DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 68DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
69static struct rcu_state *rcu_state = &rcu_preempt_state;
73 70
74static int rcu_preempted_readers_exp(struct rcu_node *rnp); 71static int rcu_preempted_readers_exp(struct rcu_node *rnp);
75 72
@@ -78,7 +75,7 @@ static int rcu_preempted_readers_exp(struct rcu_node *rnp);
78 */ 75 */
79static void __init rcu_bootup_announce(void) 76static void __init rcu_bootup_announce(void)
80{ 77{
81 printk(KERN_INFO "Preemptable hierarchical RCU implementation.\n"); 78 printk(KERN_INFO "Preemptible hierarchical RCU implementation.\n");
82 rcu_bootup_announce_oddness(); 79 rcu_bootup_announce_oddness();
83} 80}
84 81
@@ -111,7 +108,7 @@ void rcu_force_quiescent_state(void)
111EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 108EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
112 109
113/* 110/*
114 * Record a preemptable-RCU quiescent state for the specified CPU. Note 111 * Record a preemptible-RCU quiescent state for the specified CPU. Note
115 * that this just means that the task currently running on the CPU is 112 * that this just means that the task currently running on the CPU is
116 * not in a quiescent state. There might be any number of tasks blocked 113 * not in a quiescent state. There might be any number of tasks blocked
117 * while in an RCU read-side critical section. 114 * while in an RCU read-side critical section.
@@ -134,12 +131,12 @@ static void rcu_preempt_qs(int cpu)
134 * We have entered the scheduler, and the current task might soon be 131 * We have entered the scheduler, and the current task might soon be
135 * context-switched away from. If this task is in an RCU read-side 132 * context-switched away from. If this task is in an RCU read-side
136 * critical section, we will no longer be able to rely on the CPU to 133 * critical section, we will no longer be able to rely on the CPU to
137 * record that fact, so we enqueue the task on the appropriate entry 134 * record that fact, so we enqueue the task on the blkd_tasks list.
138 * of the blocked_tasks[] array. The task will dequeue itself when 135 * The task will dequeue itself when it exits the outermost enclosing
139 * it exits the outermost enclosing RCU read-side critical section. 136 * RCU read-side critical section. Therefore, the current grace period
140 * Therefore, the current grace period cannot be permitted to complete 137 * cannot be permitted to complete until the blkd_tasks list entries
141 * until the blocked_tasks[] entry indexed by the low-order bit of 138 * predating the current grace period drain, in other words, until
142 * rnp->gpnum empties. 139 * rnp->gp_tasks becomes NULL.
143 * 140 *
144 * Caller must disable preemption. 141 * Caller must disable preemption.
145 */ 142 */
@@ -147,7 +144,6 @@ static void rcu_preempt_note_context_switch(int cpu)
147{ 144{
148 struct task_struct *t = current; 145 struct task_struct *t = current;
149 unsigned long flags; 146 unsigned long flags;
150 int phase;
151 struct rcu_data *rdp; 147 struct rcu_data *rdp;
152 struct rcu_node *rnp; 148 struct rcu_node *rnp;
153 149
@@ -169,15 +165,30 @@ static void rcu_preempt_note_context_switch(int cpu)
169 * (i.e., this CPU has not yet passed through a quiescent 165 * (i.e., this CPU has not yet passed through a quiescent
170 * state for the current grace period), then as long 166 * state for the current grace period), then as long
171 * as that task remains queued, the current grace period 167 * as that task remains queued, the current grace period
172 * cannot end. 168 * cannot end. Note that there is some uncertainty as
169 * to exactly when the current grace period started.
170 * We take a conservative approach, which can result
171 * in unnecessarily waiting on tasks that started very
172 * slightly after the current grace period began. C'est
173 * la vie!!!
173 * 174 *
174 * But first, note that the current CPU must still be 175 * But first, note that the current CPU must still be
175 * on line! 176 * on line!
176 */ 177 */
177 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); 178 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
178 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); 179 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
179 phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1; 180 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
180 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); 181 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
182 rnp->gp_tasks = &t->rcu_node_entry;
183#ifdef CONFIG_RCU_BOOST
184 if (rnp->boost_tasks != NULL)
185 rnp->boost_tasks = rnp->gp_tasks;
186#endif /* #ifdef CONFIG_RCU_BOOST */
187 } else {
188 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
189 if (rnp->qsmask & rdp->grpmask)
190 rnp->gp_tasks = &t->rcu_node_entry;
191 }
181 raw_spin_unlock_irqrestore(&rnp->lock, flags); 192 raw_spin_unlock_irqrestore(&rnp->lock, flags);
182 } 193 }
183 194
@@ -196,7 +207,7 @@ static void rcu_preempt_note_context_switch(int cpu)
196} 207}
197 208
198/* 209/*
199 * Tree-preemptable RCU implementation for rcu_read_lock(). 210 * Tree-preemptible RCU implementation for rcu_read_lock().
200 * Just increment ->rcu_read_lock_nesting, shared state will be updated 211 * Just increment ->rcu_read_lock_nesting, shared state will be updated
201 * if we block. 212 * if we block.
202 */ 213 */
@@ -212,12 +223,9 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock);
212 * for the specified rcu_node structure. If the caller needs a reliable 223 * for the specified rcu_node structure. If the caller needs a reliable
213 * answer, it must hold the rcu_node's ->lock. 224 * answer, it must hold the rcu_node's ->lock.
214 */ 225 */
215static int rcu_preempted_readers(struct rcu_node *rnp) 226static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
216{ 227{
217 int phase = rnp->gpnum & 0x1; 228 return rnp->gp_tasks != NULL;
218
219 return !list_empty(&rnp->blocked_tasks[phase]) ||
220 !list_empty(&rnp->blocked_tasks[phase + 2]);
221} 229}
222 230
223/* 231/*
@@ -233,7 +241,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
233 unsigned long mask; 241 unsigned long mask;
234 struct rcu_node *rnp_p; 242 struct rcu_node *rnp_p;
235 243
236 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { 244 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
237 raw_spin_unlock_irqrestore(&rnp->lock, flags); 245 raw_spin_unlock_irqrestore(&rnp->lock, flags);
238 return; /* Still need more quiescent states! */ 246 return; /* Still need more quiescent states! */
239 } 247 }
@@ -257,6 +265,21 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
257} 265}
258 266
259/* 267/*
268 * Advance a ->blkd_tasks-list pointer to the next entry, instead
269 * returning NULL if at the end of the list.
270 */
271static struct list_head *rcu_next_node_entry(struct task_struct *t,
272 struct rcu_node *rnp)
273{
274 struct list_head *np;
275
276 np = t->rcu_node_entry.next;
277 if (np == &rnp->blkd_tasks)
278 np = NULL;
279 return np;
280}
281
282/*
260 * Handle special cases during rcu_read_unlock(), such as needing to 283 * Handle special cases during rcu_read_unlock(), such as needing to
261 * notify RCU core processing or task having blocked during the RCU 284 * notify RCU core processing or task having blocked during the RCU
262 * read-side critical section. 285 * read-side critical section.
@@ -266,6 +289,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
266 int empty; 289 int empty;
267 int empty_exp; 290 int empty_exp;
268 unsigned long flags; 291 unsigned long flags;
292 struct list_head *np;
269 struct rcu_node *rnp; 293 struct rcu_node *rnp;
270 int special; 294 int special;
271 295
@@ -306,10 +330,19 @@ static void rcu_read_unlock_special(struct task_struct *t)
306 break; 330 break;
307 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 331 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
308 } 332 }
309 empty = !rcu_preempted_readers(rnp); 333 empty = !rcu_preempt_blocked_readers_cgp(rnp);
310 empty_exp = !rcu_preempted_readers_exp(rnp); 334 empty_exp = !rcu_preempted_readers_exp(rnp);
311 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 335 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
336 np = rcu_next_node_entry(t, rnp);
312 list_del_init(&t->rcu_node_entry); 337 list_del_init(&t->rcu_node_entry);
338 if (&t->rcu_node_entry == rnp->gp_tasks)
339 rnp->gp_tasks = np;
340 if (&t->rcu_node_entry == rnp->exp_tasks)
341 rnp->exp_tasks = np;
342#ifdef CONFIG_RCU_BOOST
343 if (&t->rcu_node_entry == rnp->boost_tasks)
344 rnp->boost_tasks = np;
345#endif /* #ifdef CONFIG_RCU_BOOST */
313 t->rcu_blocked_node = NULL; 346 t->rcu_blocked_node = NULL;
314 347
315 /* 348 /*
@@ -322,6 +355,15 @@ static void rcu_read_unlock_special(struct task_struct *t)
322 else 355 else
323 rcu_report_unblock_qs_rnp(rnp, flags); 356 rcu_report_unblock_qs_rnp(rnp, flags);
324 357
358#ifdef CONFIG_RCU_BOOST
359 /* Unboost if we were boosted. */
360 if (special & RCU_READ_UNLOCK_BOOSTED) {
361 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
362 rt_mutex_unlock(t->rcu_boost_mutex);
363 t->rcu_boost_mutex = NULL;
364 }
365#endif /* #ifdef CONFIG_RCU_BOOST */
366
325 /* 367 /*
326 * If this was the last task on the expedited lists, 368 * If this was the last task on the expedited lists,
327 * then we need to report up the rcu_node hierarchy. 369 * then we need to report up the rcu_node hierarchy.
@@ -334,7 +376,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
334} 376}
335 377
336/* 378/*
337 * Tree-preemptable RCU implementation for rcu_read_unlock(). 379 * Tree-preemptible RCU implementation for rcu_read_unlock().
338 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost 380 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
339 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then 381 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
340 * invoke rcu_read_unlock_special() to clean up after a context switch 382 * invoke rcu_read_unlock_special() to clean up after a context switch
@@ -356,8 +398,6 @@ void __rcu_read_unlock(void)
356} 398}
357EXPORT_SYMBOL_GPL(__rcu_read_unlock); 399EXPORT_SYMBOL_GPL(__rcu_read_unlock);
358 400
359#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
360
361#ifdef CONFIG_RCU_CPU_STALL_VERBOSE 401#ifdef CONFIG_RCU_CPU_STALL_VERBOSE
362 402
363/* 403/*
@@ -367,18 +407,16 @@ EXPORT_SYMBOL_GPL(__rcu_read_unlock);
367static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) 407static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
368{ 408{
369 unsigned long flags; 409 unsigned long flags;
370 struct list_head *lp;
371 int phase;
372 struct task_struct *t; 410 struct task_struct *t;
373 411
374 if (rcu_preempted_readers(rnp)) { 412 if (!rcu_preempt_blocked_readers_cgp(rnp))
375 raw_spin_lock_irqsave(&rnp->lock, flags); 413 return;
376 phase = rnp->gpnum & 0x1; 414 raw_spin_lock_irqsave(&rnp->lock, flags);
377 lp = &rnp->blocked_tasks[phase]; 415 t = list_entry(rnp->gp_tasks,
378 list_for_each_entry(t, lp, rcu_node_entry) 416 struct task_struct, rcu_node_entry);
379 sched_show_task(t); 417 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
380 raw_spin_unlock_irqrestore(&rnp->lock, flags); 418 sched_show_task(t);
381 } 419 raw_spin_unlock_irqrestore(&rnp->lock, flags);
382} 420}
383 421
384/* 422/*
@@ -408,16 +446,14 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
408 */ 446 */
409static void rcu_print_task_stall(struct rcu_node *rnp) 447static void rcu_print_task_stall(struct rcu_node *rnp)
410{ 448{
411 struct list_head *lp;
412 int phase;
413 struct task_struct *t; 449 struct task_struct *t;
414 450
415 if (rcu_preempted_readers(rnp)) { 451 if (!rcu_preempt_blocked_readers_cgp(rnp))
416 phase = rnp->gpnum & 0x1; 452 return;
417 lp = &rnp->blocked_tasks[phase]; 453 t = list_entry(rnp->gp_tasks,
418 list_for_each_entry(t, lp, rcu_node_entry) 454 struct task_struct, rcu_node_entry);
419 printk(" P%d", t->pid); 455 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
420 } 456 printk(" P%d", t->pid);
421} 457}
422 458
423/* 459/*
@@ -430,18 +466,21 @@ static void rcu_preempt_stall_reset(void)
430 rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; 466 rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2;
431} 467}
432 468
433#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
434
435/* 469/*
436 * Check that the list of blocked tasks for the newly completed grace 470 * Check that the list of blocked tasks for the newly completed grace
437 * period is in fact empty. It is a serious bug to complete a grace 471 * period is in fact empty. It is a serious bug to complete a grace
438 * period that still has RCU readers blocked! This function must be 472 * period that still has RCU readers blocked! This function must be
439 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock 473 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
440 * must be held by the caller. 474 * must be held by the caller.
475 *
476 * Also, if there are blocked tasks on the list, they automatically
477 * block the newly created grace period, so set up ->gp_tasks accordingly.
441 */ 478 */
442static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 479static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
443{ 480{
444 WARN_ON_ONCE(rcu_preempted_readers(rnp)); 481 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
482 if (!list_empty(&rnp->blkd_tasks))
483 rnp->gp_tasks = rnp->blkd_tasks.next;
445 WARN_ON_ONCE(rnp->qsmask); 484 WARN_ON_ONCE(rnp->qsmask);
446} 485}
447 486
@@ -465,50 +504,68 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
465 struct rcu_node *rnp, 504 struct rcu_node *rnp,
466 struct rcu_data *rdp) 505 struct rcu_data *rdp)
467{ 506{
468 int i;
469 struct list_head *lp; 507 struct list_head *lp;
470 struct list_head *lp_root; 508 struct list_head *lp_root;
471 int retval = 0; 509 int retval = 0;
472 struct rcu_node *rnp_root = rcu_get_root(rsp); 510 struct rcu_node *rnp_root = rcu_get_root(rsp);
473 struct task_struct *tp; 511 struct task_struct *t;
474 512
475 if (rnp == rnp_root) { 513 if (rnp == rnp_root) {
476 WARN_ONCE(1, "Last CPU thought to be offlined?"); 514 WARN_ONCE(1, "Last CPU thought to be offlined?");
477 return 0; /* Shouldn't happen: at least one CPU online. */ 515 return 0; /* Shouldn't happen: at least one CPU online. */
478 } 516 }
479 WARN_ON_ONCE(rnp != rdp->mynode && 517
480 (!list_empty(&rnp->blocked_tasks[0]) || 518 /* If we are on an internal node, complain bitterly. */
481 !list_empty(&rnp->blocked_tasks[1]) || 519 WARN_ON_ONCE(rnp != rdp->mynode);
482 !list_empty(&rnp->blocked_tasks[2]) ||
483 !list_empty(&rnp->blocked_tasks[3])));
484 520
485 /* 521 /*
486 * Move tasks up to root rcu_node. Rely on the fact that the 522 * Move tasks up to root rcu_node. Don't try to get fancy for
487 * root rcu_node can be at most one ahead of the rest of the 523 * this corner-case operation -- just put this node's tasks
488 * rcu_nodes in terms of gp_num value. This fact allows us to 524 * at the head of the root node's list, and update the root node's
489 * move the blocked_tasks[] array directly, element by element. 525 * ->gp_tasks and ->exp_tasks pointers to those of this node's,
526 * if non-NULL. This might result in waiting for more tasks than
527 * absolutely necessary, but this is a good performance/complexity
528 * tradeoff.
490 */ 529 */
491 if (rcu_preempted_readers(rnp)) 530 if (rcu_preempt_blocked_readers_cgp(rnp))
492 retval |= RCU_OFL_TASKS_NORM_GP; 531 retval |= RCU_OFL_TASKS_NORM_GP;
493 if (rcu_preempted_readers_exp(rnp)) 532 if (rcu_preempted_readers_exp(rnp))
494 retval |= RCU_OFL_TASKS_EXP_GP; 533 retval |= RCU_OFL_TASKS_EXP_GP;
495 for (i = 0; i < 4; i++) { 534 lp = &rnp->blkd_tasks;
496 lp = &rnp->blocked_tasks[i]; 535 lp_root = &rnp_root->blkd_tasks;
497 lp_root = &rnp_root->blocked_tasks[i]; 536 while (!list_empty(lp)) {
498 while (!list_empty(lp)) { 537 t = list_entry(lp->next, typeof(*t), rcu_node_entry);
499 tp = list_entry(lp->next, typeof(*tp), rcu_node_entry); 538 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
500 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 539 list_del(&t->rcu_node_entry);
501 list_del(&tp->rcu_node_entry); 540 t->rcu_blocked_node = rnp_root;
502 tp->rcu_blocked_node = rnp_root; 541 list_add(&t->rcu_node_entry, lp_root);
503 list_add(&tp->rcu_node_entry, lp_root); 542 if (&t->rcu_node_entry == rnp->gp_tasks)
504 raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */ 543 rnp_root->gp_tasks = rnp->gp_tasks;
505 } 544 if (&t->rcu_node_entry == rnp->exp_tasks)
545 rnp_root->exp_tasks = rnp->exp_tasks;
546#ifdef CONFIG_RCU_BOOST
547 if (&t->rcu_node_entry == rnp->boost_tasks)
548 rnp_root->boost_tasks = rnp->boost_tasks;
549#endif /* #ifdef CONFIG_RCU_BOOST */
550 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
506 } 551 }
552
553#ifdef CONFIG_RCU_BOOST
554 /* In case root is being boosted and leaf is not. */
555 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
556 if (rnp_root->boost_tasks != NULL &&
557 rnp_root->boost_tasks != rnp_root->gp_tasks)
558 rnp_root->boost_tasks = rnp_root->gp_tasks;
559 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
560#endif /* #ifdef CONFIG_RCU_BOOST */
561
562 rnp->gp_tasks = NULL;
563 rnp->exp_tasks = NULL;
507 return retval; 564 return retval;
508} 565}
509 566
510/* 567/*
511 * Do CPU-offline processing for preemptable RCU. 568 * Do CPU-offline processing for preemptible RCU.
512 */ 569 */
513static void rcu_preempt_offline_cpu(int cpu) 570static void rcu_preempt_offline_cpu(int cpu)
514{ 571{
@@ -537,7 +594,7 @@ static void rcu_preempt_check_callbacks(int cpu)
537} 594}
538 595
539/* 596/*
540 * Process callbacks for preemptable RCU. 597 * Process callbacks for preemptible RCU.
541 */ 598 */
542static void rcu_preempt_process_callbacks(void) 599static void rcu_preempt_process_callbacks(void)
543{ 600{
@@ -546,7 +603,7 @@ static void rcu_preempt_process_callbacks(void)
546} 603}
547 604
548/* 605/*
549 * Queue a preemptable-RCU callback for invocation after a grace period. 606 * Queue a preemptible-RCU callback for invocation after a grace period.
550 */ 607 */
551void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 608void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
552{ 609{
@@ -594,8 +651,7 @@ static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
594 */ 651 */
595static int rcu_preempted_readers_exp(struct rcu_node *rnp) 652static int rcu_preempted_readers_exp(struct rcu_node *rnp)
596{ 653{
597 return !list_empty(&rnp->blocked_tasks[2]) || 654 return rnp->exp_tasks != NULL;
598 !list_empty(&rnp->blocked_tasks[3]);
599} 655}
600 656
601/* 657/*
@@ -655,13 +711,17 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
655static void 711static void
656sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) 712sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
657{ 713{
658 int must_wait; 714 unsigned long flags;
715 int must_wait = 0;
659 716
660 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 717 raw_spin_lock_irqsave(&rnp->lock, flags);
661 list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); 718 if (list_empty(&rnp->blkd_tasks))
662 list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); 719 raw_spin_unlock_irqrestore(&rnp->lock, flags);
663 must_wait = rcu_preempted_readers_exp(rnp); 720 else {
664 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 721 rnp->exp_tasks = rnp->blkd_tasks.next;
722 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
723 must_wait = 1;
724 }
665 if (!must_wait) 725 if (!must_wait)
666 rcu_report_exp_rnp(rsp, rnp); 726 rcu_report_exp_rnp(rsp, rnp);
667} 727}
@@ -669,9 +729,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
669/* 729/*
670 * Wait for an rcu-preempt grace period, but expedite it. The basic idea 730 * Wait for an rcu-preempt grace period, but expedite it. The basic idea
671 * is to invoke synchronize_sched_expedited() to push all the tasks to 731 * is to invoke synchronize_sched_expedited() to push all the tasks to
672 * the ->blocked_tasks[] lists, move all entries from the first set of 732 * the ->blkd_tasks lists and wait for this list to drain.
673 * ->blocked_tasks[] lists to the second set, and finally wait for this
674 * second set to drain.
675 */ 733 */
676void synchronize_rcu_expedited(void) 734void synchronize_rcu_expedited(void)
677{ 735{
@@ -703,7 +761,7 @@ void synchronize_rcu_expedited(void)
703 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) 761 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
704 goto unlock_mb_ret; /* Others did our work for us. */ 762 goto unlock_mb_ret; /* Others did our work for us. */
705 763
706 /* force all RCU readers onto blocked_tasks[]. */ 764 /* force all RCU readers onto ->blkd_tasks lists. */
707 synchronize_sched_expedited(); 765 synchronize_sched_expedited();
708 766
709 raw_spin_lock_irqsave(&rsp->onofflock, flags); 767 raw_spin_lock_irqsave(&rsp->onofflock, flags);
@@ -715,7 +773,7 @@ void synchronize_rcu_expedited(void)
715 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 773 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
716 } 774 }
717 775
718 /* Snapshot current state of ->blocked_tasks[] lists. */ 776 /* Snapshot current state of ->blkd_tasks lists. */
719 rcu_for_each_leaf_node(rsp, rnp) 777 rcu_for_each_leaf_node(rsp, rnp)
720 sync_rcu_preempt_exp_init(rsp, rnp); 778 sync_rcu_preempt_exp_init(rsp, rnp);
721 if (NUM_RCU_NODES > 1) 779 if (NUM_RCU_NODES > 1)
@@ -723,7 +781,7 @@ void synchronize_rcu_expedited(void)
723 781
724 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 782 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
725 783
726 /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ 784 /* Wait for snapshotted ->blkd_tasks lists to drain. */
727 rnp = rcu_get_root(rsp); 785 rnp = rcu_get_root(rsp);
728 wait_event(sync_rcu_preempt_exp_wq, 786 wait_event(sync_rcu_preempt_exp_wq,
729 sync_rcu_preempt_exp_done(rnp)); 787 sync_rcu_preempt_exp_done(rnp));
@@ -739,7 +797,7 @@ mb_ret:
739EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 797EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
740 798
741/* 799/*
742 * Check to see if there is any immediate preemptable-RCU-related work 800 * Check to see if there is any immediate preemptible-RCU-related work
743 * to be done. 801 * to be done.
744 */ 802 */
745static int rcu_preempt_pending(int cpu) 803static int rcu_preempt_pending(int cpu)
@@ -749,7 +807,7 @@ static int rcu_preempt_pending(int cpu)
749} 807}
750 808
751/* 809/*
752 * Does preemptable RCU need the CPU to stay out of dynticks mode? 810 * Does preemptible RCU need the CPU to stay out of dynticks mode?
753 */ 811 */
754static int rcu_preempt_needs_cpu(int cpu) 812static int rcu_preempt_needs_cpu(int cpu)
755{ 813{
@@ -766,7 +824,7 @@ void rcu_barrier(void)
766EXPORT_SYMBOL_GPL(rcu_barrier); 824EXPORT_SYMBOL_GPL(rcu_barrier);
767 825
768/* 826/*
769 * Initialize preemptable RCU's per-CPU data. 827 * Initialize preemptible RCU's per-CPU data.
770 */ 828 */
771static void __cpuinit rcu_preempt_init_percpu_data(int cpu) 829static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
772{ 830{
@@ -774,7 +832,7 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
774} 832}
775 833
776/* 834/*
777 * Move preemptable RCU's callbacks from dying CPU to other online CPU. 835 * Move preemptible RCU's callbacks from dying CPU to other online CPU.
778 */ 836 */
779static void rcu_preempt_send_cbs_to_online(void) 837static void rcu_preempt_send_cbs_to_online(void)
780{ 838{
@@ -782,7 +840,7 @@ static void rcu_preempt_send_cbs_to_online(void)
782} 840}
783 841
784/* 842/*
785 * Initialize preemptable RCU's state structures. 843 * Initialize preemptible RCU's state structures.
786 */ 844 */
787static void __init __rcu_init_preempt(void) 845static void __init __rcu_init_preempt(void)
788{ 846{
@@ -790,7 +848,7 @@ static void __init __rcu_init_preempt(void)
790} 848}
791 849
792/* 850/*
793 * Check for a task exiting while in a preemptable-RCU read-side 851 * Check for a task exiting while in a preemptible-RCU read-side
794 * critical section, clean up if so. No need to issue warnings, 852 * critical section, clean up if so. No need to issue warnings,
795 * as debug_check_no_locks_held() already does this if lockdep 853 * as debug_check_no_locks_held() already does this if lockdep
796 * is enabled. 854 * is enabled.
@@ -802,11 +860,13 @@ void exit_rcu(void)
802 if (t->rcu_read_lock_nesting == 0) 860 if (t->rcu_read_lock_nesting == 0)
803 return; 861 return;
804 t->rcu_read_lock_nesting = 1; 862 t->rcu_read_lock_nesting = 1;
805 rcu_read_unlock(); 863 __rcu_read_unlock();
806} 864}
807 865
808#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 866#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
809 867
868static struct rcu_state *rcu_state = &rcu_sched_state;
869
810/* 870/*
811 * Tell them what RCU they are running. 871 * Tell them what RCU they are running.
812 */ 872 */
@@ -836,7 +896,7 @@ void rcu_force_quiescent_state(void)
836EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 896EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
837 897
838/* 898/*
839 * Because preemptable RCU does not exist, we never have to check for 899 * Because preemptible RCU does not exist, we never have to check for
840 * CPUs being in quiescent states. 900 * CPUs being in quiescent states.
841 */ 901 */
842static void rcu_preempt_note_context_switch(int cpu) 902static void rcu_preempt_note_context_switch(int cpu)
@@ -844,10 +904,10 @@ static void rcu_preempt_note_context_switch(int cpu)
844} 904}
845 905
846/* 906/*
847 * Because preemptable RCU does not exist, there are never any preempted 907 * Because preemptible RCU does not exist, there are never any preempted
848 * RCU readers. 908 * RCU readers.
849 */ 909 */
850static int rcu_preempted_readers(struct rcu_node *rnp) 910static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
851{ 911{
852 return 0; 912 return 0;
853} 913}
@@ -862,10 +922,8 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
862 922
863#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 923#endif /* #ifdef CONFIG_HOTPLUG_CPU */
864 924
865#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
866
867/* 925/*
868 * Because preemptable RCU does not exist, we never have to check for 926 * Because preemptible RCU does not exist, we never have to check for
869 * tasks blocked within RCU read-side critical sections. 927 * tasks blocked within RCU read-side critical sections.
870 */ 928 */
871static void rcu_print_detail_task_stall(struct rcu_state *rsp) 929static void rcu_print_detail_task_stall(struct rcu_state *rsp)
@@ -873,7 +931,7 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
873} 931}
874 932
875/* 933/*
876 * Because preemptable RCU does not exist, we never have to check for 934 * Because preemptible RCU does not exist, we never have to check for
877 * tasks blocked within RCU read-side critical sections. 935 * tasks blocked within RCU read-side critical sections.
878 */ 936 */
879static void rcu_print_task_stall(struct rcu_node *rnp) 937static void rcu_print_task_stall(struct rcu_node *rnp)
@@ -888,10 +946,8 @@ static void rcu_preempt_stall_reset(void)
888{ 946{
889} 947}
890 948
891#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
892
893/* 949/*
894 * Because there is no preemptable RCU, there can be no readers blocked, 950 * Because there is no preemptible RCU, there can be no readers blocked,
895 * so there is no need to check for blocked tasks. So check only for 951 * so there is no need to check for blocked tasks. So check only for
896 * bogus qsmask values. 952 * bogus qsmask values.
897 */ 953 */
@@ -903,7 +959,7 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
903#ifdef CONFIG_HOTPLUG_CPU 959#ifdef CONFIG_HOTPLUG_CPU
904 960
905/* 961/*
906 * Because preemptable RCU does not exist, it never needs to migrate 962 * Because preemptible RCU does not exist, it never needs to migrate
907 * tasks that were blocked within RCU read-side critical sections, and 963 * tasks that were blocked within RCU read-side critical sections, and
908 * such non-existent tasks cannot possibly have been blocking the current 964 * such non-existent tasks cannot possibly have been blocking the current
909 * grace period. 965 * grace period.
@@ -916,7 +972,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
916} 972}
917 973
918/* 974/*
919 * Because preemptable RCU does not exist, it never needs CPU-offline 975 * Because preemptible RCU does not exist, it never needs CPU-offline
920 * processing. 976 * processing.
921 */ 977 */
922static void rcu_preempt_offline_cpu(int cpu) 978static void rcu_preempt_offline_cpu(int cpu)
@@ -926,7 +982,7 @@ static void rcu_preempt_offline_cpu(int cpu)
926#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 982#endif /* #ifdef CONFIG_HOTPLUG_CPU */
927 983
928/* 984/*
929 * Because preemptable RCU does not exist, it never has any callbacks 985 * Because preemptible RCU does not exist, it never has any callbacks
930 * to check. 986 * to check.
931 */ 987 */
932static void rcu_preempt_check_callbacks(int cpu) 988static void rcu_preempt_check_callbacks(int cpu)
@@ -934,7 +990,7 @@ static void rcu_preempt_check_callbacks(int cpu)
934} 990}
935 991
936/* 992/*
937 * Because preemptable RCU does not exist, it never has any callbacks 993 * Because preemptible RCU does not exist, it never has any callbacks
938 * to process. 994 * to process.
939 */ 995 */
940static void rcu_preempt_process_callbacks(void) 996static void rcu_preempt_process_callbacks(void)
@@ -943,7 +999,7 @@ static void rcu_preempt_process_callbacks(void)
943 999
944/* 1000/*
945 * Wait for an rcu-preempt grace period, but make it happen quickly. 1001 * Wait for an rcu-preempt grace period, but make it happen quickly.
946 * But because preemptable RCU does not exist, map to rcu-sched. 1002 * But because preemptible RCU does not exist, map to rcu-sched.
947 */ 1003 */
948void synchronize_rcu_expedited(void) 1004void synchronize_rcu_expedited(void)
949{ 1005{
@@ -954,7 +1010,7 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
954#ifdef CONFIG_HOTPLUG_CPU 1010#ifdef CONFIG_HOTPLUG_CPU
955 1011
956/* 1012/*
957 * Because preemptable RCU does not exist, there is never any need to 1013 * Because preemptible RCU does not exist, there is never any need to
958 * report on tasks preempted in RCU read-side critical sections during 1014 * report on tasks preempted in RCU read-side critical sections during
959 * expedited RCU grace periods. 1015 * expedited RCU grace periods.
960 */ 1016 */
@@ -966,7 +1022,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
966#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1022#endif /* #ifdef CONFIG_HOTPLUG_CPU */
967 1023
968/* 1024/*
969 * Because preemptable RCU does not exist, it never has any work to do. 1025 * Because preemptible RCU does not exist, it never has any work to do.
970 */ 1026 */
971static int rcu_preempt_pending(int cpu) 1027static int rcu_preempt_pending(int cpu)
972{ 1028{
@@ -974,7 +1030,7 @@ static int rcu_preempt_pending(int cpu)
974} 1030}
975 1031
976/* 1032/*
977 * Because preemptable RCU does not exist, it never needs any CPU. 1033 * Because preemptible RCU does not exist, it never needs any CPU.
978 */ 1034 */
979static int rcu_preempt_needs_cpu(int cpu) 1035static int rcu_preempt_needs_cpu(int cpu)
980{ 1036{
@@ -982,7 +1038,7 @@ static int rcu_preempt_needs_cpu(int cpu)
982} 1038}
983 1039
984/* 1040/*
985 * Because preemptable RCU does not exist, rcu_barrier() is just 1041 * Because preemptible RCU does not exist, rcu_barrier() is just
986 * another name for rcu_barrier_sched(). 1042 * another name for rcu_barrier_sched().
987 */ 1043 */
988void rcu_barrier(void) 1044void rcu_barrier(void)
@@ -992,7 +1048,7 @@ void rcu_barrier(void)
992EXPORT_SYMBOL_GPL(rcu_barrier); 1048EXPORT_SYMBOL_GPL(rcu_barrier);
993 1049
994/* 1050/*
995 * Because preemptable RCU does not exist, there is no per-CPU 1051 * Because preemptible RCU does not exist, there is no per-CPU
996 * data to initialize. 1052 * data to initialize.
997 */ 1053 */
998static void __cpuinit rcu_preempt_init_percpu_data(int cpu) 1054static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
@@ -1000,14 +1056,14 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
1000} 1056}
1001 1057
1002/* 1058/*
1003 * Because there is no preemptable RCU, there are no callbacks to move. 1059 * Because there is no preemptible RCU, there are no callbacks to move.
1004 */ 1060 */
1005static void rcu_preempt_send_cbs_to_online(void) 1061static void rcu_preempt_send_cbs_to_online(void)
1006{ 1062{
1007} 1063}
1008 1064
1009/* 1065/*
1010 * Because preemptable RCU does not exist, it need not be initialized. 1066 * Because preemptible RCU does not exist, it need not be initialized.
1011 */ 1067 */
1012static void __init __rcu_init_preempt(void) 1068static void __init __rcu_init_preempt(void)
1013{ 1069{
@@ -1015,6 +1071,276 @@ static void __init __rcu_init_preempt(void)
1015 1071
1016#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 1072#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
1017 1073
1074#ifdef CONFIG_RCU_BOOST
1075
1076#include "rtmutex_common.h"
1077
1078#ifdef CONFIG_RCU_TRACE
1079
1080static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1081{
1082 if (list_empty(&rnp->blkd_tasks))
1083 rnp->n_balk_blkd_tasks++;
1084 else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
1085 rnp->n_balk_exp_gp_tasks++;
1086 else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL)
1087 rnp->n_balk_boost_tasks++;
1088 else if (rnp->gp_tasks != NULL && rnp->qsmask != 0)
1089 rnp->n_balk_notblocked++;
1090 else if (rnp->gp_tasks != NULL &&
1091 ULONG_CMP_LT(jiffies, rnp->boost_time))
1092 rnp->n_balk_notyet++;
1093 else
1094 rnp->n_balk_nos++;
1095}
1096
1097#else /* #ifdef CONFIG_RCU_TRACE */
1098
1099static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1100{
1101}
1102
1103#endif /* #else #ifdef CONFIG_RCU_TRACE */
1104
1105/*
1106 * Carry out RCU priority boosting on the task indicated by ->exp_tasks
1107 * or ->boost_tasks, advancing the pointer to the next task in the
1108 * ->blkd_tasks list.
1109 *
1110 * Note that irqs must be enabled: boosting the task can block.
1111 * Returns 1 if there are more tasks needing to be boosted.
1112 */
1113static int rcu_boost(struct rcu_node *rnp)
1114{
1115 unsigned long flags;
1116 struct rt_mutex mtx;
1117 struct task_struct *t;
1118 struct list_head *tb;
1119
1120 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL)
1121 return 0; /* Nothing left to boost. */
1122
1123 raw_spin_lock_irqsave(&rnp->lock, flags);
1124
1125 /*
1126 * Recheck under the lock: all tasks in need of boosting
1127 * might exit their RCU read-side critical sections on their own.
1128 */
1129 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
1130 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1131 return 0;
1132 }
1133
1134 /*
1135 * Preferentially boost tasks blocking expedited grace periods.
1136 * This cannot starve the normal grace periods because a second
1137 * expedited grace period must boost all blocked tasks, including
1138 * those blocking the pre-existing normal grace period.
1139 */
1140 if (rnp->exp_tasks != NULL) {
1141 tb = rnp->exp_tasks;
1142 rnp->n_exp_boosts++;
1143 } else {
1144 tb = rnp->boost_tasks;
1145 rnp->n_normal_boosts++;
1146 }
1147 rnp->n_tasks_boosted++;
1148
1149 /*
1150 * We boost task t by manufacturing an rt_mutex that appears to
1151 * be held by task t. We leave a pointer to that rt_mutex where
1152 * task t can find it, and task t will release the mutex when it
1153 * exits its outermost RCU read-side critical section. Then
1154 * simply acquiring this artificial rt_mutex will boost task
1155 * t's priority. (Thanks to tglx for suggesting this approach!)
1156 *
1157 * Note that task t must acquire rnp->lock to remove itself from
1158 * the ->blkd_tasks list, which it will do from exit() if from
1159 * nowhere else. We therefore are guaranteed that task t will
1160 * stay around at least until we drop rnp->lock. Note that
1161 * rnp->lock also resolves races between our priority boosting
1162 * and task t's exiting its outermost RCU read-side critical
1163 * section.
1164 */
1165 t = container_of(tb, struct task_struct, rcu_node_entry);
1166 rt_mutex_init_proxy_locked(&mtx, t);
1167 t->rcu_boost_mutex = &mtx;
1168 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
1169 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1170 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
1171 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
1172
1173 return rnp->exp_tasks != NULL || rnp->boost_tasks != NULL;
1174}
1175
1176/*
1177 * Timer handler to initiate waking up of boost kthreads that
1178 * have yielded the CPU due to excessive numbers of tasks to
1179 * boost. We wake up the per-rcu_node kthread, which in turn
1180 * will wake up the booster kthread.
1181 */
1182static void rcu_boost_kthread_timer(unsigned long arg)
1183{
1184 invoke_rcu_node_kthread((struct rcu_node *)arg);
1185}
1186
1187/*
1188 * Priority-boosting kthread. One per leaf rcu_node and one for the
1189 * root rcu_node.
1190 */
1191static int rcu_boost_kthread(void *arg)
1192{
1193 struct rcu_node *rnp = (struct rcu_node *)arg;
1194 int spincnt = 0;
1195 int more2boost;
1196
1197 for (;;) {
1198 rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
1199 wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks ||
1200 rnp->exp_tasks);
1201 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
1202 more2boost = rcu_boost(rnp);
1203 if (more2boost)
1204 spincnt++;
1205 else
1206 spincnt = 0;
1207 if (spincnt > 10) {
1208 rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp);
1209 spincnt = 0;
1210 }
1211 }
1212 /* NOTREACHED */
1213 return 0;
1214}
1215
1216/*
1217 * Check to see if it is time to start boosting RCU readers that are
1218 * blocking the current grace period, and, if so, tell the per-rcu_node
1219 * kthread to start boosting them. If there is an expedited grace
1220 * period in progress, it is always time to boost.
1221 *
1222 * The caller must hold rnp->lock, which this function releases,
1223 * but irqs remain disabled. The ->boost_kthread_task is immortal,
1224 * so we don't need to worry about it going away.
1225 */
1226static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1227{
1228 struct task_struct *t;
1229
1230 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
1231 rnp->n_balk_exp_gp_tasks++;
1232 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1233 return;
1234 }
1235 if (rnp->exp_tasks != NULL ||
1236 (rnp->gp_tasks != NULL &&
1237 rnp->boost_tasks == NULL &&
1238 rnp->qsmask == 0 &&
1239 ULONG_CMP_GE(jiffies, rnp->boost_time))) {
1240 if (rnp->exp_tasks == NULL)
1241 rnp->boost_tasks = rnp->gp_tasks;
1242 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1243 t = rnp->boost_kthread_task;
1244 if (t != NULL)
1245 wake_up_process(t);
1246 } else {
1247 rcu_initiate_boost_trace(rnp);
1248 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1249 }
1250}
1251
1252/*
1253 * Set the affinity of the boost kthread. The CPU-hotplug locks are
1254 * held, so no one should be messing with the existence of the boost
1255 * kthread.
1256 */
1257static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
1258 cpumask_var_t cm)
1259{
1260 struct task_struct *t;
1261
1262 t = rnp->boost_kthread_task;
1263 if (t != NULL)
1264 set_cpus_allowed_ptr(rnp->boost_kthread_task, cm);
1265}
1266
1267#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
1268
1269/*
1270 * Do priority-boost accounting for the start of a new grace period.
1271 */
1272static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1273{
1274 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
1275}
1276
1277/*
1278 * Initialize the RCU-boost waitqueue.
1279 */
1280static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
1281{
1282 init_waitqueue_head(&rnp->boost_wq);
1283}
1284
1285/*
1286 * Create an RCU-boost kthread for the specified node if one does not
1287 * already exist. We only create this kthread for preemptible RCU.
1288 * Returns zero if all is well, a negated errno otherwise.
1289 */
1290static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1291 struct rcu_node *rnp,
1292 int rnp_index)
1293{
1294 unsigned long flags;
1295 struct sched_param sp;
1296 struct task_struct *t;
1297
1298 if (&rcu_preempt_state != rsp)
1299 return 0;
1300 if (rnp->boost_kthread_task != NULL)
1301 return 0;
1302 t = kthread_create(rcu_boost_kthread, (void *)rnp,
1303 "rcub%d", rnp_index);
1304 if (IS_ERR(t))
1305 return PTR_ERR(t);
1306 raw_spin_lock_irqsave(&rnp->lock, flags);
1307 rnp->boost_kthread_task = t;
1308 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1309 wake_up_process(t);
1310 sp.sched_priority = RCU_KTHREAD_PRIO;
1311 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1312 return 0;
1313}
1314
1315#else /* #ifdef CONFIG_RCU_BOOST */
1316
1317static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1318{
1319 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1320}
1321
1322static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
1323 cpumask_var_t cm)
1324{
1325}
1326
1327static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1328{
1329}
1330
1331static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
1332{
1333}
1334
1335static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1336 struct rcu_node *rnp,
1337 int rnp_index)
1338{
1339 return 0;
1340}
1341
1342#endif /* #else #ifdef CONFIG_RCU_BOOST */
1343
1018#ifndef CONFIG_SMP 1344#ifndef CONFIG_SMP
1019 1345
1020void synchronize_sched_expedited(void) 1346void synchronize_sched_expedited(void)
@@ -1187,8 +1513,8 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
1187 * 1513 *
1188 * Because it is not legal to invoke rcu_process_callbacks() with irqs 1514 * Because it is not legal to invoke rcu_process_callbacks() with irqs
1189 * disabled, we do one pass of force_quiescent_state(), then do a 1515 * disabled, we do one pass of force_quiescent_state(), then do a
1190 * raise_softirq() to cause rcu_process_callbacks() to be invoked later. 1516 * invoke_rcu_cpu_kthread() to cause rcu_process_callbacks() to be invoked
1191 * The per-cpu rcu_dyntick_drain variable controls the sequencing. 1517 * later. The per-cpu rcu_dyntick_drain variable controls the sequencing.
1192 */ 1518 */
1193int rcu_needs_cpu(int cpu) 1519int rcu_needs_cpu(int cpu)
1194{ 1520{
@@ -1239,7 +1565,7 @@ int rcu_needs_cpu(int cpu)
1239 1565
1240 /* If RCU callbacks are still pending, RCU still needs this CPU. */ 1566 /* If RCU callbacks are still pending, RCU still needs this CPU. */
1241 if (c) 1567 if (c)
1242 raise_softirq(RCU_SOFTIRQ); 1568 invoke_rcu_cpu_kthread();
1243 return c; 1569 return c;
1244} 1570}
1245 1571
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index c8e97853b970..aa0fd72b4bc7 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -46,6 +46,18 @@
46#define RCU_TREE_NONCORE 46#define RCU_TREE_NONCORE
47#include "rcutree.h" 47#include "rcutree.h"
48 48
49DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
50DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_cpu);
51DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
52DECLARE_PER_CPU(char, rcu_cpu_has_work);
53
54static char convert_kthread_status(unsigned int kthread_status)
55{
56 if (kthread_status > RCU_KTHREAD_MAX)
57 return '?';
58 return "SRWOY"[kthread_status];
59}
60
49static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) 61static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
50{ 62{
51 if (!rdp->beenonline) 63 if (!rdp->beenonline)
@@ -64,7 +76,21 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
64 rdp->dynticks_fqs); 76 rdp->dynticks_fqs);
65#endif /* #ifdef CONFIG_NO_HZ */ 77#endif /* #ifdef CONFIG_NO_HZ */
66 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); 78 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
67 seq_printf(m, " ql=%ld b=%ld", rdp->qlen, rdp->blimit); 79 seq_printf(m, " ql=%ld qs=%c%c%c%c kt=%d/%c/%d ktl=%x b=%ld",
80 rdp->qlen,
81 ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
82 rdp->nxttail[RCU_NEXT_TAIL]],
83 ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
84 rdp->nxttail[RCU_NEXT_READY_TAIL]],
85 ".W"[rdp->nxttail[RCU_DONE_TAIL] !=
86 rdp->nxttail[RCU_WAIT_TAIL]],
87 ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]],
88 per_cpu(rcu_cpu_has_work, rdp->cpu),
89 convert_kthread_status(per_cpu(rcu_cpu_kthread_status,
90 rdp->cpu)),
91 per_cpu(rcu_cpu_kthread_cpu, rdp->cpu),
92 per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff,
93 rdp->blimit);
68 seq_printf(m, " ci=%lu co=%lu ca=%lu\n", 94 seq_printf(m, " ci=%lu co=%lu ca=%lu\n",
69 rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); 95 rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
70} 96}
@@ -121,7 +147,18 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
121 rdp->dynticks_fqs); 147 rdp->dynticks_fqs);
122#endif /* #ifdef CONFIG_NO_HZ */ 148#endif /* #ifdef CONFIG_NO_HZ */
123 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); 149 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
124 seq_printf(m, ",%ld,%ld", rdp->qlen, rdp->blimit); 150 seq_printf(m, ",%ld,\"%c%c%c%c\",%d,\"%c\",%ld", rdp->qlen,
151 ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
152 rdp->nxttail[RCU_NEXT_TAIL]],
153 ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
154 rdp->nxttail[RCU_NEXT_READY_TAIL]],
155 ".W"[rdp->nxttail[RCU_DONE_TAIL] !=
156 rdp->nxttail[RCU_WAIT_TAIL]],
157 ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]],
158 per_cpu(rcu_cpu_has_work, rdp->cpu),
159 convert_kthread_status(per_cpu(rcu_cpu_kthread_status,
160 rdp->cpu)),
161 rdp->blimit);
125 seq_printf(m, ",%lu,%lu,%lu\n", 162 seq_printf(m, ",%lu,%lu,%lu\n",
126 rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); 163 rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
127} 164}
@@ -157,11 +194,76 @@ static const struct file_operations rcudata_csv_fops = {
157 .release = single_release, 194 .release = single_release,
158}; 195};
159 196
197#ifdef CONFIG_RCU_BOOST
198
199static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp)
200{
201 seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu "
202 "j=%04x bt=%04x\n",
203 rnp->grplo, rnp->grphi,
204 "T."[list_empty(&rnp->blkd_tasks)],
205 "N."[!rnp->gp_tasks],
206 "E."[!rnp->exp_tasks],
207 "B."[!rnp->boost_tasks],
208 convert_kthread_status(rnp->boost_kthread_status),
209 rnp->n_tasks_boosted, rnp->n_exp_boosts,
210 rnp->n_normal_boosts,
211 (int)(jiffies & 0xffff),
212 (int)(rnp->boost_time & 0xffff));
213 seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n",
214 " balk",
215 rnp->n_balk_blkd_tasks,
216 rnp->n_balk_exp_gp_tasks,
217 rnp->n_balk_boost_tasks,
218 rnp->n_balk_notblocked,
219 rnp->n_balk_notyet,
220 rnp->n_balk_nos);
221}
222
223static int show_rcu_node_boost(struct seq_file *m, void *unused)
224{
225 struct rcu_node *rnp;
226
227 rcu_for_each_leaf_node(&rcu_preempt_state, rnp)
228 print_one_rcu_node_boost(m, rnp);
229 return 0;
230}
231
232static int rcu_node_boost_open(struct inode *inode, struct file *file)
233{
234 return single_open(file, show_rcu_node_boost, NULL);
235}
236
237static const struct file_operations rcu_node_boost_fops = {
238 .owner = THIS_MODULE,
239 .open = rcu_node_boost_open,
240 .read = seq_read,
241 .llseek = seq_lseek,
242 .release = single_release,
243};
244
245/*
246 * Create the rcuboost debugfs entry. Standard error return.
247 */
248static int rcu_boost_trace_create_file(struct dentry *rcudir)
249{
250 return !debugfs_create_file("rcuboost", 0444, rcudir, NULL,
251 &rcu_node_boost_fops);
252}
253
254#else /* #ifdef CONFIG_RCU_BOOST */
255
256static int rcu_boost_trace_create_file(struct dentry *rcudir)
257{
258 return 0; /* There cannot be an error if we didn't create it! */
259}
260
261#endif /* #else #ifdef CONFIG_RCU_BOOST */
262
160static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) 263static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
161{ 264{
162 unsigned long gpnum; 265 unsigned long gpnum;
163 int level = 0; 266 int level = 0;
164 int phase;
165 struct rcu_node *rnp; 267 struct rcu_node *rnp;
166 268
167 gpnum = rsp->gpnum; 269 gpnum = rsp->gpnum;
@@ -178,13 +280,11 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
178 seq_puts(m, "\n"); 280 seq_puts(m, "\n");
179 level = rnp->level; 281 level = rnp->level;
180 } 282 }
181 phase = gpnum & 0x1; 283 seq_printf(m, "%lx/%lx %c%c>%c %d:%d ^%d ",
182 seq_printf(m, "%lx/%lx %c%c>%c%c %d:%d ^%d ",
183 rnp->qsmask, rnp->qsmaskinit, 284 rnp->qsmask, rnp->qsmaskinit,
184 "T."[list_empty(&rnp->blocked_tasks[phase])], 285 ".G"[rnp->gp_tasks != NULL],
185 "E."[list_empty(&rnp->blocked_tasks[phase + 2])], 286 ".E"[rnp->exp_tasks != NULL],
186 "T."[list_empty(&rnp->blocked_tasks[!phase])], 287 ".T"[!list_empty(&rnp->blkd_tasks)],
187 "E."[list_empty(&rnp->blocked_tasks[!phase + 2])],
188 rnp->grplo, rnp->grphi, rnp->grpnum); 288 rnp->grplo, rnp->grphi, rnp->grpnum);
189 } 289 }
190 seq_puts(m, "\n"); 290 seq_puts(m, "\n");
@@ -216,16 +316,35 @@ static const struct file_operations rcuhier_fops = {
216 .release = single_release, 316 .release = single_release,
217}; 317};
218 318
319static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
320{
321 unsigned long flags;
322 unsigned long completed;
323 unsigned long gpnum;
324 unsigned long gpage;
325 unsigned long gpmax;
326 struct rcu_node *rnp = &rsp->node[0];
327
328 raw_spin_lock_irqsave(&rnp->lock, flags);
329 completed = rsp->completed;
330 gpnum = rsp->gpnum;
331 if (rsp->completed == rsp->gpnum)
332 gpage = 0;
333 else
334 gpage = jiffies - rsp->gp_start;
335 gpmax = rsp->gp_max;
336 raw_spin_unlock_irqrestore(&rnp->lock, flags);
337 seq_printf(m, "%s: completed=%ld gpnum=%lu age=%ld max=%ld\n",
338 rsp->name, completed, gpnum, gpage, gpmax);
339}
340
219static int show_rcugp(struct seq_file *m, void *unused) 341static int show_rcugp(struct seq_file *m, void *unused)
220{ 342{
221#ifdef CONFIG_TREE_PREEMPT_RCU 343#ifdef CONFIG_TREE_PREEMPT_RCU
222 seq_printf(m, "rcu_preempt: completed=%ld gpnum=%lu\n", 344 show_one_rcugp(m, &rcu_preempt_state);
223 rcu_preempt_state.completed, rcu_preempt_state.gpnum);
224#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 345#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
225 seq_printf(m, "rcu_sched: completed=%ld gpnum=%lu\n", 346 show_one_rcugp(m, &rcu_sched_state);
226 rcu_sched_state.completed, rcu_sched_state.gpnum); 347 show_one_rcugp(m, &rcu_bh_state);
227 seq_printf(m, "rcu_bh: completed=%ld gpnum=%lu\n",
228 rcu_bh_state.completed, rcu_bh_state.gpnum);
229 return 0; 348 return 0;
230} 349}
231 350
@@ -298,6 +417,29 @@ static const struct file_operations rcu_pending_fops = {
298 .release = single_release, 417 .release = single_release,
299}; 418};
300 419
420static int show_rcutorture(struct seq_file *m, void *unused)
421{
422 seq_printf(m, "rcutorture test sequence: %lu %s\n",
423 rcutorture_testseq >> 1,
424 (rcutorture_testseq & 0x1) ? "(test in progress)" : "");
425 seq_printf(m, "rcutorture update version number: %lu\n",
426 rcutorture_vernum);
427 return 0;
428}
429
430static int rcutorture_open(struct inode *inode, struct file *file)
431{
432 return single_open(file, show_rcutorture, NULL);
433}
434
435static const struct file_operations rcutorture_fops = {
436 .owner = THIS_MODULE,
437 .open = rcutorture_open,
438 .read = seq_read,
439 .llseek = seq_lseek,
440 .release = single_release,
441};
442
301static struct dentry *rcudir; 443static struct dentry *rcudir;
302 444
303static int __init rcutree_trace_init(void) 445static int __init rcutree_trace_init(void)
@@ -318,6 +460,9 @@ static int __init rcutree_trace_init(void)
318 if (!retval) 460 if (!retval)
319 goto free_out; 461 goto free_out;
320 462
463 if (rcu_boost_trace_create_file(rcudir))
464 goto free_out;
465
321 retval = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops); 466 retval = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops);
322 if (!retval) 467 if (!retval)
323 goto free_out; 468 goto free_out;
@@ -331,6 +476,11 @@ static int __init rcutree_trace_init(void)
331 NULL, &rcu_pending_fops); 476 NULL, &rcu_pending_fops);
332 if (!retval) 477 if (!retval)
333 goto free_out; 478 goto free_out;
479
480 retval = debugfs_create_file("rcutorture", 0444, rcudir,
481 NULL, &rcutorture_fops);
482 if (!retval)
483 goto free_out;
334 return 0; 484 return 0;
335free_out: 485free_out:
336 debugfs_remove_recursive(rcudir); 486 debugfs_remove_recursive(rcudir);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 174f976c2874..13960170cad4 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -58,7 +58,7 @@ DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
58 58
59char *softirq_to_name[NR_SOFTIRQS] = { 59char *softirq_to_name[NR_SOFTIRQS] = {
60 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", 60 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
61 "TASKLET", "SCHED", "HRTIMER", "RCU" 61 "TASKLET", "SCHED", "HRTIMER"
62}; 62};
63 63
64/* 64/*
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9b1707b5f646..10ef61981149 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -352,7 +352,7 @@ config DEBUG_OBJECTS_WORK
352 352
353config DEBUG_OBJECTS_RCU_HEAD 353config DEBUG_OBJECTS_RCU_HEAD
354 bool "Debug RCU callbacks objects" 354 bool "Debug RCU callbacks objects"
355 depends on DEBUG_OBJECTS && PREEMPT 355 depends on DEBUG_OBJECTS
356 help 356 help
357 Enable this to turn on debugging of RCU list heads (call_rcu() usage). 357 Enable this to turn on debugging of RCU list heads (call_rcu() usage).
358 358
@@ -890,22 +890,9 @@ config RCU_TORTURE_TEST_RUNNABLE
890 Say N here if you want the RCU torture tests to start only 890 Say N here if you want the RCU torture tests to start only
891 after being manually enabled via /proc. 891 after being manually enabled via /proc.
892 892
893config RCU_CPU_STALL_DETECTOR
894 bool "Check for stalled CPUs delaying RCU grace periods"
895 depends on TREE_RCU || TREE_PREEMPT_RCU
896 default y
897 help
898 This option causes RCU to printk information on which
899 CPUs are delaying the current grace period, but only when
900 the grace period extends for excessive time periods.
901
902 Say N if you want to disable such checks.
903
904 Say Y if you are unsure.
905
906config RCU_CPU_STALL_TIMEOUT 893config RCU_CPU_STALL_TIMEOUT
907 int "RCU CPU stall timeout in seconds" 894 int "RCU CPU stall timeout in seconds"
908 depends on RCU_CPU_STALL_DETECTOR 895 depends on TREE_RCU || TREE_PREEMPT_RCU
909 range 3 300 896 range 3 300
910 default 60 897 default 60
911 help 898 help
@@ -914,22 +901,9 @@ config RCU_CPU_STALL_TIMEOUT
914 RCU grace period persists, additional CPU stall warnings are 901 RCU grace period persists, additional CPU stall warnings are
915 printed at more widely spaced intervals. 902 printed at more widely spaced intervals.
916 903
917config RCU_CPU_STALL_DETECTOR_RUNNABLE
918 bool "RCU CPU stall checking starts automatically at boot"
919 depends on RCU_CPU_STALL_DETECTOR
920 default y
921 help
922 If set, start checking for RCU CPU stalls immediately on
923 boot. Otherwise, RCU CPU stall checking must be manually
924 enabled.
925
926 Say Y if you are unsure.
927
928 Say N if you wish to suppress RCU CPU stall checking during boot.
929
930config RCU_CPU_STALL_VERBOSE 904config RCU_CPU_STALL_VERBOSE
931 bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR" 905 bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR"
932 depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU 906 depends on TREE_PREEMPT_RCU
933 default y 907 default y
934 help 908 help
935 This option causes RCU to printk detailed per-task information 909 This option causes RCU to printk detailed per-task information
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 3cc43558cf9c..150b6ce23df3 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -28,18 +28,10 @@
28#include <linux/udp.h> 28#include <linux/udp.h>
29#include <linux/if_vlan.h> 29#include <linux/if_vlan.h>
30 30
31static void gw_node_free_rcu(struct rcu_head *rcu)
32{
33 struct gw_node *gw_node;
34
35 gw_node = container_of(rcu, struct gw_node, rcu);
36 kfree(gw_node);
37}
38
39static void gw_node_free_ref(struct gw_node *gw_node) 31static void gw_node_free_ref(struct gw_node *gw_node)
40{ 32{
41 if (atomic_dec_and_test(&gw_node->refcount)) 33 if (atomic_dec_and_test(&gw_node->refcount))
42 call_rcu(&gw_node->rcu, gw_node_free_rcu); 34 kfree_rcu(gw_node, rcu);
43} 35}
44 36
45void *gw_get_selected(struct bat_priv *bat_priv) 37void *gw_get_selected(struct bat_priv *bat_priv)
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 0b9133022d2d..ed23a5895d6c 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -56,18 +56,10 @@ err:
56 return 0; 56 return 0;
57} 57}
58 58
59static void neigh_node_free_rcu(struct rcu_head *rcu)
60{
61 struct neigh_node *neigh_node;
62
63 neigh_node = container_of(rcu, struct neigh_node, rcu);
64 kfree(neigh_node);
65}
66
67void neigh_node_free_ref(struct neigh_node *neigh_node) 59void neigh_node_free_ref(struct neigh_node *neigh_node)
68{ 60{
69 if (atomic_dec_and_test(&neigh_node->refcount)) 61 if (atomic_dec_and_test(&neigh_node->refcount))
70 call_rcu(&neigh_node->rcu, neigh_node_free_rcu); 62 kfree_rcu(neigh_node, rcu);
71} 63}
72 64
73struct neigh_node *create_neighbor(struct orig_node *orig_node, 65struct neigh_node *create_neighbor(struct orig_node *orig_node,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 824e1f6e50f2..04efe022c13b 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -76,18 +76,10 @@ int my_skb_head_push(struct sk_buff *skb, unsigned int len)
76 return 0; 76 return 0;
77} 77}
78 78
79static void softif_neigh_free_rcu(struct rcu_head *rcu)
80{
81 struct softif_neigh *softif_neigh;
82
83 softif_neigh = container_of(rcu, struct softif_neigh, rcu);
84 kfree(softif_neigh);
85}
86
87static void softif_neigh_free_ref(struct softif_neigh *softif_neigh) 79static void softif_neigh_free_ref(struct softif_neigh *softif_neigh)
88{ 80{
89 if (atomic_dec_and_test(&softif_neigh->refcount)) 81 if (atomic_dec_and_test(&softif_neigh->refcount))
90 call_rcu(&softif_neigh->rcu, softif_neigh_free_rcu); 82 kfree_rcu(softif_neigh, rcu);
91} 83}
92 84
93void softif_neigh_purge(struct bat_priv *bat_priv) 85void softif_neigh_purge(struct bat_priv *bat_priv)
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 7b39f3ed2fda..e2e66939ed00 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -68,14 +68,6 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
68 return __hw_addr_add_ex(list, addr, addr_len, addr_type, false); 68 return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
69} 69}
70 70
71static void ha_rcu_free(struct rcu_head *head)
72{
73 struct netdev_hw_addr *ha;
74
75 ha = container_of(head, struct netdev_hw_addr, rcu_head);
76 kfree(ha);
77}
78
79static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, 71static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
80 unsigned char *addr, int addr_len, 72 unsigned char *addr, int addr_len,
81 unsigned char addr_type, bool global) 73 unsigned char addr_type, bool global)
@@ -94,7 +86,7 @@ static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
94 if (--ha->refcount) 86 if (--ha->refcount)
95 return 0; 87 return 0;
96 list_del_rcu(&ha->list); 88 list_del_rcu(&ha->list);
97 call_rcu(&ha->rcu_head, ha_rcu_free); 89 kfree_rcu(ha, rcu_head);
98 list->count--; 90 list->count--;
99 return 0; 91 return 0;
100 } 92 }
@@ -197,7 +189,7 @@ void __hw_addr_flush(struct netdev_hw_addr_list *list)
197 189
198 list_for_each_entry_safe(ha, tmp, &list->list, list) { 190 list_for_each_entry_safe(ha, tmp, &list->list, list) {
199 list_del_rcu(&ha->list); 191 list_del_rcu(&ha->list);
200 call_rcu(&ha->rcu_head, ha_rcu_free); 192 kfree_rcu(ha, rcu_head);
201 } 193 }
202 list->count = 0; 194 list->count = 0;
203} 195}
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 706502ff64aa..7f36b38e060f 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -207,14 +207,6 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
207 rcu_read_unlock(); 207 rcu_read_unlock();
208} 208}
209 209
210
211static void free_dm_hw_stat(struct rcu_head *head)
212{
213 struct dm_hw_stat_delta *n;
214 n = container_of(head, struct dm_hw_stat_delta, rcu);
215 kfree(n);
216}
217
218static int set_all_monitor_traces(int state) 210static int set_all_monitor_traces(int state)
219{ 211{
220 int rc = 0; 212 int rc = 0;
@@ -245,7 +237,7 @@ static int set_all_monitor_traces(int state)
245 list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { 237 list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
246 if (new_stat->dev == NULL) { 238 if (new_stat->dev == NULL) {
247 list_del_rcu(&new_stat->list); 239 list_del_rcu(&new_stat->list);
248 call_rcu(&new_stat->rcu, free_dm_hw_stat); 240 kfree_rcu(new_stat, rcu);
249 } 241 }
250 } 242 }
251 break; 243 break;
@@ -314,7 +306,7 @@ static int dropmon_net_event(struct notifier_block *ev_block,
314 new_stat->dev = NULL; 306 new_stat->dev = NULL;
315 if (trace_state == TRACE_OFF) { 307 if (trace_state == TRACE_OFF) {
316 list_del_rcu(&new_stat->list); 308 list_del_rcu(&new_stat->list);
317 call_rcu(&new_stat->rcu, free_dm_hw_stat); 309 kfree_rcu(new_stat, rcu);
318 break; 310 break;
319 } 311 }
320 } 312 }
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 7c2373321b74..43b03dd71e85 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -249,13 +249,6 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
249} 249}
250EXPORT_SYMBOL(gen_new_estimator); 250EXPORT_SYMBOL(gen_new_estimator);
251 251
252static void __gen_kill_estimator(struct rcu_head *head)
253{
254 struct gen_estimator *e = container_of(head,
255 struct gen_estimator, e_rcu);
256 kfree(e);
257}
258
259/** 252/**
260 * gen_kill_estimator - remove a rate estimator 253 * gen_kill_estimator - remove a rate estimator
261 * @bstats: basic statistics 254 * @bstats: basic statistics
@@ -279,7 +272,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
279 write_unlock(&est_lock); 272 write_unlock(&est_lock);
280 273
281 list_del_rcu(&e->list); 274 list_del_rcu(&e->list);
282 call_rcu(&e->e_rcu, __gen_kill_estimator); 275 kfree_rcu(e, e_rcu);
283 } 276 }
284 spin_unlock_bh(&est_tree_lock); 277 spin_unlock_bh(&est_tree_lock);
285} 278}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 5ceb257e860c..80b2aad3b73d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -565,13 +565,6 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue,
565 return len; 565 return len;
566} 566}
567 567
568static void rps_map_release(struct rcu_head *rcu)
569{
570 struct rps_map *map = container_of(rcu, struct rps_map, rcu);
571
572 kfree(map);
573}
574
575static ssize_t store_rps_map(struct netdev_rx_queue *queue, 568static ssize_t store_rps_map(struct netdev_rx_queue *queue,
576 struct rx_queue_attribute *attribute, 569 struct rx_queue_attribute *attribute,
577 const char *buf, size_t len) 570 const char *buf, size_t len)
@@ -619,7 +612,7 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
619 spin_unlock(&rps_map_lock); 612 spin_unlock(&rps_map_lock);
620 613
621 if (old_map) 614 if (old_map)
622 call_rcu(&old_map->rcu, rps_map_release); 615 kfree_rcu(old_map, rcu);
623 616
624 free_cpumask_var(mask); 617 free_cpumask_var(mask);
625 return len; 618 return len;
@@ -728,7 +721,7 @@ static void rx_queue_release(struct kobject *kobj)
728 map = rcu_dereference_raw(queue->rps_map); 721 map = rcu_dereference_raw(queue->rps_map);
729 if (map) { 722 if (map) {
730 RCU_INIT_POINTER(queue->rps_map, NULL); 723 RCU_INIT_POINTER(queue->rps_map, NULL);
731 call_rcu(&map->rcu, rps_map_release); 724 kfree_rcu(map, rcu);
732 } 725 }
733 726
734 flow_table = rcu_dereference_raw(queue->rps_flow_table); 727 flow_table = rcu_dereference_raw(queue->rps_flow_table);
@@ -898,21 +891,6 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
898 return len; 891 return len;
899} 892}
900 893
901static void xps_map_release(struct rcu_head *rcu)
902{
903 struct xps_map *map = container_of(rcu, struct xps_map, rcu);
904
905 kfree(map);
906}
907
908static void xps_dev_maps_release(struct rcu_head *rcu)
909{
910 struct xps_dev_maps *dev_maps =
911 container_of(rcu, struct xps_dev_maps, rcu);
912
913 kfree(dev_maps);
914}
915
916static DEFINE_MUTEX(xps_map_mutex); 894static DEFINE_MUTEX(xps_map_mutex);
917#define xmap_dereference(P) \ 895#define xmap_dereference(P) \
918 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) 896 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
@@ -1009,7 +987,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
1009 map = dev_maps ? 987 map = dev_maps ?
1010 xmap_dereference(dev_maps->cpu_map[cpu]) : NULL; 988 xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
1011 if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map) 989 if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
1012 call_rcu(&map->rcu, xps_map_release); 990 kfree_rcu(map, rcu);
1013 if (new_dev_maps->cpu_map[cpu]) 991 if (new_dev_maps->cpu_map[cpu])
1014 nonempty = 1; 992 nonempty = 1;
1015 } 993 }
@@ -1022,7 +1000,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
1022 } 1000 }
1023 1001
1024 if (dev_maps) 1002 if (dev_maps)
1025 call_rcu(&dev_maps->rcu, xps_dev_maps_release); 1003 kfree_rcu(dev_maps, rcu);
1026 1004
1027 netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node : 1005 netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node :
1028 NUMA_NO_NODE); 1006 NUMA_NO_NODE);
@@ -1084,7 +1062,7 @@ static void netdev_queue_release(struct kobject *kobj)
1084 else { 1062 else {
1085 RCU_INIT_POINTER(dev_maps->cpu_map[i], 1063 RCU_INIT_POINTER(dev_maps->cpu_map[i],
1086 NULL); 1064 NULL);
1087 call_rcu(&map->rcu, xps_map_release); 1065 kfree_rcu(map, rcu);
1088 map = NULL; 1066 map = NULL;
1089 } 1067 }
1090 } 1068 }
@@ -1094,7 +1072,7 @@ static void netdev_queue_release(struct kobject *kobj)
1094 1072
1095 if (!nonempty) { 1073 if (!nonempty) {
1096 RCU_INIT_POINTER(dev->xps_maps, NULL); 1074 RCU_INIT_POINTER(dev->xps_maps, NULL);
1097 call_rcu(&dev_maps->rcu, xps_dev_maps_release); 1075 kfree_rcu(dev_maps, rcu);
1098 } 1076 }
1099 } 1077 }
1100 1078
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3f860261c5ee..297bb9272240 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -27,14 +27,6 @@ EXPORT_SYMBOL(init_net);
27 27
28#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 28#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
29 29
30static void net_generic_release(struct rcu_head *rcu)
31{
32 struct net_generic *ng;
33
34 ng = container_of(rcu, struct net_generic, rcu);
35 kfree(ng);
36}
37
38static int net_assign_generic(struct net *net, int id, void *data) 30static int net_assign_generic(struct net *net, int id, void *data)
39{ 31{
40 struct net_generic *ng, *old_ng; 32 struct net_generic *ng, *old_ng;
@@ -68,7 +60,7 @@ static int net_assign_generic(struct net *net, int id, void *data)
68 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); 60 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
69 61
70 rcu_assign_pointer(net->gen, ng); 62 rcu_assign_pointer(net->gen, ng);
71 call_rcu(&old_ng->rcu, net_generic_release); 63 kfree_rcu(old_ng, rcu);
72assign: 64assign:
73 ng->ptr[id - 1] = data; 65 ng->ptr[id - 1] = data;
74 return 0; 66 return 0;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 0dcaa903e00e..4c27615340dc 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -332,14 +332,9 @@ static struct dn_ifaddr *dn_dev_alloc_ifa(void)
332 return ifa; 332 return ifa;
333} 333}
334 334
335static void dn_dev_free_ifa_rcu(struct rcu_head *head)
336{
337 kfree(container_of(head, struct dn_ifaddr, rcu));
338}
339
340static void dn_dev_free_ifa(struct dn_ifaddr *ifa) 335static void dn_dev_free_ifa(struct dn_ifaddr *ifa)
341{ 336{
342 call_rcu(&ifa->rcu, dn_dev_free_ifa_rcu); 337 kfree_rcu(ifa, rcu);
343} 338}
344 339
345static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr __rcu **ifap, int destroy) 340static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr __rcu **ifap, int destroy)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 641a5a2a9f9c..33e2c35b74b7 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -141,18 +141,8 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
141 }, 141 },
142}; 142};
143 143
144
145/* Release a nexthop info record */ 144/* Release a nexthop info record */
146 145
147static void free_fib_info_rcu(struct rcu_head *head)
148{
149 struct fib_info *fi = container_of(head, struct fib_info, rcu);
150
151 if (fi->fib_metrics != (u32 *) dst_default_metrics)
152 kfree(fi->fib_metrics);
153 kfree(fi);
154}
155
156void free_fib_info(struct fib_info *fi) 146void free_fib_info(struct fib_info *fi)
157{ 147{
158 if (fi->fib_dead == 0) { 148 if (fi->fib_dead == 0) {
@@ -166,7 +156,7 @@ void free_fib_info(struct fib_info *fi)
166 } endfor_nexthops(fi); 156 } endfor_nexthops(fi);
167 fib_info_cnt--; 157 fib_info_cnt--;
168 release_net(fi->fib_net); 158 release_net(fi->fib_net);
169 call_rcu(&fi->rcu, free_fib_info_rcu); 159 kfree_rcu(fi, rcu);
170} 160}
171 161
172void fib_release_info(struct fib_info *fi) 162void fib_release_info(struct fib_info *fi)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5fe9b8b41df3..11d4d28190bd 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -350,14 +350,9 @@ static inline void free_leaf(struct leaf *l)
350 call_rcu_bh(&l->rcu, __leaf_free_rcu); 350 call_rcu_bh(&l->rcu, __leaf_free_rcu);
351} 351}
352 352
353static void __leaf_info_free_rcu(struct rcu_head *head)
354{
355 kfree(container_of(head, struct leaf_info, rcu));
356}
357
358static inline void free_leaf_info(struct leaf_info *leaf) 353static inline void free_leaf_info(struct leaf_info *leaf)
359{ 354{
360 call_rcu(&leaf->rcu, __leaf_info_free_rcu); 355 kfree_rcu(leaf, rcu);
361} 356}
362 357
363static struct tnode *tnode_alloc(size_t size) 358static struct tnode *tnode_alloc(size_t size)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 1fd3d9ce8398..8f62d66d0857 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -149,17 +149,11 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc);
149static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, 149static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
150 int sfcount, __be32 *psfsrc, int delta); 150 int sfcount, __be32 *psfsrc, int delta);
151 151
152
153static void ip_mc_list_reclaim(struct rcu_head *head)
154{
155 kfree(container_of(head, struct ip_mc_list, rcu));
156}
157
158static void ip_ma_put(struct ip_mc_list *im) 152static void ip_ma_put(struct ip_mc_list *im)
159{ 153{
160 if (atomic_dec_and_test(&im->refcnt)) { 154 if (atomic_dec_and_test(&im->refcnt)) {
161 in_dev_put(im->interface); 155 in_dev_put(im->interface);
162 call_rcu(&im->rcu, ip_mc_list_reclaim); 156 kfree_rcu(im, rcu);
163 } 157 }
164} 158}
165 159
@@ -1836,12 +1830,6 @@ done:
1836} 1830}
1837EXPORT_SYMBOL(ip_mc_join_group); 1831EXPORT_SYMBOL(ip_mc_join_group);
1838 1832
1839static void ip_sf_socklist_reclaim(struct rcu_head *rp)
1840{
1841 kfree(container_of(rp, struct ip_sf_socklist, rcu));
1842 /* sk_omem_alloc should have been decreased by the caller*/
1843}
1844
1845static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, 1833static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
1846 struct in_device *in_dev) 1834 struct in_device *in_dev)
1847{ 1835{
@@ -1858,18 +1846,10 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
1858 rcu_assign_pointer(iml->sflist, NULL); 1846 rcu_assign_pointer(iml->sflist, NULL);
1859 /* decrease mem now to avoid the memleak warning */ 1847 /* decrease mem now to avoid the memleak warning */
1860 atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc); 1848 atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
1861 call_rcu(&psf->rcu, ip_sf_socklist_reclaim); 1849 kfree_rcu(psf, rcu);
1862 return err; 1850 return err;
1863} 1851}
1864 1852
1865
1866static void ip_mc_socklist_reclaim(struct rcu_head *rp)
1867{
1868 kfree(container_of(rp, struct ip_mc_socklist, rcu));
1869 /* sk_omem_alloc should have been decreased by the caller*/
1870}
1871
1872
1873/* 1853/*
1874 * Ask a socket to leave a group. 1854 * Ask a socket to leave a group.
1875 */ 1855 */
@@ -1909,7 +1889,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
1909 rtnl_unlock(); 1889 rtnl_unlock();
1910 /* decrease mem now to avoid the memleak warning */ 1890 /* decrease mem now to avoid the memleak warning */
1911 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); 1891 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
1912 call_rcu(&iml->rcu, ip_mc_socklist_reclaim); 1892 kfree_rcu(iml, rcu);
1913 return 0; 1893 return 0;
1914 } 1894 }
1915 if (!in_dev) 1895 if (!in_dev)
@@ -2026,7 +2006,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
2026 newpsl->sl_addr[i] = psl->sl_addr[i]; 2006 newpsl->sl_addr[i] = psl->sl_addr[i];
2027 /* decrease mem now to avoid the memleak warning */ 2007 /* decrease mem now to avoid the memleak warning */
2028 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); 2008 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
2029 call_rcu(&psl->rcu, ip_sf_socklist_reclaim); 2009 kfree_rcu(psl, rcu);
2030 } 2010 }
2031 rcu_assign_pointer(pmc->sflist, newpsl); 2011 rcu_assign_pointer(pmc->sflist, newpsl);
2032 psl = newpsl; 2012 psl = newpsl;
@@ -2127,7 +2107,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
2127 psl->sl_count, psl->sl_addr, 0); 2107 psl->sl_count, psl->sl_addr, 0);
2128 /* decrease mem now to avoid the memleak warning */ 2108 /* decrease mem now to avoid the memleak warning */
2129 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); 2109 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
2130 call_rcu(&psl->rcu, ip_sf_socklist_reclaim); 2110 kfree_rcu(psl, rcu);
2131 } else 2111 } else
2132 (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 2112 (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
2133 0, NULL, 0); 2113 0, NULL, 0);
@@ -2324,7 +2304,7 @@ void ip_mc_drop_socket(struct sock *sk)
2324 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); 2304 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
2325 /* decrease mem now to avoid the memleak warning */ 2305 /* decrease mem now to avoid the memleak warning */
2326 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); 2306 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
2327 call_rcu(&iml->rcu, ip_mc_socklist_reclaim); 2307 kfree_rcu(iml, rcu);
2328 } 2308 }
2329 rtnl_unlock(); 2309 rtnl_unlock();
2330} 2310}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a7bda0757053..8f13d88d7dba 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -317,12 +317,6 @@ static void snmp6_free_dev(struct inet6_dev *idev)
317 317
318/* Nobody refers to this device, we may destroy it. */ 318/* Nobody refers to this device, we may destroy it. */
319 319
320static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
321{
322 struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu);
323 kfree(idev);
324}
325
326void in6_dev_finish_destroy(struct inet6_dev *idev) 320void in6_dev_finish_destroy(struct inet6_dev *idev)
327{ 321{
328 struct net_device *dev = idev->dev; 322 struct net_device *dev = idev->dev;
@@ -339,7 +333,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
339 return; 333 return;
340 } 334 }
341 snmp6_free_dev(idev); 335 snmp6_free_dev(idev);
342 call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu); 336 kfree_rcu(idev, rcu);
343} 337}
344 338
345EXPORT_SYMBOL(in6_dev_finish_destroy); 339EXPORT_SYMBOL(in6_dev_finish_destroy);
@@ -535,12 +529,6 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
535} 529}
536#endif 530#endif
537 531
538static void inet6_ifa_finish_destroy_rcu(struct rcu_head *head)
539{
540 struct inet6_ifaddr *ifp = container_of(head, struct inet6_ifaddr, rcu);
541 kfree(ifp);
542}
543
544/* Nobody refers to this ifaddr, destroy it */ 532/* Nobody refers to this ifaddr, destroy it */
545void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) 533void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
546{ 534{
@@ -561,7 +549,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
561 } 549 }
562 dst_release(&ifp->rt->dst); 550 dst_release(&ifp->rt->dst);
563 551
564 call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu); 552 kfree_rcu(ifp, rcu);
565} 553}
566 554
567static void 555static void
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 76b893771e6e..f2d98ca7588a 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -201,10 +201,6 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
201 return 0; 201 return 0;
202} 202}
203 203
204static void ipv6_mc_socklist_reclaim(struct rcu_head *head)
205{
206 kfree(container_of(head, struct ipv6_mc_socklist, rcu));
207}
208/* 204/*
209 * socket leave on multicast group 205 * socket leave on multicast group
210 */ 206 */
@@ -239,7 +235,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
239 (void) ip6_mc_leave_src(sk, mc_lst, NULL); 235 (void) ip6_mc_leave_src(sk, mc_lst, NULL);
240 rcu_read_unlock(); 236 rcu_read_unlock();
241 atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); 237 atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
242 call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim); 238 kfree_rcu(mc_lst, rcu);
243 return 0; 239 return 0;
244 } 240 }
245 } 241 }
@@ -307,7 +303,7 @@ void ipv6_sock_mc_close(struct sock *sk)
307 rcu_read_unlock(); 303 rcu_read_unlock();
308 304
309 atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); 305 atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
310 call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim); 306 kfree_rcu(mc_lst, rcu);
311 307
312 spin_lock(&ipv6_sk_mc_lock); 308 spin_lock(&ipv6_sk_mc_lock);
313 } 309 }
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 43b33373adb2..5f35d595e4a5 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -401,11 +401,6 @@ out:
401 return err; 401 return err;
402} 402}
403 403
404static void prl_entry_destroy_rcu(struct rcu_head *head)
405{
406 kfree(container_of(head, struct ip_tunnel_prl_entry, rcu_head));
407}
408
409static void prl_list_destroy_rcu(struct rcu_head *head) 404static void prl_list_destroy_rcu(struct rcu_head *head)
410{ 405{
411 struct ip_tunnel_prl_entry *p, *n; 406 struct ip_tunnel_prl_entry *p, *n;
@@ -433,7 +428,7 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
433 p = &x->next) { 428 p = &x->next) {
434 if (x->addr == a->addr) { 429 if (x->addr == a->addr) {
435 *p = x->next; 430 *p = x->next;
436 call_rcu(&x->rcu_head, prl_entry_destroy_rcu); 431 kfree_rcu(x, rcu_head);
437 t->prl_count--; 432 t->prl_count--;
438 goto out; 433 goto out;
439 } 434 }
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 63d852cb4ca2..53defafb9aae 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -136,14 +136,6 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1
136 ieee80211_tx_skb(sdata, skb); 136 ieee80211_tx_skb(sdata, skb);
137} 137}
138 138
139static void kfree_tid_tx(struct rcu_head *rcu_head)
140{
141 struct tid_ampdu_tx *tid_tx =
142 container_of(rcu_head, struct tid_ampdu_tx, rcu_head);
143
144 kfree(tid_tx);
145}
146
147int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, 139int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
148 enum ieee80211_back_parties initiator, 140 enum ieee80211_back_parties initiator,
149 bool tx) 141 bool tx)
@@ -163,7 +155,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
163 /* not even started yet! */ 155 /* not even started yet! */
164 rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL); 156 rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL);
165 spin_unlock_bh(&sta->lock); 157 spin_unlock_bh(&sta->lock);
166 call_rcu(&tid_tx->rcu_head, kfree_tid_tx); 158 kfree_rcu(tid_tx, rcu_head);
167 return 0; 159 return 0;
168 } 160 }
169 161
@@ -322,7 +314,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
322 spin_unlock_bh(&sta->lock); 314 spin_unlock_bh(&sta->lock);
323 315
324 ieee80211_wake_queue_agg(local, tid); 316 ieee80211_wake_queue_agg(local, tid);
325 call_rcu(&tid_tx->rcu_head, kfree_tid_tx); 317 kfree_rcu(tid_tx, rcu_head);
326 return; 318 return;
327 } 319 }
328 320
@@ -701,7 +693,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
701 693
702 ieee80211_agg_splice_finish(local, tid); 694 ieee80211_agg_splice_finish(local, tid);
703 695
704 call_rcu(&tid_tx->rcu_head, kfree_tid_tx); 696 kfree_rcu(tid_tx, rcu_head);
705 697
706 unlock_sta: 698 unlock_sta:
707 spin_unlock_bh(&sta->lock); 699 spin_unlock_bh(&sta->lock);
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index e73c8cae036b..ac3549690b8e 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -65,17 +65,9 @@ static void run_again(struct ieee80211_local *local,
65 mod_timer(&local->work_timer, timeout); 65 mod_timer(&local->work_timer, timeout);
66} 66}
67 67
68static void work_free_rcu(struct rcu_head *head)
69{
70 struct ieee80211_work *wk =
71 container_of(head, struct ieee80211_work, rcu_head);
72
73 kfree(wk);
74}
75
76void free_work(struct ieee80211_work *wk) 68void free_work(struct ieee80211_work *wk)
77{ 69{
78 call_rcu(&wk->rcu_head, work_free_rcu); 70 kfree_rcu(wk, rcu_head);
79} 71}
80 72
81static int ieee80211_compatible_rates(const u8 *supp_rates, int supp_rates_len, 73static int ieee80211_compatible_rates(const u8 *supp_rates, int supp_rates_len,
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 80a23ed62bb0..05ecdc281a53 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -68,12 +68,6 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp)
68 return (void *)(*ext) + off; 68 return (void *)(*ext) + off;
69} 69}
70 70
71static void __nf_ct_ext_free_rcu(struct rcu_head *head)
72{
73 struct nf_ct_ext *ext = container_of(head, struct nf_ct_ext, rcu);
74 kfree(ext);
75}
76
77void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) 71void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
78{ 72{
79 struct nf_ct_ext *old, *new; 73 struct nf_ct_ext *old, *new;
@@ -114,7 +108,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
114 (void *)old + old->offset[i]); 108 (void *)old + old->offset[i]);
115 rcu_read_unlock(); 109 rcu_read_unlock();
116 } 110 }
117 call_rcu(&old->rcu, __nf_ct_ext_free_rcu); 111 kfree_rcu(old, rcu);
118 ct->ext = new; 112 ct->ext = new;
119 } 113 }
120 114
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 4327e101c047..846f895cb656 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -62,13 +62,6 @@ static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = {
62 [OSF_ATTR_FINGER] = { .len = sizeof(struct xt_osf_user_finger) }, 62 [OSF_ATTR_FINGER] = { .len = sizeof(struct xt_osf_user_finger) },
63}; 63};
64 64
65static void xt_osf_finger_free_rcu(struct rcu_head *rcu_head)
66{
67 struct xt_osf_finger *f = container_of(rcu_head, struct xt_osf_finger, rcu_head);
68
69 kfree(f);
70}
71
72static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb, 65static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb,
73 const struct nlmsghdr *nlh, 66 const struct nlmsghdr *nlh,
74 const struct nlattr * const osf_attrs[]) 67 const struct nlattr * const osf_attrs[])
@@ -133,7 +126,7 @@ static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb,
133 * We are protected by nfnl mutex. 126 * We are protected by nfnl mutex.
134 */ 127 */
135 list_del_rcu(&sf->finger_entry); 128 list_del_rcu(&sf->finger_entry);
136 call_rcu(&sf->rcu_head, xt_osf_finger_free_rcu); 129 kfree_rcu(sf, rcu_head);
137 130
138 err = 0; 131 err = 0;
139 break; 132 break;
@@ -414,7 +407,7 @@ static void __exit xt_osf_fini(void)
414 407
415 list_for_each_entry_rcu(f, &xt_osf_fingers[i], finger_entry) { 408 list_for_each_entry_rcu(f, &xt_osf_fingers[i], finger_entry) {
416 list_del_rcu(&f->finger_entry); 409 list_del_rcu(&f->finger_entry);
417 call_rcu(&f->rcu_head, xt_osf_finger_free_rcu); 410 kfree_rcu(f, rcu_head);
418 } 411 }
419 } 412 }
420 rcu_read_unlock(); 413 rcu_read_unlock();
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index e2b0a680dd56..9c38658fba8b 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -154,44 +154,6 @@ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1
154 */ 154 */
155 155
156/** 156/**
157 * netlbl_unlhsh_free_addr4 - Frees an IPv4 address entry from the hash table
158 * @entry: the entry's RCU field
159 *
160 * Description:
161 * This function is designed to be used as a callback to the call_rcu()
162 * function so that memory allocated to a hash table address entry can be
163 * released safely.
164 *
165 */
166static void netlbl_unlhsh_free_addr4(struct rcu_head *entry)
167{
168 struct netlbl_unlhsh_addr4 *ptr;
169
170 ptr = container_of(entry, struct netlbl_unlhsh_addr4, rcu);
171 kfree(ptr);
172}
173
174#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
175/**
176 * netlbl_unlhsh_free_addr6 - Frees an IPv6 address entry from the hash table
177 * @entry: the entry's RCU field
178 *
179 * Description:
180 * This function is designed to be used as a callback to the call_rcu()
181 * function so that memory allocated to a hash table address entry can be
182 * released safely.
183 *
184 */
185static void netlbl_unlhsh_free_addr6(struct rcu_head *entry)
186{
187 struct netlbl_unlhsh_addr6 *ptr;
188
189 ptr = container_of(entry, struct netlbl_unlhsh_addr6, rcu);
190 kfree(ptr);
191}
192#endif /* IPv6 */
193
194/**
195 * netlbl_unlhsh_free_iface - Frees an interface entry from the hash table 157 * netlbl_unlhsh_free_iface - Frees an interface entry from the hash table
196 * @entry: the entry's RCU field 158 * @entry: the entry's RCU field
197 * 159 *
@@ -568,7 +530,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
568 if (entry == NULL) 530 if (entry == NULL)
569 return -ENOENT; 531 return -ENOENT;
570 532
571 call_rcu(&entry->rcu, netlbl_unlhsh_free_addr4); 533 kfree_rcu(entry, rcu);
572 return 0; 534 return 0;
573} 535}
574 536
@@ -629,7 +591,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
629 if (entry == NULL) 591 if (entry == NULL)
630 return -ENOENT; 592 return -ENOENT;
631 593
632 call_rcu(&entry->rcu, netlbl_unlhsh_free_addr6); 594 kfree_rcu(entry, rcu);
633 return 0; 595 return 0;
634} 596}
635#endif /* IPv6 */ 597#endif /* IPv6 */
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c8f35b5d2ee9..5fe4f3b04ed3 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1566,12 +1566,6 @@ netlink_kernel_release(struct sock *sk)
1566} 1566}
1567EXPORT_SYMBOL(netlink_kernel_release); 1567EXPORT_SYMBOL(netlink_kernel_release);
1568 1568
1569
1570static void listeners_free_rcu(struct rcu_head *head)
1571{
1572 kfree(container_of(head, struct listeners, rcu));
1573}
1574
1575int __netlink_change_ngroups(struct sock *sk, unsigned int groups) 1569int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
1576{ 1570{
1577 struct listeners *new, *old; 1571 struct listeners *new, *old;
@@ -1588,7 +1582,7 @@ int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
1588 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups)); 1582 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
1589 rcu_assign_pointer(tbl->listeners, new); 1583 rcu_assign_pointer(tbl->listeners, new);
1590 1584
1591 call_rcu(&old->rcu, listeners_free_rcu); 1585 kfree_rcu(old, rcu);
1592 } 1586 }
1593 tbl->groups = groups; 1587 tbl->groups = groups;
1594 1588
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 947038ddd04c..1566672235dd 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -162,14 +162,6 @@ int phonet_address_add(struct net_device *dev, u8 addr)
162 return err; 162 return err;
163} 163}
164 164
165static void phonet_device_rcu_free(struct rcu_head *head)
166{
167 struct phonet_device *pnd;
168
169 pnd = container_of(head, struct phonet_device, rcu);
170 kfree(pnd);
171}
172
173int phonet_address_del(struct net_device *dev, u8 addr) 165int phonet_address_del(struct net_device *dev, u8 addr)
174{ 166{
175 struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); 167 struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
@@ -188,7 +180,7 @@ int phonet_address_del(struct net_device *dev, u8 addr)
188 mutex_unlock(&pndevs->lock); 180 mutex_unlock(&pndevs->lock);
189 181
190 if (pnd) 182 if (pnd)
191 call_rcu(&pnd->rcu, phonet_device_rcu_free); 183 kfree_rcu(pnd, rcu);
192 184
193 return err; 185 return err;
194} 186}
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 14b42f4ad791..a606025814a1 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -26,11 +26,6 @@
26#include <net/act_api.h> 26#include <net/act_api.h>
27#include <net/netlink.h> 27#include <net/netlink.h>
28 28
29static void tcf_common_free_rcu(struct rcu_head *head)
30{
31 kfree(container_of(head, struct tcf_common, tcfc_rcu));
32}
33
34void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) 29void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
35{ 30{
36 unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); 31 unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
@@ -47,7 +42,7 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
47 * gen_estimator est_timer() might access p->tcfc_lock 42 * gen_estimator est_timer() might access p->tcfc_lock
48 * or bstats, wait a RCU grace period before freeing p 43 * or bstats, wait a RCU grace period before freeing p
49 */ 44 */
50 call_rcu(&p->tcfc_rcu, tcf_common_free_rcu); 45 kfree_rcu(p, tcfc_rcu);
51 return; 46 return;
52 } 47 }
53 } 48 }
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 8a1630774fd6..b3b9b32f4e00 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -96,11 +96,6 @@ nla_put_failure:
96 goto done; 96 goto done;
97} 97}
98 98
99static void tcf_police_free_rcu(struct rcu_head *head)
100{
101 kfree(container_of(head, struct tcf_police, tcf_rcu));
102}
103
104static void tcf_police_destroy(struct tcf_police *p) 99static void tcf_police_destroy(struct tcf_police *p)
105{ 100{
106 unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); 101 unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
@@ -121,7 +116,7 @@ static void tcf_police_destroy(struct tcf_police *p)
121 * gen_estimator est_timer() might access p->tcf_lock 116 * gen_estimator est_timer() might access p->tcf_lock
122 * or bstats, wait a RCU grace period before freeing p 117 * or bstats, wait a RCU grace period before freeing p
123 */ 118 */
124 call_rcu(&p->tcf_rcu, tcf_police_free_rcu); 119 kfree_rcu(p, tcf_rcu);
125 return; 120 return;
126 } 121 }
127 } 122 }
@@ -401,7 +396,6 @@ static void __exit
401police_cleanup_module(void) 396police_cleanup_module(void)
402{ 397{
403 tcf_unregister_action(&act_police_ops); 398 tcf_unregister_action(&act_police_ops);
404 rcu_barrier(); /* Wait for completion of call_rcu()'s (tcf_police_free_rcu) */
405} 399}
406 400
407module_init(police_init_module); 401module_init(police_init_module);
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index faf71d179e46..3c06c87cd280 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -219,7 +219,7 @@ int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr)
219 } 219 }
220 220
221 if (found) { 221 if (found) {
222 call_rcu(&addr->rcu, sctp_local_addr_free); 222 kfree_rcu(addr, rcu);
223 SCTP_DBG_OBJCNT_DEC(addr); 223 SCTP_DBG_OBJCNT_DEC(addr);
224 return 0; 224 return 0;
225 } 225 }
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 865ce7ba4e14..185fe058db11 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -123,7 +123,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
123 } 123 }
124 spin_unlock_bh(&sctp_local_addr_lock); 124 spin_unlock_bh(&sctp_local_addr_lock);
125 if (found) 125 if (found)
126 call_rcu(&addr->rcu, sctp_local_addr_free); 126 kfree_rcu(addr, rcu);
127 break; 127 break;
128 } 128 }
129 129
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index d5bf91d04f63..065d99958ced 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -230,13 +230,6 @@ static void sctp_free_local_addr_list(void)
230 } 230 }
231} 231}
232 232
233void sctp_local_addr_free(struct rcu_head *head)
234{
235 struct sctp_sockaddr_entry *e = container_of(head,
236 struct sctp_sockaddr_entry, rcu);
237 kfree(e);
238}
239
240/* Copy the local addresses which are valid for 'scope' into 'bp'. */ 233/* Copy the local addresses which are valid for 'scope' into 'bp'. */
241int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, 234int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
242 gfp_t gfp, int copy_flags) 235 gfp_t gfp, int copy_flags)
@@ -681,7 +674,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
681 } 674 }
682 spin_unlock_bh(&sctp_local_addr_lock); 675 spin_unlock_bh(&sctp_local_addr_lock);
683 if (found) 676 if (found)
684 call_rcu(&addr->rcu, sctp_local_addr_free); 677 kfree_rcu(addr, rcu);
685 break; 678 break;
686 } 679 }
687 680
diff --git a/net/socket.c b/net/socket.c
index 310d16b1b3c9..c2ed7c95ce87 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -263,15 +263,6 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
263 return &ei->vfs_inode; 263 return &ei->vfs_inode;
264} 264}
265 265
266
267
268static void wq_free_rcu(struct rcu_head *head)
269{
270 struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
271
272 kfree(wq);
273}
274
275static void sock_destroy_inode(struct inode *inode) 266static void sock_destroy_inode(struct inode *inode)
276{ 267{
277 struct socket_alloc *ei; 268 struct socket_alloc *ei;
@@ -279,7 +270,7 @@ static void sock_destroy_inode(struct inode *inode)
279 270
280 ei = container_of(inode, struct socket_alloc, vfs_inode); 271 ei = container_of(inode, struct socket_alloc, vfs_inode);
281 wq = rcu_dereference_protected(ei->socket.wq, 1); 272 wq = rcu_dereference_protected(ei->socket.wq, 1);
282 call_rcu(&wq->rcu, wq_free_rcu); 273 kfree_rcu(wq, rcu);
283 kmem_cache_free(sock_inode_cachep, ei); 274 kmem_cache_free(sock_inode_cachep, ei);
284} 275}
285 276
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index c6ca8662a468..f66baf44f32d 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -69,18 +69,6 @@ error:
69EXPORT_SYMBOL_GPL(user_instantiate); 69EXPORT_SYMBOL_GPL(user_instantiate);
70 70
71/* 71/*
72 * dispose of the old data from an updated user defined key
73 */
74static void user_update_rcu_disposal(struct rcu_head *rcu)
75{
76 struct user_key_payload *upayload;
77
78 upayload = container_of(rcu, struct user_key_payload, rcu);
79
80 kfree(upayload);
81}
82
83/*
84 * update a user defined key 72 * update a user defined key
85 * - the key's semaphore is write-locked 73 * - the key's semaphore is write-locked
86 */ 74 */
@@ -114,7 +102,7 @@ int user_update(struct key *key, const void *data, size_t datalen)
114 key->expiry = 0; 102 key->expiry = 0;
115 } 103 }
116 104
117 call_rcu(&zap->rcu, user_update_rcu_disposal); 105 kfree_rcu(zap, rcu);
118 106
119error: 107error:
120 return ret; 108 return ret;
@@ -145,7 +133,7 @@ void user_revoke(struct key *key)
145 133
146 if (upayload) { 134 if (upayload) {
147 rcu_assign_pointer(key->payload.data, NULL); 135 rcu_assign_pointer(key->payload.data, NULL);
148 call_rcu(&upayload->rcu, user_update_rcu_disposal); 136 kfree_rcu(upayload, rcu);
149 } 137 }
150} 138}
151 139
diff --git a/security/selinux/netif.c b/security/selinux/netif.c
index d6095d63d831..58cc481c93d5 100644
--- a/security/selinux/netif.c
+++ b/security/selinux/netif.c
@@ -104,22 +104,6 @@ static int sel_netif_insert(struct sel_netif *netif)
104} 104}
105 105
106/** 106/**
107 * sel_netif_free - Frees an interface entry
108 * @p: the entry's RCU field
109 *
110 * Description:
111 * This function is designed to be used as a callback to the call_rcu()
112 * function so that memory allocated to a hash table interface entry can be
113 * released safely.
114 *
115 */
116static void sel_netif_free(struct rcu_head *p)
117{
118 struct sel_netif *netif = container_of(p, struct sel_netif, rcu_head);
119 kfree(netif);
120}
121
122/**
123 * sel_netif_destroy - Remove an interface record from the table 107 * sel_netif_destroy - Remove an interface record from the table
124 * @netif: the existing interface record 108 * @netif: the existing interface record
125 * 109 *
@@ -131,7 +115,7 @@ static void sel_netif_destroy(struct sel_netif *netif)
131{ 115{
132 list_del_rcu(&netif->list); 116 list_del_rcu(&netif->list);
133 sel_netif_total--; 117 sel_netif_total--;
134 call_rcu(&netif->rcu_head, sel_netif_free); 118 kfree_rcu(netif, rcu_head);
135} 119}
136 120
137/** 121/**
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 0a7ed5b5e281..1e88485c16a0 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -2187,7 +2187,6 @@ static const struct flag flags[] = {
2187 { "TASKLET_SOFTIRQ", 6 }, 2187 { "TASKLET_SOFTIRQ", 6 },
2188 { "SCHED_SOFTIRQ", 7 }, 2188 { "SCHED_SOFTIRQ", 7 },
2189 { "HRTIMER_SOFTIRQ", 8 }, 2189 { "HRTIMER_SOFTIRQ", 8 },
2190 { "RCU_SOFTIRQ", 9 },
2191 2190
2192 { "HRTIMER_NORESTART", 0 }, 2191 { "HRTIMER_NORESTART", 0 },
2193 { "HRTIMER_RESTART", 1 }, 2192 { "HRTIMER_RESTART", 1 },