diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-10-26 10:26:53 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-10-26 10:26:53 -0400 |
commit | 19b4a8d520a6e0176dd52aaa429261ad4fcaa545 (patch) | |
tree | 6dcf5a780718fc50b9cd79cc803daa7c7e080a02 | |
parent | 3cfef9524677a4ecb392d6fbffe6ebce6302f1d4 (diff) | |
parent | 048b718029033af117870d3da47da12995be14a3 (diff) |
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits)
rcu: Move propagation of ->completed from rcu_start_gp() to rcu_report_qs_rsp()
rcu: Remove rcu_needs_cpu_flush() to avoid false quiescent states
rcu: Wire up RCU_BOOST_PRIO for rcutree
rcu: Make rcu_torture_boost() exit loops at end of test
rcu: Make rcu_torture_fqs() exit loops at end of test
rcu: Permit rt_mutex_unlock() with irqs disabled
rcu: Avoid having just-onlined CPU resched itself when RCU is idle
rcu: Suppress NMI backtraces when stall ends before dump
rcu: Prohibit grace periods during early boot
rcu: Simplify unboosting checks
rcu: Prevent early boot set_need_resched() from __rcu_pending()
rcu: Dump local stack if cannot dump all CPUs' stacks
rcu: Move __rcu_read_unlock()'s barrier() within if-statement
rcu: Improve rcu_assign_pointer() and RCU_INIT_POINTER() documentation
rcu: Make rcu_assign_pointer() unconditionally insert a memory barrier
rcu: Make rcu_implicit_dynticks_qs() locals be correct size
rcu: Eliminate in_irq() checks in rcu_enter_nohz()
nohz: Remove nohz_cpu_mask
rcu: Document interpretation of RCU-lockdep splats
rcu: Allow rcutorture's stat_interval parameter to be changed at runtime
...
-rw-r--r-- | Documentation/RCU/NMI-RCU.txt | 2 | ||||
-rw-r--r-- | Documentation/RCU/lockdep-splat.txt | 110 | ||||
-rw-r--r-- | Documentation/RCU/lockdep.txt | 34 | ||||
-rw-r--r-- | Documentation/RCU/torture.txt | 137 | ||||
-rw-r--r-- | Documentation/RCU/trace.txt | 38 | ||||
-rw-r--r-- | include/linux/lockdep.h | 2 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 300 | ||||
-rw-r--r-- | include/linux/rcutiny.h | 20 | ||||
-rw-r--r-- | include/linux/rcutree.h | 2 | ||||
-rw-r--r-- | include/linux/sched.h | 4 | ||||
-rw-r--r-- | include/linux/types.h | 10 | ||||
-rw-r--r-- | include/trace/events/rcu.h | 459 | ||||
-rw-r--r-- | init/Kconfig | 6 | ||||
-rw-r--r-- | kernel/lockdep.c | 84 | ||||
-rw-r--r-- | kernel/pid.c | 4 | ||||
-rw-r--r-- | kernel/rcu.h | 85 | ||||
-rw-r--r-- | kernel/rcupdate.c | 26 | ||||
-rw-r--r-- | kernel/rcutiny.c | 117 | ||||
-rw-r--r-- | kernel/rcutiny_plugin.h | 134 | ||||
-rw-r--r-- | kernel/rcutorture.c | 77 | ||||
-rw-r--r-- | kernel/rcutree.c | 290 | ||||
-rw-r--r-- | kernel/rcutree.h | 17 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 150 | ||||
-rw-r--r-- | kernel/rcutree_trace.c | 13 | ||||
-rw-r--r-- | kernel/rtmutex.c | 8 | ||||
-rw-r--r-- | kernel/sched.c | 13 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 6 |
27 files changed, 1489 insertions, 659 deletions
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt index bf82851a0e57..687777f83b23 100644 --- a/Documentation/RCU/NMI-RCU.txt +++ b/Documentation/RCU/NMI-RCU.txt | |||
@@ -95,7 +95,7 @@ not to return until all ongoing NMI handlers exit. It is therefore safe | |||
95 | to free up the handler's data as soon as synchronize_sched() returns. | 95 | to free up the handler's data as soon as synchronize_sched() returns. |
96 | 96 | ||
97 | Important note: for this to work, the architecture in question must | 97 | Important note: for this to work, the architecture in question must |
98 | invoke irq_enter() and irq_exit() on NMI entry and exit, respectively. | 98 | invoke nmi_enter() and nmi_exit() on NMI entry and exit, respectively. |
99 | 99 | ||
100 | 100 | ||
101 | Answer to Quick Quiz | 101 | Answer to Quick Quiz |
diff --git a/Documentation/RCU/lockdep-splat.txt b/Documentation/RCU/lockdep-splat.txt new file mode 100644 index 000000000000..bf9061142827 --- /dev/null +++ b/Documentation/RCU/lockdep-splat.txt | |||
@@ -0,0 +1,110 @@ | |||
1 | Lockdep-RCU was added to the Linux kernel in early 2010 | ||
2 | (http://lwn.net/Articles/371986/). This facility checks for some common | ||
3 | misuses of the RCU API, most notably using one of the rcu_dereference() | ||
4 | family to access an RCU-protected pointer without the proper protection. | ||
5 | When such misuse is detected, an lockdep-RCU splat is emitted. | ||
6 | |||
7 | The usual cause of a lockdep-RCU slat is someone accessing an | ||
8 | RCU-protected data structure without either (1) being in the right kind of | ||
9 | RCU read-side critical section or (2) holding the right update-side lock. | ||
10 | This problem can therefore be serious: it might result in random memory | ||
11 | overwriting or worse. There can of course be false positives, this | ||
12 | being the real world and all that. | ||
13 | |||
14 | So let's look at an example RCU lockdep splat from 3.0-rc5, one that | ||
15 | has long since been fixed: | ||
16 | |||
17 | =============================== | ||
18 | [ INFO: suspicious RCU usage. ] | ||
19 | ------------------------------- | ||
20 | block/cfq-iosched.c:2776 suspicious rcu_dereference_protected() usage! | ||
21 | |||
22 | other info that might help us debug this: | ||
23 | |||
24 | |||
25 | rcu_scheduler_active = 1, debug_locks = 0 | ||
26 | 3 locks held by scsi_scan_6/1552: | ||
27 | #0: (&shost->scan_mutex){+.+.+.}, at: [<ffffffff8145efca>] | ||
28 | scsi_scan_host_selected+0x5a/0x150 | ||
29 | #1: (&eq->sysfs_lock){+.+...}, at: [<ffffffff812a5032>] | ||
30 | elevator_exit+0x22/0x60 | ||
31 | #2: (&(&q->__queue_lock)->rlock){-.-...}, at: [<ffffffff812b6233>] | ||
32 | cfq_exit_queue+0x43/0x190 | ||
33 | |||
34 | stack backtrace: | ||
35 | Pid: 1552, comm: scsi_scan_6 Not tainted 3.0.0-rc5 #17 | ||
36 | Call Trace: | ||
37 | [<ffffffff810abb9b>] lockdep_rcu_dereference+0xbb/0xc0 | ||
38 | [<ffffffff812b6139>] __cfq_exit_single_io_context+0xe9/0x120 | ||
39 | [<ffffffff812b626c>] cfq_exit_queue+0x7c/0x190 | ||
40 | [<ffffffff812a5046>] elevator_exit+0x36/0x60 | ||
41 | [<ffffffff812a802a>] blk_cleanup_queue+0x4a/0x60 | ||
42 | [<ffffffff8145cc09>] scsi_free_queue+0x9/0x10 | ||
43 | [<ffffffff81460944>] __scsi_remove_device+0x84/0xd0 | ||
44 | [<ffffffff8145dca3>] scsi_probe_and_add_lun+0x353/0xb10 | ||
45 | [<ffffffff817da069>] ? error_exit+0x29/0xb0 | ||
46 | [<ffffffff817d98ed>] ? _raw_spin_unlock_irqrestore+0x3d/0x80 | ||
47 | [<ffffffff8145e722>] __scsi_scan_target+0x112/0x680 | ||
48 | [<ffffffff812c690d>] ? trace_hardirqs_off_thunk+0x3a/0x3c | ||
49 | [<ffffffff817da069>] ? error_exit+0x29/0xb0 | ||
50 | [<ffffffff812bcc60>] ? kobject_del+0x40/0x40 | ||
51 | [<ffffffff8145ed16>] scsi_scan_channel+0x86/0xb0 | ||
52 | [<ffffffff8145f0b0>] scsi_scan_host_selected+0x140/0x150 | ||
53 | [<ffffffff8145f149>] do_scsi_scan_host+0x89/0x90 | ||
54 | [<ffffffff8145f170>] do_scan_async+0x20/0x160 | ||
55 | [<ffffffff8145f150>] ? do_scsi_scan_host+0x90/0x90 | ||
56 | [<ffffffff810975b6>] kthread+0xa6/0xb0 | ||
57 | [<ffffffff817db154>] kernel_thread_helper+0x4/0x10 | ||
58 | [<ffffffff81066430>] ? finish_task_switch+0x80/0x110 | ||
59 | [<ffffffff817d9c04>] ? retint_restore_args+0xe/0xe | ||
60 | [<ffffffff81097510>] ? __init_kthread_worker+0x70/0x70 | ||
61 | [<ffffffff817db150>] ? gs_change+0xb/0xb | ||
62 | |||
63 | Line 2776 of block/cfq-iosched.c in v3.0-rc5 is as follows: | ||
64 | |||
65 | if (rcu_dereference(ioc->ioc_data) == cic) { | ||
66 | |||
67 | This form says that it must be in a plain vanilla RCU read-side critical | ||
68 | section, but the "other info" list above shows that this is not the | ||
69 | case. Instead, we hold three locks, one of which might be RCU related. | ||
70 | And maybe that lock really does protect this reference. If so, the fix | ||
71 | is to inform RCU, perhaps by changing __cfq_exit_single_io_context() to | ||
72 | take the struct request_queue "q" from cfq_exit_queue() as an argument, | ||
73 | which would permit us to invoke rcu_dereference_protected as follows: | ||
74 | |||
75 | if (rcu_dereference_protected(ioc->ioc_data, | ||
76 | lockdep_is_held(&q->queue_lock)) == cic) { | ||
77 | |||
78 | With this change, there would be no lockdep-RCU splat emitted if this | ||
79 | code was invoked either from within an RCU read-side critical section | ||
80 | or with the ->queue_lock held. In particular, this would have suppressed | ||
81 | the above lockdep-RCU splat because ->queue_lock is held (see #2 in the | ||
82 | list above). | ||
83 | |||
84 | On the other hand, perhaps we really do need an RCU read-side critical | ||
85 | section. In this case, the critical section must span the use of the | ||
86 | return value from rcu_dereference(), or at least until there is some | ||
87 | reference count incremented or some such. One way to handle this is to | ||
88 | add rcu_read_lock() and rcu_read_unlock() as follows: | ||
89 | |||
90 | rcu_read_lock(); | ||
91 | if (rcu_dereference(ioc->ioc_data) == cic) { | ||
92 | spin_lock(&ioc->lock); | ||
93 | rcu_assign_pointer(ioc->ioc_data, NULL); | ||
94 | spin_unlock(&ioc->lock); | ||
95 | } | ||
96 | rcu_read_unlock(); | ||
97 | |||
98 | With this change, the rcu_dereference() is always within an RCU | ||
99 | read-side critical section, which again would have suppressed the | ||
100 | above lockdep-RCU splat. | ||
101 | |||
102 | But in this particular case, we don't actually deference the pointer | ||
103 | returned from rcu_dereference(). Instead, that pointer is just compared | ||
104 | to the cic pointer, which means that the rcu_dereference() can be replaced | ||
105 | by rcu_access_pointer() as follows: | ||
106 | |||
107 | if (rcu_access_pointer(ioc->ioc_data) == cic) { | ||
108 | |||
109 | Because it is legal to invoke rcu_access_pointer() without protection, | ||
110 | this change would also suppress the above lockdep-RCU splat. | ||
diff --git a/Documentation/RCU/lockdep.txt b/Documentation/RCU/lockdep.txt index d7a49b2f6994..a102d4b3724b 100644 --- a/Documentation/RCU/lockdep.txt +++ b/Documentation/RCU/lockdep.txt | |||
@@ -32,9 +32,27 @@ checking of rcu_dereference() primitives: | |||
32 | srcu_dereference(p, sp): | 32 | srcu_dereference(p, sp): |
33 | Check for SRCU read-side critical section. | 33 | Check for SRCU read-side critical section. |
34 | rcu_dereference_check(p, c): | 34 | rcu_dereference_check(p, c): |
35 | Use explicit check expression "c". This is useful in | 35 | Use explicit check expression "c" along with |
36 | code that is invoked by both readers and updaters. | 36 | rcu_read_lock_held(). This is useful in code that is |
37 | rcu_dereference_raw(p) | 37 | invoked by both RCU readers and updaters. |
38 | rcu_dereference_bh_check(p, c): | ||
39 | Use explicit check expression "c" along with | ||
40 | rcu_read_lock_bh_held(). This is useful in code that | ||
41 | is invoked by both RCU-bh readers and updaters. | ||
42 | rcu_dereference_sched_check(p, c): | ||
43 | Use explicit check expression "c" along with | ||
44 | rcu_read_lock_sched_held(). This is useful in code that | ||
45 | is invoked by both RCU-sched readers and updaters. | ||
46 | srcu_dereference_check(p, c): | ||
47 | Use explicit check expression "c" along with | ||
48 | srcu_read_lock_held()(). This is useful in code that | ||
49 | is invoked by both SRCU readers and updaters. | ||
50 | rcu_dereference_index_check(p, c): | ||
51 | Use explicit check expression "c", but the caller | ||
52 | must supply one of the rcu_read_lock_held() functions. | ||
53 | This is useful in code that uses RCU-protected arrays | ||
54 | that is invoked by both RCU readers and updaters. | ||
55 | rcu_dereference_raw(p): | ||
38 | Don't check. (Use sparingly, if at all.) | 56 | Don't check. (Use sparingly, if at all.) |
39 | rcu_dereference_protected(p, c): | 57 | rcu_dereference_protected(p, c): |
40 | Use explicit check expression "c", and omit all barriers | 58 | Use explicit check expression "c", and omit all barriers |
@@ -48,13 +66,11 @@ checking of rcu_dereference() primitives: | |||
48 | value of the pointer itself, for example, against NULL. | 66 | value of the pointer itself, for example, against NULL. |
49 | 67 | ||
50 | The rcu_dereference_check() check expression can be any boolean | 68 | The rcu_dereference_check() check expression can be any boolean |
51 | expression, but would normally include one of the rcu_read_lock_held() | 69 | expression, but would normally include a lockdep expression. However, |
52 | family of functions and a lockdep expression. However, any boolean | 70 | any boolean expression can be used. For a moderately ornate example, |
53 | expression can be used. For a moderately ornate example, consider | 71 | consider the following: |
54 | the following: | ||
55 | 72 | ||
56 | file = rcu_dereference_check(fdt->fd[fd], | 73 | file = rcu_dereference_check(fdt->fd[fd], |
57 | rcu_read_lock_held() || | ||
58 | lockdep_is_held(&files->file_lock) || | 74 | lockdep_is_held(&files->file_lock) || |
59 | atomic_read(&files->count) == 1); | 75 | atomic_read(&files->count) == 1); |
60 | 76 | ||
@@ -62,7 +78,7 @@ This expression picks up the pointer "fdt->fd[fd]" in an RCU-safe manner, | |||
62 | and, if CONFIG_PROVE_RCU is configured, verifies that this expression | 78 | and, if CONFIG_PROVE_RCU is configured, verifies that this expression |
63 | is used in: | 79 | is used in: |
64 | 80 | ||
65 | 1. An RCU read-side critical section, or | 81 | 1. An RCU read-side critical section (implicit), or |
66 | 2. with files->file_lock held, or | 82 | 2. with files->file_lock held, or |
67 | 3. on an unshared files_struct. | 83 | 3. on an unshared files_struct. |
68 | 84 | ||
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt index 5d9016795fd8..783d6c134d3f 100644 --- a/Documentation/RCU/torture.txt +++ b/Documentation/RCU/torture.txt | |||
@@ -42,7 +42,7 @@ fqs_holdoff Holdoff time (in microseconds) between consecutive calls | |||
42 | fqs_stutter Wait time (in seconds) between consecutive bursts | 42 | fqs_stutter Wait time (in seconds) between consecutive bursts |
43 | of calls to force_quiescent_state(). | 43 | of calls to force_quiescent_state(). |
44 | 44 | ||
45 | irqreaders Says to invoke RCU readers from irq level. This is currently | 45 | irqreader Says to invoke RCU readers from irq level. This is currently |
46 | done via timers. Defaults to "1" for variants of RCU that | 46 | done via timers. Defaults to "1" for variants of RCU that |
47 | permit this. (Or, more accurately, variants of RCU that do | 47 | permit this. (Or, more accurately, variants of RCU that do |
48 | -not- permit this know to ignore this variable.) | 48 | -not- permit this know to ignore this variable.) |
@@ -79,19 +79,68 @@ stutter The length of time to run the test before pausing for this | |||
79 | Specifying "stutter=0" causes the test to run continuously | 79 | Specifying "stutter=0" causes the test to run continuously |
80 | without pausing, which is the old default behavior. | 80 | without pausing, which is the old default behavior. |
81 | 81 | ||
82 | test_boost Whether or not to test the ability of RCU to do priority | ||
83 | boosting. Defaults to "test_boost=1", which performs | ||
84 | RCU priority-inversion testing only if the selected | ||
85 | RCU implementation supports priority boosting. Specifying | ||
86 | "test_boost=0" never performs RCU priority-inversion | ||
87 | testing. Specifying "test_boost=2" performs RCU | ||
88 | priority-inversion testing even if the selected RCU | ||
89 | implementation does not support RCU priority boosting, | ||
90 | which can be used to test rcutorture's ability to | ||
91 | carry out RCU priority-inversion testing. | ||
92 | |||
93 | test_boost_interval | ||
94 | The number of seconds in an RCU priority-inversion test | ||
95 | cycle. Defaults to "test_boost_interval=7". It is | ||
96 | usually wise for this value to be relatively prime to | ||
97 | the value selected for "stutter". | ||
98 | |||
99 | test_boost_duration | ||
100 | The number of seconds to do RCU priority-inversion testing | ||
101 | within any given "test_boost_interval". Defaults to | ||
102 | "test_boost_duration=4". | ||
103 | |||
82 | test_no_idle_hz Whether or not to test the ability of RCU to operate in | 104 | test_no_idle_hz Whether or not to test the ability of RCU to operate in |
83 | a kernel that disables the scheduling-clock interrupt to | 105 | a kernel that disables the scheduling-clock interrupt to |
84 | idle CPUs. Boolean parameter, "1" to test, "0" otherwise. | 106 | idle CPUs. Boolean parameter, "1" to test, "0" otherwise. |
85 | Defaults to omitting this test. | 107 | Defaults to omitting this test. |
86 | 108 | ||
87 | torture_type The type of RCU to test: "rcu" for the rcu_read_lock() API, | 109 | torture_type The type of RCU to test, with string values as follows: |
88 | "rcu_sync" for rcu_read_lock() with synchronous reclamation, | 110 | |
89 | "rcu_bh" for the rcu_read_lock_bh() API, "rcu_bh_sync" for | 111 | "rcu": rcu_read_lock(), rcu_read_unlock() and call_rcu(). |
90 | rcu_read_lock_bh() with synchronous reclamation, "srcu" for | 112 | |
91 | the "srcu_read_lock()" API, "sched" for the use of | 113 | "rcu_sync": rcu_read_lock(), rcu_read_unlock(), and |
92 | preempt_disable() together with synchronize_sched(), | 114 | synchronize_rcu(). |
93 | and "sched_expedited" for the use of preempt_disable() | 115 | |
94 | with synchronize_sched_expedited(). | 116 | "rcu_expedited": rcu_read_lock(), rcu_read_unlock(), and |
117 | synchronize_rcu_expedited(). | ||
118 | |||
119 | "rcu_bh": rcu_read_lock_bh(), rcu_read_unlock_bh(), and | ||
120 | call_rcu_bh(). | ||
121 | |||
122 | "rcu_bh_sync": rcu_read_lock_bh(), rcu_read_unlock_bh(), | ||
123 | and synchronize_rcu_bh(). | ||
124 | |||
125 | "rcu_bh_expedited": rcu_read_lock_bh(), rcu_read_unlock_bh(), | ||
126 | and synchronize_rcu_bh_expedited(). | ||
127 | |||
128 | "srcu": srcu_read_lock(), srcu_read_unlock() and | ||
129 | synchronize_srcu(). | ||
130 | |||
131 | "srcu_expedited": srcu_read_lock(), srcu_read_unlock() and | ||
132 | synchronize_srcu_expedited(). | ||
133 | |||
134 | "sched": preempt_disable(), preempt_enable(), and | ||
135 | call_rcu_sched(). | ||
136 | |||
137 | "sched_sync": preempt_disable(), preempt_enable(), and | ||
138 | synchronize_sched(). | ||
139 | |||
140 | "sched_expedited": preempt_disable(), preempt_enable(), and | ||
141 | synchronize_sched_expedited(). | ||
142 | |||
143 | Defaults to "rcu". | ||
95 | 144 | ||
96 | verbose Enable debug printk()s. Default is disabled. | 145 | verbose Enable debug printk()s. Default is disabled. |
97 | 146 | ||
@@ -100,12 +149,12 @@ OUTPUT | |||
100 | 149 | ||
101 | The statistics output is as follows: | 150 | The statistics output is as follows: |
102 | 151 | ||
103 | rcu-torture: --- Start of test: nreaders=16 stat_interval=0 verbose=0 | 152 | rcu-torture:--- Start of test: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4 |
104 | rcu-torture: rtc: 0000000000000000 ver: 1916 tfle: 0 rta: 1916 rtaf: 0 rtf: 1915 | 153 | rcu-torture: rtc: (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767 |
105 | rcu-torture: Reader Pipe: 1466408 9747 0 0 0 0 0 0 0 0 0 | 154 | rcu-torture: Reader Pipe: 727860534 34213 0 0 0 0 0 0 0 0 0 |
106 | rcu-torture: Reader Batch: 1464477 11678 0 0 0 0 0 0 0 0 | 155 | rcu-torture: Reader Batch: 727877838 17003 0 0 0 0 0 0 0 0 0 |
107 | rcu-torture: Free-Block Circulation: 1915 1915 1915 1915 1915 1915 1915 1915 1915 1915 0 | 156 | rcu-torture: Free-Block Circulation: 155440 155440 155440 155440 155440 155440 155440 155440 155440 155440 0 |
108 | rcu-torture: --- End of test | 157 | rcu-torture:--- End of test: SUCCESS: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4 |
109 | 158 | ||
110 | The command "dmesg | grep torture:" will extract this information on | 159 | The command "dmesg | grep torture:" will extract this information on |
111 | most systems. On more esoteric configurations, it may be necessary to | 160 | most systems. On more esoteric configurations, it may be necessary to |
@@ -113,26 +162,55 @@ use other commands to access the output of the printk()s used by | |||
113 | the RCU torture test. The printk()s use KERN_ALERT, so they should | 162 | the RCU torture test. The printk()s use KERN_ALERT, so they should |
114 | be evident. ;-) | 163 | be evident. ;-) |
115 | 164 | ||
165 | The first and last lines show the rcutorture module parameters, and the | ||
166 | last line shows either "SUCCESS" or "FAILURE", based on rcutorture's | ||
167 | automatic determination as to whether RCU operated correctly. | ||
168 | |||
116 | The entries are as follows: | 169 | The entries are as follows: |
117 | 170 | ||
118 | o "rtc": The hexadecimal address of the structure currently visible | 171 | o "rtc": The hexadecimal address of the structure currently visible |
119 | to readers. | 172 | to readers. |
120 | 173 | ||
121 | o "ver": The number of times since boot that the rcutw writer task | 174 | o "ver": The number of times since boot that the RCU writer task |
122 | has changed the structure visible to readers. | 175 | has changed the structure visible to readers. |
123 | 176 | ||
124 | o "tfle": If non-zero, indicates that the "torture freelist" | 177 | o "tfle": If non-zero, indicates that the "torture freelist" |
125 | containing structure to be placed into the "rtc" area is empty. | 178 | containing structures to be placed into the "rtc" area is empty. |
126 | This condition is important, since it can fool you into thinking | 179 | This condition is important, since it can fool you into thinking |
127 | that RCU is working when it is not. :-/ | 180 | that RCU is working when it is not. :-/ |
128 | 181 | ||
129 | o "rta": Number of structures allocated from the torture freelist. | 182 | o "rta": Number of structures allocated from the torture freelist. |
130 | 183 | ||
131 | o "rtaf": Number of allocations from the torture freelist that have | 184 | o "rtaf": Number of allocations from the torture freelist that have |
132 | failed due to the list being empty. | 185 | failed due to the list being empty. It is not unusual for this |
186 | to be non-zero, but it is bad for it to be a large fraction of | ||
187 | the value indicated by "rta". | ||
133 | 188 | ||
134 | o "rtf": Number of frees into the torture freelist. | 189 | o "rtf": Number of frees into the torture freelist. |
135 | 190 | ||
191 | o "rtmbe": A non-zero value indicates that rcutorture believes that | ||
192 | rcu_assign_pointer() and rcu_dereference() are not working | ||
193 | correctly. This value should be zero. | ||
194 | |||
195 | o "rtbke": rcutorture was unable to create the real-time kthreads | ||
196 | used to force RCU priority inversion. This value should be zero. | ||
197 | |||
198 | o "rtbre": Although rcutorture successfully created the kthreads | ||
199 | used to force RCU priority inversion, it was unable to set them | ||
200 | to the real-time priority level of 1. This value should be zero. | ||
201 | |||
202 | o "rtbf": The number of times that RCU priority boosting failed | ||
203 | to resolve RCU priority inversion. | ||
204 | |||
205 | o "rtb": The number of times that rcutorture attempted to force | ||
206 | an RCU priority inversion condition. If you are testing RCU | ||
207 | priority boosting via the "test_boost" module parameter, this | ||
208 | value should be non-zero. | ||
209 | |||
210 | o "nt": The number of times rcutorture ran RCU read-side code from | ||
211 | within a timer handler. This value should be non-zero only | ||
212 | if you specified the "irqreader" module parameter. | ||
213 | |||
136 | o "Reader Pipe": Histogram of "ages" of structures seen by readers. | 214 | o "Reader Pipe": Histogram of "ages" of structures seen by readers. |
137 | If any entries past the first two are non-zero, RCU is broken. | 215 | If any entries past the first two are non-zero, RCU is broken. |
138 | And rcutorture prints the error flag string "!!!" to make sure | 216 | And rcutorture prints the error flag string "!!!" to make sure |
@@ -162,26 +240,15 @@ o "Free-Block Circulation": Shows the number of torture structures | |||
162 | somehow gets incremented farther than it should. | 240 | somehow gets incremented farther than it should. |
163 | 241 | ||
164 | Different implementations of RCU can provide implementation-specific | 242 | Different implementations of RCU can provide implementation-specific |
165 | additional information. For example, SRCU provides the following: | 243 | additional information. For example, SRCU provides the following |
244 | additional line: | ||
166 | 245 | ||
167 | srcu-torture: rtc: f8cf46a8 ver: 355 tfle: 0 rta: 356 rtaf: 0 rtf: 346 rtmbe: 0 | ||
168 | srcu-torture: Reader Pipe: 559738 939 0 0 0 0 0 0 0 0 0 | ||
169 | srcu-torture: Reader Batch: 560434 243 0 0 0 0 0 0 0 0 | ||
170 | srcu-torture: Free-Block Circulation: 355 354 353 352 351 350 349 348 347 346 0 | ||
171 | srcu-torture: per-CPU(idx=1): 0(0,1) 1(0,1) 2(0,0) 3(0,1) | 246 | srcu-torture: per-CPU(idx=1): 0(0,1) 1(0,1) 2(0,0) 3(0,1) |
172 | 247 | ||
173 | The first four lines are similar to those for RCU. The last line shows | 248 | This line shows the per-CPU counter state. The numbers in parentheses are |
174 | the per-CPU counter state. The numbers in parentheses are the values | 249 | the values of the "old" and "current" counters for the corresponding CPU. |
175 | of the "old" and "current" counters for the corresponding CPU. The | 250 | The "idx" value maps the "old" and "current" values to the underlying |
176 | "idx" value maps the "old" and "current" values to the underlying array, | 251 | array, and is useful for debugging. |
177 | and is useful for debugging. | ||
178 | |||
179 | Similarly, sched_expedited RCU provides the following: | ||
180 | |||
181 | sched_expedited-torture: rtc: d0000000016c1880 ver: 1090796 tfle: 0 rta: 1090796 rtaf: 0 rtf: 1090787 rtmbe: 0 nt: 27713319 | ||
182 | sched_expedited-torture: Reader Pipe: 12660320201 95875 0 0 0 0 0 0 0 0 0 | ||
183 | sched_expedited-torture: Reader Batch: 12660424885 0 0 0 0 0 0 0 0 0 0 | ||
184 | sched_expedited-torture: Free-Block Circulation: 1090795 1090795 1090794 1090793 1090792 1090791 1090790 1090789 1090788 1090787 0 | ||
185 | 252 | ||
186 | 253 | ||
187 | USAGE | 254 | USAGE |
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 8173cec473aa..aaf65f6c6cd7 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt | |||
@@ -33,23 +33,23 @@ rcu/rcuboost: | |||
33 | The output of "cat rcu/rcudata" looks as follows: | 33 | The output of "cat rcu/rcudata" looks as follows: |
34 | 34 | ||
35 | rcu_sched: | 35 | rcu_sched: |
36 | 0 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0 | 36 | 0 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0 |
37 | 1 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0 | 37 | 1 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0 |
38 | 2 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0 | 38 | 2 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0 |
39 | 3 c=20942 g=20943 pq=1 pqc=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0 | 39 | 3 c=20942 g=20943 pq=1 pgp=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0 |
40 | 4 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0 | 40 | 4 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0 |
41 | 5 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0 | 41 | 5 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0 |
42 | 6 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0 | 42 | 6 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0 |
43 | 7 c=20897 g=20897 pq=1 pqc=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0 | 43 | 7 c=20897 g=20897 pq=1 pgp=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0 |
44 | rcu_bh: | 44 | rcu_bh: |
45 | 0 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0 | 45 | 0 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0 |
46 | 1 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0 | 46 | 1 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0 |
47 | 2 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0 | 47 | 2 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0 |
48 | 3 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0 | 48 | 3 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0 |
49 | 4 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0 | 49 | 4 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0 |
50 | 5 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0 | 50 | 5 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0 |
51 | 6 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0 | 51 | 6 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0 |
52 | 7 c=1474 g=1474 pq=1 pqc=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0 | 52 | 7 c=1474 g=1474 pq=1 pgp=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0 |
53 | 53 | ||
54 | The first section lists the rcu_data structures for rcu_sched, the second | 54 | The first section lists the rcu_data structures for rcu_sched, the second |
55 | for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an | 55 | for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an |
@@ -84,7 +84,7 @@ o "pq" indicates that this CPU has passed through a quiescent state | |||
84 | CPU has not yet reported that fact, (2) some other CPU has not | 84 | CPU has not yet reported that fact, (2) some other CPU has not |
85 | yet reported for this grace period, or (3) both. | 85 | yet reported for this grace period, or (3) both. |
86 | 86 | ||
87 | o "pqc" indicates which grace period the last-observed quiescent | 87 | o "pgp" indicates which grace period the last-observed quiescent |
88 | state for this CPU corresponds to. This is important for handling | 88 | state for this CPU corresponds to. This is important for handling |
89 | the race between CPU 0 reporting an extended dynticks-idle | 89 | the race between CPU 0 reporting an extended dynticks-idle |
90 | quiescent state for CPU 1 and CPU 1 suddenly waking up and | 90 | quiescent state for CPU 1 and CPU 1 suddenly waking up and |
@@ -184,10 +184,14 @@ o "kt" is the per-CPU kernel-thread state. The digit preceding | |||
184 | The number after the final slash is the CPU that the kthread | 184 | The number after the final slash is the CPU that the kthread |
185 | is actually running on. | 185 | is actually running on. |
186 | 186 | ||
187 | This field is displayed only for CONFIG_RCU_BOOST kernels. | ||
188 | |||
187 | o "ktl" is the low-order 16 bits (in hexadecimal) of the count of | 189 | o "ktl" is the low-order 16 bits (in hexadecimal) of the count of |
188 | the number of times that this CPU's per-CPU kthread has gone | 190 | the number of times that this CPU's per-CPU kthread has gone |
189 | through its loop servicing invoke_rcu_cpu_kthread() requests. | 191 | through its loop servicing invoke_rcu_cpu_kthread() requests. |
190 | 192 | ||
193 | This field is displayed only for CONFIG_RCU_BOOST kernels. | ||
194 | |||
191 | o "b" is the batch limit for this CPU. If more than this number | 195 | o "b" is the batch limit for this CPU. If more than this number |
192 | of RCU callbacks is ready to invoke, then the remainder will | 196 | of RCU callbacks is ready to invoke, then the remainder will |
193 | be deferred. | 197 | be deferred. |
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index ef820a3c378b..b6a56e37284c 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h | |||
@@ -548,7 +548,7 @@ do { \ | |||
548 | #endif | 548 | #endif |
549 | 549 | ||
550 | #ifdef CONFIG_PROVE_RCU | 550 | #ifdef CONFIG_PROVE_RCU |
551 | extern void lockdep_rcu_dereference(const char *file, const int line); | 551 | void lockdep_rcu_suspicious(const char *file, const int line, const char *s); |
552 | #endif | 552 | #endif |
553 | 553 | ||
554 | #endif /* __LINUX_LOCKDEP_H */ | 554 | #endif /* __LINUX_LOCKDEP_H */ |
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 8f4f881a0ad8..2cf4226ade7e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h | |||
@@ -33,6 +33,7 @@ | |||
33 | #ifndef __LINUX_RCUPDATE_H | 33 | #ifndef __LINUX_RCUPDATE_H |
34 | #define __LINUX_RCUPDATE_H | 34 | #define __LINUX_RCUPDATE_H |
35 | 35 | ||
36 | #include <linux/types.h> | ||
36 | #include <linux/cache.h> | 37 | #include <linux/cache.h> |
37 | #include <linux/spinlock.h> | 38 | #include <linux/spinlock.h> |
38 | #include <linux/threads.h> | 39 | #include <linux/threads.h> |
@@ -64,32 +65,74 @@ static inline void rcutorture_record_progress(unsigned long vernum) | |||
64 | #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) | 65 | #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) |
65 | #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) | 66 | #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) |
66 | 67 | ||
68 | /* Exported common interfaces */ | ||
69 | |||
70 | #ifdef CONFIG_PREEMPT_RCU | ||
71 | |||
67 | /** | 72 | /** |
68 | * struct rcu_head - callback structure for use with RCU | 73 | * call_rcu() - Queue an RCU callback for invocation after a grace period. |
69 | * @next: next update requests in a list | 74 | * @head: structure to be used for queueing the RCU updates. |
70 | * @func: actual update function to call after the grace period. | 75 | * @func: actual callback function to be invoked after the grace period |
76 | * | ||
77 | * The callback function will be invoked some time after a full grace | ||
78 | * period elapses, in other words after all pre-existing RCU read-side | ||
79 | * critical sections have completed. However, the callback function | ||
80 | * might well execute concurrently with RCU read-side critical sections | ||
81 | * that started after call_rcu() was invoked. RCU read-side critical | ||
82 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | ||
83 | * and may be nested. | ||
71 | */ | 84 | */ |
72 | struct rcu_head { | 85 | extern void call_rcu(struct rcu_head *head, |
73 | struct rcu_head *next; | 86 | void (*func)(struct rcu_head *head)); |
74 | void (*func)(struct rcu_head *head); | ||
75 | }; | ||
76 | 87 | ||
77 | /* Exported common interfaces */ | 88 | #else /* #ifdef CONFIG_PREEMPT_RCU */ |
89 | |||
90 | /* In classic RCU, call_rcu() is just call_rcu_sched(). */ | ||
91 | #define call_rcu call_rcu_sched | ||
92 | |||
93 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | ||
94 | |||
95 | /** | ||
96 | * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. | ||
97 | * @head: structure to be used for queueing the RCU updates. | ||
98 | * @func: actual callback function to be invoked after the grace period | ||
99 | * | ||
100 | * The callback function will be invoked some time after a full grace | ||
101 | * period elapses, in other words after all currently executing RCU | ||
102 | * read-side critical sections have completed. call_rcu_bh() assumes | ||
103 | * that the read-side critical sections end on completion of a softirq | ||
104 | * handler. This means that read-side critical sections in process | ||
105 | * context must not be interrupted by softirqs. This interface is to be | ||
106 | * used when most of the read-side critical sections are in softirq context. | ||
107 | * RCU read-side critical sections are delimited by : | ||
108 | * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. | ||
109 | * OR | ||
110 | * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. | ||
111 | * These may be nested. | ||
112 | */ | ||
113 | extern void call_rcu_bh(struct rcu_head *head, | ||
114 | void (*func)(struct rcu_head *head)); | ||
115 | |||
116 | /** | ||
117 | * call_rcu_sched() - Queue an RCU for invocation after sched grace period. | ||
118 | * @head: structure to be used for queueing the RCU updates. | ||
119 | * @func: actual callback function to be invoked after the grace period | ||
120 | * | ||
121 | * The callback function will be invoked some time after a full grace | ||
122 | * period elapses, in other words after all currently executing RCU | ||
123 | * read-side critical sections have completed. call_rcu_sched() assumes | ||
124 | * that the read-side critical sections end on enabling of preemption | ||
125 | * or on voluntary preemption. | ||
126 | * RCU read-side critical sections are delimited by : | ||
127 | * - rcu_read_lock_sched() and rcu_read_unlock_sched(), | ||
128 | * OR | ||
129 | * anything that disables preemption. | ||
130 | * These may be nested. | ||
131 | */ | ||
78 | extern void call_rcu_sched(struct rcu_head *head, | 132 | extern void call_rcu_sched(struct rcu_head *head, |
79 | void (*func)(struct rcu_head *rcu)); | 133 | void (*func)(struct rcu_head *rcu)); |
80 | extern void synchronize_sched(void); | ||
81 | extern void rcu_barrier_bh(void); | ||
82 | extern void rcu_barrier_sched(void); | ||
83 | |||
84 | static inline void __rcu_read_lock_bh(void) | ||
85 | { | ||
86 | local_bh_disable(); | ||
87 | } | ||
88 | 134 | ||
89 | static inline void __rcu_read_unlock_bh(void) | 135 | extern void synchronize_sched(void); |
90 | { | ||
91 | local_bh_enable(); | ||
92 | } | ||
93 | 136 | ||
94 | #ifdef CONFIG_PREEMPT_RCU | 137 | #ifdef CONFIG_PREEMPT_RCU |
95 | 138 | ||
@@ -152,6 +195,15 @@ static inline void rcu_exit_nohz(void) | |||
152 | 195 | ||
153 | #endif /* #else #ifdef CONFIG_NO_HZ */ | 196 | #endif /* #else #ifdef CONFIG_NO_HZ */ |
154 | 197 | ||
198 | /* | ||
199 | * Infrastructure to implement the synchronize_() primitives in | ||
200 | * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. | ||
201 | */ | ||
202 | |||
203 | typedef void call_rcu_func_t(struct rcu_head *head, | ||
204 | void (*func)(struct rcu_head *head)); | ||
205 | void wait_rcu_gp(call_rcu_func_t crf); | ||
206 | |||
155 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) | 207 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) |
156 | #include <linux/rcutree.h> | 208 | #include <linux/rcutree.h> |
157 | #elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) | 209 | #elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) |
@@ -297,19 +349,31 @@ extern int rcu_my_thread_group_empty(void); | |||
297 | /** | 349 | /** |
298 | * rcu_lockdep_assert - emit lockdep splat if specified condition not met | 350 | * rcu_lockdep_assert - emit lockdep splat if specified condition not met |
299 | * @c: condition to check | 351 | * @c: condition to check |
352 | * @s: informative message | ||
300 | */ | 353 | */ |
301 | #define rcu_lockdep_assert(c) \ | 354 | #define rcu_lockdep_assert(c, s) \ |
302 | do { \ | 355 | do { \ |
303 | static bool __warned; \ | 356 | static bool __warned; \ |
304 | if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ | 357 | if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ |
305 | __warned = true; \ | 358 | __warned = true; \ |
306 | lockdep_rcu_dereference(__FILE__, __LINE__); \ | 359 | lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ |
307 | } \ | 360 | } \ |
308 | } while (0) | 361 | } while (0) |
309 | 362 | ||
363 | #define rcu_sleep_check() \ | ||
364 | do { \ | ||
365 | rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \ | ||
366 | "Illegal context switch in RCU-bh" \ | ||
367 | " read-side critical section"); \ | ||
368 | rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), \ | ||
369 | "Illegal context switch in RCU-sched"\ | ||
370 | " read-side critical section"); \ | ||
371 | } while (0) | ||
372 | |||
310 | #else /* #ifdef CONFIG_PROVE_RCU */ | 373 | #else /* #ifdef CONFIG_PROVE_RCU */ |
311 | 374 | ||
312 | #define rcu_lockdep_assert(c) do { } while (0) | 375 | #define rcu_lockdep_assert(c, s) do { } while (0) |
376 | #define rcu_sleep_check() do { } while (0) | ||
313 | 377 | ||
314 | #endif /* #else #ifdef CONFIG_PROVE_RCU */ | 378 | #endif /* #else #ifdef CONFIG_PROVE_RCU */ |
315 | 379 | ||
@@ -338,14 +402,16 @@ extern int rcu_my_thread_group_empty(void); | |||
338 | #define __rcu_dereference_check(p, c, space) \ | 402 | #define __rcu_dereference_check(p, c, space) \ |
339 | ({ \ | 403 | ({ \ |
340 | typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ | 404 | typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ |
341 | rcu_lockdep_assert(c); \ | 405 | rcu_lockdep_assert(c, "suspicious rcu_dereference_check()" \ |
406 | " usage"); \ | ||
342 | rcu_dereference_sparse(p, space); \ | 407 | rcu_dereference_sparse(p, space); \ |
343 | smp_read_barrier_depends(); \ | 408 | smp_read_barrier_depends(); \ |
344 | ((typeof(*p) __force __kernel *)(_________p1)); \ | 409 | ((typeof(*p) __force __kernel *)(_________p1)); \ |
345 | }) | 410 | }) |
346 | #define __rcu_dereference_protected(p, c, space) \ | 411 | #define __rcu_dereference_protected(p, c, space) \ |
347 | ({ \ | 412 | ({ \ |
348 | rcu_lockdep_assert(c); \ | 413 | rcu_lockdep_assert(c, "suspicious rcu_dereference_protected()" \ |
414 | " usage"); \ | ||
349 | rcu_dereference_sparse(p, space); \ | 415 | rcu_dereference_sparse(p, space); \ |
350 | ((typeof(*p) __force __kernel *)(p)); \ | 416 | ((typeof(*p) __force __kernel *)(p)); \ |
351 | }) | 417 | }) |
@@ -359,15 +425,15 @@ extern int rcu_my_thread_group_empty(void); | |||
359 | #define __rcu_dereference_index_check(p, c) \ | 425 | #define __rcu_dereference_index_check(p, c) \ |
360 | ({ \ | 426 | ({ \ |
361 | typeof(p) _________p1 = ACCESS_ONCE(p); \ | 427 | typeof(p) _________p1 = ACCESS_ONCE(p); \ |
362 | rcu_lockdep_assert(c); \ | 428 | rcu_lockdep_assert(c, \ |
429 | "suspicious rcu_dereference_index_check()" \ | ||
430 | " usage"); \ | ||
363 | smp_read_barrier_depends(); \ | 431 | smp_read_barrier_depends(); \ |
364 | (_________p1); \ | 432 | (_________p1); \ |
365 | }) | 433 | }) |
366 | #define __rcu_assign_pointer(p, v, space) \ | 434 | #define __rcu_assign_pointer(p, v, space) \ |
367 | ({ \ | 435 | ({ \ |
368 | if (!__builtin_constant_p(v) || \ | 436 | smp_wmb(); \ |
369 | ((v) != NULL)) \ | ||
370 | smp_wmb(); \ | ||
371 | (p) = (typeof(*v) __force space *)(v); \ | 437 | (p) = (typeof(*v) __force space *)(v); \ |
372 | }) | 438 | }) |
373 | 439 | ||
@@ -500,26 +566,6 @@ extern int rcu_my_thread_group_empty(void); | |||
500 | #define rcu_dereference_protected(p, c) \ | 566 | #define rcu_dereference_protected(p, c) \ |
501 | __rcu_dereference_protected((p), (c), __rcu) | 567 | __rcu_dereference_protected((p), (c), __rcu) |
502 | 568 | ||
503 | /** | ||
504 | * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented | ||
505 | * @p: The pointer to read, prior to dereferencing | ||
506 | * @c: The conditions under which the dereference will take place | ||
507 | * | ||
508 | * This is the RCU-bh counterpart to rcu_dereference_protected(). | ||
509 | */ | ||
510 | #define rcu_dereference_bh_protected(p, c) \ | ||
511 | __rcu_dereference_protected((p), (c), __rcu) | ||
512 | |||
513 | /** | ||
514 | * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented | ||
515 | * @p: The pointer to read, prior to dereferencing | ||
516 | * @c: The conditions under which the dereference will take place | ||
517 | * | ||
518 | * This is the RCU-sched counterpart to rcu_dereference_protected(). | ||
519 | */ | ||
520 | #define rcu_dereference_sched_protected(p, c) \ | ||
521 | __rcu_dereference_protected((p), (c), __rcu) | ||
522 | |||
523 | 569 | ||
524 | /** | 570 | /** |
525 | * rcu_dereference() - fetch RCU-protected pointer for dereferencing | 571 | * rcu_dereference() - fetch RCU-protected pointer for dereferencing |
@@ -630,7 +676,7 @@ static inline void rcu_read_unlock(void) | |||
630 | */ | 676 | */ |
631 | static inline void rcu_read_lock_bh(void) | 677 | static inline void rcu_read_lock_bh(void) |
632 | { | 678 | { |
633 | __rcu_read_lock_bh(); | 679 | local_bh_disable(); |
634 | __acquire(RCU_BH); | 680 | __acquire(RCU_BH); |
635 | rcu_read_acquire_bh(); | 681 | rcu_read_acquire_bh(); |
636 | } | 682 | } |
@@ -644,7 +690,7 @@ static inline void rcu_read_unlock_bh(void) | |||
644 | { | 690 | { |
645 | rcu_read_release_bh(); | 691 | rcu_read_release_bh(); |
646 | __release(RCU_BH); | 692 | __release(RCU_BH); |
647 | __rcu_read_unlock_bh(); | 693 | local_bh_enable(); |
648 | } | 694 | } |
649 | 695 | ||
650 | /** | 696 | /** |
@@ -698,11 +744,18 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) | |||
698 | * any prior initialization. Returns the value assigned. | 744 | * any prior initialization. Returns the value assigned. |
699 | * | 745 | * |
700 | * Inserts memory barriers on architectures that require them | 746 | * Inserts memory barriers on architectures that require them |
701 | * (pretty much all of them other than x86), and also prevents | 747 | * (which is most of them), and also prevents the compiler from |
702 | * the compiler from reordering the code that initializes the | 748 | * reordering the code that initializes the structure after the pointer |
703 | * structure after the pointer assignment. More importantly, this | 749 | * assignment. More importantly, this call documents which pointers |
704 | * call documents which pointers will be dereferenced by RCU read-side | 750 | * will be dereferenced by RCU read-side code. |
705 | * code. | 751 | * |
752 | * In some special cases, you may use RCU_INIT_POINTER() instead | ||
753 | * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due | ||
754 | * to the fact that it does not constrain either the CPU or the compiler. | ||
755 | * That said, using RCU_INIT_POINTER() when you should have used | ||
756 | * rcu_assign_pointer() is a very bad thing that results in | ||
757 | * impossible-to-diagnose memory corruption. So please be careful. | ||
758 | * See the RCU_INIT_POINTER() comment header for details. | ||
706 | */ | 759 | */ |
707 | #define rcu_assign_pointer(p, v) \ | 760 | #define rcu_assign_pointer(p, v) \ |
708 | __rcu_assign_pointer((p), (v), __rcu) | 761 | __rcu_assign_pointer((p), (v), __rcu) |
@@ -710,105 +763,38 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) | |||
710 | /** | 763 | /** |
711 | * RCU_INIT_POINTER() - initialize an RCU protected pointer | 764 | * RCU_INIT_POINTER() - initialize an RCU protected pointer |
712 | * | 765 | * |
713 | * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep | 766 | * Initialize an RCU-protected pointer in special cases where readers |
714 | * splats. | 767 | * do not need ordering constraints on the CPU or the compiler. These |
768 | * special cases are: | ||
769 | * | ||
770 | * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer -or- | ||
771 | * 2. The caller has taken whatever steps are required to prevent | ||
772 | * RCU readers from concurrently accessing this pointer -or- | ||
773 | * 3. The referenced data structure has already been exposed to | ||
774 | * readers either at compile time or via rcu_assign_pointer() -and- | ||
775 | * a. You have not made -any- reader-visible changes to | ||
776 | * this structure since then -or- | ||
777 | * b. It is OK for readers accessing this structure from its | ||
778 | * new location to see the old state of the structure. (For | ||
779 | * example, the changes were to statistical counters or to | ||
780 | * other state where exact synchronization is not required.) | ||
781 | * | ||
782 | * Failure to follow these rules governing use of RCU_INIT_POINTER() will | ||
783 | * result in impossible-to-diagnose memory corruption. As in the structures | ||
784 | * will look OK in crash dumps, but any concurrent RCU readers might | ||
785 | * see pre-initialized values of the referenced data structure. So | ||
786 | * please be very careful how you use RCU_INIT_POINTER()!!! | ||
787 | * | ||
788 | * If you are creating an RCU-protected linked structure that is accessed | ||
789 | * by a single external-to-structure RCU-protected pointer, then you may | ||
790 | * use RCU_INIT_POINTER() to initialize the internal RCU-protected | ||
791 | * pointers, but you must use rcu_assign_pointer() to initialize the | ||
792 | * external-to-structure pointer -after- you have completely initialized | ||
793 | * the reader-accessible portions of the linked structure. | ||
715 | */ | 794 | */ |
716 | #define RCU_INIT_POINTER(p, v) \ | 795 | #define RCU_INIT_POINTER(p, v) \ |
717 | p = (typeof(*v) __force __rcu *)(v) | 796 | p = (typeof(*v) __force __rcu *)(v) |
718 | 797 | ||
719 | /* Infrastructure to implement the synchronize_() primitives. */ | ||
720 | |||
721 | struct rcu_synchronize { | ||
722 | struct rcu_head head; | ||
723 | struct completion completion; | ||
724 | }; | ||
725 | |||
726 | extern void wakeme_after_rcu(struct rcu_head *head); | ||
727 | |||
728 | #ifdef CONFIG_PREEMPT_RCU | ||
729 | |||
730 | /** | ||
731 | * call_rcu() - Queue an RCU callback for invocation after a grace period. | ||
732 | * @head: structure to be used for queueing the RCU updates. | ||
733 | * @func: actual callback function to be invoked after the grace period | ||
734 | * | ||
735 | * The callback function will be invoked some time after a full grace | ||
736 | * period elapses, in other words after all pre-existing RCU read-side | ||
737 | * critical sections have completed. However, the callback function | ||
738 | * might well execute concurrently with RCU read-side critical sections | ||
739 | * that started after call_rcu() was invoked. RCU read-side critical | ||
740 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | ||
741 | * and may be nested. | ||
742 | */ | ||
743 | extern void call_rcu(struct rcu_head *head, | ||
744 | void (*func)(struct rcu_head *head)); | ||
745 | |||
746 | #else /* #ifdef CONFIG_PREEMPT_RCU */ | ||
747 | |||
748 | /* In classic RCU, call_rcu() is just call_rcu_sched(). */ | ||
749 | #define call_rcu call_rcu_sched | ||
750 | |||
751 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | ||
752 | |||
753 | /** | ||
754 | * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. | ||
755 | * @head: structure to be used for queueing the RCU updates. | ||
756 | * @func: actual callback function to be invoked after the grace period | ||
757 | * | ||
758 | * The callback function will be invoked some time after a full grace | ||
759 | * period elapses, in other words after all currently executing RCU | ||
760 | * read-side critical sections have completed. call_rcu_bh() assumes | ||
761 | * that the read-side critical sections end on completion of a softirq | ||
762 | * handler. This means that read-side critical sections in process | ||
763 | * context must not be interrupted by softirqs. This interface is to be | ||
764 | * used when most of the read-side critical sections are in softirq context. | ||
765 | * RCU read-side critical sections are delimited by : | ||
766 | * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. | ||
767 | * OR | ||
768 | * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. | ||
769 | * These may be nested. | ||
770 | */ | ||
771 | extern void call_rcu_bh(struct rcu_head *head, | ||
772 | void (*func)(struct rcu_head *head)); | ||
773 | |||
774 | /* | ||
775 | * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally | ||
776 | * by call_rcu() and rcu callback execution, and are therefore not part of the | ||
777 | * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. | ||
778 | */ | ||
779 | |||
780 | #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD | ||
781 | # define STATE_RCU_HEAD_READY 0 | ||
782 | # define STATE_RCU_HEAD_QUEUED 1 | ||
783 | |||
784 | extern struct debug_obj_descr rcuhead_debug_descr; | ||
785 | |||
786 | static inline void debug_rcu_head_queue(struct rcu_head *head) | ||
787 | { | ||
788 | WARN_ON_ONCE((unsigned long)head & 0x3); | ||
789 | debug_object_activate(head, &rcuhead_debug_descr); | ||
790 | debug_object_active_state(head, &rcuhead_debug_descr, | ||
791 | STATE_RCU_HEAD_READY, | ||
792 | STATE_RCU_HEAD_QUEUED); | ||
793 | } | ||
794 | |||
795 | static inline void debug_rcu_head_unqueue(struct rcu_head *head) | ||
796 | { | ||
797 | debug_object_active_state(head, &rcuhead_debug_descr, | ||
798 | STATE_RCU_HEAD_QUEUED, | ||
799 | STATE_RCU_HEAD_READY); | ||
800 | debug_object_deactivate(head, &rcuhead_debug_descr); | ||
801 | } | ||
802 | #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | ||
803 | static inline void debug_rcu_head_queue(struct rcu_head *head) | ||
804 | { | ||
805 | } | ||
806 | |||
807 | static inline void debug_rcu_head_unqueue(struct rcu_head *head) | ||
808 | { | ||
809 | } | ||
810 | #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | ||
811 | |||
812 | static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) | 798 | static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) |
813 | { | 799 | { |
814 | return offset < 4096; | 800 | return offset < 4096; |
@@ -827,18 +813,6 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset) | |||
827 | call_rcu(head, (rcu_callback)offset); | 813 | call_rcu(head, (rcu_callback)offset); |
828 | } | 814 | } |
829 | 815 | ||
830 | extern void kfree(const void *); | ||
831 | |||
832 | static inline void __rcu_reclaim(struct rcu_head *head) | ||
833 | { | ||
834 | unsigned long offset = (unsigned long)head->func; | ||
835 | |||
836 | if (__is_kfree_rcu_offset(offset)) | ||
837 | kfree((void *)head - offset); | ||
838 | else | ||
839 | head->func(head); | ||
840 | } | ||
841 | |||
842 | /** | 816 | /** |
843 | * kfree_rcu() - kfree an object after a grace period. | 817 | * kfree_rcu() - kfree an object after a grace period. |
844 | * @ptr: pointer to kfree | 818 | * @ptr: pointer to kfree |
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 52b3e0281fd0..00b7a5e493d2 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h | |||
@@ -27,9 +27,23 @@ | |||
27 | 27 | ||
28 | #include <linux/cache.h> | 28 | #include <linux/cache.h> |
29 | 29 | ||
30 | #ifdef CONFIG_RCU_BOOST | ||
30 | static inline void rcu_init(void) | 31 | static inline void rcu_init(void) |
31 | { | 32 | { |
32 | } | 33 | } |
34 | #else /* #ifdef CONFIG_RCU_BOOST */ | ||
35 | void rcu_init(void); | ||
36 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | ||
37 | |||
38 | static inline void rcu_barrier_bh(void) | ||
39 | { | ||
40 | wait_rcu_gp(call_rcu_bh); | ||
41 | } | ||
42 | |||
43 | static inline void rcu_barrier_sched(void) | ||
44 | { | ||
45 | wait_rcu_gp(call_rcu_sched); | ||
46 | } | ||
33 | 47 | ||
34 | #ifdef CONFIG_TINY_RCU | 48 | #ifdef CONFIG_TINY_RCU |
35 | 49 | ||
@@ -45,9 +59,13 @@ static inline void rcu_barrier(void) | |||
45 | 59 | ||
46 | #else /* #ifdef CONFIG_TINY_RCU */ | 60 | #else /* #ifdef CONFIG_TINY_RCU */ |
47 | 61 | ||
48 | void rcu_barrier(void); | ||
49 | void synchronize_rcu_expedited(void); | 62 | void synchronize_rcu_expedited(void); |
50 | 63 | ||
64 | static inline void rcu_barrier(void) | ||
65 | { | ||
66 | wait_rcu_gp(call_rcu); | ||
67 | } | ||
68 | |||
51 | #endif /* #else #ifdef CONFIG_TINY_RCU */ | 69 | #endif /* #else #ifdef CONFIG_TINY_RCU */ |
52 | 70 | ||
53 | static inline void synchronize_rcu_bh(void) | 71 | static inline void synchronize_rcu_bh(void) |
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index e65d06634dd8..67458468f1a8 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h | |||
@@ -67,6 +67,8 @@ static inline void synchronize_rcu_bh_expedited(void) | |||
67 | } | 67 | } |
68 | 68 | ||
69 | extern void rcu_barrier(void); | 69 | extern void rcu_barrier(void); |
70 | extern void rcu_barrier_bh(void); | ||
71 | extern void rcu_barrier_sched(void); | ||
70 | 72 | ||
71 | extern unsigned long rcutorture_testseq; | 73 | extern unsigned long rcutorture_testseq; |
72 | extern unsigned long rcutorture_vernum; | 74 | extern unsigned long rcutorture_vernum; |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 1be699dd32a5..ede8a6585e38 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -270,7 +270,6 @@ extern void init_idle_bootup_task(struct task_struct *idle); | |||
270 | 270 | ||
271 | extern int runqueue_is_locked(int cpu); | 271 | extern int runqueue_is_locked(int cpu); |
272 | 272 | ||
273 | extern cpumask_var_t nohz_cpu_mask; | ||
274 | #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) | 273 | #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) |
275 | extern void select_nohz_load_balancer(int stop_tick); | 274 | extern void select_nohz_load_balancer(int stop_tick); |
276 | extern int get_nohz_timer_target(void); | 275 | extern int get_nohz_timer_target(void); |
@@ -1260,9 +1259,6 @@ struct task_struct { | |||
1260 | #ifdef CONFIG_PREEMPT_RCU | 1259 | #ifdef CONFIG_PREEMPT_RCU |
1261 | int rcu_read_lock_nesting; | 1260 | int rcu_read_lock_nesting; |
1262 | char rcu_read_unlock_special; | 1261 | char rcu_read_unlock_special; |
1263 | #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) | ||
1264 | int rcu_boosted; | ||
1265 | #endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */ | ||
1266 | struct list_head rcu_node_entry; | 1262 | struct list_head rcu_node_entry; |
1267 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ | 1263 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ |
1268 | #ifdef CONFIG_TREE_PREEMPT_RCU | 1264 | #ifdef CONFIG_TREE_PREEMPT_RCU |
diff --git a/include/linux/types.h b/include/linux/types.h index 176da8c1fbb1..57a97234bec1 100644 --- a/include/linux/types.h +++ b/include/linux/types.h | |||
@@ -238,6 +238,16 @@ struct ustat { | |||
238 | char f_fpack[6]; | 238 | char f_fpack[6]; |
239 | }; | 239 | }; |
240 | 240 | ||
241 | /** | ||
242 | * struct rcu_head - callback structure for use with RCU | ||
243 | * @next: next update requests in a list | ||
244 | * @func: actual update function to call after the grace period. | ||
245 | */ | ||
246 | struct rcu_head { | ||
247 | struct rcu_head *next; | ||
248 | void (*func)(struct rcu_head *head); | ||
249 | }; | ||
250 | |||
241 | #endif /* __KERNEL__ */ | 251 | #endif /* __KERNEL__ */ |
242 | #endif /* __ASSEMBLY__ */ | 252 | #endif /* __ASSEMBLY__ */ |
243 | #endif /* _LINUX_TYPES_H */ | 253 | #endif /* _LINUX_TYPES_H */ |
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h new file mode 100644 index 000000000000..669fbd62ec25 --- /dev/null +++ b/include/trace/events/rcu.h | |||
@@ -0,0 +1,459 @@ | |||
1 | #undef TRACE_SYSTEM | ||
2 | #define TRACE_SYSTEM rcu | ||
3 | |||
4 | #if !defined(_TRACE_RCU_H) || defined(TRACE_HEADER_MULTI_READ) | ||
5 | #define _TRACE_RCU_H | ||
6 | |||
7 | #include <linux/tracepoint.h> | ||
8 | |||
9 | /* | ||
10 | * Tracepoint for start/end markers used for utilization calculations. | ||
11 | * By convention, the string is of the following forms: | ||
12 | * | ||
13 | * "Start <activity>" -- Mark the start of the specified activity, | ||
14 | * such as "context switch". Nesting is permitted. | ||
15 | * "End <activity>" -- Mark the end of the specified activity. | ||
16 | * | ||
17 | * An "@" character within "<activity>" is a comment character: Data | ||
18 | * reduction scripts will ignore the "@" and the remainder of the line. | ||
19 | */ | ||
20 | TRACE_EVENT(rcu_utilization, | ||
21 | |||
22 | TP_PROTO(char *s), | ||
23 | |||
24 | TP_ARGS(s), | ||
25 | |||
26 | TP_STRUCT__entry( | ||
27 | __field(char *, s) | ||
28 | ), | ||
29 | |||
30 | TP_fast_assign( | ||
31 | __entry->s = s; | ||
32 | ), | ||
33 | |||
34 | TP_printk("%s", __entry->s) | ||
35 | ); | ||
36 | |||
37 | #ifdef CONFIG_RCU_TRACE | ||
38 | |||
39 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) | ||
40 | |||
41 | /* | ||
42 | * Tracepoint for grace-period events: starting and ending a grace | ||
43 | * period ("start" and "end", respectively), a CPU noting the start | ||
44 | * of a new grace period or the end of an old grace period ("cpustart" | ||
45 | * and "cpuend", respectively), a CPU passing through a quiescent | ||
46 | * state ("cpuqs"), a CPU coming online or going offline ("cpuonl" | ||
47 | * and "cpuofl", respectively), and a CPU being kicked for being too | ||
48 | * long in dyntick-idle mode ("kick"). | ||
49 | */ | ||
50 | TRACE_EVENT(rcu_grace_period, | ||
51 | |||
52 | TP_PROTO(char *rcuname, unsigned long gpnum, char *gpevent), | ||
53 | |||
54 | TP_ARGS(rcuname, gpnum, gpevent), | ||
55 | |||
56 | TP_STRUCT__entry( | ||
57 | __field(char *, rcuname) | ||
58 | __field(unsigned long, gpnum) | ||
59 | __field(char *, gpevent) | ||
60 | ), | ||
61 | |||
62 | TP_fast_assign( | ||
63 | __entry->rcuname = rcuname; | ||
64 | __entry->gpnum = gpnum; | ||
65 | __entry->gpevent = gpevent; | ||
66 | ), | ||
67 | |||
68 | TP_printk("%s %lu %s", | ||
69 | __entry->rcuname, __entry->gpnum, __entry->gpevent) | ||
70 | ); | ||
71 | |||
72 | /* | ||
73 | * Tracepoint for grace-period-initialization events. These are | ||
74 | * distinguished by the type of RCU, the new grace-period number, the | ||
75 | * rcu_node structure level, the starting and ending CPU covered by the | ||
76 | * rcu_node structure, and the mask of CPUs that will be waited for. | ||
77 | * All but the type of RCU are extracted from the rcu_node structure. | ||
78 | */ | ||
79 | TRACE_EVENT(rcu_grace_period_init, | ||
80 | |||
81 | TP_PROTO(char *rcuname, unsigned long gpnum, u8 level, | ||
82 | int grplo, int grphi, unsigned long qsmask), | ||
83 | |||
84 | TP_ARGS(rcuname, gpnum, level, grplo, grphi, qsmask), | ||
85 | |||
86 | TP_STRUCT__entry( | ||
87 | __field(char *, rcuname) | ||
88 | __field(unsigned long, gpnum) | ||
89 | __field(u8, level) | ||
90 | __field(int, grplo) | ||
91 | __field(int, grphi) | ||
92 | __field(unsigned long, qsmask) | ||
93 | ), | ||
94 | |||
95 | TP_fast_assign( | ||
96 | __entry->rcuname = rcuname; | ||
97 | __entry->gpnum = gpnum; | ||
98 | __entry->level = level; | ||
99 | __entry->grplo = grplo; | ||
100 | __entry->grphi = grphi; | ||
101 | __entry->qsmask = qsmask; | ||
102 | ), | ||
103 | |||
104 | TP_printk("%s %lu %u %d %d %lx", | ||
105 | __entry->rcuname, __entry->gpnum, __entry->level, | ||
106 | __entry->grplo, __entry->grphi, __entry->qsmask) | ||
107 | ); | ||
108 | |||
109 | /* | ||
110 | * Tracepoint for tasks blocking within preemptible-RCU read-side | ||
111 | * critical sections. Track the type of RCU (which one day might | ||
112 | * include SRCU), the grace-period number that the task is blocking | ||
113 | * (the current or the next), and the task's PID. | ||
114 | */ | ||
115 | TRACE_EVENT(rcu_preempt_task, | ||
116 | |||
117 | TP_PROTO(char *rcuname, int pid, unsigned long gpnum), | ||
118 | |||
119 | TP_ARGS(rcuname, pid, gpnum), | ||
120 | |||
121 | TP_STRUCT__entry( | ||
122 | __field(char *, rcuname) | ||
123 | __field(unsigned long, gpnum) | ||
124 | __field(int, pid) | ||
125 | ), | ||
126 | |||
127 | TP_fast_assign( | ||
128 | __entry->rcuname = rcuname; | ||
129 | __entry->gpnum = gpnum; | ||
130 | __entry->pid = pid; | ||
131 | ), | ||
132 | |||
133 | TP_printk("%s %lu %d", | ||
134 | __entry->rcuname, __entry->gpnum, __entry->pid) | ||
135 | ); | ||
136 | |||
137 | /* | ||
138 | * Tracepoint for tasks that blocked within a given preemptible-RCU | ||
139 | * read-side critical section exiting that critical section. Track the | ||
140 | * type of RCU (which one day might include SRCU) and the task's PID. | ||
141 | */ | ||
142 | TRACE_EVENT(rcu_unlock_preempted_task, | ||
143 | |||
144 | TP_PROTO(char *rcuname, unsigned long gpnum, int pid), | ||
145 | |||
146 | TP_ARGS(rcuname, gpnum, pid), | ||
147 | |||
148 | TP_STRUCT__entry( | ||
149 | __field(char *, rcuname) | ||
150 | __field(unsigned long, gpnum) | ||
151 | __field(int, pid) | ||
152 | ), | ||
153 | |||
154 | TP_fast_assign( | ||
155 | __entry->rcuname = rcuname; | ||
156 | __entry->gpnum = gpnum; | ||
157 | __entry->pid = pid; | ||
158 | ), | ||
159 | |||
160 | TP_printk("%s %lu %d", __entry->rcuname, __entry->gpnum, __entry->pid) | ||
161 | ); | ||
162 | |||
163 | /* | ||
164 | * Tracepoint for quiescent-state-reporting events. These are | ||
165 | * distinguished by the type of RCU, the grace-period number, the | ||
166 | * mask of quiescent lower-level entities, the rcu_node structure level, | ||
167 | * the starting and ending CPU covered by the rcu_node structure, and | ||
168 | * whether there are any blocked tasks blocking the current grace period. | ||
169 | * All but the type of RCU are extracted from the rcu_node structure. | ||
170 | */ | ||
171 | TRACE_EVENT(rcu_quiescent_state_report, | ||
172 | |||
173 | TP_PROTO(char *rcuname, unsigned long gpnum, | ||
174 | unsigned long mask, unsigned long qsmask, | ||
175 | u8 level, int grplo, int grphi, int gp_tasks), | ||
176 | |||
177 | TP_ARGS(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks), | ||
178 | |||
179 | TP_STRUCT__entry( | ||
180 | __field(char *, rcuname) | ||
181 | __field(unsigned long, gpnum) | ||
182 | __field(unsigned long, mask) | ||
183 | __field(unsigned long, qsmask) | ||
184 | __field(u8, level) | ||
185 | __field(int, grplo) | ||
186 | __field(int, grphi) | ||
187 | __field(u8, gp_tasks) | ||
188 | ), | ||
189 | |||
190 | TP_fast_assign( | ||
191 | __entry->rcuname = rcuname; | ||
192 | __entry->gpnum = gpnum; | ||
193 | __entry->mask = mask; | ||
194 | __entry->qsmask = qsmask; | ||
195 | __entry->level = level; | ||
196 | __entry->grplo = grplo; | ||
197 | __entry->grphi = grphi; | ||
198 | __entry->gp_tasks = gp_tasks; | ||
199 | ), | ||
200 | |||
201 | TP_printk("%s %lu %lx>%lx %u %d %d %u", | ||
202 | __entry->rcuname, __entry->gpnum, | ||
203 | __entry->mask, __entry->qsmask, __entry->level, | ||
204 | __entry->grplo, __entry->grphi, __entry->gp_tasks) | ||
205 | ); | ||
206 | |||
207 | /* | ||
208 | * Tracepoint for quiescent states detected by force_quiescent_state(). | ||
209 | * These trace events include the type of RCU, the grace-period number | ||
210 | * that was blocked by the CPU, the CPU itself, and the type of quiescent | ||
211 | * state, which can be "dti" for dyntick-idle mode, "ofl" for CPU offline, | ||
212 | * or "kick" when kicking a CPU that has been in dyntick-idle mode for | ||
213 | * too long. | ||
214 | */ | ||
215 | TRACE_EVENT(rcu_fqs, | ||
216 | |||
217 | TP_PROTO(char *rcuname, unsigned long gpnum, int cpu, char *qsevent), | ||
218 | |||
219 | TP_ARGS(rcuname, gpnum, cpu, qsevent), | ||
220 | |||
221 | TP_STRUCT__entry( | ||
222 | __field(char *, rcuname) | ||
223 | __field(unsigned long, gpnum) | ||
224 | __field(int, cpu) | ||
225 | __field(char *, qsevent) | ||
226 | ), | ||
227 | |||
228 | TP_fast_assign( | ||
229 | __entry->rcuname = rcuname; | ||
230 | __entry->gpnum = gpnum; | ||
231 | __entry->cpu = cpu; | ||
232 | __entry->qsevent = qsevent; | ||
233 | ), | ||
234 | |||
235 | TP_printk("%s %lu %d %s", | ||
236 | __entry->rcuname, __entry->gpnum, | ||
237 | __entry->cpu, __entry->qsevent) | ||
238 | ); | ||
239 | |||
240 | #endif /* #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) */ | ||
241 | |||
242 | /* | ||
243 | * Tracepoint for dyntick-idle entry/exit events. These take a string | ||
244 | * as argument: "Start" for entering dyntick-idle mode and "End" for | ||
245 | * leaving it. | ||
246 | */ | ||
247 | TRACE_EVENT(rcu_dyntick, | ||
248 | |||
249 | TP_PROTO(char *polarity), | ||
250 | |||
251 | TP_ARGS(polarity), | ||
252 | |||
253 | TP_STRUCT__entry( | ||
254 | __field(char *, polarity) | ||
255 | ), | ||
256 | |||
257 | TP_fast_assign( | ||
258 | __entry->polarity = polarity; | ||
259 | ), | ||
260 | |||
261 | TP_printk("%s", __entry->polarity) | ||
262 | ); | ||
263 | |||
264 | /* | ||
265 | * Tracepoint for the registration of a single RCU callback function. | ||
266 | * The first argument is the type of RCU, the second argument is | ||
267 | * a pointer to the RCU callback itself, and the third element is the | ||
268 | * new RCU callback queue length for the current CPU. | ||
269 | */ | ||
270 | TRACE_EVENT(rcu_callback, | ||
271 | |||
272 | TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen), | ||
273 | |||
274 | TP_ARGS(rcuname, rhp, qlen), | ||
275 | |||
276 | TP_STRUCT__entry( | ||
277 | __field(char *, rcuname) | ||
278 | __field(void *, rhp) | ||
279 | __field(void *, func) | ||
280 | __field(long, qlen) | ||
281 | ), | ||
282 | |||
283 | TP_fast_assign( | ||
284 | __entry->rcuname = rcuname; | ||
285 | __entry->rhp = rhp; | ||
286 | __entry->func = rhp->func; | ||
287 | __entry->qlen = qlen; | ||
288 | ), | ||
289 | |||
290 | TP_printk("%s rhp=%p func=%pf %ld", | ||
291 | __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen) | ||
292 | ); | ||
293 | |||
294 | /* | ||
295 | * Tracepoint for the registration of a single RCU callback of the special | ||
296 | * kfree() form. The first argument is the RCU type, the second argument | ||
297 | * is a pointer to the RCU callback, the third argument is the offset | ||
298 | * of the callback within the enclosing RCU-protected data structure, | ||
299 | * and the fourth argument is the new RCU callback queue length for the | ||
300 | * current CPU. | ||
301 | */ | ||
302 | TRACE_EVENT(rcu_kfree_callback, | ||
303 | |||
304 | TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset, | ||
305 | long qlen), | ||
306 | |||
307 | TP_ARGS(rcuname, rhp, offset, qlen), | ||
308 | |||
309 | TP_STRUCT__entry( | ||
310 | __field(char *, rcuname) | ||
311 | __field(void *, rhp) | ||
312 | __field(unsigned long, offset) | ||
313 | __field(long, qlen) | ||
314 | ), | ||
315 | |||
316 | TP_fast_assign( | ||
317 | __entry->rcuname = rcuname; | ||
318 | __entry->rhp = rhp; | ||
319 | __entry->offset = offset; | ||
320 | __entry->qlen = qlen; | ||
321 | ), | ||
322 | |||
323 | TP_printk("%s rhp=%p func=%ld %ld", | ||
324 | __entry->rcuname, __entry->rhp, __entry->offset, | ||
325 | __entry->qlen) | ||
326 | ); | ||
327 | |||
328 | /* | ||
329 | * Tracepoint for marking the beginning rcu_do_batch, performed to start | ||
330 | * RCU callback invocation. The first argument is the RCU flavor, | ||
331 | * the second is the total number of callbacks (including those that | ||
332 | * are not yet ready to be invoked), and the third argument is the | ||
333 | * current RCU-callback batch limit. | ||
334 | */ | ||
335 | TRACE_EVENT(rcu_batch_start, | ||
336 | |||
337 | TP_PROTO(char *rcuname, long qlen, int blimit), | ||
338 | |||
339 | TP_ARGS(rcuname, qlen, blimit), | ||
340 | |||
341 | TP_STRUCT__entry( | ||
342 | __field(char *, rcuname) | ||
343 | __field(long, qlen) | ||
344 | __field(int, blimit) | ||
345 | ), | ||
346 | |||
347 | TP_fast_assign( | ||
348 | __entry->rcuname = rcuname; | ||
349 | __entry->qlen = qlen; | ||
350 | __entry->blimit = blimit; | ||
351 | ), | ||
352 | |||
353 | TP_printk("%s CBs=%ld bl=%d", | ||
354 | __entry->rcuname, __entry->qlen, __entry->blimit) | ||
355 | ); | ||
356 | |||
357 | /* | ||
358 | * Tracepoint for the invocation of a single RCU callback function. | ||
359 | * The first argument is the type of RCU, and the second argument is | ||
360 | * a pointer to the RCU callback itself. | ||
361 | */ | ||
362 | TRACE_EVENT(rcu_invoke_callback, | ||
363 | |||
364 | TP_PROTO(char *rcuname, struct rcu_head *rhp), | ||
365 | |||
366 | TP_ARGS(rcuname, rhp), | ||
367 | |||
368 | TP_STRUCT__entry( | ||
369 | __field(char *, rcuname) | ||
370 | __field(void *, rhp) | ||
371 | __field(void *, func) | ||
372 | ), | ||
373 | |||
374 | TP_fast_assign( | ||
375 | __entry->rcuname = rcuname; | ||
376 | __entry->rhp = rhp; | ||
377 | __entry->func = rhp->func; | ||
378 | ), | ||
379 | |||
380 | TP_printk("%s rhp=%p func=%pf", | ||
381 | __entry->rcuname, __entry->rhp, __entry->func) | ||
382 | ); | ||
383 | |||
384 | /* | ||
385 | * Tracepoint for the invocation of a single RCU callback of the special | ||
386 | * kfree() form. The first argument is the RCU flavor, the second | ||
387 | * argument is a pointer to the RCU callback, and the third argument | ||
388 | * is the offset of the callback within the enclosing RCU-protected | ||
389 | * data structure. | ||
390 | */ | ||
391 | TRACE_EVENT(rcu_invoke_kfree_callback, | ||
392 | |||
393 | TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset), | ||
394 | |||
395 | TP_ARGS(rcuname, rhp, offset), | ||
396 | |||
397 | TP_STRUCT__entry( | ||
398 | __field(char *, rcuname) | ||
399 | __field(void *, rhp) | ||
400 | __field(unsigned long, offset) | ||
401 | ), | ||
402 | |||
403 | TP_fast_assign( | ||
404 | __entry->rcuname = rcuname; | ||
405 | __entry->rhp = rhp; | ||
406 | __entry->offset = offset; | ||
407 | ), | ||
408 | |||
409 | TP_printk("%s rhp=%p func=%ld", | ||
410 | __entry->rcuname, __entry->rhp, __entry->offset) | ||
411 | ); | ||
412 | |||
413 | /* | ||
414 | * Tracepoint for exiting rcu_do_batch after RCU callbacks have been | ||
415 | * invoked. The first argument is the name of the RCU flavor and | ||
416 | * the second argument is number of callbacks actually invoked. | ||
417 | */ | ||
418 | TRACE_EVENT(rcu_batch_end, | ||
419 | |||
420 | TP_PROTO(char *rcuname, int callbacks_invoked), | ||
421 | |||
422 | TP_ARGS(rcuname, callbacks_invoked), | ||
423 | |||
424 | TP_STRUCT__entry( | ||
425 | __field(char *, rcuname) | ||
426 | __field(int, callbacks_invoked) | ||
427 | ), | ||
428 | |||
429 | TP_fast_assign( | ||
430 | __entry->rcuname = rcuname; | ||
431 | __entry->callbacks_invoked = callbacks_invoked; | ||
432 | ), | ||
433 | |||
434 | TP_printk("%s CBs-invoked=%d", | ||
435 | __entry->rcuname, __entry->callbacks_invoked) | ||
436 | ); | ||
437 | |||
438 | #else /* #ifdef CONFIG_RCU_TRACE */ | ||
439 | |||
440 | #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) | ||
441 | #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0) | ||
442 | #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) | ||
443 | #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) | ||
444 | #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) | ||
445 | #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) | ||
446 | #define trace_rcu_dyntick(polarity) do { } while (0) | ||
447 | #define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) | ||
448 | #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) | ||
449 | #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) | ||
450 | #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) | ||
451 | #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) | ||
452 | #define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0) | ||
453 | |||
454 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | ||
455 | |||
456 | #endif /* _TRACE_RCU_H */ | ||
457 | |||
458 | /* This part must be outside protection */ | ||
459 | #include <trace/define_trace.h> | ||
diff --git a/init/Kconfig b/init/Kconfig index d62778390e55..dc7e27bf89a8 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -391,7 +391,7 @@ config TREE_RCU | |||
391 | 391 | ||
392 | config TREE_PREEMPT_RCU | 392 | config TREE_PREEMPT_RCU |
393 | bool "Preemptible tree-based hierarchical RCU" | 393 | bool "Preemptible tree-based hierarchical RCU" |
394 | depends on PREEMPT | 394 | depends on PREEMPT && SMP |
395 | help | 395 | help |
396 | This option selects the RCU implementation that is | 396 | This option selects the RCU implementation that is |
397 | designed for very large SMP systems with hundreds or | 397 | designed for very large SMP systems with hundreds or |
@@ -401,7 +401,7 @@ config TREE_PREEMPT_RCU | |||
401 | 401 | ||
402 | config TINY_RCU | 402 | config TINY_RCU |
403 | bool "UP-only small-memory-footprint RCU" | 403 | bool "UP-only small-memory-footprint RCU" |
404 | depends on !SMP | 404 | depends on !PREEMPT && !SMP |
405 | help | 405 | help |
406 | This option selects the RCU implementation that is | 406 | This option selects the RCU implementation that is |
407 | designed for UP systems from which real-time response | 407 | designed for UP systems from which real-time response |
@@ -410,7 +410,7 @@ config TINY_RCU | |||
410 | 410 | ||
411 | config TINY_PREEMPT_RCU | 411 | config TINY_PREEMPT_RCU |
412 | bool "Preemptible UP-only small-memory-footprint RCU" | 412 | bool "Preemptible UP-only small-memory-footprint RCU" |
413 | depends on !SMP && PREEMPT | 413 | depends on PREEMPT && !SMP |
414 | help | 414 | help |
415 | This option selects the RCU implementation that is designed | 415 | This option selects the RCU implementation that is designed |
416 | for real-time UP systems. This option greatly reduces the | 416 | for real-time UP systems. This option greatly reduces the |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index c081fa967c8f..e69434b070da 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -1145,10 +1145,11 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth, | |||
1145 | if (debug_locks_silent) | 1145 | if (debug_locks_silent) |
1146 | return 0; | 1146 | return 0; |
1147 | 1147 | ||
1148 | printk("\n=======================================================\n"); | 1148 | printk("\n"); |
1149 | printk( "[ INFO: possible circular locking dependency detected ]\n"); | 1149 | printk("======================================================\n"); |
1150 | printk("[ INFO: possible circular locking dependency detected ]\n"); | ||
1150 | print_kernel_version(); | 1151 | print_kernel_version(); |
1151 | printk( "-------------------------------------------------------\n"); | 1152 | printk("-------------------------------------------------------\n"); |
1152 | printk("%s/%d is trying to acquire lock:\n", | 1153 | printk("%s/%d is trying to acquire lock:\n", |
1153 | curr->comm, task_pid_nr(curr)); | 1154 | curr->comm, task_pid_nr(curr)); |
1154 | print_lock(check_src); | 1155 | print_lock(check_src); |
@@ -1482,11 +1483,12 @@ print_bad_irq_dependency(struct task_struct *curr, | |||
1482 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | 1483 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
1483 | return 0; | 1484 | return 0; |
1484 | 1485 | ||
1485 | printk("\n======================================================\n"); | 1486 | printk("\n"); |
1486 | printk( "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", | 1487 | printk("======================================================\n"); |
1488 | printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", | ||
1487 | irqclass, irqclass); | 1489 | irqclass, irqclass); |
1488 | print_kernel_version(); | 1490 | print_kernel_version(); |
1489 | printk( "------------------------------------------------------\n"); | 1491 | printk("------------------------------------------------------\n"); |
1490 | printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", | 1492 | printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", |
1491 | curr->comm, task_pid_nr(curr), | 1493 | curr->comm, task_pid_nr(curr), |
1492 | curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, | 1494 | curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, |
@@ -1711,10 +1713,11 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, | |||
1711 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | 1713 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
1712 | return 0; | 1714 | return 0; |
1713 | 1715 | ||
1714 | printk("\n=============================================\n"); | 1716 | printk("\n"); |
1715 | printk( "[ INFO: possible recursive locking detected ]\n"); | 1717 | printk("=============================================\n"); |
1718 | printk("[ INFO: possible recursive locking detected ]\n"); | ||
1716 | print_kernel_version(); | 1719 | print_kernel_version(); |
1717 | printk( "---------------------------------------------\n"); | 1720 | printk("---------------------------------------------\n"); |
1718 | printk("%s/%d is trying to acquire lock:\n", | 1721 | printk("%s/%d is trying to acquire lock:\n", |
1719 | curr->comm, task_pid_nr(curr)); | 1722 | curr->comm, task_pid_nr(curr)); |
1720 | print_lock(next); | 1723 | print_lock(next); |
@@ -2217,10 +2220,11 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, | |||
2217 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | 2220 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
2218 | return 0; | 2221 | return 0; |
2219 | 2222 | ||
2220 | printk("\n=================================\n"); | 2223 | printk("\n"); |
2221 | printk( "[ INFO: inconsistent lock state ]\n"); | 2224 | printk("=================================\n"); |
2225 | printk("[ INFO: inconsistent lock state ]\n"); | ||
2222 | print_kernel_version(); | 2226 | print_kernel_version(); |
2223 | printk( "---------------------------------\n"); | 2227 | printk("---------------------------------\n"); |
2224 | 2228 | ||
2225 | printk("inconsistent {%s} -> {%s} usage.\n", | 2229 | printk("inconsistent {%s} -> {%s} usage.\n", |
2226 | usage_str[prev_bit], usage_str[new_bit]); | 2230 | usage_str[prev_bit], usage_str[new_bit]); |
@@ -2281,10 +2285,11 @@ print_irq_inversion_bug(struct task_struct *curr, | |||
2281 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | 2285 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
2282 | return 0; | 2286 | return 0; |
2283 | 2287 | ||
2284 | printk("\n=========================================================\n"); | 2288 | printk("\n"); |
2285 | printk( "[ INFO: possible irq lock inversion dependency detected ]\n"); | 2289 | printk("=========================================================\n"); |
2290 | printk("[ INFO: possible irq lock inversion dependency detected ]\n"); | ||
2286 | print_kernel_version(); | 2291 | print_kernel_version(); |
2287 | printk( "---------------------------------------------------------\n"); | 2292 | printk("---------------------------------------------------------\n"); |
2288 | printk("%s/%d just changed the state of lock:\n", | 2293 | printk("%s/%d just changed the state of lock:\n", |
2289 | curr->comm, task_pid_nr(curr)); | 2294 | curr->comm, task_pid_nr(curr)); |
2290 | print_lock(this); | 2295 | print_lock(this); |
@@ -3161,9 +3166,10 @@ print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock, | |||
3161 | if (debug_locks_silent) | 3166 | if (debug_locks_silent) |
3162 | return 0; | 3167 | return 0; |
3163 | 3168 | ||
3164 | printk("\n=====================================\n"); | 3169 | printk("\n"); |
3165 | printk( "[ BUG: bad unlock balance detected! ]\n"); | 3170 | printk("=====================================\n"); |
3166 | printk( "-------------------------------------\n"); | 3171 | printk("[ BUG: bad unlock balance detected! ]\n"); |
3172 | printk("-------------------------------------\n"); | ||
3167 | printk("%s/%d is trying to release lock (", | 3173 | printk("%s/%d is trying to release lock (", |
3168 | curr->comm, task_pid_nr(curr)); | 3174 | curr->comm, task_pid_nr(curr)); |
3169 | print_lockdep_cache(lock); | 3175 | print_lockdep_cache(lock); |
@@ -3604,9 +3610,10 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, | |||
3604 | if (debug_locks_silent) | 3610 | if (debug_locks_silent) |
3605 | return 0; | 3611 | return 0; |
3606 | 3612 | ||
3607 | printk("\n=================================\n"); | 3613 | printk("\n"); |
3608 | printk( "[ BUG: bad contention detected! ]\n"); | 3614 | printk("=================================\n"); |
3609 | printk( "---------------------------------\n"); | 3615 | printk("[ BUG: bad contention detected! ]\n"); |
3616 | printk("---------------------------------\n"); | ||
3610 | printk("%s/%d is trying to contend lock (", | 3617 | printk("%s/%d is trying to contend lock (", |
3611 | curr->comm, task_pid_nr(curr)); | 3618 | curr->comm, task_pid_nr(curr)); |
3612 | print_lockdep_cache(lock); | 3619 | print_lockdep_cache(lock); |
@@ -3977,9 +3984,10 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from, | |||
3977 | if (debug_locks_silent) | 3984 | if (debug_locks_silent) |
3978 | return; | 3985 | return; |
3979 | 3986 | ||
3980 | printk("\n=========================\n"); | 3987 | printk("\n"); |
3981 | printk( "[ BUG: held lock freed! ]\n"); | 3988 | printk("=========================\n"); |
3982 | printk( "-------------------------\n"); | 3989 | printk("[ BUG: held lock freed! ]\n"); |
3990 | printk("-------------------------\n"); | ||
3983 | printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", | 3991 | printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", |
3984 | curr->comm, task_pid_nr(curr), mem_from, mem_to-1); | 3992 | curr->comm, task_pid_nr(curr), mem_from, mem_to-1); |
3985 | print_lock(hlock); | 3993 | print_lock(hlock); |
@@ -4033,9 +4041,10 @@ static void print_held_locks_bug(struct task_struct *curr) | |||
4033 | if (debug_locks_silent) | 4041 | if (debug_locks_silent) |
4034 | return; | 4042 | return; |
4035 | 4043 | ||
4036 | printk("\n=====================================\n"); | 4044 | printk("\n"); |
4037 | printk( "[ BUG: lock held at task exit time! ]\n"); | 4045 | printk("=====================================\n"); |
4038 | printk( "-------------------------------------\n"); | 4046 | printk("[ BUG: lock held at task exit time! ]\n"); |
4047 | printk("-------------------------------------\n"); | ||
4039 | printk("%s/%d is exiting with locks still held!\n", | 4048 | printk("%s/%d is exiting with locks still held!\n", |
4040 | curr->comm, task_pid_nr(curr)); | 4049 | curr->comm, task_pid_nr(curr)); |
4041 | lockdep_print_held_locks(curr); | 4050 | lockdep_print_held_locks(curr); |
@@ -4129,16 +4138,17 @@ void lockdep_sys_exit(void) | |||
4129 | if (unlikely(curr->lockdep_depth)) { | 4138 | if (unlikely(curr->lockdep_depth)) { |
4130 | if (!debug_locks_off()) | 4139 | if (!debug_locks_off()) |
4131 | return; | 4140 | return; |
4132 | printk("\n================================================\n"); | 4141 | printk("\n"); |
4133 | printk( "[ BUG: lock held when returning to user space! ]\n"); | 4142 | printk("================================================\n"); |
4134 | printk( "------------------------------------------------\n"); | 4143 | printk("[ BUG: lock held when returning to user space! ]\n"); |
4144 | printk("------------------------------------------------\n"); | ||
4135 | printk("%s/%d is leaving the kernel with locks still held!\n", | 4145 | printk("%s/%d is leaving the kernel with locks still held!\n", |
4136 | curr->comm, curr->pid); | 4146 | curr->comm, curr->pid); |
4137 | lockdep_print_held_locks(curr); | 4147 | lockdep_print_held_locks(curr); |
4138 | } | 4148 | } |
4139 | } | 4149 | } |
4140 | 4150 | ||
4141 | void lockdep_rcu_dereference(const char *file, const int line) | 4151 | void lockdep_rcu_suspicious(const char *file, const int line, const char *s) |
4142 | { | 4152 | { |
4143 | struct task_struct *curr = current; | 4153 | struct task_struct *curr = current; |
4144 | 4154 | ||
@@ -4147,15 +4157,15 @@ void lockdep_rcu_dereference(const char *file, const int line) | |||
4147 | return; | 4157 | return; |
4148 | #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ | 4158 | #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ |
4149 | /* Note: the following can be executed concurrently, so be careful. */ | 4159 | /* Note: the following can be executed concurrently, so be careful. */ |
4150 | printk("\n===================================================\n"); | 4160 | printk("\n"); |
4151 | printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n"); | 4161 | printk("===============================\n"); |
4152 | printk( "---------------------------------------------------\n"); | 4162 | printk("[ INFO: suspicious RCU usage. ]\n"); |
4153 | printk("%s:%d invoked rcu_dereference_check() without protection!\n", | 4163 | printk("-------------------------------\n"); |
4154 | file, line); | 4164 | printk("%s:%d %s!\n", file, line, s); |
4155 | printk("\nother info that might help us debug this:\n\n"); | 4165 | printk("\nother info that might help us debug this:\n\n"); |
4156 | printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); | 4166 | printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); |
4157 | lockdep_print_held_locks(curr); | 4167 | lockdep_print_held_locks(curr); |
4158 | printk("\nstack backtrace:\n"); | 4168 | printk("\nstack backtrace:\n"); |
4159 | dump_stack(); | 4169 | dump_stack(); |
4160 | } | 4170 | } |
4161 | EXPORT_SYMBOL_GPL(lockdep_rcu_dereference); | 4171 | EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); |
diff --git a/kernel/pid.c b/kernel/pid.c index e432057f3b21..8cafe7e72ad2 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -418,7 +418,9 @@ EXPORT_SYMBOL(pid_task); | |||
418 | */ | 418 | */ |
419 | struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) | 419 | struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) |
420 | { | 420 | { |
421 | rcu_lockdep_assert(rcu_read_lock_held()); | 421 | rcu_lockdep_assert(rcu_read_lock_held(), |
422 | "find_task_by_pid_ns() needs rcu_read_lock()" | ||
423 | " protection"); | ||
422 | return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); | 424 | return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); |
423 | } | 425 | } |
424 | 426 | ||
diff --git a/kernel/rcu.h b/kernel/rcu.h new file mode 100644 index 000000000000..f600868d550d --- /dev/null +++ b/kernel/rcu.h | |||
@@ -0,0 +1,85 @@ | |||
1 | /* | ||
2 | * Read-Copy Update definitions shared among RCU implementations. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright IBM Corporation, 2011 | ||
19 | * | ||
20 | * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> | ||
21 | */ | ||
22 | |||
23 | #ifndef __LINUX_RCU_H | ||
24 | #define __LINUX_RCU_H | ||
25 | |||
26 | #ifdef CONFIG_RCU_TRACE | ||
27 | #define RCU_TRACE(stmt) stmt | ||
28 | #else /* #ifdef CONFIG_RCU_TRACE */ | ||
29 | #define RCU_TRACE(stmt) | ||
30 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | ||
31 | |||
32 | /* | ||
33 | * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally | ||
34 | * by call_rcu() and rcu callback execution, and are therefore not part of the | ||
35 | * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. | ||
36 | */ | ||
37 | |||
38 | #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD | ||
39 | # define STATE_RCU_HEAD_READY 0 | ||
40 | # define STATE_RCU_HEAD_QUEUED 1 | ||
41 | |||
42 | extern struct debug_obj_descr rcuhead_debug_descr; | ||
43 | |||
44 | static inline void debug_rcu_head_queue(struct rcu_head *head) | ||
45 | { | ||
46 | WARN_ON_ONCE((unsigned long)head & 0x3); | ||
47 | debug_object_activate(head, &rcuhead_debug_descr); | ||
48 | debug_object_active_state(head, &rcuhead_debug_descr, | ||
49 | STATE_RCU_HEAD_READY, | ||
50 | STATE_RCU_HEAD_QUEUED); | ||
51 | } | ||
52 | |||
53 | static inline void debug_rcu_head_unqueue(struct rcu_head *head) | ||
54 | { | ||
55 | debug_object_active_state(head, &rcuhead_debug_descr, | ||
56 | STATE_RCU_HEAD_QUEUED, | ||
57 | STATE_RCU_HEAD_READY); | ||
58 | debug_object_deactivate(head, &rcuhead_debug_descr); | ||
59 | } | ||
60 | #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | ||
61 | static inline void debug_rcu_head_queue(struct rcu_head *head) | ||
62 | { | ||
63 | } | ||
64 | |||
65 | static inline void debug_rcu_head_unqueue(struct rcu_head *head) | ||
66 | { | ||
67 | } | ||
68 | #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | ||
69 | |||
70 | extern void kfree(const void *); | ||
71 | |||
72 | static inline void __rcu_reclaim(char *rn, struct rcu_head *head) | ||
73 | { | ||
74 | unsigned long offset = (unsigned long)head->func; | ||
75 | |||
76 | if (__is_kfree_rcu_offset(offset)) { | ||
77 | RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset)); | ||
78 | kfree((void *)head - offset); | ||
79 | } else { | ||
80 | RCU_TRACE(trace_rcu_invoke_callback(rn, head)); | ||
81 | head->func(head); | ||
82 | } | ||
83 | } | ||
84 | |||
85 | #endif /* __LINUX_RCU_H */ | ||
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index ddddb320be61..ca0d23b6b3e8 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -46,6 +46,11 @@ | |||
46 | #include <linux/module.h> | 46 | #include <linux/module.h> |
47 | #include <linux/hardirq.h> | 47 | #include <linux/hardirq.h> |
48 | 48 | ||
49 | #define CREATE_TRACE_POINTS | ||
50 | #include <trace/events/rcu.h> | ||
51 | |||
52 | #include "rcu.h" | ||
53 | |||
49 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 54 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
50 | static struct lock_class_key rcu_lock_key; | 55 | static struct lock_class_key rcu_lock_key; |
51 | struct lockdep_map rcu_lock_map = | 56 | struct lockdep_map rcu_lock_map = |
@@ -94,11 +99,16 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); | |||
94 | 99 | ||
95 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 100 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
96 | 101 | ||
102 | struct rcu_synchronize { | ||
103 | struct rcu_head head; | ||
104 | struct completion completion; | ||
105 | }; | ||
106 | |||
97 | /* | 107 | /* |
98 | * Awaken the corresponding synchronize_rcu() instance now that a | 108 | * Awaken the corresponding synchronize_rcu() instance now that a |
99 | * grace period has elapsed. | 109 | * grace period has elapsed. |
100 | */ | 110 | */ |
101 | void wakeme_after_rcu(struct rcu_head *head) | 111 | static void wakeme_after_rcu(struct rcu_head *head) |
102 | { | 112 | { |
103 | struct rcu_synchronize *rcu; | 113 | struct rcu_synchronize *rcu; |
104 | 114 | ||
@@ -106,6 +116,20 @@ void wakeme_after_rcu(struct rcu_head *head) | |||
106 | complete(&rcu->completion); | 116 | complete(&rcu->completion); |
107 | } | 117 | } |
108 | 118 | ||
119 | void wait_rcu_gp(call_rcu_func_t crf) | ||
120 | { | ||
121 | struct rcu_synchronize rcu; | ||
122 | |||
123 | init_rcu_head_on_stack(&rcu.head); | ||
124 | init_completion(&rcu.completion); | ||
125 | /* Will wake me after RCU finished. */ | ||
126 | crf(&rcu.head, wakeme_after_rcu); | ||
127 | /* Wait for it. */ | ||
128 | wait_for_completion(&rcu.completion); | ||
129 | destroy_rcu_head_on_stack(&rcu.head); | ||
130 | } | ||
131 | EXPORT_SYMBOL_GPL(wait_rcu_gp); | ||
132 | |||
109 | #ifdef CONFIG_PROVE_RCU | 133 | #ifdef CONFIG_PROVE_RCU |
110 | /* | 134 | /* |
111 | * wrapper function to avoid #include problems. | 135 | * wrapper function to avoid #include problems. |
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 7bbac7d0f5ab..da775c87f27f 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c | |||
@@ -37,16 +37,17 @@ | |||
37 | #include <linux/cpu.h> | 37 | #include <linux/cpu.h> |
38 | #include <linux/prefetch.h> | 38 | #include <linux/prefetch.h> |
39 | 39 | ||
40 | /* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ | 40 | #ifdef CONFIG_RCU_TRACE |
41 | static struct task_struct *rcu_kthread_task; | 41 | #include <trace/events/rcu.h> |
42 | static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); | 42 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ |
43 | static unsigned long have_rcu_kthread_work; | 43 | |
44 | #include "rcu.h" | ||
44 | 45 | ||
45 | /* Forward declarations for rcutiny_plugin.h. */ | 46 | /* Forward declarations for rcutiny_plugin.h. */ |
46 | struct rcu_ctrlblk; | 47 | struct rcu_ctrlblk; |
47 | static void invoke_rcu_kthread(void); | 48 | static void invoke_rcu_callbacks(void); |
48 | static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); | 49 | static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); |
49 | static int rcu_kthread(void *arg); | 50 | static void rcu_process_callbacks(struct softirq_action *unused); |
50 | static void __call_rcu(struct rcu_head *head, | 51 | static void __call_rcu(struct rcu_head *head, |
51 | void (*func)(struct rcu_head *rcu), | 52 | void (*func)(struct rcu_head *rcu), |
52 | struct rcu_ctrlblk *rcp); | 53 | struct rcu_ctrlblk *rcp); |
@@ -96,16 +97,6 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) | |||
96 | } | 97 | } |
97 | 98 | ||
98 | /* | 99 | /* |
99 | * Wake up rcu_kthread() to process callbacks now eligible for invocation | ||
100 | * or to boost readers. | ||
101 | */ | ||
102 | static void invoke_rcu_kthread(void) | ||
103 | { | ||
104 | have_rcu_kthread_work = 1; | ||
105 | wake_up(&rcu_kthread_wq); | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * Record an rcu quiescent state. And an rcu_bh quiescent state while we | 100 | * Record an rcu quiescent state. And an rcu_bh quiescent state while we |
110 | * are at it, given that any rcu quiescent state is also an rcu_bh | 101 | * are at it, given that any rcu quiescent state is also an rcu_bh |
111 | * quiescent state. Use "+" instead of "||" to defeat short circuiting. | 102 | * quiescent state. Use "+" instead of "||" to defeat short circuiting. |
@@ -117,7 +108,7 @@ void rcu_sched_qs(int cpu) | |||
117 | local_irq_save(flags); | 108 | local_irq_save(flags); |
118 | if (rcu_qsctr_help(&rcu_sched_ctrlblk) + | 109 | if (rcu_qsctr_help(&rcu_sched_ctrlblk) + |
119 | rcu_qsctr_help(&rcu_bh_ctrlblk)) | 110 | rcu_qsctr_help(&rcu_bh_ctrlblk)) |
120 | invoke_rcu_kthread(); | 111 | invoke_rcu_callbacks(); |
121 | local_irq_restore(flags); | 112 | local_irq_restore(flags); |
122 | } | 113 | } |
123 | 114 | ||
@@ -130,7 +121,7 @@ void rcu_bh_qs(int cpu) | |||
130 | 121 | ||
131 | local_irq_save(flags); | 122 | local_irq_save(flags); |
132 | if (rcu_qsctr_help(&rcu_bh_ctrlblk)) | 123 | if (rcu_qsctr_help(&rcu_bh_ctrlblk)) |
133 | invoke_rcu_kthread(); | 124 | invoke_rcu_callbacks(); |
134 | local_irq_restore(flags); | 125 | local_irq_restore(flags); |
135 | } | 126 | } |
136 | 127 | ||
@@ -154,18 +145,23 @@ void rcu_check_callbacks(int cpu, int user) | |||
154 | * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure | 145 | * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure |
155 | * whose grace period has elapsed. | 146 | * whose grace period has elapsed. |
156 | */ | 147 | */ |
157 | static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) | 148 | static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) |
158 | { | 149 | { |
150 | char *rn = NULL; | ||
159 | struct rcu_head *next, *list; | 151 | struct rcu_head *next, *list; |
160 | unsigned long flags; | 152 | unsigned long flags; |
161 | RCU_TRACE(int cb_count = 0); | 153 | RCU_TRACE(int cb_count = 0); |
162 | 154 | ||
163 | /* If no RCU callbacks ready to invoke, just return. */ | 155 | /* If no RCU callbacks ready to invoke, just return. */ |
164 | if (&rcp->rcucblist == rcp->donetail) | 156 | if (&rcp->rcucblist == rcp->donetail) { |
157 | RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); | ||
158 | RCU_TRACE(trace_rcu_batch_end(rcp->name, 0)); | ||
165 | return; | 159 | return; |
160 | } | ||
166 | 161 | ||
167 | /* Move the ready-to-invoke callbacks to a local list. */ | 162 | /* Move the ready-to-invoke callbacks to a local list. */ |
168 | local_irq_save(flags); | 163 | local_irq_save(flags); |
164 | RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); | ||
169 | list = rcp->rcucblist; | 165 | list = rcp->rcucblist; |
170 | rcp->rcucblist = *rcp->donetail; | 166 | rcp->rcucblist = *rcp->donetail; |
171 | *rcp->donetail = NULL; | 167 | *rcp->donetail = NULL; |
@@ -176,49 +172,26 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
176 | local_irq_restore(flags); | 172 | local_irq_restore(flags); |
177 | 173 | ||
178 | /* Invoke the callbacks on the local list. */ | 174 | /* Invoke the callbacks on the local list. */ |
175 | RCU_TRACE(rn = rcp->name); | ||
179 | while (list) { | 176 | while (list) { |
180 | next = list->next; | 177 | next = list->next; |
181 | prefetch(next); | 178 | prefetch(next); |
182 | debug_rcu_head_unqueue(list); | 179 | debug_rcu_head_unqueue(list); |
183 | local_bh_disable(); | 180 | local_bh_disable(); |
184 | __rcu_reclaim(list); | 181 | __rcu_reclaim(rn, list); |
185 | local_bh_enable(); | 182 | local_bh_enable(); |
186 | list = next; | 183 | list = next; |
187 | RCU_TRACE(cb_count++); | 184 | RCU_TRACE(cb_count++); |
188 | } | 185 | } |
189 | RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); | 186 | RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); |
187 | RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count)); | ||
190 | } | 188 | } |
191 | 189 | ||
192 | /* | 190 | static void rcu_process_callbacks(struct softirq_action *unused) |
193 | * This kthread invokes RCU callbacks whose grace periods have | ||
194 | * elapsed. It is awakened as needed, and takes the place of the | ||
195 | * RCU_SOFTIRQ that was used previously for this purpose. | ||
196 | * This is a kthread, but it is never stopped, at least not until | ||
197 | * the system goes down. | ||
198 | */ | ||
199 | static int rcu_kthread(void *arg) | ||
200 | { | 191 | { |
201 | unsigned long work; | 192 | __rcu_process_callbacks(&rcu_sched_ctrlblk); |
202 | unsigned long morework; | 193 | __rcu_process_callbacks(&rcu_bh_ctrlblk); |
203 | unsigned long flags; | 194 | rcu_preempt_process_callbacks(); |
204 | |||
205 | for (;;) { | ||
206 | wait_event_interruptible(rcu_kthread_wq, | ||
207 | have_rcu_kthread_work != 0); | ||
208 | morework = rcu_boost(); | ||
209 | local_irq_save(flags); | ||
210 | work = have_rcu_kthread_work; | ||
211 | have_rcu_kthread_work = morework; | ||
212 | local_irq_restore(flags); | ||
213 | if (work) { | ||
214 | rcu_process_callbacks(&rcu_sched_ctrlblk); | ||
215 | rcu_process_callbacks(&rcu_bh_ctrlblk); | ||
216 | rcu_preempt_process_callbacks(); | ||
217 | } | ||
218 | schedule_timeout_interruptible(1); /* Leave CPU for others. */ | ||
219 | } | ||
220 | |||
221 | return 0; /* Not reached, but needed to shut gcc up. */ | ||
222 | } | 195 | } |
223 | 196 | ||
224 | /* | 197 | /* |
@@ -280,45 +253,3 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
280 | __call_rcu(head, func, &rcu_bh_ctrlblk); | 253 | __call_rcu(head, func, &rcu_bh_ctrlblk); |
281 | } | 254 | } |
282 | EXPORT_SYMBOL_GPL(call_rcu_bh); | 255 | EXPORT_SYMBOL_GPL(call_rcu_bh); |
283 | |||
284 | void rcu_barrier_bh(void) | ||
285 | { | ||
286 | struct rcu_synchronize rcu; | ||
287 | |||
288 | init_rcu_head_on_stack(&rcu.head); | ||
289 | init_completion(&rcu.completion); | ||
290 | /* Will wake me after RCU finished. */ | ||
291 | call_rcu_bh(&rcu.head, wakeme_after_rcu); | ||
292 | /* Wait for it. */ | ||
293 | wait_for_completion(&rcu.completion); | ||
294 | destroy_rcu_head_on_stack(&rcu.head); | ||
295 | } | ||
296 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); | ||
297 | |||
298 | void rcu_barrier_sched(void) | ||
299 | { | ||
300 | struct rcu_synchronize rcu; | ||
301 | |||
302 | init_rcu_head_on_stack(&rcu.head); | ||
303 | init_completion(&rcu.completion); | ||
304 | /* Will wake me after RCU finished. */ | ||
305 | call_rcu_sched(&rcu.head, wakeme_after_rcu); | ||
306 | /* Wait for it. */ | ||
307 | wait_for_completion(&rcu.completion); | ||
308 | destroy_rcu_head_on_stack(&rcu.head); | ||
309 | } | ||
310 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); | ||
311 | |||
312 | /* | ||
313 | * Spawn the kthread that invokes RCU callbacks. | ||
314 | */ | ||
315 | static int __init rcu_spawn_kthreads(void) | ||
316 | { | ||
317 | struct sched_param sp; | ||
318 | |||
319 | rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread"); | ||
320 | sp.sched_priority = RCU_BOOST_PRIO; | ||
321 | sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp); | ||
322 | return 0; | ||
323 | } | ||
324 | early_initcall(rcu_spawn_kthreads); | ||
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index f259c676195f..02aa7139861c 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h | |||
@@ -26,29 +26,26 @@ | |||
26 | #include <linux/debugfs.h> | 26 | #include <linux/debugfs.h> |
27 | #include <linux/seq_file.h> | 27 | #include <linux/seq_file.h> |
28 | 28 | ||
29 | #ifdef CONFIG_RCU_TRACE | ||
30 | #define RCU_TRACE(stmt) stmt | ||
31 | #else /* #ifdef CONFIG_RCU_TRACE */ | ||
32 | #define RCU_TRACE(stmt) | ||
33 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | ||
34 | |||
35 | /* Global control variables for rcupdate callback mechanism. */ | 29 | /* Global control variables for rcupdate callback mechanism. */ |
36 | struct rcu_ctrlblk { | 30 | struct rcu_ctrlblk { |
37 | struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ | 31 | struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ |
38 | struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ | 32 | struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ |
39 | struct rcu_head **curtail; /* ->next pointer of last CB. */ | 33 | struct rcu_head **curtail; /* ->next pointer of last CB. */ |
40 | RCU_TRACE(long qlen); /* Number of pending CBs. */ | 34 | RCU_TRACE(long qlen); /* Number of pending CBs. */ |
35 | RCU_TRACE(char *name); /* Name of RCU type. */ | ||
41 | }; | 36 | }; |
42 | 37 | ||
43 | /* Definition for rcupdate control block. */ | 38 | /* Definition for rcupdate control block. */ |
44 | static struct rcu_ctrlblk rcu_sched_ctrlblk = { | 39 | static struct rcu_ctrlblk rcu_sched_ctrlblk = { |
45 | .donetail = &rcu_sched_ctrlblk.rcucblist, | 40 | .donetail = &rcu_sched_ctrlblk.rcucblist, |
46 | .curtail = &rcu_sched_ctrlblk.rcucblist, | 41 | .curtail = &rcu_sched_ctrlblk.rcucblist, |
42 | RCU_TRACE(.name = "rcu_sched") | ||
47 | }; | 43 | }; |
48 | 44 | ||
49 | static struct rcu_ctrlblk rcu_bh_ctrlblk = { | 45 | static struct rcu_ctrlblk rcu_bh_ctrlblk = { |
50 | .donetail = &rcu_bh_ctrlblk.rcucblist, | 46 | .donetail = &rcu_bh_ctrlblk.rcucblist, |
51 | .curtail = &rcu_bh_ctrlblk.rcucblist, | 47 | .curtail = &rcu_bh_ctrlblk.rcucblist, |
48 | RCU_TRACE(.name = "rcu_bh") | ||
52 | }; | 49 | }; |
53 | 50 | ||
54 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 51 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
@@ -131,6 +128,7 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { | |||
131 | .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist, | 128 | .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist, |
132 | .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist, | 129 | .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist, |
133 | .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks), | 130 | .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks), |
131 | RCU_TRACE(.rcb.name = "rcu_preempt") | ||
134 | }; | 132 | }; |
135 | 133 | ||
136 | static int rcu_preempted_readers_exp(void); | 134 | static int rcu_preempted_readers_exp(void); |
@@ -247,6 +245,13 @@ static void show_tiny_preempt_stats(struct seq_file *m) | |||
247 | 245 | ||
248 | #include "rtmutex_common.h" | 246 | #include "rtmutex_common.h" |
249 | 247 | ||
248 | #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO | ||
249 | |||
250 | /* Controls for rcu_kthread() kthread. */ | ||
251 | static struct task_struct *rcu_kthread_task; | ||
252 | static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); | ||
253 | static unsigned long have_rcu_kthread_work; | ||
254 | |||
250 | /* | 255 | /* |
251 | * Carry out RCU priority boosting on the task indicated by ->boost_tasks, | 256 | * Carry out RCU priority boosting on the task indicated by ->boost_tasks, |
252 | * and advance ->boost_tasks to the next task in the ->blkd_tasks list. | 257 | * and advance ->boost_tasks to the next task in the ->blkd_tasks list. |
@@ -334,7 +339,7 @@ static int rcu_initiate_boost(void) | |||
334 | if (rcu_preempt_ctrlblk.exp_tasks == NULL) | 339 | if (rcu_preempt_ctrlblk.exp_tasks == NULL) |
335 | rcu_preempt_ctrlblk.boost_tasks = | 340 | rcu_preempt_ctrlblk.boost_tasks = |
336 | rcu_preempt_ctrlblk.gp_tasks; | 341 | rcu_preempt_ctrlblk.gp_tasks; |
337 | invoke_rcu_kthread(); | 342 | invoke_rcu_callbacks(); |
338 | } else | 343 | } else |
339 | RCU_TRACE(rcu_initiate_boost_trace()); | 344 | RCU_TRACE(rcu_initiate_boost_trace()); |
340 | return 1; | 345 | return 1; |
@@ -353,14 +358,6 @@ static void rcu_preempt_boost_start_gp(void) | |||
353 | #else /* #ifdef CONFIG_RCU_BOOST */ | 358 | #else /* #ifdef CONFIG_RCU_BOOST */ |
354 | 359 | ||
355 | /* | 360 | /* |
356 | * If there is no RCU priority boosting, we don't boost. | ||
357 | */ | ||
358 | static int rcu_boost(void) | ||
359 | { | ||
360 | return 0; | ||
361 | } | ||
362 | |||
363 | /* | ||
364 | * If there is no RCU priority boosting, we don't initiate boosting, | 361 | * If there is no RCU priority boosting, we don't initiate boosting, |
365 | * but we do indicate whether there are blocked readers blocking the | 362 | * but we do indicate whether there are blocked readers blocking the |
366 | * current grace period. | 363 | * current grace period. |
@@ -427,7 +424,7 @@ static void rcu_preempt_cpu_qs(void) | |||
427 | 424 | ||
428 | /* If there are done callbacks, cause them to be invoked. */ | 425 | /* If there are done callbacks, cause them to be invoked. */ |
429 | if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) | 426 | if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) |
430 | invoke_rcu_kthread(); | 427 | invoke_rcu_callbacks(); |
431 | } | 428 | } |
432 | 429 | ||
433 | /* | 430 | /* |
@@ -648,7 +645,7 @@ static void rcu_preempt_check_callbacks(void) | |||
648 | rcu_preempt_cpu_qs(); | 645 | rcu_preempt_cpu_qs(); |
649 | if (&rcu_preempt_ctrlblk.rcb.rcucblist != | 646 | if (&rcu_preempt_ctrlblk.rcb.rcucblist != |
650 | rcu_preempt_ctrlblk.rcb.donetail) | 647 | rcu_preempt_ctrlblk.rcb.donetail) |
651 | invoke_rcu_kthread(); | 648 | invoke_rcu_callbacks(); |
652 | if (rcu_preempt_gp_in_progress() && | 649 | if (rcu_preempt_gp_in_progress() && |
653 | rcu_cpu_blocking_cur_gp() && | 650 | rcu_cpu_blocking_cur_gp() && |
654 | rcu_preempt_running_reader()) | 651 | rcu_preempt_running_reader()) |
@@ -674,7 +671,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) | |||
674 | */ | 671 | */ |
675 | static void rcu_preempt_process_callbacks(void) | 672 | static void rcu_preempt_process_callbacks(void) |
676 | { | 673 | { |
677 | rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); | 674 | __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); |
678 | } | 675 | } |
679 | 676 | ||
680 | /* | 677 | /* |
@@ -697,20 +694,6 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
697 | } | 694 | } |
698 | EXPORT_SYMBOL_GPL(call_rcu); | 695 | EXPORT_SYMBOL_GPL(call_rcu); |
699 | 696 | ||
700 | void rcu_barrier(void) | ||
701 | { | ||
702 | struct rcu_synchronize rcu; | ||
703 | |||
704 | init_rcu_head_on_stack(&rcu.head); | ||
705 | init_completion(&rcu.completion); | ||
706 | /* Will wake me after RCU finished. */ | ||
707 | call_rcu(&rcu.head, wakeme_after_rcu); | ||
708 | /* Wait for it. */ | ||
709 | wait_for_completion(&rcu.completion); | ||
710 | destroy_rcu_head_on_stack(&rcu.head); | ||
711 | } | ||
712 | EXPORT_SYMBOL_GPL(rcu_barrier); | ||
713 | |||
714 | /* | 697 | /* |
715 | * synchronize_rcu - wait until a grace period has elapsed. | 698 | * synchronize_rcu - wait until a grace period has elapsed. |
716 | * | 699 | * |
@@ -864,15 +847,6 @@ static void show_tiny_preempt_stats(struct seq_file *m) | |||
864 | #endif /* #ifdef CONFIG_RCU_TRACE */ | 847 | #endif /* #ifdef CONFIG_RCU_TRACE */ |
865 | 848 | ||
866 | /* | 849 | /* |
867 | * Because preemptible RCU does not exist, it is never necessary to | ||
868 | * boost preempted RCU readers. | ||
869 | */ | ||
870 | static int rcu_boost(void) | ||
871 | { | ||
872 | return 0; | ||
873 | } | ||
874 | |||
875 | /* | ||
876 | * Because preemptible RCU does not exist, it never has any callbacks | 850 | * Because preemptible RCU does not exist, it never has any callbacks |
877 | * to check. | 851 | * to check. |
878 | */ | 852 | */ |
@@ -898,6 +872,78 @@ static void rcu_preempt_process_callbacks(void) | |||
898 | 872 | ||
899 | #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ | 873 | #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ |
900 | 874 | ||
875 | #ifdef CONFIG_RCU_BOOST | ||
876 | |||
877 | /* | ||
878 | * Wake up rcu_kthread() to process callbacks now eligible for invocation | ||
879 | * or to boost readers. | ||
880 | */ | ||
881 | static void invoke_rcu_callbacks(void) | ||
882 | { | ||
883 | have_rcu_kthread_work = 1; | ||
884 | wake_up(&rcu_kthread_wq); | ||
885 | } | ||
886 | |||
887 | /* | ||
888 | * This kthread invokes RCU callbacks whose grace periods have | ||
889 | * elapsed. It is awakened as needed, and takes the place of the | ||
890 | * RCU_SOFTIRQ that is used for this purpose when boosting is disabled. | ||
891 | * This is a kthread, but it is never stopped, at least not until | ||
892 | * the system goes down. | ||
893 | */ | ||
894 | static int rcu_kthread(void *arg) | ||
895 | { | ||
896 | unsigned long work; | ||
897 | unsigned long morework; | ||
898 | unsigned long flags; | ||
899 | |||
900 | for (;;) { | ||
901 | wait_event_interruptible(rcu_kthread_wq, | ||
902 | have_rcu_kthread_work != 0); | ||
903 | morework = rcu_boost(); | ||
904 | local_irq_save(flags); | ||
905 | work = have_rcu_kthread_work; | ||
906 | have_rcu_kthread_work = morework; | ||
907 | local_irq_restore(flags); | ||
908 | if (work) | ||
909 | rcu_process_callbacks(NULL); | ||
910 | schedule_timeout_interruptible(1); /* Leave CPU for others. */ | ||
911 | } | ||
912 | |||
913 | return 0; /* Not reached, but needed to shut gcc up. */ | ||
914 | } | ||
915 | |||
916 | /* | ||
917 | * Spawn the kthread that invokes RCU callbacks. | ||
918 | */ | ||
919 | static int __init rcu_spawn_kthreads(void) | ||
920 | { | ||
921 | struct sched_param sp; | ||
922 | |||
923 | rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread"); | ||
924 | sp.sched_priority = RCU_BOOST_PRIO; | ||
925 | sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp); | ||
926 | return 0; | ||
927 | } | ||
928 | early_initcall(rcu_spawn_kthreads); | ||
929 | |||
930 | #else /* #ifdef CONFIG_RCU_BOOST */ | ||
931 | |||
932 | /* | ||
933 | * Start up softirq processing of callbacks. | ||
934 | */ | ||
935 | void invoke_rcu_callbacks(void) | ||
936 | { | ||
937 | raise_softirq(RCU_SOFTIRQ); | ||
938 | } | ||
939 | |||
940 | void rcu_init(void) | ||
941 | { | ||
942 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | ||
943 | } | ||
944 | |||
945 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | ||
946 | |||
901 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 947 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
902 | #include <linux/kernel_stat.h> | 948 | #include <linux/kernel_stat.h> |
903 | 949 | ||
@@ -913,12 +959,6 @@ void __init rcu_scheduler_starting(void) | |||
913 | 959 | ||
914 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 960 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
915 | 961 | ||
916 | #ifdef CONFIG_RCU_BOOST | ||
917 | #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO | ||
918 | #else /* #ifdef CONFIG_RCU_BOOST */ | ||
919 | #define RCU_BOOST_PRIO 1 | ||
920 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | ||
921 | |||
922 | #ifdef CONFIG_RCU_TRACE | 962 | #ifdef CONFIG_RCU_TRACE |
923 | 963 | ||
924 | #ifdef CONFIG_RCU_BOOST | 964 | #ifdef CONFIG_RCU_BOOST |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 98f51b13bb7e..764825c2685c 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -73,7 +73,7 @@ module_param(nreaders, int, 0444); | |||
73 | MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); | 73 | MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); |
74 | module_param(nfakewriters, int, 0444); | 74 | module_param(nfakewriters, int, 0444); |
75 | MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads"); | 75 | MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads"); |
76 | module_param(stat_interval, int, 0444); | 76 | module_param(stat_interval, int, 0644); |
77 | MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); | 77 | MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); |
78 | module_param(verbose, bool, 0444); | 78 | module_param(verbose, bool, 0444); |
79 | MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); | 79 | MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); |
@@ -480,30 +480,6 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p) | |||
480 | call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); | 480 | call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); |
481 | } | 481 | } |
482 | 482 | ||
483 | struct rcu_bh_torture_synchronize { | ||
484 | struct rcu_head head; | ||
485 | struct completion completion; | ||
486 | }; | ||
487 | |||
488 | static void rcu_bh_torture_wakeme_after_cb(struct rcu_head *head) | ||
489 | { | ||
490 | struct rcu_bh_torture_synchronize *rcu; | ||
491 | |||
492 | rcu = container_of(head, struct rcu_bh_torture_synchronize, head); | ||
493 | complete(&rcu->completion); | ||
494 | } | ||
495 | |||
496 | static void rcu_bh_torture_synchronize(void) | ||
497 | { | ||
498 | struct rcu_bh_torture_synchronize rcu; | ||
499 | |||
500 | init_rcu_head_on_stack(&rcu.head); | ||
501 | init_completion(&rcu.completion); | ||
502 | call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb); | ||
503 | wait_for_completion(&rcu.completion); | ||
504 | destroy_rcu_head_on_stack(&rcu.head); | ||
505 | } | ||
506 | |||
507 | static struct rcu_torture_ops rcu_bh_ops = { | 483 | static struct rcu_torture_ops rcu_bh_ops = { |
508 | .init = NULL, | 484 | .init = NULL, |
509 | .cleanup = NULL, | 485 | .cleanup = NULL, |
@@ -512,7 +488,7 @@ static struct rcu_torture_ops rcu_bh_ops = { | |||
512 | .readunlock = rcu_bh_torture_read_unlock, | 488 | .readunlock = rcu_bh_torture_read_unlock, |
513 | .completed = rcu_bh_torture_completed, | 489 | .completed = rcu_bh_torture_completed, |
514 | .deferred_free = rcu_bh_torture_deferred_free, | 490 | .deferred_free = rcu_bh_torture_deferred_free, |
515 | .sync = rcu_bh_torture_synchronize, | 491 | .sync = synchronize_rcu_bh, |
516 | .cb_barrier = rcu_barrier_bh, | 492 | .cb_barrier = rcu_barrier_bh, |
517 | .fqs = rcu_bh_force_quiescent_state, | 493 | .fqs = rcu_bh_force_quiescent_state, |
518 | .stats = NULL, | 494 | .stats = NULL, |
@@ -528,7 +504,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = { | |||
528 | .readunlock = rcu_bh_torture_read_unlock, | 504 | .readunlock = rcu_bh_torture_read_unlock, |
529 | .completed = rcu_bh_torture_completed, | 505 | .completed = rcu_bh_torture_completed, |
530 | .deferred_free = rcu_sync_torture_deferred_free, | 506 | .deferred_free = rcu_sync_torture_deferred_free, |
531 | .sync = rcu_bh_torture_synchronize, | 507 | .sync = synchronize_rcu_bh, |
532 | .cb_barrier = NULL, | 508 | .cb_barrier = NULL, |
533 | .fqs = rcu_bh_force_quiescent_state, | 509 | .fqs = rcu_bh_force_quiescent_state, |
534 | .stats = NULL, | 510 | .stats = NULL, |
@@ -536,6 +512,22 @@ static struct rcu_torture_ops rcu_bh_sync_ops = { | |||
536 | .name = "rcu_bh_sync" | 512 | .name = "rcu_bh_sync" |
537 | }; | 513 | }; |
538 | 514 | ||
515 | static struct rcu_torture_ops rcu_bh_expedited_ops = { | ||
516 | .init = rcu_sync_torture_init, | ||
517 | .cleanup = NULL, | ||
518 | .readlock = rcu_bh_torture_read_lock, | ||
519 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | ||
520 | .readunlock = rcu_bh_torture_read_unlock, | ||
521 | .completed = rcu_bh_torture_completed, | ||
522 | .deferred_free = rcu_sync_torture_deferred_free, | ||
523 | .sync = synchronize_rcu_bh_expedited, | ||
524 | .cb_barrier = NULL, | ||
525 | .fqs = rcu_bh_force_quiescent_state, | ||
526 | .stats = NULL, | ||
527 | .irq_capable = 1, | ||
528 | .name = "rcu_bh_expedited" | ||
529 | }; | ||
530 | |||
539 | /* | 531 | /* |
540 | * Definitions for srcu torture testing. | 532 | * Definitions for srcu torture testing. |
541 | */ | 533 | */ |
@@ -659,11 +651,6 @@ static void rcu_sched_torture_deferred_free(struct rcu_torture *p) | |||
659 | call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); | 651 | call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); |
660 | } | 652 | } |
661 | 653 | ||
662 | static void sched_torture_synchronize(void) | ||
663 | { | ||
664 | synchronize_sched(); | ||
665 | } | ||
666 | |||
667 | static struct rcu_torture_ops sched_ops = { | 654 | static struct rcu_torture_ops sched_ops = { |
668 | .init = rcu_sync_torture_init, | 655 | .init = rcu_sync_torture_init, |
669 | .cleanup = NULL, | 656 | .cleanup = NULL, |
@@ -672,7 +659,7 @@ static struct rcu_torture_ops sched_ops = { | |||
672 | .readunlock = sched_torture_read_unlock, | 659 | .readunlock = sched_torture_read_unlock, |
673 | .completed = rcu_no_completed, | 660 | .completed = rcu_no_completed, |
674 | .deferred_free = rcu_sched_torture_deferred_free, | 661 | .deferred_free = rcu_sched_torture_deferred_free, |
675 | .sync = sched_torture_synchronize, | 662 | .sync = synchronize_sched, |
676 | .cb_barrier = rcu_barrier_sched, | 663 | .cb_barrier = rcu_barrier_sched, |
677 | .fqs = rcu_sched_force_quiescent_state, | 664 | .fqs = rcu_sched_force_quiescent_state, |
678 | .stats = NULL, | 665 | .stats = NULL, |
@@ -688,7 +675,7 @@ static struct rcu_torture_ops sched_sync_ops = { | |||
688 | .readunlock = sched_torture_read_unlock, | 675 | .readunlock = sched_torture_read_unlock, |
689 | .completed = rcu_no_completed, | 676 | .completed = rcu_no_completed, |
690 | .deferred_free = rcu_sync_torture_deferred_free, | 677 | .deferred_free = rcu_sync_torture_deferred_free, |
691 | .sync = sched_torture_synchronize, | 678 | .sync = synchronize_sched, |
692 | .cb_barrier = NULL, | 679 | .cb_barrier = NULL, |
693 | .fqs = rcu_sched_force_quiescent_state, | 680 | .fqs = rcu_sched_force_quiescent_state, |
694 | .stats = NULL, | 681 | .stats = NULL, |
@@ -754,7 +741,7 @@ static int rcu_torture_boost(void *arg) | |||
754 | do { | 741 | do { |
755 | /* Wait for the next test interval. */ | 742 | /* Wait for the next test interval. */ |
756 | oldstarttime = boost_starttime; | 743 | oldstarttime = boost_starttime; |
757 | while (jiffies - oldstarttime > ULONG_MAX / 2) { | 744 | while (ULONG_CMP_LT(jiffies, oldstarttime)) { |
758 | schedule_timeout_uninterruptible(1); | 745 | schedule_timeout_uninterruptible(1); |
759 | rcu_stutter_wait("rcu_torture_boost"); | 746 | rcu_stutter_wait("rcu_torture_boost"); |
760 | if (kthread_should_stop() || | 747 | if (kthread_should_stop() || |
@@ -765,7 +752,7 @@ static int rcu_torture_boost(void *arg) | |||
765 | /* Do one boost-test interval. */ | 752 | /* Do one boost-test interval. */ |
766 | endtime = oldstarttime + test_boost_duration * HZ; | 753 | endtime = oldstarttime + test_boost_duration * HZ; |
767 | call_rcu_time = jiffies; | 754 | call_rcu_time = jiffies; |
768 | while (jiffies - endtime > ULONG_MAX / 2) { | 755 | while (ULONG_CMP_LT(jiffies, endtime)) { |
769 | /* If we don't have a callback in flight, post one. */ | 756 | /* If we don't have a callback in flight, post one. */ |
770 | if (!rbi.inflight) { | 757 | if (!rbi.inflight) { |
771 | smp_mb(); /* RCU core before ->inflight = 1. */ | 758 | smp_mb(); /* RCU core before ->inflight = 1. */ |
@@ -792,7 +779,8 @@ static int rcu_torture_boost(void *arg) | |||
792 | * interval. Besides, we are running at RT priority, | 779 | * interval. Besides, we are running at RT priority, |
793 | * so delays should be relatively rare. | 780 | * so delays should be relatively rare. |
794 | */ | 781 | */ |
795 | while (oldstarttime == boost_starttime) { | 782 | while (oldstarttime == boost_starttime && |
783 | !kthread_should_stop()) { | ||
796 | if (mutex_trylock(&boost_mutex)) { | 784 | if (mutex_trylock(&boost_mutex)) { |
797 | boost_starttime = jiffies + | 785 | boost_starttime = jiffies + |
798 | test_boost_interval * HZ; | 786 | test_boost_interval * HZ; |
@@ -809,11 +797,11 @@ checkwait: rcu_stutter_wait("rcu_torture_boost"); | |||
809 | 797 | ||
810 | /* Clean up and exit. */ | 798 | /* Clean up and exit. */ |
811 | VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); | 799 | VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); |
812 | destroy_rcu_head_on_stack(&rbi.rcu); | ||
813 | rcutorture_shutdown_absorb("rcu_torture_boost"); | 800 | rcutorture_shutdown_absorb("rcu_torture_boost"); |
814 | while (!kthread_should_stop() || rbi.inflight) | 801 | while (!kthread_should_stop() || rbi.inflight) |
815 | schedule_timeout_uninterruptible(1); | 802 | schedule_timeout_uninterruptible(1); |
816 | smp_mb(); /* order accesses to ->inflight before stack-frame death. */ | 803 | smp_mb(); /* order accesses to ->inflight before stack-frame death. */ |
804 | destroy_rcu_head_on_stack(&rbi.rcu); | ||
817 | return 0; | 805 | return 0; |
818 | } | 806 | } |
819 | 807 | ||
@@ -831,11 +819,13 @@ rcu_torture_fqs(void *arg) | |||
831 | VERBOSE_PRINTK_STRING("rcu_torture_fqs task started"); | 819 | VERBOSE_PRINTK_STRING("rcu_torture_fqs task started"); |
832 | do { | 820 | do { |
833 | fqs_resume_time = jiffies + fqs_stutter * HZ; | 821 | fqs_resume_time = jiffies + fqs_stutter * HZ; |
834 | while (jiffies - fqs_resume_time > LONG_MAX) { | 822 | while (ULONG_CMP_LT(jiffies, fqs_resume_time) && |
823 | !kthread_should_stop()) { | ||
835 | schedule_timeout_interruptible(1); | 824 | schedule_timeout_interruptible(1); |
836 | } | 825 | } |
837 | fqs_burst_remaining = fqs_duration; | 826 | fqs_burst_remaining = fqs_duration; |
838 | while (fqs_burst_remaining > 0) { | 827 | while (fqs_burst_remaining > 0 && |
828 | !kthread_should_stop()) { | ||
839 | cur_ops->fqs(); | 829 | cur_ops->fqs(); |
840 | udelay(fqs_holdoff); | 830 | udelay(fqs_holdoff); |
841 | fqs_burst_remaining -= fqs_holdoff; | 831 | fqs_burst_remaining -= fqs_holdoff; |
@@ -1280,8 +1270,9 @@ static int rcutorture_booster_init(int cpu) | |||
1280 | /* Don't allow time recalculation while creating a new task. */ | 1270 | /* Don't allow time recalculation while creating a new task. */ |
1281 | mutex_lock(&boost_mutex); | 1271 | mutex_lock(&boost_mutex); |
1282 | VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task"); | 1272 | VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task"); |
1283 | boost_tasks[cpu] = kthread_create(rcu_torture_boost, NULL, | 1273 | boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL, |
1284 | "rcu_torture_boost"); | 1274 | cpu_to_node(cpu), |
1275 | "rcu_torture_boost"); | ||
1285 | if (IS_ERR(boost_tasks[cpu])) { | 1276 | if (IS_ERR(boost_tasks[cpu])) { |
1286 | retval = PTR_ERR(boost_tasks[cpu]); | 1277 | retval = PTR_ERR(boost_tasks[cpu]); |
1287 | VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed"); | 1278 | VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed"); |
@@ -1424,7 +1415,7 @@ rcu_torture_init(void) | |||
1424 | int firsterr = 0; | 1415 | int firsterr = 0; |
1425 | static struct rcu_torture_ops *torture_ops[] = | 1416 | static struct rcu_torture_ops *torture_ops[] = |
1426 | { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, | 1417 | { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, |
1427 | &rcu_bh_ops, &rcu_bh_sync_ops, | 1418 | &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, |
1428 | &srcu_ops, &srcu_expedited_ops, | 1419 | &srcu_ops, &srcu_expedited_ops, |
1429 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; | 1420 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; |
1430 | 1421 | ||
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ba06207b1dd3..e234eb92a177 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -52,13 +52,16 @@ | |||
52 | #include <linux/prefetch.h> | 52 | #include <linux/prefetch.h> |
53 | 53 | ||
54 | #include "rcutree.h" | 54 | #include "rcutree.h" |
55 | #include <trace/events/rcu.h> | ||
56 | |||
57 | #include "rcu.h" | ||
55 | 58 | ||
56 | /* Data structures. */ | 59 | /* Data structures. */ |
57 | 60 | ||
58 | static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | 61 | static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; |
59 | 62 | ||
60 | #define RCU_STATE_INITIALIZER(structname) { \ | 63 | #define RCU_STATE_INITIALIZER(structname) { \ |
61 | .level = { &structname.node[0] }, \ | 64 | .level = { &structname##_state.node[0] }, \ |
62 | .levelcnt = { \ | 65 | .levelcnt = { \ |
63 | NUM_RCU_LVL_0, /* root of hierarchy. */ \ | 66 | NUM_RCU_LVL_0, /* root of hierarchy. */ \ |
64 | NUM_RCU_LVL_1, \ | 67 | NUM_RCU_LVL_1, \ |
@@ -69,17 +72,17 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | |||
69 | .signaled = RCU_GP_IDLE, \ | 72 | .signaled = RCU_GP_IDLE, \ |
70 | .gpnum = -300, \ | 73 | .gpnum = -300, \ |
71 | .completed = -300, \ | 74 | .completed = -300, \ |
72 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ | 75 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ |
73 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ | 76 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ |
74 | .n_force_qs = 0, \ | 77 | .n_force_qs = 0, \ |
75 | .n_force_qs_ngp = 0, \ | 78 | .n_force_qs_ngp = 0, \ |
76 | .name = #structname, \ | 79 | .name = #structname, \ |
77 | } | 80 | } |
78 | 81 | ||
79 | struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state); | 82 | struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); |
80 | DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); | 83 | DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); |
81 | 84 | ||
82 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); | 85 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); |
83 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); | 86 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); |
84 | 87 | ||
85 | static struct rcu_state *rcu_state; | 88 | static struct rcu_state *rcu_state; |
@@ -128,8 +131,6 @@ static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); | |||
128 | static void invoke_rcu_core(void); | 131 | static void invoke_rcu_core(void); |
129 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | 132 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); |
130 | 133 | ||
131 | #define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ | ||
132 | |||
133 | /* | 134 | /* |
134 | * Track the rcutorture test sequence number and the update version | 135 | * Track the rcutorture test sequence number and the update version |
135 | * number within a given test. The rcutorture_testseq is incremented | 136 | * number within a given test. The rcutorture_testseq is incremented |
@@ -156,33 +157,41 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) | |||
156 | * Note a quiescent state. Because we do not need to know | 157 | * Note a quiescent state. Because we do not need to know |
157 | * how many quiescent states passed, just if there was at least | 158 | * how many quiescent states passed, just if there was at least |
158 | * one since the start of the grace period, this just sets a flag. | 159 | * one since the start of the grace period, this just sets a flag. |
160 | * The caller must have disabled preemption. | ||
159 | */ | 161 | */ |
160 | void rcu_sched_qs(int cpu) | 162 | void rcu_sched_qs(int cpu) |
161 | { | 163 | { |
162 | struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); | 164 | struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); |
163 | 165 | ||
164 | rdp->passed_quiesc_completed = rdp->gpnum - 1; | 166 | rdp->passed_quiesce_gpnum = rdp->gpnum; |
165 | barrier(); | 167 | barrier(); |
166 | rdp->passed_quiesc = 1; | 168 | if (rdp->passed_quiesce == 0) |
169 | trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs"); | ||
170 | rdp->passed_quiesce = 1; | ||
167 | } | 171 | } |
168 | 172 | ||
169 | void rcu_bh_qs(int cpu) | 173 | void rcu_bh_qs(int cpu) |
170 | { | 174 | { |
171 | struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); | 175 | struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); |
172 | 176 | ||
173 | rdp->passed_quiesc_completed = rdp->gpnum - 1; | 177 | rdp->passed_quiesce_gpnum = rdp->gpnum; |
174 | barrier(); | 178 | barrier(); |
175 | rdp->passed_quiesc = 1; | 179 | if (rdp->passed_quiesce == 0) |
180 | trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs"); | ||
181 | rdp->passed_quiesce = 1; | ||
176 | } | 182 | } |
177 | 183 | ||
178 | /* | 184 | /* |
179 | * Note a context switch. This is a quiescent state for RCU-sched, | 185 | * Note a context switch. This is a quiescent state for RCU-sched, |
180 | * and requires special handling for preemptible RCU. | 186 | * and requires special handling for preemptible RCU. |
187 | * The caller must have disabled preemption. | ||
181 | */ | 188 | */ |
182 | void rcu_note_context_switch(int cpu) | 189 | void rcu_note_context_switch(int cpu) |
183 | { | 190 | { |
191 | trace_rcu_utilization("Start context switch"); | ||
184 | rcu_sched_qs(cpu); | 192 | rcu_sched_qs(cpu); |
185 | rcu_preempt_note_context_switch(cpu); | 193 | rcu_preempt_note_context_switch(cpu); |
194 | trace_rcu_utilization("End context switch"); | ||
186 | } | 195 | } |
187 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); | 196 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); |
188 | 197 | ||
@@ -193,7 +202,7 @@ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | |||
193 | }; | 202 | }; |
194 | #endif /* #ifdef CONFIG_NO_HZ */ | 203 | #endif /* #ifdef CONFIG_NO_HZ */ |
195 | 204 | ||
196 | static int blimit = 10; /* Maximum callbacks per softirq. */ | 205 | static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ |
197 | static int qhimark = 10000; /* If this many pending, ignore blimit. */ | 206 | static int qhimark = 10000; /* If this many pending, ignore blimit. */ |
198 | static int qlowmark = 100; /* Once only this many pending, use blimit. */ | 207 | static int qlowmark = 100; /* Once only this many pending, use blimit. */ |
199 | 208 | ||
@@ -314,6 +323,7 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp) | |||
314 | * trust its state not to change because interrupts are disabled. | 323 | * trust its state not to change because interrupts are disabled. |
315 | */ | 324 | */ |
316 | if (cpu_is_offline(rdp->cpu)) { | 325 | if (cpu_is_offline(rdp->cpu)) { |
326 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); | ||
317 | rdp->offline_fqs++; | 327 | rdp->offline_fqs++; |
318 | return 1; | 328 | return 1; |
319 | } | 329 | } |
@@ -354,19 +364,13 @@ void rcu_enter_nohz(void) | |||
354 | local_irq_restore(flags); | 364 | local_irq_restore(flags); |
355 | return; | 365 | return; |
356 | } | 366 | } |
367 | trace_rcu_dyntick("Start"); | ||
357 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ | 368 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ |
358 | smp_mb__before_atomic_inc(); /* See above. */ | 369 | smp_mb__before_atomic_inc(); /* See above. */ |
359 | atomic_inc(&rdtp->dynticks); | 370 | atomic_inc(&rdtp->dynticks); |
360 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ | 371 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ |
361 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | 372 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); |
362 | local_irq_restore(flags); | 373 | local_irq_restore(flags); |
363 | |||
364 | /* If the interrupt queued a callback, get out of dyntick mode. */ | ||
365 | if (in_irq() && | ||
366 | (__get_cpu_var(rcu_sched_data).nxtlist || | ||
367 | __get_cpu_var(rcu_bh_data).nxtlist || | ||
368 | rcu_preempt_needs_cpu(smp_processor_id()))) | ||
369 | set_need_resched(); | ||
370 | } | 374 | } |
371 | 375 | ||
372 | /* | 376 | /* |
@@ -391,6 +395,7 @@ void rcu_exit_nohz(void) | |||
391 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ | 395 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ |
392 | smp_mb__after_atomic_inc(); /* See above. */ | 396 | smp_mb__after_atomic_inc(); /* See above. */ |
393 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | 397 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); |
398 | trace_rcu_dyntick("End"); | ||
394 | local_irq_restore(flags); | 399 | local_irq_restore(flags); |
395 | } | 400 | } |
396 | 401 | ||
@@ -481,11 +486,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) | |||
481 | */ | 486 | */ |
482 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | 487 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) |
483 | { | 488 | { |
484 | unsigned long curr; | 489 | unsigned int curr; |
485 | unsigned long snap; | 490 | unsigned int snap; |
486 | 491 | ||
487 | curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks); | 492 | curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks); |
488 | snap = (unsigned long)rdp->dynticks_snap; | 493 | snap = (unsigned int)rdp->dynticks_snap; |
489 | 494 | ||
490 | /* | 495 | /* |
491 | * If the CPU passed through or entered a dynticks idle phase with | 496 | * If the CPU passed through or entered a dynticks idle phase with |
@@ -495,7 +500,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | |||
495 | * read-side critical section that started before the beginning | 500 | * read-side critical section that started before the beginning |
496 | * of the current RCU grace period. | 501 | * of the current RCU grace period. |
497 | */ | 502 | */ |
498 | if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) { | 503 | if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) { |
504 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti"); | ||
499 | rdp->dynticks_fqs++; | 505 | rdp->dynticks_fqs++; |
500 | return 1; | 506 | return 1; |
501 | } | 507 | } |
@@ -537,6 +543,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
537 | int cpu; | 543 | int cpu; |
538 | long delta; | 544 | long delta; |
539 | unsigned long flags; | 545 | unsigned long flags; |
546 | int ndetected; | ||
540 | struct rcu_node *rnp = rcu_get_root(rsp); | 547 | struct rcu_node *rnp = rcu_get_root(rsp); |
541 | 548 | ||
542 | /* Only let one CPU complain about others per time interval. */ | 549 | /* Only let one CPU complain about others per time interval. */ |
@@ -553,7 +560,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
553 | * Now rat on any tasks that got kicked up to the root rcu_node | 560 | * Now rat on any tasks that got kicked up to the root rcu_node |
554 | * due to CPU offlining. | 561 | * due to CPU offlining. |
555 | */ | 562 | */ |
556 | rcu_print_task_stall(rnp); | 563 | ndetected = rcu_print_task_stall(rnp); |
557 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 564 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
558 | 565 | ||
559 | /* | 566 | /* |
@@ -565,17 +572,22 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
565 | rsp->name); | 572 | rsp->name); |
566 | rcu_for_each_leaf_node(rsp, rnp) { | 573 | rcu_for_each_leaf_node(rsp, rnp) { |
567 | raw_spin_lock_irqsave(&rnp->lock, flags); | 574 | raw_spin_lock_irqsave(&rnp->lock, flags); |
568 | rcu_print_task_stall(rnp); | 575 | ndetected += rcu_print_task_stall(rnp); |
569 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 576 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
570 | if (rnp->qsmask == 0) | 577 | if (rnp->qsmask == 0) |
571 | continue; | 578 | continue; |
572 | for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) | 579 | for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) |
573 | if (rnp->qsmask & (1UL << cpu)) | 580 | if (rnp->qsmask & (1UL << cpu)) { |
574 | printk(" %d", rnp->grplo + cpu); | 581 | printk(" %d", rnp->grplo + cpu); |
582 | ndetected++; | ||
583 | } | ||
575 | } | 584 | } |
576 | printk("} (detected by %d, t=%ld jiffies)\n", | 585 | printk("} (detected by %d, t=%ld jiffies)\n", |
577 | smp_processor_id(), (long)(jiffies - rsp->gp_start)); | 586 | smp_processor_id(), (long)(jiffies - rsp->gp_start)); |
578 | trigger_all_cpu_backtrace(); | 587 | if (ndetected == 0) |
588 | printk(KERN_ERR "INFO: Stall ended before state dump start\n"); | ||
589 | else if (!trigger_all_cpu_backtrace()) | ||
590 | dump_stack(); | ||
579 | 591 | ||
580 | /* If so configured, complain about tasks blocking the grace period. */ | 592 | /* If so configured, complain about tasks blocking the grace period. */ |
581 | 593 | ||
@@ -596,7 +608,8 @@ static void print_cpu_stall(struct rcu_state *rsp) | |||
596 | */ | 608 | */ |
597 | printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", | 609 | printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", |
598 | rsp->name, smp_processor_id(), jiffies - rsp->gp_start); | 610 | rsp->name, smp_processor_id(), jiffies - rsp->gp_start); |
599 | trigger_all_cpu_backtrace(); | 611 | if (!trigger_all_cpu_backtrace()) |
612 | dump_stack(); | ||
600 | 613 | ||
601 | raw_spin_lock_irqsave(&rnp->lock, flags); | 614 | raw_spin_lock_irqsave(&rnp->lock, flags); |
602 | if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) | 615 | if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) |
@@ -678,9 +691,10 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct | |||
678 | * go looking for one. | 691 | * go looking for one. |
679 | */ | 692 | */ |
680 | rdp->gpnum = rnp->gpnum; | 693 | rdp->gpnum = rnp->gpnum; |
694 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart"); | ||
681 | if (rnp->qsmask & rdp->grpmask) { | 695 | if (rnp->qsmask & rdp->grpmask) { |
682 | rdp->qs_pending = 1; | 696 | rdp->qs_pending = 1; |
683 | rdp->passed_quiesc = 0; | 697 | rdp->passed_quiesce = 0; |
684 | } else | 698 | } else |
685 | rdp->qs_pending = 0; | 699 | rdp->qs_pending = 0; |
686 | } | 700 | } |
@@ -741,6 +755,7 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat | |||
741 | 755 | ||
742 | /* Remember that we saw this grace-period completion. */ | 756 | /* Remember that we saw this grace-period completion. */ |
743 | rdp->completed = rnp->completed; | 757 | rdp->completed = rnp->completed; |
758 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend"); | ||
744 | 759 | ||
745 | /* | 760 | /* |
746 | * If we were in an extended quiescent state, we may have | 761 | * If we were in an extended quiescent state, we may have |
@@ -826,31 +841,31 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
826 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 841 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
827 | struct rcu_node *rnp = rcu_get_root(rsp); | 842 | struct rcu_node *rnp = rcu_get_root(rsp); |
828 | 843 | ||
829 | if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { | 844 | if (!rcu_scheduler_fully_active || |
830 | if (cpu_needs_another_gp(rsp, rdp)) | 845 | !cpu_needs_another_gp(rsp, rdp)) { |
831 | rsp->fqs_need_gp = 1; | 846 | /* |
832 | if (rnp->completed == rsp->completed) { | 847 | * Either the scheduler hasn't yet spawned the first |
833 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 848 | * non-idle task or this CPU does not need another |
834 | return; | 849 | * grace period. Either way, don't start a new grace |
835 | } | 850 | * period. |
836 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 851 | */ |
852 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
853 | return; | ||
854 | } | ||
837 | 855 | ||
856 | if (rsp->fqs_active) { | ||
838 | /* | 857 | /* |
839 | * Propagate new ->completed value to rcu_node structures | 858 | * This CPU needs a grace period, but force_quiescent_state() |
840 | * so that other CPUs don't have to wait until the start | 859 | * is running. Tell it to start one on this CPU's behalf. |
841 | * of the next grace period to process their callbacks. | ||
842 | */ | 860 | */ |
843 | rcu_for_each_node_breadth_first(rsp, rnp) { | 861 | rsp->fqs_need_gp = 1; |
844 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 862 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
845 | rnp->completed = rsp->completed; | ||
846 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
847 | } | ||
848 | local_irq_restore(flags); | ||
849 | return; | 863 | return; |
850 | } | 864 | } |
851 | 865 | ||
852 | /* Advance to a new grace period and initialize state. */ | 866 | /* Advance to a new grace period and initialize state. */ |
853 | rsp->gpnum++; | 867 | rsp->gpnum++; |
868 | trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); | ||
854 | WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); | 869 | WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); |
855 | rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ | 870 | rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ |
856 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; | 871 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; |
@@ -865,6 +880,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
865 | rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ | 880 | rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ |
866 | rcu_start_gp_per_cpu(rsp, rnp, rdp); | 881 | rcu_start_gp_per_cpu(rsp, rnp, rdp); |
867 | rcu_preempt_boost_start_gp(rnp); | 882 | rcu_preempt_boost_start_gp(rnp); |
883 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, | ||
884 | rnp->level, rnp->grplo, | ||
885 | rnp->grphi, rnp->qsmask); | ||
868 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 886 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
869 | return; | 887 | return; |
870 | } | 888 | } |
@@ -901,6 +919,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
901 | if (rnp == rdp->mynode) | 919 | if (rnp == rdp->mynode) |
902 | rcu_start_gp_per_cpu(rsp, rnp, rdp); | 920 | rcu_start_gp_per_cpu(rsp, rnp, rdp); |
903 | rcu_preempt_boost_start_gp(rnp); | 921 | rcu_preempt_boost_start_gp(rnp); |
922 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, | ||
923 | rnp->level, rnp->grplo, | ||
924 | rnp->grphi, rnp->qsmask); | ||
904 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 925 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
905 | } | 926 | } |
906 | 927 | ||
@@ -922,6 +943,8 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | |||
922 | __releases(rcu_get_root(rsp)->lock) | 943 | __releases(rcu_get_root(rsp)->lock) |
923 | { | 944 | { |
924 | unsigned long gp_duration; | 945 | unsigned long gp_duration; |
946 | struct rcu_node *rnp = rcu_get_root(rsp); | ||
947 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | ||
925 | 948 | ||
926 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); | 949 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); |
927 | 950 | ||
@@ -933,7 +956,41 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | |||
933 | gp_duration = jiffies - rsp->gp_start; | 956 | gp_duration = jiffies - rsp->gp_start; |
934 | if (gp_duration > rsp->gp_max) | 957 | if (gp_duration > rsp->gp_max) |
935 | rsp->gp_max = gp_duration; | 958 | rsp->gp_max = gp_duration; |
936 | rsp->completed = rsp->gpnum; | 959 | |
960 | /* | ||
961 | * We know the grace period is complete, but to everyone else | ||
962 | * it appears to still be ongoing. But it is also the case | ||
963 | * that to everyone else it looks like there is nothing that | ||
964 | * they can do to advance the grace period. It is therefore | ||
965 | * safe for us to drop the lock in order to mark the grace | ||
966 | * period as completed in all of the rcu_node structures. | ||
967 | * | ||
968 | * But if this CPU needs another grace period, it will take | ||
969 | * care of this while initializing the next grace period. | ||
970 | * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL | ||
971 | * because the callbacks have not yet been advanced: Those | ||
972 | * callbacks are waiting on the grace period that just now | ||
973 | * completed. | ||
974 | */ | ||
975 | if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { | ||
976 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
977 | |||
978 | /* | ||
979 | * Propagate new ->completed value to rcu_node structures | ||
980 | * so that other CPUs don't have to wait until the start | ||
981 | * of the next grace period to process their callbacks. | ||
982 | */ | ||
983 | rcu_for_each_node_breadth_first(rsp, rnp) { | ||
984 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
985 | rnp->completed = rsp->gpnum; | ||
986 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
987 | } | ||
988 | rnp = rcu_get_root(rsp); | ||
989 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
990 | } | ||
991 | |||
992 | rsp->completed = rsp->gpnum; /* Declare the grace period complete. */ | ||
993 | trace_rcu_grace_period(rsp->name, rsp->completed, "end"); | ||
937 | rsp->signaled = RCU_GP_IDLE; | 994 | rsp->signaled = RCU_GP_IDLE; |
938 | rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ | 995 | rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ |
939 | } | 996 | } |
@@ -962,6 +1019,10 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, | |||
962 | return; | 1019 | return; |
963 | } | 1020 | } |
964 | rnp->qsmask &= ~mask; | 1021 | rnp->qsmask &= ~mask; |
1022 | trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, | ||
1023 | mask, rnp->qsmask, rnp->level, | ||
1024 | rnp->grplo, rnp->grphi, | ||
1025 | !!rnp->gp_tasks); | ||
965 | if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { | 1026 | if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { |
966 | 1027 | ||
967 | /* Other bits still set at this level, so done. */ | 1028 | /* Other bits still set at this level, so done. */ |
@@ -1000,7 +1061,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, | |||
1000 | * based on quiescent states detected in an earlier grace period! | 1061 | * based on quiescent states detected in an earlier grace period! |
1001 | */ | 1062 | */ |
1002 | static void | 1063 | static void |
1003 | rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) | 1064 | rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastgp) |
1004 | { | 1065 | { |
1005 | unsigned long flags; | 1066 | unsigned long flags; |
1006 | unsigned long mask; | 1067 | unsigned long mask; |
@@ -1008,17 +1069,15 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long las | |||
1008 | 1069 | ||
1009 | rnp = rdp->mynode; | 1070 | rnp = rdp->mynode; |
1010 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1071 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1011 | if (lastcomp != rnp->completed) { | 1072 | if (lastgp != rnp->gpnum || rnp->completed == rnp->gpnum) { |
1012 | 1073 | ||
1013 | /* | 1074 | /* |
1014 | * Someone beat us to it for this grace period, so leave. | 1075 | * The grace period in which this quiescent state was |
1015 | * The race with GP start is resolved by the fact that we | 1076 | * recorded has ended, so don't report it upwards. |
1016 | * hold the leaf rcu_node lock, so that the per-CPU bits | 1077 | * We will instead need a new quiescent state that lies |
1017 | * cannot yet be initialized -- so we would simply find our | 1078 | * within the current grace period. |
1018 | * CPU's bit already cleared in rcu_report_qs_rnp() if this | ||
1019 | * race occurred. | ||
1020 | */ | 1079 | */ |
1021 | rdp->passed_quiesc = 0; /* try again later! */ | 1080 | rdp->passed_quiesce = 0; /* need qs for new gp. */ |
1022 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1081 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1023 | return; | 1082 | return; |
1024 | } | 1083 | } |
@@ -1062,14 +1121,14 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1062 | * Was there a quiescent state since the beginning of the grace | 1121 | * Was there a quiescent state since the beginning of the grace |
1063 | * period? If no, then exit and wait for the next call. | 1122 | * period? If no, then exit and wait for the next call. |
1064 | */ | 1123 | */ |
1065 | if (!rdp->passed_quiesc) | 1124 | if (!rdp->passed_quiesce) |
1066 | return; | 1125 | return; |
1067 | 1126 | ||
1068 | /* | 1127 | /* |
1069 | * Tell RCU we are done (but rcu_report_qs_rdp() will be the | 1128 | * Tell RCU we are done (but rcu_report_qs_rdp() will be the |
1070 | * judge of that). | 1129 | * judge of that). |
1071 | */ | 1130 | */ |
1072 | rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed); | 1131 | rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesce_gpnum); |
1073 | } | 1132 | } |
1074 | 1133 | ||
1075 | #ifdef CONFIG_HOTPLUG_CPU | 1134 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -1130,11 +1189,20 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
1130 | if (rnp->qsmaskinit != 0) { | 1189 | if (rnp->qsmaskinit != 0) { |
1131 | if (rnp != rdp->mynode) | 1190 | if (rnp != rdp->mynode) |
1132 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1191 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
1192 | else | ||
1193 | trace_rcu_grace_period(rsp->name, | ||
1194 | rnp->gpnum + 1 - | ||
1195 | !!(rnp->qsmask & mask), | ||
1196 | "cpuofl"); | ||
1133 | break; | 1197 | break; |
1134 | } | 1198 | } |
1135 | if (rnp == rdp->mynode) | 1199 | if (rnp == rdp->mynode) { |
1200 | trace_rcu_grace_period(rsp->name, | ||
1201 | rnp->gpnum + 1 - | ||
1202 | !!(rnp->qsmask & mask), | ||
1203 | "cpuofl"); | ||
1136 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); | 1204 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); |
1137 | else | 1205 | } else |
1138 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1206 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
1139 | mask = rnp->grpmask; | 1207 | mask = rnp->grpmask; |
1140 | rnp = rnp->parent; | 1208 | rnp = rnp->parent; |
@@ -1190,17 +1258,22 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1190 | { | 1258 | { |
1191 | unsigned long flags; | 1259 | unsigned long flags; |
1192 | struct rcu_head *next, *list, **tail; | 1260 | struct rcu_head *next, *list, **tail; |
1193 | int count; | 1261 | int bl, count; |
1194 | 1262 | ||
1195 | /* If no callbacks are ready, just return.*/ | 1263 | /* If no callbacks are ready, just return.*/ |
1196 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) | 1264 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) { |
1265 | trace_rcu_batch_start(rsp->name, 0, 0); | ||
1266 | trace_rcu_batch_end(rsp->name, 0); | ||
1197 | return; | 1267 | return; |
1268 | } | ||
1198 | 1269 | ||
1199 | /* | 1270 | /* |
1200 | * Extract the list of ready callbacks, disabling to prevent | 1271 | * Extract the list of ready callbacks, disabling to prevent |
1201 | * races with call_rcu() from interrupt handlers. | 1272 | * races with call_rcu() from interrupt handlers. |
1202 | */ | 1273 | */ |
1203 | local_irq_save(flags); | 1274 | local_irq_save(flags); |
1275 | bl = rdp->blimit; | ||
1276 | trace_rcu_batch_start(rsp->name, rdp->qlen, bl); | ||
1204 | list = rdp->nxtlist; | 1277 | list = rdp->nxtlist; |
1205 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; | 1278 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; |
1206 | *rdp->nxttail[RCU_DONE_TAIL] = NULL; | 1279 | *rdp->nxttail[RCU_DONE_TAIL] = NULL; |
@@ -1216,13 +1289,14 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1216 | next = list->next; | 1289 | next = list->next; |
1217 | prefetch(next); | 1290 | prefetch(next); |
1218 | debug_rcu_head_unqueue(list); | 1291 | debug_rcu_head_unqueue(list); |
1219 | __rcu_reclaim(list); | 1292 | __rcu_reclaim(rsp->name, list); |
1220 | list = next; | 1293 | list = next; |
1221 | if (++count >= rdp->blimit) | 1294 | if (++count >= bl) |
1222 | break; | 1295 | break; |
1223 | } | 1296 | } |
1224 | 1297 | ||
1225 | local_irq_save(flags); | 1298 | local_irq_save(flags); |
1299 | trace_rcu_batch_end(rsp->name, count); | ||
1226 | 1300 | ||
1227 | /* Update count, and requeue any remaining callbacks. */ | 1301 | /* Update count, and requeue any remaining callbacks. */ |
1228 | rdp->qlen -= count; | 1302 | rdp->qlen -= count; |
@@ -1250,7 +1324,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1250 | 1324 | ||
1251 | local_irq_restore(flags); | 1325 | local_irq_restore(flags); |
1252 | 1326 | ||
1253 | /* Re-raise the RCU softirq if there are callbacks remaining. */ | 1327 | /* Re-invoke RCU core processing if there are callbacks remaining. */ |
1254 | if (cpu_has_callbacks_ready_to_invoke(rdp)) | 1328 | if (cpu_has_callbacks_ready_to_invoke(rdp)) |
1255 | invoke_rcu_core(); | 1329 | invoke_rcu_core(); |
1256 | } | 1330 | } |
@@ -1258,7 +1332,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1258 | /* | 1332 | /* |
1259 | * Check to see if this CPU is in a non-context-switch quiescent state | 1333 | * Check to see if this CPU is in a non-context-switch quiescent state |
1260 | * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). | 1334 | * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). |
1261 | * Also schedule the RCU softirq handler. | 1335 | * Also schedule RCU core processing. |
1262 | * | 1336 | * |
1263 | * This function must be called with hardirqs disabled. It is normally | 1337 | * This function must be called with hardirqs disabled. It is normally |
1264 | * invoked from the scheduling-clock interrupt. If rcu_pending returns | 1338 | * invoked from the scheduling-clock interrupt. If rcu_pending returns |
@@ -1266,6 +1340,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1266 | */ | 1340 | */ |
1267 | void rcu_check_callbacks(int cpu, int user) | 1341 | void rcu_check_callbacks(int cpu, int user) |
1268 | { | 1342 | { |
1343 | trace_rcu_utilization("Start scheduler-tick"); | ||
1269 | if (user || | 1344 | if (user || |
1270 | (idle_cpu(cpu) && rcu_scheduler_active && | 1345 | (idle_cpu(cpu) && rcu_scheduler_active && |
1271 | !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { | 1346 | !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { |
@@ -1299,6 +1374,7 @@ void rcu_check_callbacks(int cpu, int user) | |||
1299 | rcu_preempt_check_callbacks(cpu); | 1374 | rcu_preempt_check_callbacks(cpu); |
1300 | if (rcu_pending(cpu)) | 1375 | if (rcu_pending(cpu)) |
1301 | invoke_rcu_core(); | 1376 | invoke_rcu_core(); |
1377 | trace_rcu_utilization("End scheduler-tick"); | ||
1302 | } | 1378 | } |
1303 | 1379 | ||
1304 | #ifdef CONFIG_SMP | 1380 | #ifdef CONFIG_SMP |
@@ -1360,10 +1436,14 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
1360 | unsigned long flags; | 1436 | unsigned long flags; |
1361 | struct rcu_node *rnp = rcu_get_root(rsp); | 1437 | struct rcu_node *rnp = rcu_get_root(rsp); |
1362 | 1438 | ||
1363 | if (!rcu_gp_in_progress(rsp)) | 1439 | trace_rcu_utilization("Start fqs"); |
1440 | if (!rcu_gp_in_progress(rsp)) { | ||
1441 | trace_rcu_utilization("End fqs"); | ||
1364 | return; /* No grace period in progress, nothing to force. */ | 1442 | return; /* No grace period in progress, nothing to force. */ |
1443 | } | ||
1365 | if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { | 1444 | if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { |
1366 | rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ | 1445 | rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ |
1446 | trace_rcu_utilization("End fqs"); | ||
1367 | return; /* Someone else is already on the job. */ | 1447 | return; /* Someone else is already on the job. */ |
1368 | } | 1448 | } |
1369 | if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) | 1449 | if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) |
@@ -1412,11 +1492,13 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
1412 | raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ | 1492 | raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ |
1413 | rsp->fqs_need_gp = 0; | 1493 | rsp->fqs_need_gp = 0; |
1414 | rcu_start_gp(rsp, flags); /* releases rnp->lock */ | 1494 | rcu_start_gp(rsp, flags); /* releases rnp->lock */ |
1495 | trace_rcu_utilization("End fqs"); | ||
1415 | return; | 1496 | return; |
1416 | } | 1497 | } |
1417 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | 1498 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ |
1418 | unlock_fqs_ret: | 1499 | unlock_fqs_ret: |
1419 | raw_spin_unlock_irqrestore(&rsp->fqslock, flags); | 1500 | raw_spin_unlock_irqrestore(&rsp->fqslock, flags); |
1501 | trace_rcu_utilization("End fqs"); | ||
1420 | } | 1502 | } |
1421 | 1503 | ||
1422 | #else /* #ifdef CONFIG_SMP */ | 1504 | #else /* #ifdef CONFIG_SMP */ |
@@ -1429,9 +1511,9 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
1429 | #endif /* #else #ifdef CONFIG_SMP */ | 1511 | #endif /* #else #ifdef CONFIG_SMP */ |
1430 | 1512 | ||
1431 | /* | 1513 | /* |
1432 | * This does the RCU processing work from softirq context for the | 1514 | * This does the RCU core processing work for the specified rcu_state |
1433 | * specified rcu_state and rcu_data structures. This may be called | 1515 | * and rcu_data structures. This may be called only from the CPU to |
1434 | * only from the CPU to whom the rdp belongs. | 1516 | * whom the rdp belongs. |
1435 | */ | 1517 | */ |
1436 | static void | 1518 | static void |
1437 | __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | 1519 | __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) |
@@ -1468,24 +1550,24 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1468 | } | 1550 | } |
1469 | 1551 | ||
1470 | /* | 1552 | /* |
1471 | * Do softirq processing for the current CPU. | 1553 | * Do RCU core processing for the current CPU. |
1472 | */ | 1554 | */ |
1473 | static void rcu_process_callbacks(struct softirq_action *unused) | 1555 | static void rcu_process_callbacks(struct softirq_action *unused) |
1474 | { | 1556 | { |
1557 | trace_rcu_utilization("Start RCU core"); | ||
1475 | __rcu_process_callbacks(&rcu_sched_state, | 1558 | __rcu_process_callbacks(&rcu_sched_state, |
1476 | &__get_cpu_var(rcu_sched_data)); | 1559 | &__get_cpu_var(rcu_sched_data)); |
1477 | __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); | 1560 | __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); |
1478 | rcu_preempt_process_callbacks(); | 1561 | rcu_preempt_process_callbacks(); |
1479 | 1562 | trace_rcu_utilization("End RCU core"); | |
1480 | /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ | ||
1481 | rcu_needs_cpu_flush(); | ||
1482 | } | 1563 | } |
1483 | 1564 | ||
1484 | /* | 1565 | /* |
1485 | * Wake up the current CPU's kthread. This replaces raise_softirq() | 1566 | * Schedule RCU callback invocation. If the specified type of RCU |
1486 | * in earlier versions of RCU. Note that because we are running on | 1567 | * does not support RCU priority boosting, just do a direct call, |
1487 | * the current CPU with interrupts disabled, the rcu_cpu_kthread_task | 1568 | * otherwise wake up the per-CPU kernel kthread. Note that because we |
1488 | * cannot disappear out from under us. | 1569 | * are running on the current CPU with interrupts disabled, the |
1570 | * rcu_cpu_kthread_task cannot disappear out from under us. | ||
1489 | */ | 1571 | */ |
1490 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | 1572 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) |
1491 | { | 1573 | { |
@@ -1530,6 +1612,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1530 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; | 1612 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; |
1531 | rdp->qlen++; | 1613 | rdp->qlen++; |
1532 | 1614 | ||
1615 | if (__is_kfree_rcu_offset((unsigned long)func)) | ||
1616 | trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, | ||
1617 | rdp->qlen); | ||
1618 | else | ||
1619 | trace_rcu_callback(rsp->name, head, rdp->qlen); | ||
1620 | |||
1533 | /* If interrupts were disabled, don't dive into RCU core. */ | 1621 | /* If interrupts were disabled, don't dive into RCU core. */ |
1534 | if (irqs_disabled_flags(flags)) { | 1622 | if (irqs_disabled_flags(flags)) { |
1535 | local_irq_restore(flags); | 1623 | local_irq_restore(flags); |
@@ -1613,18 +1701,9 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); | |||
1613 | */ | 1701 | */ |
1614 | void synchronize_sched(void) | 1702 | void synchronize_sched(void) |
1615 | { | 1703 | { |
1616 | struct rcu_synchronize rcu; | ||
1617 | |||
1618 | if (rcu_blocking_is_gp()) | 1704 | if (rcu_blocking_is_gp()) |
1619 | return; | 1705 | return; |
1620 | 1706 | wait_rcu_gp(call_rcu_sched); | |
1621 | init_rcu_head_on_stack(&rcu.head); | ||
1622 | init_completion(&rcu.completion); | ||
1623 | /* Will wake me after RCU finished. */ | ||
1624 | call_rcu_sched(&rcu.head, wakeme_after_rcu); | ||
1625 | /* Wait for it. */ | ||
1626 | wait_for_completion(&rcu.completion); | ||
1627 | destroy_rcu_head_on_stack(&rcu.head); | ||
1628 | } | 1707 | } |
1629 | EXPORT_SYMBOL_GPL(synchronize_sched); | 1708 | EXPORT_SYMBOL_GPL(synchronize_sched); |
1630 | 1709 | ||
@@ -1639,18 +1718,9 @@ EXPORT_SYMBOL_GPL(synchronize_sched); | |||
1639 | */ | 1718 | */ |
1640 | void synchronize_rcu_bh(void) | 1719 | void synchronize_rcu_bh(void) |
1641 | { | 1720 | { |
1642 | struct rcu_synchronize rcu; | ||
1643 | |||
1644 | if (rcu_blocking_is_gp()) | 1721 | if (rcu_blocking_is_gp()) |
1645 | return; | 1722 | return; |
1646 | 1723 | wait_rcu_gp(call_rcu_bh); | |
1647 | init_rcu_head_on_stack(&rcu.head); | ||
1648 | init_completion(&rcu.completion); | ||
1649 | /* Will wake me after RCU finished. */ | ||
1650 | call_rcu_bh(&rcu.head, wakeme_after_rcu); | ||
1651 | /* Wait for it. */ | ||
1652 | wait_for_completion(&rcu.completion); | ||
1653 | destroy_rcu_head_on_stack(&rcu.head); | ||
1654 | } | 1724 | } |
1655 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | 1725 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); |
1656 | 1726 | ||
@@ -1671,7 +1741,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1671 | check_cpu_stall(rsp, rdp); | 1741 | check_cpu_stall(rsp, rdp); |
1672 | 1742 | ||
1673 | /* Is the RCU core waiting for a quiescent state from this CPU? */ | 1743 | /* Is the RCU core waiting for a quiescent state from this CPU? */ |
1674 | if (rdp->qs_pending && !rdp->passed_quiesc) { | 1744 | if (rcu_scheduler_fully_active && |
1745 | rdp->qs_pending && !rdp->passed_quiesce) { | ||
1675 | 1746 | ||
1676 | /* | 1747 | /* |
1677 | * If force_quiescent_state() coming soon and this CPU | 1748 | * If force_quiescent_state() coming soon and this CPU |
@@ -1683,7 +1754,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1683 | ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, | 1754 | ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, |
1684 | jiffies)) | 1755 | jiffies)) |
1685 | set_need_resched(); | 1756 | set_need_resched(); |
1686 | } else if (rdp->qs_pending && rdp->passed_quiesc) { | 1757 | } else if (rdp->qs_pending && rdp->passed_quiesce) { |
1687 | rdp->n_rp_report_qs++; | 1758 | rdp->n_rp_report_qs++; |
1688 | return 1; | 1759 | return 1; |
1689 | } | 1760 | } |
@@ -1846,6 +1917,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
1846 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); | 1917 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); |
1847 | #endif /* #ifdef CONFIG_NO_HZ */ | 1918 | #endif /* #ifdef CONFIG_NO_HZ */ |
1848 | rdp->cpu = cpu; | 1919 | rdp->cpu = cpu; |
1920 | rdp->rsp = rsp; | ||
1849 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1921 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1850 | } | 1922 | } |
1851 | 1923 | ||
@@ -1865,8 +1937,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
1865 | 1937 | ||
1866 | /* Set up local state, ensuring consistent view of global state. */ | 1938 | /* Set up local state, ensuring consistent view of global state. */ |
1867 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1939 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1868 | rdp->passed_quiesc = 0; /* We could be racing with new GP, */ | ||
1869 | rdp->qs_pending = 1; /* so set up to respond to current GP. */ | ||
1870 | rdp->beenonline = 1; /* We have now been online. */ | 1940 | rdp->beenonline = 1; /* We have now been online. */ |
1871 | rdp->preemptible = preemptible; | 1941 | rdp->preemptible = preemptible; |
1872 | rdp->qlen_last_fqs_check = 0; | 1942 | rdp->qlen_last_fqs_check = 0; |
@@ -1891,9 +1961,17 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
1891 | rnp->qsmaskinit |= mask; | 1961 | rnp->qsmaskinit |= mask; |
1892 | mask = rnp->grpmask; | 1962 | mask = rnp->grpmask; |
1893 | if (rnp == rdp->mynode) { | 1963 | if (rnp == rdp->mynode) { |
1894 | rdp->gpnum = rnp->completed; /* if GP in progress... */ | 1964 | /* |
1965 | * If there is a grace period in progress, we will | ||
1966 | * set up to wait for it next time we run the | ||
1967 | * RCU core code. | ||
1968 | */ | ||
1969 | rdp->gpnum = rnp->completed; | ||
1895 | rdp->completed = rnp->completed; | 1970 | rdp->completed = rnp->completed; |
1896 | rdp->passed_quiesc_completed = rnp->completed - 1; | 1971 | rdp->passed_quiesce = 0; |
1972 | rdp->qs_pending = 0; | ||
1973 | rdp->passed_quiesce_gpnum = rnp->gpnum - 1; | ||
1974 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); | ||
1897 | } | 1975 | } |
1898 | raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ | 1976 | raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ |
1899 | rnp = rnp->parent; | 1977 | rnp = rnp->parent; |
@@ -1919,6 +1997,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
1919 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | 1997 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); |
1920 | struct rcu_node *rnp = rdp->mynode; | 1998 | struct rcu_node *rnp = rdp->mynode; |
1921 | 1999 | ||
2000 | trace_rcu_utilization("Start CPU hotplug"); | ||
1922 | switch (action) { | 2001 | switch (action) { |
1923 | case CPU_UP_PREPARE: | 2002 | case CPU_UP_PREPARE: |
1924 | case CPU_UP_PREPARE_FROZEN: | 2003 | case CPU_UP_PREPARE_FROZEN: |
@@ -1954,6 +2033,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
1954 | default: | 2033 | default: |
1955 | break; | 2034 | break; |
1956 | } | 2035 | } |
2036 | trace_rcu_utilization("End CPU hotplug"); | ||
1957 | return NOTIFY_OK; | 2037 | return NOTIFY_OK; |
1958 | } | 2038 | } |
1959 | 2039 | ||
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 01b2ccda26fb..849ce9ec51fe 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -230,9 +230,9 @@ struct rcu_data { | |||
230 | /* in order to detect GP end. */ | 230 | /* in order to detect GP end. */ |
231 | unsigned long gpnum; /* Highest gp number that this CPU */ | 231 | unsigned long gpnum; /* Highest gp number that this CPU */ |
232 | /* is aware of having started. */ | 232 | /* is aware of having started. */ |
233 | unsigned long passed_quiesc_completed; | 233 | unsigned long passed_quiesce_gpnum; |
234 | /* Value of completed at time of qs. */ | 234 | /* gpnum at time of quiescent state. */ |
235 | bool passed_quiesc; /* User-mode/idle loop etc. */ | 235 | bool passed_quiesce; /* User-mode/idle loop etc. */ |
236 | bool qs_pending; /* Core waits for quiesc state. */ | 236 | bool qs_pending; /* Core waits for quiesc state. */ |
237 | bool beenonline; /* CPU online at least once. */ | 237 | bool beenonline; /* CPU online at least once. */ |
238 | bool preemptible; /* Preemptible RCU? */ | 238 | bool preemptible; /* Preemptible RCU? */ |
@@ -299,6 +299,7 @@ struct rcu_data { | |||
299 | unsigned long n_rp_need_nothing; | 299 | unsigned long n_rp_need_nothing; |
300 | 300 | ||
301 | int cpu; | 301 | int cpu; |
302 | struct rcu_state *rsp; | ||
302 | }; | 303 | }; |
303 | 304 | ||
304 | /* Values for signaled field in struct rcu_state. */ | 305 | /* Values for signaled field in struct rcu_state. */ |
@@ -417,6 +418,13 @@ extern struct rcu_state rcu_preempt_state; | |||
417 | DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); | 418 | DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); |
418 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 419 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
419 | 420 | ||
421 | #ifdef CONFIG_RCU_BOOST | ||
422 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | ||
423 | DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); | ||
424 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | ||
425 | DECLARE_PER_CPU(char, rcu_cpu_has_work); | ||
426 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
427 | |||
420 | #ifndef RCU_TREE_NONCORE | 428 | #ifndef RCU_TREE_NONCORE |
421 | 429 | ||
422 | /* Forward declarations for rcutree_plugin.h */ | 430 | /* Forward declarations for rcutree_plugin.h */ |
@@ -430,7 +438,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, | |||
430 | static void rcu_stop_cpu_kthread(int cpu); | 438 | static void rcu_stop_cpu_kthread(int cpu); |
431 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 439 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
432 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); | 440 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); |
433 | static void rcu_print_task_stall(struct rcu_node *rnp); | 441 | static int rcu_print_task_stall(struct rcu_node *rnp); |
434 | static void rcu_preempt_stall_reset(void); | 442 | static void rcu_preempt_stall_reset(void); |
435 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); | 443 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); |
436 | #ifdef CONFIG_HOTPLUG_CPU | 444 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -450,7 +458,6 @@ static int rcu_preempt_needs_cpu(int cpu); | |||
450 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); | 458 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); |
451 | static void rcu_preempt_send_cbs_to_online(void); | 459 | static void rcu_preempt_send_cbs_to_online(void); |
452 | static void __init __rcu_init_preempt(void); | 460 | static void __init __rcu_init_preempt(void); |
453 | static void rcu_needs_cpu_flush(void); | ||
454 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | 461 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); |
455 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | 462 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); |
456 | static void invoke_rcu_callbacks_kthread(void); | 463 | static void invoke_rcu_callbacks_kthread(void); |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 8aafbb80b8b0..4b9b9f8a4184 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -27,6 +27,14 @@ | |||
27 | #include <linux/delay.h> | 27 | #include <linux/delay.h> |
28 | #include <linux/stop_machine.h> | 28 | #include <linux/stop_machine.h> |
29 | 29 | ||
30 | #define RCU_KTHREAD_PRIO 1 | ||
31 | |||
32 | #ifdef CONFIG_RCU_BOOST | ||
33 | #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO | ||
34 | #else | ||
35 | #define RCU_BOOST_PRIO RCU_KTHREAD_PRIO | ||
36 | #endif | ||
37 | |||
30 | /* | 38 | /* |
31 | * Check the RCU kernel configuration parameters and print informative | 39 | * Check the RCU kernel configuration parameters and print informative |
32 | * messages about anything out of the ordinary. If you like #ifdef, you | 40 | * messages about anything out of the ordinary. If you like #ifdef, you |
@@ -64,7 +72,7 @@ static void __init rcu_bootup_announce_oddness(void) | |||
64 | 72 | ||
65 | #ifdef CONFIG_TREE_PREEMPT_RCU | 73 | #ifdef CONFIG_TREE_PREEMPT_RCU |
66 | 74 | ||
67 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); | 75 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt); |
68 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); | 76 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); |
69 | static struct rcu_state *rcu_state = &rcu_preempt_state; | 77 | static struct rcu_state *rcu_state = &rcu_preempt_state; |
70 | 78 | ||
@@ -122,9 +130,11 @@ static void rcu_preempt_qs(int cpu) | |||
122 | { | 130 | { |
123 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); | 131 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); |
124 | 132 | ||
125 | rdp->passed_quiesc_completed = rdp->gpnum - 1; | 133 | rdp->passed_quiesce_gpnum = rdp->gpnum; |
126 | barrier(); | 134 | barrier(); |
127 | rdp->passed_quiesc = 1; | 135 | if (rdp->passed_quiesce == 0) |
136 | trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs"); | ||
137 | rdp->passed_quiesce = 1; | ||
128 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | 138 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; |
129 | } | 139 | } |
130 | 140 | ||
@@ -190,6 +200,11 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
190 | if (rnp->qsmask & rdp->grpmask) | 200 | if (rnp->qsmask & rdp->grpmask) |
191 | rnp->gp_tasks = &t->rcu_node_entry; | 201 | rnp->gp_tasks = &t->rcu_node_entry; |
192 | } | 202 | } |
203 | trace_rcu_preempt_task(rdp->rsp->name, | ||
204 | t->pid, | ||
205 | (rnp->qsmask & rdp->grpmask) | ||
206 | ? rnp->gpnum | ||
207 | : rnp->gpnum + 1); | ||
193 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 208 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
194 | } else if (t->rcu_read_lock_nesting < 0 && | 209 | } else if (t->rcu_read_lock_nesting < 0 && |
195 | t->rcu_read_unlock_special) { | 210 | t->rcu_read_unlock_special) { |
@@ -299,6 +314,9 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
299 | int empty_exp; | 314 | int empty_exp; |
300 | unsigned long flags; | 315 | unsigned long flags; |
301 | struct list_head *np; | 316 | struct list_head *np; |
317 | #ifdef CONFIG_RCU_BOOST | ||
318 | struct rt_mutex *rbmp = NULL; | ||
319 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
302 | struct rcu_node *rnp; | 320 | struct rcu_node *rnp; |
303 | int special; | 321 | int special; |
304 | 322 | ||
@@ -344,6 +362,9 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
344 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ | 362 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ |
345 | np = rcu_next_node_entry(t, rnp); | 363 | np = rcu_next_node_entry(t, rnp); |
346 | list_del_init(&t->rcu_node_entry); | 364 | list_del_init(&t->rcu_node_entry); |
365 | t->rcu_blocked_node = NULL; | ||
366 | trace_rcu_unlock_preempted_task("rcu_preempt", | ||
367 | rnp->gpnum, t->pid); | ||
347 | if (&t->rcu_node_entry == rnp->gp_tasks) | 368 | if (&t->rcu_node_entry == rnp->gp_tasks) |
348 | rnp->gp_tasks = np; | 369 | rnp->gp_tasks = np; |
349 | if (&t->rcu_node_entry == rnp->exp_tasks) | 370 | if (&t->rcu_node_entry == rnp->exp_tasks) |
@@ -351,30 +372,34 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
351 | #ifdef CONFIG_RCU_BOOST | 372 | #ifdef CONFIG_RCU_BOOST |
352 | if (&t->rcu_node_entry == rnp->boost_tasks) | 373 | if (&t->rcu_node_entry == rnp->boost_tasks) |
353 | rnp->boost_tasks = np; | 374 | rnp->boost_tasks = np; |
354 | /* Snapshot and clear ->rcu_boosted with rcu_node lock held. */ | 375 | /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */ |
355 | if (t->rcu_boosted) { | 376 | if (t->rcu_boost_mutex) { |
356 | special |= RCU_READ_UNLOCK_BOOSTED; | 377 | rbmp = t->rcu_boost_mutex; |
357 | t->rcu_boosted = 0; | 378 | t->rcu_boost_mutex = NULL; |
358 | } | 379 | } |
359 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 380 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
360 | t->rcu_blocked_node = NULL; | ||
361 | 381 | ||
362 | /* | 382 | /* |
363 | * If this was the last task on the current list, and if | 383 | * If this was the last task on the current list, and if |
364 | * we aren't waiting on any CPUs, report the quiescent state. | 384 | * we aren't waiting on any CPUs, report the quiescent state. |
365 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. | 385 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. |
366 | */ | 386 | */ |
367 | if (empty) | 387 | if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { |
368 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 388 | trace_rcu_quiescent_state_report("preempt_rcu", |
369 | else | 389 | rnp->gpnum, |
390 | 0, rnp->qsmask, | ||
391 | rnp->level, | ||
392 | rnp->grplo, | ||
393 | rnp->grphi, | ||
394 | !!rnp->gp_tasks); | ||
370 | rcu_report_unblock_qs_rnp(rnp, flags); | 395 | rcu_report_unblock_qs_rnp(rnp, flags); |
396 | } else | ||
397 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
371 | 398 | ||
372 | #ifdef CONFIG_RCU_BOOST | 399 | #ifdef CONFIG_RCU_BOOST |
373 | /* Unboost if we were boosted. */ | 400 | /* Unboost if we were boosted. */ |
374 | if (special & RCU_READ_UNLOCK_BOOSTED) { | 401 | if (rbmp) |
375 | rt_mutex_unlock(t->rcu_boost_mutex); | 402 | rt_mutex_unlock(rbmp); |
376 | t->rcu_boost_mutex = NULL; | ||
377 | } | ||
378 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 403 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
379 | 404 | ||
380 | /* | 405 | /* |
@@ -399,10 +424,10 @@ void __rcu_read_unlock(void) | |||
399 | { | 424 | { |
400 | struct task_struct *t = current; | 425 | struct task_struct *t = current; |
401 | 426 | ||
402 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ | ||
403 | if (t->rcu_read_lock_nesting != 1) | 427 | if (t->rcu_read_lock_nesting != 1) |
404 | --t->rcu_read_lock_nesting; | 428 | --t->rcu_read_lock_nesting; |
405 | else { | 429 | else { |
430 | barrier(); /* critical section before exit code. */ | ||
406 | t->rcu_read_lock_nesting = INT_MIN; | 431 | t->rcu_read_lock_nesting = INT_MIN; |
407 | barrier(); /* assign before ->rcu_read_unlock_special load */ | 432 | barrier(); /* assign before ->rcu_read_unlock_special load */ |
408 | if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | 433 | if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) |
@@ -466,16 +491,20 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) | |||
466 | * Scan the current list of tasks blocked within RCU read-side critical | 491 | * Scan the current list of tasks blocked within RCU read-side critical |
467 | * sections, printing out the tid of each. | 492 | * sections, printing out the tid of each. |
468 | */ | 493 | */ |
469 | static void rcu_print_task_stall(struct rcu_node *rnp) | 494 | static int rcu_print_task_stall(struct rcu_node *rnp) |
470 | { | 495 | { |
471 | struct task_struct *t; | 496 | struct task_struct *t; |
497 | int ndetected = 0; | ||
472 | 498 | ||
473 | if (!rcu_preempt_blocked_readers_cgp(rnp)) | 499 | if (!rcu_preempt_blocked_readers_cgp(rnp)) |
474 | return; | 500 | return 0; |
475 | t = list_entry(rnp->gp_tasks, | 501 | t = list_entry(rnp->gp_tasks, |
476 | struct task_struct, rcu_node_entry); | 502 | struct task_struct, rcu_node_entry); |
477 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) | 503 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { |
478 | printk(" P%d", t->pid); | 504 | printk(" P%d", t->pid); |
505 | ndetected++; | ||
506 | } | ||
507 | return ndetected; | ||
479 | } | 508 | } |
480 | 509 | ||
481 | /* | 510 | /* |
@@ -656,18 +685,9 @@ EXPORT_SYMBOL_GPL(call_rcu); | |||
656 | */ | 685 | */ |
657 | void synchronize_rcu(void) | 686 | void synchronize_rcu(void) |
658 | { | 687 | { |
659 | struct rcu_synchronize rcu; | ||
660 | |||
661 | if (!rcu_scheduler_active) | 688 | if (!rcu_scheduler_active) |
662 | return; | 689 | return; |
663 | 690 | wait_rcu_gp(call_rcu); | |
664 | init_rcu_head_on_stack(&rcu.head); | ||
665 | init_completion(&rcu.completion); | ||
666 | /* Will wake me after RCU finished. */ | ||
667 | call_rcu(&rcu.head, wakeme_after_rcu); | ||
668 | /* Wait for it. */ | ||
669 | wait_for_completion(&rcu.completion); | ||
670 | destroy_rcu_head_on_stack(&rcu.head); | ||
671 | } | 691 | } |
672 | EXPORT_SYMBOL_GPL(synchronize_rcu); | 692 | EXPORT_SYMBOL_GPL(synchronize_rcu); |
673 | 693 | ||
@@ -968,8 +988,9 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) | |||
968 | * Because preemptible RCU does not exist, we never have to check for | 988 | * Because preemptible RCU does not exist, we never have to check for |
969 | * tasks blocked within RCU read-side critical sections. | 989 | * tasks blocked within RCU read-side critical sections. |
970 | */ | 990 | */ |
971 | static void rcu_print_task_stall(struct rcu_node *rnp) | 991 | static int rcu_print_task_stall(struct rcu_node *rnp) |
972 | { | 992 | { |
993 | return 0; | ||
973 | } | 994 | } |
974 | 995 | ||
975 | /* | 996 | /* |
@@ -1136,6 +1157,8 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp) | |||
1136 | 1157 | ||
1137 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | 1158 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ |
1138 | 1159 | ||
1160 | static struct lock_class_key rcu_boost_class; | ||
1161 | |||
1139 | /* | 1162 | /* |
1140 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks | 1163 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks |
1141 | * or ->boost_tasks, advancing the pointer to the next task in the | 1164 | * or ->boost_tasks, advancing the pointer to the next task in the |
@@ -1198,8 +1221,10 @@ static int rcu_boost(struct rcu_node *rnp) | |||
1198 | */ | 1221 | */ |
1199 | t = container_of(tb, struct task_struct, rcu_node_entry); | 1222 | t = container_of(tb, struct task_struct, rcu_node_entry); |
1200 | rt_mutex_init_proxy_locked(&mtx, t); | 1223 | rt_mutex_init_proxy_locked(&mtx, t); |
1224 | /* Avoid lockdep false positives. This rt_mutex is its own thing. */ | ||
1225 | lockdep_set_class_and_name(&mtx.wait_lock, &rcu_boost_class, | ||
1226 | "rcu_boost_mutex"); | ||
1201 | t->rcu_boost_mutex = &mtx; | 1227 | t->rcu_boost_mutex = &mtx; |
1202 | t->rcu_boosted = 1; | ||
1203 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1228 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1204 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ | 1229 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ |
1205 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ | 1230 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ |
@@ -1228,9 +1253,12 @@ static int rcu_boost_kthread(void *arg) | |||
1228 | int spincnt = 0; | 1253 | int spincnt = 0; |
1229 | int more2boost; | 1254 | int more2boost; |
1230 | 1255 | ||
1256 | trace_rcu_utilization("Start boost kthread@init"); | ||
1231 | for (;;) { | 1257 | for (;;) { |
1232 | rnp->boost_kthread_status = RCU_KTHREAD_WAITING; | 1258 | rnp->boost_kthread_status = RCU_KTHREAD_WAITING; |
1259 | trace_rcu_utilization("End boost kthread@rcu_wait"); | ||
1233 | rcu_wait(rnp->boost_tasks || rnp->exp_tasks); | 1260 | rcu_wait(rnp->boost_tasks || rnp->exp_tasks); |
1261 | trace_rcu_utilization("Start boost kthread@rcu_wait"); | ||
1234 | rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; | 1262 | rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; |
1235 | more2boost = rcu_boost(rnp); | 1263 | more2boost = rcu_boost(rnp); |
1236 | if (more2boost) | 1264 | if (more2boost) |
@@ -1238,11 +1266,14 @@ static int rcu_boost_kthread(void *arg) | |||
1238 | else | 1266 | else |
1239 | spincnt = 0; | 1267 | spincnt = 0; |
1240 | if (spincnt > 10) { | 1268 | if (spincnt > 10) { |
1269 | trace_rcu_utilization("End boost kthread@rcu_yield"); | ||
1241 | rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); | 1270 | rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); |
1271 | trace_rcu_utilization("Start boost kthread@rcu_yield"); | ||
1242 | spincnt = 0; | 1272 | spincnt = 0; |
1243 | } | 1273 | } |
1244 | } | 1274 | } |
1245 | /* NOTREACHED */ | 1275 | /* NOTREACHED */ |
1276 | trace_rcu_utilization("End boost kthread@notreached"); | ||
1246 | return 0; | 1277 | return 0; |
1247 | } | 1278 | } |
1248 | 1279 | ||
@@ -1291,11 +1322,9 @@ static void invoke_rcu_callbacks_kthread(void) | |||
1291 | 1322 | ||
1292 | local_irq_save(flags); | 1323 | local_irq_save(flags); |
1293 | __this_cpu_write(rcu_cpu_has_work, 1); | 1324 | __this_cpu_write(rcu_cpu_has_work, 1); |
1294 | if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) { | 1325 | if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && |
1295 | local_irq_restore(flags); | 1326 | current != __this_cpu_read(rcu_cpu_kthread_task)) |
1296 | return; | 1327 | wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); |
1297 | } | ||
1298 | wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); | ||
1299 | local_irq_restore(flags); | 1328 | local_irq_restore(flags); |
1300 | } | 1329 | } |
1301 | 1330 | ||
@@ -1343,13 +1372,13 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |||
1343 | if (rnp->boost_kthread_task != NULL) | 1372 | if (rnp->boost_kthread_task != NULL) |
1344 | return 0; | 1373 | return 0; |
1345 | t = kthread_create(rcu_boost_kthread, (void *)rnp, | 1374 | t = kthread_create(rcu_boost_kthread, (void *)rnp, |
1346 | "rcub%d", rnp_index); | 1375 | "rcub/%d", rnp_index); |
1347 | if (IS_ERR(t)) | 1376 | if (IS_ERR(t)) |
1348 | return PTR_ERR(t); | 1377 | return PTR_ERR(t); |
1349 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1378 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1350 | rnp->boost_kthread_task = t; | 1379 | rnp->boost_kthread_task = t; |
1351 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1380 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1352 | sp.sched_priority = RCU_KTHREAD_PRIO; | 1381 | sp.sched_priority = RCU_BOOST_PRIO; |
1353 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 1382 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
1354 | wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ | 1383 | wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ |
1355 | return 0; | 1384 | return 0; |
@@ -1444,6 +1473,7 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg) | |||
1444 | { | 1473 | { |
1445 | struct sched_param sp; | 1474 | struct sched_param sp; |
1446 | struct timer_list yield_timer; | 1475 | struct timer_list yield_timer; |
1476 | int prio = current->rt_priority; | ||
1447 | 1477 | ||
1448 | setup_timer_on_stack(&yield_timer, f, arg); | 1478 | setup_timer_on_stack(&yield_timer, f, arg); |
1449 | mod_timer(&yield_timer, jiffies + 2); | 1479 | mod_timer(&yield_timer, jiffies + 2); |
@@ -1451,7 +1481,8 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg) | |||
1451 | sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); | 1481 | sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); |
1452 | set_user_nice(current, 19); | 1482 | set_user_nice(current, 19); |
1453 | schedule(); | 1483 | schedule(); |
1454 | sp.sched_priority = RCU_KTHREAD_PRIO; | 1484 | set_user_nice(current, 0); |
1485 | sp.sched_priority = prio; | ||
1455 | sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | 1486 | sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); |
1456 | del_timer(&yield_timer); | 1487 | del_timer(&yield_timer); |
1457 | } | 1488 | } |
@@ -1489,7 +1520,8 @@ static int rcu_cpu_kthread_should_stop(int cpu) | |||
1489 | 1520 | ||
1490 | /* | 1521 | /* |
1491 | * Per-CPU kernel thread that invokes RCU callbacks. This replaces the | 1522 | * Per-CPU kernel thread that invokes RCU callbacks. This replaces the |
1492 | * earlier RCU softirq. | 1523 | * RCU softirq used in flavors and configurations of RCU that do not |
1524 | * support RCU priority boosting. | ||
1493 | */ | 1525 | */ |
1494 | static int rcu_cpu_kthread(void *arg) | 1526 | static int rcu_cpu_kthread(void *arg) |
1495 | { | 1527 | { |
@@ -1500,9 +1532,12 @@ static int rcu_cpu_kthread(void *arg) | |||
1500 | char work; | 1532 | char work; |
1501 | char *workp = &per_cpu(rcu_cpu_has_work, cpu); | 1533 | char *workp = &per_cpu(rcu_cpu_has_work, cpu); |
1502 | 1534 | ||
1535 | trace_rcu_utilization("Start CPU kthread@init"); | ||
1503 | for (;;) { | 1536 | for (;;) { |
1504 | *statusp = RCU_KTHREAD_WAITING; | 1537 | *statusp = RCU_KTHREAD_WAITING; |
1538 | trace_rcu_utilization("End CPU kthread@rcu_wait"); | ||
1505 | rcu_wait(*workp != 0 || kthread_should_stop()); | 1539 | rcu_wait(*workp != 0 || kthread_should_stop()); |
1540 | trace_rcu_utilization("Start CPU kthread@rcu_wait"); | ||
1506 | local_bh_disable(); | 1541 | local_bh_disable(); |
1507 | if (rcu_cpu_kthread_should_stop(cpu)) { | 1542 | if (rcu_cpu_kthread_should_stop(cpu)) { |
1508 | local_bh_enable(); | 1543 | local_bh_enable(); |
@@ -1523,11 +1558,14 @@ static int rcu_cpu_kthread(void *arg) | |||
1523 | spincnt = 0; | 1558 | spincnt = 0; |
1524 | if (spincnt > 10) { | 1559 | if (spincnt > 10) { |
1525 | *statusp = RCU_KTHREAD_YIELDING; | 1560 | *statusp = RCU_KTHREAD_YIELDING; |
1561 | trace_rcu_utilization("End CPU kthread@rcu_yield"); | ||
1526 | rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); | 1562 | rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); |
1563 | trace_rcu_utilization("Start CPU kthread@rcu_yield"); | ||
1527 | spincnt = 0; | 1564 | spincnt = 0; |
1528 | } | 1565 | } |
1529 | } | 1566 | } |
1530 | *statusp = RCU_KTHREAD_STOPPED; | 1567 | *statusp = RCU_KTHREAD_STOPPED; |
1568 | trace_rcu_utilization("End CPU kthread@term"); | ||
1531 | return 0; | 1569 | return 0; |
1532 | } | 1570 | } |
1533 | 1571 | ||
@@ -1560,7 +1598,10 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) | |||
1560 | if (!rcu_scheduler_fully_active || | 1598 | if (!rcu_scheduler_fully_active || |
1561 | per_cpu(rcu_cpu_kthread_task, cpu) != NULL) | 1599 | per_cpu(rcu_cpu_kthread_task, cpu) != NULL) |
1562 | return 0; | 1600 | return 0; |
1563 | t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); | 1601 | t = kthread_create_on_node(rcu_cpu_kthread, |
1602 | (void *)(long)cpu, | ||
1603 | cpu_to_node(cpu), | ||
1604 | "rcuc/%d", cpu); | ||
1564 | if (IS_ERR(t)) | 1605 | if (IS_ERR(t)) |
1565 | return PTR_ERR(t); | 1606 | return PTR_ERR(t); |
1566 | if (cpu_online(cpu)) | 1607 | if (cpu_online(cpu)) |
@@ -1669,7 +1710,7 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, | |||
1669 | return 0; | 1710 | return 0; |
1670 | if (rnp->node_kthread_task == NULL) { | 1711 | if (rnp->node_kthread_task == NULL) { |
1671 | t = kthread_create(rcu_node_kthread, (void *)rnp, | 1712 | t = kthread_create(rcu_node_kthread, (void *)rnp, |
1672 | "rcun%d", rnp_index); | 1713 | "rcun/%d", rnp_index); |
1673 | if (IS_ERR(t)) | 1714 | if (IS_ERR(t)) |
1674 | return PTR_ERR(t); | 1715 | return PTR_ERR(t); |
1675 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1716 | raw_spin_lock_irqsave(&rnp->lock, flags); |
@@ -1907,15 +1948,6 @@ int rcu_needs_cpu(int cpu) | |||
1907 | return rcu_needs_cpu_quick_check(cpu); | 1948 | return rcu_needs_cpu_quick_check(cpu); |
1908 | } | 1949 | } |
1909 | 1950 | ||
1910 | /* | ||
1911 | * Check to see if we need to continue a callback-flush operations to | ||
1912 | * allow the last CPU to enter dyntick-idle mode. But fast dyntick-idle | ||
1913 | * entry is not configured, so we never do need to. | ||
1914 | */ | ||
1915 | static void rcu_needs_cpu_flush(void) | ||
1916 | { | ||
1917 | } | ||
1918 | |||
1919 | #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 1951 | #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ |
1920 | 1952 | ||
1921 | #define RCU_NEEDS_CPU_FLUSHES 5 | 1953 | #define RCU_NEEDS_CPU_FLUSHES 5 |
@@ -1991,20 +2023,4 @@ int rcu_needs_cpu(int cpu) | |||
1991 | return c; | 2023 | return c; |
1992 | } | 2024 | } |
1993 | 2025 | ||
1994 | /* | ||
1995 | * Check to see if we need to continue a callback-flush operations to | ||
1996 | * allow the last CPU to enter dyntick-idle mode. | ||
1997 | */ | ||
1998 | static void rcu_needs_cpu_flush(void) | ||
1999 | { | ||
2000 | int cpu = smp_processor_id(); | ||
2001 | unsigned long flags; | ||
2002 | |||
2003 | if (per_cpu(rcu_dyntick_drain, cpu) <= 0) | ||
2004 | return; | ||
2005 | local_irq_save(flags); | ||
2006 | (void)rcu_needs_cpu(cpu); | ||
2007 | local_irq_restore(flags); | ||
2008 | } | ||
2009 | |||
2010 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 2026 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ |
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 3b0c0986afc0..9feffa4c0695 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
@@ -48,11 +48,6 @@ | |||
48 | 48 | ||
49 | #ifdef CONFIG_RCU_BOOST | 49 | #ifdef CONFIG_RCU_BOOST |
50 | 50 | ||
51 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | ||
52 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_cpu); | ||
53 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | ||
54 | DECLARE_PER_CPU(char, rcu_cpu_has_work); | ||
55 | |||
56 | static char convert_kthread_status(unsigned int kthread_status) | 51 | static char convert_kthread_status(unsigned int kthread_status) |
57 | { | 52 | { |
58 | if (kthread_status > RCU_KTHREAD_MAX) | 53 | if (kthread_status > RCU_KTHREAD_MAX) |
@@ -66,11 +61,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | |||
66 | { | 61 | { |
67 | if (!rdp->beenonline) | 62 | if (!rdp->beenonline) |
68 | return; | 63 | return; |
69 | seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pqc=%lu qp=%d", | 64 | seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pgp=%lu qp=%d", |
70 | rdp->cpu, | 65 | rdp->cpu, |
71 | cpu_is_offline(rdp->cpu) ? '!' : ' ', | 66 | cpu_is_offline(rdp->cpu) ? '!' : ' ', |
72 | rdp->completed, rdp->gpnum, | 67 | rdp->completed, rdp->gpnum, |
73 | rdp->passed_quiesc, rdp->passed_quiesc_completed, | 68 | rdp->passed_quiesce, rdp->passed_quiesce_gpnum, |
74 | rdp->qs_pending); | 69 | rdp->qs_pending); |
75 | #ifdef CONFIG_NO_HZ | 70 | #ifdef CONFIG_NO_HZ |
76 | seq_printf(m, " dt=%d/%d/%d df=%lu", | 71 | seq_printf(m, " dt=%d/%d/%d df=%lu", |
@@ -144,7 +139,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
144 | rdp->cpu, | 139 | rdp->cpu, |
145 | cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"", | 140 | cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"", |
146 | rdp->completed, rdp->gpnum, | 141 | rdp->completed, rdp->gpnum, |
147 | rdp->passed_quiesc, rdp->passed_quiesc_completed, | 142 | rdp->passed_quiesce, rdp->passed_quiesce_gpnum, |
148 | rdp->qs_pending); | 143 | rdp->qs_pending); |
149 | #ifdef CONFIG_NO_HZ | 144 | #ifdef CONFIG_NO_HZ |
150 | seq_printf(m, ",%d,%d,%d,%lu", | 145 | seq_printf(m, ",%d,%d,%d,%lu", |
@@ -175,7 +170,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
175 | 170 | ||
176 | static int show_rcudata_csv(struct seq_file *m, void *unused) | 171 | static int show_rcudata_csv(struct seq_file *m, void *unused) |
177 | { | 172 | { |
178 | seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); | 173 | seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); |
179 | #ifdef CONFIG_NO_HZ | 174 | #ifdef CONFIG_NO_HZ |
180 | seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); | 175 | seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); |
181 | #endif /* #ifdef CONFIG_NO_HZ */ | 176 | #endif /* #ifdef CONFIG_NO_HZ */ |
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 255e1662acdb..5e8d9cce7470 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
@@ -579,6 +579,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
579 | struct rt_mutex_waiter *waiter) | 579 | struct rt_mutex_waiter *waiter) |
580 | { | 580 | { |
581 | int ret = 0; | 581 | int ret = 0; |
582 | int was_disabled; | ||
582 | 583 | ||
583 | for (;;) { | 584 | for (;;) { |
584 | /* Try to acquire the lock: */ | 585 | /* Try to acquire the lock: */ |
@@ -601,10 +602,17 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
601 | 602 | ||
602 | raw_spin_unlock(&lock->wait_lock); | 603 | raw_spin_unlock(&lock->wait_lock); |
603 | 604 | ||
605 | was_disabled = irqs_disabled(); | ||
606 | if (was_disabled) | ||
607 | local_irq_enable(); | ||
608 | |||
604 | debug_rt_mutex_print_deadlock(waiter); | 609 | debug_rt_mutex_print_deadlock(waiter); |
605 | 610 | ||
606 | schedule_rt_mutex(lock); | 611 | schedule_rt_mutex(lock); |
607 | 612 | ||
613 | if (was_disabled) | ||
614 | local_irq_disable(); | ||
615 | |||
608 | raw_spin_lock(&lock->wait_lock); | 616 | raw_spin_lock(&lock->wait_lock); |
609 | set_current_state(state); | 617 | set_current_state(state); |
610 | } | 618 | } |
diff --git a/kernel/sched.c b/kernel/sched.c index 8aa00803c1ec..03ad0113801a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -4213,6 +4213,7 @@ static inline void schedule_debug(struct task_struct *prev) | |||
4213 | */ | 4213 | */ |
4214 | if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) | 4214 | if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) |
4215 | __schedule_bug(prev); | 4215 | __schedule_bug(prev); |
4216 | rcu_sleep_check(); | ||
4216 | 4217 | ||
4217 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | 4218 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); |
4218 | 4219 | ||
@@ -5955,15 +5956,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
5955 | } | 5956 | } |
5956 | 5957 | ||
5957 | /* | 5958 | /* |
5958 | * In a system that switches off the HZ timer nohz_cpu_mask | ||
5959 | * indicates which cpus entered this state. This is used | ||
5960 | * in the rcu update to wait only for active cpus. For system | ||
5961 | * which do not switch off the HZ timer nohz_cpu_mask should | ||
5962 | * always be CPU_BITS_NONE. | ||
5963 | */ | ||
5964 | cpumask_var_t nohz_cpu_mask; | ||
5965 | |||
5966 | /* | ||
5967 | * Increase the granularity value when there are more CPUs, | 5959 | * Increase the granularity value when there are more CPUs, |
5968 | * because with more CPUs the 'effective latency' as visible | 5960 | * because with more CPUs the 'effective latency' as visible |
5969 | * to users decreases. But the relationship is not linear, | 5961 | * to users decreases. But the relationship is not linear, |
@@ -8175,8 +8167,6 @@ void __init sched_init(void) | |||
8175 | */ | 8167 | */ |
8176 | current->sched_class = &fair_sched_class; | 8168 | current->sched_class = &fair_sched_class; |
8177 | 8169 | ||
8178 | /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ | ||
8179 | zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); | ||
8180 | #ifdef CONFIG_SMP | 8170 | #ifdef CONFIG_SMP |
8181 | zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); | 8171 | zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); |
8182 | #ifdef CONFIG_NO_HZ | 8172 | #ifdef CONFIG_NO_HZ |
@@ -8206,6 +8196,7 @@ void __might_sleep(const char *file, int line, int preempt_offset) | |||
8206 | { | 8196 | { |
8207 | static unsigned long prev_jiffy; /* ratelimiting */ | 8197 | static unsigned long prev_jiffy; /* ratelimiting */ |
8208 | 8198 | ||
8199 | rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ | ||
8209 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || | 8200 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || |
8210 | system_state != SYSTEM_RUNNING || oops_in_progress) | 8201 | system_state != SYSTEM_RUNNING || oops_in_progress) |
8211 | return; | 8202 | return; |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d5097c44b407..eb98e55196b9 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -139,7 +139,6 @@ static void tick_nohz_update_jiffies(ktime_t now) | |||
139 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 139 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
140 | unsigned long flags; | 140 | unsigned long flags; |
141 | 141 | ||
142 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||
143 | ts->idle_waketime = now; | 142 | ts->idle_waketime = now; |
144 | 143 | ||
145 | local_irq_save(flags); | 144 | local_irq_save(flags); |
@@ -389,9 +388,6 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
389 | else | 388 | else |
390 | expires.tv64 = KTIME_MAX; | 389 | expires.tv64 = KTIME_MAX; |
391 | 390 | ||
392 | if (delta_jiffies > 1) | ||
393 | cpumask_set_cpu(cpu, nohz_cpu_mask); | ||
394 | |||
395 | /* Skip reprogram of event if its not changed */ | 391 | /* Skip reprogram of event if its not changed */ |
396 | if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) | 392 | if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) |
397 | goto out; | 393 | goto out; |
@@ -441,7 +437,6 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
441 | * softirq. | 437 | * softirq. |
442 | */ | 438 | */ |
443 | tick_do_update_jiffies64(ktime_get()); | 439 | tick_do_update_jiffies64(ktime_get()); |
444 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||
445 | } | 440 | } |
446 | raise_softirq_irqoff(TIMER_SOFTIRQ); | 441 | raise_softirq_irqoff(TIMER_SOFTIRQ); |
447 | out: | 442 | out: |
@@ -524,7 +519,6 @@ void tick_nohz_restart_sched_tick(void) | |||
524 | /* Update jiffies first */ | 519 | /* Update jiffies first */ |
525 | select_nohz_load_balancer(0); | 520 | select_nohz_load_balancer(0); |
526 | tick_do_update_jiffies64(now); | 521 | tick_do_update_jiffies64(now); |
527 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||
528 | 522 | ||
529 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 523 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
530 | /* | 524 | /* |