diff options
| -rw-r--r-- | Documentation/RCU/NMI-RCU.txt | 2 | ||||
| -rw-r--r-- | Documentation/RCU/lockdep-splat.txt | 110 | ||||
| -rw-r--r-- | Documentation/RCU/lockdep.txt | 34 | ||||
| -rw-r--r-- | Documentation/RCU/torture.txt | 137 | ||||
| -rw-r--r-- | Documentation/RCU/trace.txt | 38 | ||||
| -rw-r--r-- | include/linux/lockdep.h | 2 | ||||
| -rw-r--r-- | include/linux/rcupdate.h | 300 | ||||
| -rw-r--r-- | include/linux/rcutiny.h | 20 | ||||
| -rw-r--r-- | include/linux/rcutree.h | 2 | ||||
| -rw-r--r-- | include/linux/sched.h | 4 | ||||
| -rw-r--r-- | include/linux/types.h | 10 | ||||
| -rw-r--r-- | include/trace/events/rcu.h | 459 | ||||
| -rw-r--r-- | init/Kconfig | 6 | ||||
| -rw-r--r-- | kernel/lockdep.c | 84 | ||||
| -rw-r--r-- | kernel/pid.c | 4 | ||||
| -rw-r--r-- | kernel/rcu.h | 85 | ||||
| -rw-r--r-- | kernel/rcupdate.c | 26 | ||||
| -rw-r--r-- | kernel/rcutiny.c | 117 | ||||
| -rw-r--r-- | kernel/rcutiny_plugin.h | 134 | ||||
| -rw-r--r-- | kernel/rcutorture.c | 77 | ||||
| -rw-r--r-- | kernel/rcutree.c | 290 | ||||
| -rw-r--r-- | kernel/rcutree.h | 17 | ||||
| -rw-r--r-- | kernel/rcutree_plugin.h | 150 | ||||
| -rw-r--r-- | kernel/rcutree_trace.c | 13 | ||||
| -rw-r--r-- | kernel/rtmutex.c | 8 | ||||
| -rw-r--r-- | kernel/sched.c | 13 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 6 |
27 files changed, 1489 insertions, 659 deletions
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt index bf82851a0e57..687777f83b23 100644 --- a/Documentation/RCU/NMI-RCU.txt +++ b/Documentation/RCU/NMI-RCU.txt | |||
| @@ -95,7 +95,7 @@ not to return until all ongoing NMI handlers exit. It is therefore safe | |||
| 95 | to free up the handler's data as soon as synchronize_sched() returns. | 95 | to free up the handler's data as soon as synchronize_sched() returns. |
| 96 | 96 | ||
| 97 | Important note: for this to work, the architecture in question must | 97 | Important note: for this to work, the architecture in question must |
| 98 | invoke irq_enter() and irq_exit() on NMI entry and exit, respectively. | 98 | invoke nmi_enter() and nmi_exit() on NMI entry and exit, respectively. |
| 99 | 99 | ||
| 100 | 100 | ||
| 101 | Answer to Quick Quiz | 101 | Answer to Quick Quiz |
diff --git a/Documentation/RCU/lockdep-splat.txt b/Documentation/RCU/lockdep-splat.txt new file mode 100644 index 000000000000..bf9061142827 --- /dev/null +++ b/Documentation/RCU/lockdep-splat.txt | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | Lockdep-RCU was added to the Linux kernel in early 2010 | ||
| 2 | (http://lwn.net/Articles/371986/). This facility checks for some common | ||
| 3 | misuses of the RCU API, most notably using one of the rcu_dereference() | ||
| 4 | family to access an RCU-protected pointer without the proper protection. | ||
| 5 | When such misuse is detected, an lockdep-RCU splat is emitted. | ||
| 6 | |||
| 7 | The usual cause of a lockdep-RCU slat is someone accessing an | ||
| 8 | RCU-protected data structure without either (1) being in the right kind of | ||
| 9 | RCU read-side critical section or (2) holding the right update-side lock. | ||
| 10 | This problem can therefore be serious: it might result in random memory | ||
| 11 | overwriting or worse. There can of course be false positives, this | ||
| 12 | being the real world and all that. | ||
| 13 | |||
| 14 | So let's look at an example RCU lockdep splat from 3.0-rc5, one that | ||
| 15 | has long since been fixed: | ||
| 16 | |||
| 17 | =============================== | ||
| 18 | [ INFO: suspicious RCU usage. ] | ||
| 19 | ------------------------------- | ||
| 20 | block/cfq-iosched.c:2776 suspicious rcu_dereference_protected() usage! | ||
| 21 | |||
| 22 | other info that might help us debug this: | ||
| 23 | |||
| 24 | |||
| 25 | rcu_scheduler_active = 1, debug_locks = 0 | ||
| 26 | 3 locks held by scsi_scan_6/1552: | ||
| 27 | #0: (&shost->scan_mutex){+.+.+.}, at: [<ffffffff8145efca>] | ||
| 28 | scsi_scan_host_selected+0x5a/0x150 | ||
| 29 | #1: (&eq->sysfs_lock){+.+...}, at: [<ffffffff812a5032>] | ||
| 30 | elevator_exit+0x22/0x60 | ||
| 31 | #2: (&(&q->__queue_lock)->rlock){-.-...}, at: [<ffffffff812b6233>] | ||
| 32 | cfq_exit_queue+0x43/0x190 | ||
| 33 | |||
| 34 | stack backtrace: | ||
| 35 | Pid: 1552, comm: scsi_scan_6 Not tainted 3.0.0-rc5 #17 | ||
| 36 | Call Trace: | ||
| 37 | [<ffffffff810abb9b>] lockdep_rcu_dereference+0xbb/0xc0 | ||
| 38 | [<ffffffff812b6139>] __cfq_exit_single_io_context+0xe9/0x120 | ||
| 39 | [<ffffffff812b626c>] cfq_exit_queue+0x7c/0x190 | ||
| 40 | [<ffffffff812a5046>] elevator_exit+0x36/0x60 | ||
| 41 | [<ffffffff812a802a>] blk_cleanup_queue+0x4a/0x60 | ||
| 42 | [<ffffffff8145cc09>] scsi_free_queue+0x9/0x10 | ||
| 43 | [<ffffffff81460944>] __scsi_remove_device+0x84/0xd0 | ||
| 44 | [<ffffffff8145dca3>] scsi_probe_and_add_lun+0x353/0xb10 | ||
| 45 | [<ffffffff817da069>] ? error_exit+0x29/0xb0 | ||
| 46 | [<ffffffff817d98ed>] ? _raw_spin_unlock_irqrestore+0x3d/0x80 | ||
| 47 | [<ffffffff8145e722>] __scsi_scan_target+0x112/0x680 | ||
| 48 | [<ffffffff812c690d>] ? trace_hardirqs_off_thunk+0x3a/0x3c | ||
| 49 | [<ffffffff817da069>] ? error_exit+0x29/0xb0 | ||
| 50 | [<ffffffff812bcc60>] ? kobject_del+0x40/0x40 | ||
| 51 | [<ffffffff8145ed16>] scsi_scan_channel+0x86/0xb0 | ||
| 52 | [<ffffffff8145f0b0>] scsi_scan_host_selected+0x140/0x150 | ||
| 53 | [<ffffffff8145f149>] do_scsi_scan_host+0x89/0x90 | ||
| 54 | [<ffffffff8145f170>] do_scan_async+0x20/0x160 | ||
| 55 | [<ffffffff8145f150>] ? do_scsi_scan_host+0x90/0x90 | ||
| 56 | [<ffffffff810975b6>] kthread+0xa6/0xb0 | ||
| 57 | [<ffffffff817db154>] kernel_thread_helper+0x4/0x10 | ||
| 58 | [<ffffffff81066430>] ? finish_task_switch+0x80/0x110 | ||
| 59 | [<ffffffff817d9c04>] ? retint_restore_args+0xe/0xe | ||
| 60 | [<ffffffff81097510>] ? __init_kthread_worker+0x70/0x70 | ||
| 61 | [<ffffffff817db150>] ? gs_change+0xb/0xb | ||
| 62 | |||
| 63 | Line 2776 of block/cfq-iosched.c in v3.0-rc5 is as follows: | ||
| 64 | |||
| 65 | if (rcu_dereference(ioc->ioc_data) == cic) { | ||
| 66 | |||
| 67 | This form says that it must be in a plain vanilla RCU read-side critical | ||
| 68 | section, but the "other info" list above shows that this is not the | ||
| 69 | case. Instead, we hold three locks, one of which might be RCU related. | ||
| 70 | And maybe that lock really does protect this reference. If so, the fix | ||
| 71 | is to inform RCU, perhaps by changing __cfq_exit_single_io_context() to | ||
| 72 | take the struct request_queue "q" from cfq_exit_queue() as an argument, | ||
| 73 | which would permit us to invoke rcu_dereference_protected as follows: | ||
| 74 | |||
| 75 | if (rcu_dereference_protected(ioc->ioc_data, | ||
| 76 | lockdep_is_held(&q->queue_lock)) == cic) { | ||
| 77 | |||
| 78 | With this change, there would be no lockdep-RCU splat emitted if this | ||
| 79 | code was invoked either from within an RCU read-side critical section | ||
| 80 | or with the ->queue_lock held. In particular, this would have suppressed | ||
| 81 | the above lockdep-RCU splat because ->queue_lock is held (see #2 in the | ||
| 82 | list above). | ||
| 83 | |||
| 84 | On the other hand, perhaps we really do need an RCU read-side critical | ||
| 85 | section. In this case, the critical section must span the use of the | ||
| 86 | return value from rcu_dereference(), or at least until there is some | ||
| 87 | reference count incremented or some such. One way to handle this is to | ||
| 88 | add rcu_read_lock() and rcu_read_unlock() as follows: | ||
| 89 | |||
| 90 | rcu_read_lock(); | ||
| 91 | if (rcu_dereference(ioc->ioc_data) == cic) { | ||
| 92 | spin_lock(&ioc->lock); | ||
| 93 | rcu_assign_pointer(ioc->ioc_data, NULL); | ||
| 94 | spin_unlock(&ioc->lock); | ||
| 95 | } | ||
| 96 | rcu_read_unlock(); | ||
| 97 | |||
| 98 | With this change, the rcu_dereference() is always within an RCU | ||
| 99 | read-side critical section, which again would have suppressed the | ||
| 100 | above lockdep-RCU splat. | ||
| 101 | |||
| 102 | But in this particular case, we don't actually deference the pointer | ||
| 103 | returned from rcu_dereference(). Instead, that pointer is just compared | ||
| 104 | to the cic pointer, which means that the rcu_dereference() can be replaced | ||
| 105 | by rcu_access_pointer() as follows: | ||
| 106 | |||
| 107 | if (rcu_access_pointer(ioc->ioc_data) == cic) { | ||
| 108 | |||
| 109 | Because it is legal to invoke rcu_access_pointer() without protection, | ||
| 110 | this change would also suppress the above lockdep-RCU splat. | ||
diff --git a/Documentation/RCU/lockdep.txt b/Documentation/RCU/lockdep.txt index d7a49b2f6994..a102d4b3724b 100644 --- a/Documentation/RCU/lockdep.txt +++ b/Documentation/RCU/lockdep.txt | |||
| @@ -32,9 +32,27 @@ checking of rcu_dereference() primitives: | |||
| 32 | srcu_dereference(p, sp): | 32 | srcu_dereference(p, sp): |
| 33 | Check for SRCU read-side critical section. | 33 | Check for SRCU read-side critical section. |
| 34 | rcu_dereference_check(p, c): | 34 | rcu_dereference_check(p, c): |
| 35 | Use explicit check expression "c". This is useful in | 35 | Use explicit check expression "c" along with |
| 36 | code that is invoked by both readers and updaters. | 36 | rcu_read_lock_held(). This is useful in code that is |
| 37 | rcu_dereference_raw(p) | 37 | invoked by both RCU readers and updaters. |
| 38 | rcu_dereference_bh_check(p, c): | ||
| 39 | Use explicit check expression "c" along with | ||
| 40 | rcu_read_lock_bh_held(). This is useful in code that | ||
| 41 | is invoked by both RCU-bh readers and updaters. | ||
| 42 | rcu_dereference_sched_check(p, c): | ||
| 43 | Use explicit check expression "c" along with | ||
| 44 | rcu_read_lock_sched_held(). This is useful in code that | ||
| 45 | is invoked by both RCU-sched readers and updaters. | ||
| 46 | srcu_dereference_check(p, c): | ||
| 47 | Use explicit check expression "c" along with | ||
| 48 | srcu_read_lock_held()(). This is useful in code that | ||
| 49 | is invoked by both SRCU readers and updaters. | ||
| 50 | rcu_dereference_index_check(p, c): | ||
| 51 | Use explicit check expression "c", but the caller | ||
| 52 | must supply one of the rcu_read_lock_held() functions. | ||
| 53 | This is useful in code that uses RCU-protected arrays | ||
| 54 | that is invoked by both RCU readers and updaters. | ||
| 55 | rcu_dereference_raw(p): | ||
| 38 | Don't check. (Use sparingly, if at all.) | 56 | Don't check. (Use sparingly, if at all.) |
| 39 | rcu_dereference_protected(p, c): | 57 | rcu_dereference_protected(p, c): |
| 40 | Use explicit check expression "c", and omit all barriers | 58 | Use explicit check expression "c", and omit all barriers |
| @@ -48,13 +66,11 @@ checking of rcu_dereference() primitives: | |||
| 48 | value of the pointer itself, for example, against NULL. | 66 | value of the pointer itself, for example, against NULL. |
| 49 | 67 | ||
| 50 | The rcu_dereference_check() check expression can be any boolean | 68 | The rcu_dereference_check() check expression can be any boolean |
| 51 | expression, but would normally include one of the rcu_read_lock_held() | 69 | expression, but would normally include a lockdep expression. However, |
| 52 | family of functions and a lockdep expression. However, any boolean | 70 | any boolean expression can be used. For a moderately ornate example, |
| 53 | expression can be used. For a moderately ornate example, consider | 71 | consider the following: |
| 54 | the following: | ||
| 55 | 72 | ||
| 56 | file = rcu_dereference_check(fdt->fd[fd], | 73 | file = rcu_dereference_check(fdt->fd[fd], |
| 57 | rcu_read_lock_held() || | ||
| 58 | lockdep_is_held(&files->file_lock) || | 74 | lockdep_is_held(&files->file_lock) || |
| 59 | atomic_read(&files->count) == 1); | 75 | atomic_read(&files->count) == 1); |
| 60 | 76 | ||
| @@ -62,7 +78,7 @@ This expression picks up the pointer "fdt->fd[fd]" in an RCU-safe manner, | |||
| 62 | and, if CONFIG_PROVE_RCU is configured, verifies that this expression | 78 | and, if CONFIG_PROVE_RCU is configured, verifies that this expression |
| 63 | is used in: | 79 | is used in: |
| 64 | 80 | ||
| 65 | 1. An RCU read-side critical section, or | 81 | 1. An RCU read-side critical section (implicit), or |
| 66 | 2. with files->file_lock held, or | 82 | 2. with files->file_lock held, or |
| 67 | 3. on an unshared files_struct. | 83 | 3. on an unshared files_struct. |
| 68 | 84 | ||
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt index 5d9016795fd8..783d6c134d3f 100644 --- a/Documentation/RCU/torture.txt +++ b/Documentation/RCU/torture.txt | |||
| @@ -42,7 +42,7 @@ fqs_holdoff Holdoff time (in microseconds) between consecutive calls | |||
| 42 | fqs_stutter Wait time (in seconds) between consecutive bursts | 42 | fqs_stutter Wait time (in seconds) between consecutive bursts |
| 43 | of calls to force_quiescent_state(). | 43 | of calls to force_quiescent_state(). |
| 44 | 44 | ||
| 45 | irqreaders Says to invoke RCU readers from irq level. This is currently | 45 | irqreader Says to invoke RCU readers from irq level. This is currently |
| 46 | done via timers. Defaults to "1" for variants of RCU that | 46 | done via timers. Defaults to "1" for variants of RCU that |
| 47 | permit this. (Or, more accurately, variants of RCU that do | 47 | permit this. (Or, more accurately, variants of RCU that do |
| 48 | -not- permit this know to ignore this variable.) | 48 | -not- permit this know to ignore this variable.) |
| @@ -79,19 +79,68 @@ stutter The length of time to run the test before pausing for this | |||
| 79 | Specifying "stutter=0" causes the test to run continuously | 79 | Specifying "stutter=0" causes the test to run continuously |
| 80 | without pausing, which is the old default behavior. | 80 | without pausing, which is the old default behavior. |
| 81 | 81 | ||
| 82 | test_boost Whether or not to test the ability of RCU to do priority | ||
| 83 | boosting. Defaults to "test_boost=1", which performs | ||
| 84 | RCU priority-inversion testing only if the selected | ||
| 85 | RCU implementation supports priority boosting. Specifying | ||
| 86 | "test_boost=0" never performs RCU priority-inversion | ||
| 87 | testing. Specifying "test_boost=2" performs RCU | ||
| 88 | priority-inversion testing even if the selected RCU | ||
| 89 | implementation does not support RCU priority boosting, | ||
| 90 | which can be used to test rcutorture's ability to | ||
| 91 | carry out RCU priority-inversion testing. | ||
| 92 | |||
| 93 | test_boost_interval | ||
| 94 | The number of seconds in an RCU priority-inversion test | ||
| 95 | cycle. Defaults to "test_boost_interval=7". It is | ||
| 96 | usually wise for this value to be relatively prime to | ||
| 97 | the value selected for "stutter". | ||
| 98 | |||
| 99 | test_boost_duration | ||
| 100 | The number of seconds to do RCU priority-inversion testing | ||
| 101 | within any given "test_boost_interval". Defaults to | ||
| 102 | "test_boost_duration=4". | ||
| 103 | |||
| 82 | test_no_idle_hz Whether or not to test the ability of RCU to operate in | 104 | test_no_idle_hz Whether or not to test the ability of RCU to operate in |
| 83 | a kernel that disables the scheduling-clock interrupt to | 105 | a kernel that disables the scheduling-clock interrupt to |
| 84 | idle CPUs. Boolean parameter, "1" to test, "0" otherwise. | 106 | idle CPUs. Boolean parameter, "1" to test, "0" otherwise. |
| 85 | Defaults to omitting this test. | 107 | Defaults to omitting this test. |
| 86 | 108 | ||
| 87 | torture_type The type of RCU to test: "rcu" for the rcu_read_lock() API, | 109 | torture_type The type of RCU to test, with string values as follows: |
| 88 | "rcu_sync" for rcu_read_lock() with synchronous reclamation, | 110 | |
| 89 | "rcu_bh" for the rcu_read_lock_bh() API, "rcu_bh_sync" for | 111 | "rcu": rcu_read_lock(), rcu_read_unlock() and call_rcu(). |
| 90 | rcu_read_lock_bh() with synchronous reclamation, "srcu" for | 112 | |
| 91 | the "srcu_read_lock()" API, "sched" for the use of | 113 | "rcu_sync": rcu_read_lock(), rcu_read_unlock(), and |
| 92 | preempt_disable() together with synchronize_sched(), | 114 | synchronize_rcu(). |
| 93 | and "sched_expedited" for the use of preempt_disable() | 115 | |
| 94 | with synchronize_sched_expedited(). | 116 | "rcu_expedited": rcu_read_lock(), rcu_read_unlock(), and |
| 117 | synchronize_rcu_expedited(). | ||
| 118 | |||
| 119 | "rcu_bh": rcu_read_lock_bh(), rcu_read_unlock_bh(), and | ||
| 120 | call_rcu_bh(). | ||
| 121 | |||
| 122 | "rcu_bh_sync": rcu_read_lock_bh(), rcu_read_unlock_bh(), | ||
| 123 | and synchronize_rcu_bh(). | ||
| 124 | |||
| 125 | "rcu_bh_expedited": rcu_read_lock_bh(), rcu_read_unlock_bh(), | ||
| 126 | and synchronize_rcu_bh_expedited(). | ||
| 127 | |||
| 128 | "srcu": srcu_read_lock(), srcu_read_unlock() and | ||
| 129 | synchronize_srcu(). | ||
| 130 | |||
| 131 | "srcu_expedited": srcu_read_lock(), srcu_read_unlock() and | ||
| 132 | synchronize_srcu_expedited(). | ||
| 133 | |||
| 134 | "sched": preempt_disable(), preempt_enable(), and | ||
| 135 | call_rcu_sched(). | ||
| 136 | |||
| 137 | "sched_sync": preempt_disable(), preempt_enable(), and | ||
| 138 | synchronize_sched(). | ||
| 139 | |||
| 140 | "sched_expedited": preempt_disable(), preempt_enable(), and | ||
| 141 | synchronize_sched_expedited(). | ||
| 142 | |||
| 143 | Defaults to "rcu". | ||
| 95 | 144 | ||
| 96 | verbose Enable debug printk()s. Default is disabled. | 145 | verbose Enable debug printk()s. Default is disabled. |
| 97 | 146 | ||
| @@ -100,12 +149,12 @@ OUTPUT | |||
| 100 | 149 | ||
| 101 | The statistics output is as follows: | 150 | The statistics output is as follows: |
| 102 | 151 | ||
| 103 | rcu-torture: --- Start of test: nreaders=16 stat_interval=0 verbose=0 | 152 | rcu-torture:--- Start of test: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4 |
| 104 | rcu-torture: rtc: 0000000000000000 ver: 1916 tfle: 0 rta: 1916 rtaf: 0 rtf: 1915 | 153 | rcu-torture: rtc: (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767 |
| 105 | rcu-torture: Reader Pipe: 1466408 9747 0 0 0 0 0 0 0 0 0 | 154 | rcu-torture: Reader Pipe: 727860534 34213 0 0 0 0 0 0 0 0 0 |
| 106 | rcu-torture: Reader Batch: 1464477 11678 0 0 0 0 0 0 0 0 | 155 | rcu-torture: Reader Batch: 727877838 17003 0 0 0 0 0 0 0 0 0 |
| 107 | rcu-torture: Free-Block Circulation: 1915 1915 1915 1915 1915 1915 1915 1915 1915 1915 0 | 156 | rcu-torture: Free-Block Circulation: 155440 155440 155440 155440 155440 155440 155440 155440 155440 155440 0 |
| 108 | rcu-torture: --- End of test | 157 | rcu-torture:--- End of test: SUCCESS: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4 |
| 109 | 158 | ||
| 110 | The command "dmesg | grep torture:" will extract this information on | 159 | The command "dmesg | grep torture:" will extract this information on |
| 111 | most systems. On more esoteric configurations, it may be necessary to | 160 | most systems. On more esoteric configurations, it may be necessary to |
| @@ -113,26 +162,55 @@ use other commands to access the output of the printk()s used by | |||
| 113 | the RCU torture test. The printk()s use KERN_ALERT, so they should | 162 | the RCU torture test. The printk()s use KERN_ALERT, so they should |
| 114 | be evident. ;-) | 163 | be evident. ;-) |
| 115 | 164 | ||
| 165 | The first and last lines show the rcutorture module parameters, and the | ||
| 166 | last line shows either "SUCCESS" or "FAILURE", based on rcutorture's | ||
| 167 | automatic determination as to whether RCU operated correctly. | ||
| 168 | |||
| 116 | The entries are as follows: | 169 | The entries are as follows: |
| 117 | 170 | ||
| 118 | o "rtc": The hexadecimal address of the structure currently visible | 171 | o "rtc": The hexadecimal address of the structure currently visible |
| 119 | to readers. | 172 | to readers. |
| 120 | 173 | ||
| 121 | o "ver": The number of times since boot that the rcutw writer task | 174 | o "ver": The number of times since boot that the RCU writer task |
| 122 | has changed the structure visible to readers. | 175 | has changed the structure visible to readers. |
| 123 | 176 | ||
| 124 | o "tfle": If non-zero, indicates that the "torture freelist" | 177 | o "tfle": If non-zero, indicates that the "torture freelist" |
| 125 | containing structure to be placed into the "rtc" area is empty. | 178 | containing structures to be placed into the "rtc" area is empty. |
| 126 | This condition is important, since it can fool you into thinking | 179 | This condition is important, since it can fool you into thinking |
| 127 | that RCU is working when it is not. :-/ | 180 | that RCU is working when it is not. :-/ |
| 128 | 181 | ||
| 129 | o "rta": Number of structures allocated from the torture freelist. | 182 | o "rta": Number of structures allocated from the torture freelist. |
| 130 | 183 | ||
| 131 | o "rtaf": Number of allocations from the torture freelist that have | 184 | o "rtaf": Number of allocations from the torture freelist that have |
| 132 | failed due to the list being empty. | 185 | failed due to the list being empty. It is not unusual for this |
| 186 | to be non-zero, but it is bad for it to be a large fraction of | ||
| 187 | the value indicated by "rta". | ||
| 133 | 188 | ||
| 134 | o "rtf": Number of frees into the torture freelist. | 189 | o "rtf": Number of frees into the torture freelist. |
| 135 | 190 | ||
| 191 | o "rtmbe": A non-zero value indicates that rcutorture believes that | ||
| 192 | rcu_assign_pointer() and rcu_dereference() are not working | ||
| 193 | correctly. This value should be zero. | ||
| 194 | |||
| 195 | o "rtbke": rcutorture was unable to create the real-time kthreads | ||
| 196 | used to force RCU priority inversion. This value should be zero. | ||
| 197 | |||
| 198 | o "rtbre": Although rcutorture successfully created the kthreads | ||
| 199 | used to force RCU priority inversion, it was unable to set them | ||
| 200 | to the real-time priority level of 1. This value should be zero. | ||
| 201 | |||
| 202 | o "rtbf": The number of times that RCU priority boosting failed | ||
| 203 | to resolve RCU priority inversion. | ||
| 204 | |||
| 205 | o "rtb": The number of times that rcutorture attempted to force | ||
| 206 | an RCU priority inversion condition. If you are testing RCU | ||
| 207 | priority boosting via the "test_boost" module parameter, this | ||
| 208 | value should be non-zero. | ||
| 209 | |||
| 210 | o "nt": The number of times rcutorture ran RCU read-side code from | ||
| 211 | within a timer handler. This value should be non-zero only | ||
| 212 | if you specified the "irqreader" module parameter. | ||
| 213 | |||
| 136 | o "Reader Pipe": Histogram of "ages" of structures seen by readers. | 214 | o "Reader Pipe": Histogram of "ages" of structures seen by readers. |
| 137 | If any entries past the first two are non-zero, RCU is broken. | 215 | If any entries past the first two are non-zero, RCU is broken. |
| 138 | And rcutorture prints the error flag string "!!!" to make sure | 216 | And rcutorture prints the error flag string "!!!" to make sure |
| @@ -162,26 +240,15 @@ o "Free-Block Circulation": Shows the number of torture structures | |||
| 162 | somehow gets incremented farther than it should. | 240 | somehow gets incremented farther than it should. |
| 163 | 241 | ||
| 164 | Different implementations of RCU can provide implementation-specific | 242 | Different implementations of RCU can provide implementation-specific |
| 165 | additional information. For example, SRCU provides the following: | 243 | additional information. For example, SRCU provides the following |
| 244 | additional line: | ||
| 166 | 245 | ||
| 167 | srcu-torture: rtc: f8cf46a8 ver: 355 tfle: 0 rta: 356 rtaf: 0 rtf: 346 rtmbe: 0 | ||
| 168 | srcu-torture: Reader Pipe: 559738 939 0 0 0 0 0 0 0 0 0 | ||
| 169 | srcu-torture: Reader Batch: 560434 243 0 0 0 0 0 0 0 0 | ||
| 170 | srcu-torture: Free-Block Circulation: 355 354 353 352 351 350 349 348 347 346 0 | ||
| 171 | srcu-torture: per-CPU(idx=1): 0(0,1) 1(0,1) 2(0,0) 3(0,1) | 246 | srcu-torture: per-CPU(idx=1): 0(0,1) 1(0,1) 2(0,0) 3(0,1) |
| 172 | 247 | ||
| 173 | The first four lines are similar to those for RCU. The last line shows | 248 | This line shows the per-CPU counter state. The numbers in parentheses are |
| 174 | the per-CPU counter state. The numbers in parentheses are the values | 249 | the values of the "old" and "current" counters for the corresponding CPU. |
| 175 | of the "old" and "current" counters for the corresponding CPU. The | 250 | The "idx" value maps the "old" and "current" values to the underlying |
| 176 | "idx" value maps the "old" and "current" values to the underlying array, | 251 | array, and is useful for debugging. |
| 177 | and is useful for debugging. | ||
| 178 | |||
| 179 | Similarly, sched_expedited RCU provides the following: | ||
| 180 | |||
| 181 | sched_expedited-torture: rtc: d0000000016c1880 ver: 1090796 tfle: 0 rta: 1090796 rtaf: 0 rtf: 1090787 rtmbe: 0 nt: 27713319 | ||
| 182 | sched_expedited-torture: Reader Pipe: 12660320201 95875 0 0 0 0 0 0 0 0 0 | ||
| 183 | sched_expedited-torture: Reader Batch: 12660424885 0 0 0 0 0 0 0 0 0 0 | ||
| 184 | sched_expedited-torture: Free-Block Circulation: 1090795 1090795 1090794 1090793 1090792 1090791 1090790 1090789 1090788 1090787 0 | ||
| 185 | 252 | ||
| 186 | 253 | ||
| 187 | USAGE | 254 | USAGE |
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 8173cec473aa..aaf65f6c6cd7 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt | |||
| @@ -33,23 +33,23 @@ rcu/rcuboost: | |||
| 33 | The output of "cat rcu/rcudata" looks as follows: | 33 | The output of "cat rcu/rcudata" looks as follows: |
| 34 | 34 | ||
| 35 | rcu_sched: | 35 | rcu_sched: |
| 36 | 0 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0 | 36 | 0 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0 |
| 37 | 1 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0 | 37 | 1 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0 |
| 38 | 2 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0 | 38 | 2 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0 |
| 39 | 3 c=20942 g=20943 pq=1 pqc=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0 | 39 | 3 c=20942 g=20943 pq=1 pgp=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0 |
| 40 | 4 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0 | 40 | 4 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0 |
| 41 | 5 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0 | 41 | 5 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0 |
| 42 | 6 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0 | 42 | 6 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0 |
| 43 | 7 c=20897 g=20897 pq=1 pqc=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0 | 43 | 7 c=20897 g=20897 pq=1 pgp=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0 |
| 44 | rcu_bh: | 44 | rcu_bh: |
| 45 | 0 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0 | 45 | 0 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0 |
| 46 | 1 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0 | 46 | 1 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0 |
| 47 | 2 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0 | 47 | 2 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0 |
| 48 | 3 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0 | 48 | 3 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0 |
| 49 | 4 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0 | 49 | 4 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0 |
| 50 | 5 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0 | 50 | 5 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0 |
| 51 | 6 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0 | 51 | 6 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0 |
| 52 | 7 c=1474 g=1474 pq=1 pqc=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0 | 52 | 7 c=1474 g=1474 pq=1 pgp=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0 |
| 53 | 53 | ||
| 54 | The first section lists the rcu_data structures for rcu_sched, the second | 54 | The first section lists the rcu_data structures for rcu_sched, the second |
| 55 | for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an | 55 | for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an |
| @@ -84,7 +84,7 @@ o "pq" indicates that this CPU has passed through a quiescent state | |||
| 84 | CPU has not yet reported that fact, (2) some other CPU has not | 84 | CPU has not yet reported that fact, (2) some other CPU has not |
| 85 | yet reported for this grace period, or (3) both. | 85 | yet reported for this grace period, or (3) both. |
| 86 | 86 | ||
| 87 | o "pqc" indicates which grace period the last-observed quiescent | 87 | o "pgp" indicates which grace period the last-observed quiescent |
| 88 | state for this CPU corresponds to. This is important for handling | 88 | state for this CPU corresponds to. This is important for handling |
| 89 | the race between CPU 0 reporting an extended dynticks-idle | 89 | the race between CPU 0 reporting an extended dynticks-idle |
| 90 | quiescent state for CPU 1 and CPU 1 suddenly waking up and | 90 | quiescent state for CPU 1 and CPU 1 suddenly waking up and |
| @@ -184,10 +184,14 @@ o "kt" is the per-CPU kernel-thread state. The digit preceding | |||
| 184 | The number after the final slash is the CPU that the kthread | 184 | The number after the final slash is the CPU that the kthread |
| 185 | is actually running on. | 185 | is actually running on. |
| 186 | 186 | ||
| 187 | This field is displayed only for CONFIG_RCU_BOOST kernels. | ||
| 188 | |||
| 187 | o "ktl" is the low-order 16 bits (in hexadecimal) of the count of | 189 | o "ktl" is the low-order 16 bits (in hexadecimal) of the count of |
| 188 | the number of times that this CPU's per-CPU kthread has gone | 190 | the number of times that this CPU's per-CPU kthread has gone |
| 189 | through its loop servicing invoke_rcu_cpu_kthread() requests. | 191 | through its loop servicing invoke_rcu_cpu_kthread() requests. |
| 190 | 192 | ||
| 193 | This field is displayed only for CONFIG_RCU_BOOST kernels. | ||
| 194 | |||
| 191 | o "b" is the batch limit for this CPU. If more than this number | 195 | o "b" is the batch limit for this CPU. If more than this number |
| 192 | of RCU callbacks is ready to invoke, then the remainder will | 196 | of RCU callbacks is ready to invoke, then the remainder will |
| 193 | be deferred. | 197 | be deferred. |
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index ef820a3c378b..b6a56e37284c 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h | |||
| @@ -548,7 +548,7 @@ do { \ | |||
| 548 | #endif | 548 | #endif |
| 549 | 549 | ||
| 550 | #ifdef CONFIG_PROVE_RCU | 550 | #ifdef CONFIG_PROVE_RCU |
| 551 | extern void lockdep_rcu_dereference(const char *file, const int line); | 551 | void lockdep_rcu_suspicious(const char *file, const int line, const char *s); |
| 552 | #endif | 552 | #endif |
| 553 | 553 | ||
| 554 | #endif /* __LINUX_LOCKDEP_H */ | 554 | #endif /* __LINUX_LOCKDEP_H */ |
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 8f4f881a0ad8..2cf4226ade7e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #ifndef __LINUX_RCUPDATE_H | 33 | #ifndef __LINUX_RCUPDATE_H |
| 34 | #define __LINUX_RCUPDATE_H | 34 | #define __LINUX_RCUPDATE_H |
| 35 | 35 | ||
| 36 | #include <linux/types.h> | ||
| 36 | #include <linux/cache.h> | 37 | #include <linux/cache.h> |
| 37 | #include <linux/spinlock.h> | 38 | #include <linux/spinlock.h> |
| 38 | #include <linux/threads.h> | 39 | #include <linux/threads.h> |
| @@ -64,32 +65,74 @@ static inline void rcutorture_record_progress(unsigned long vernum) | |||
| 64 | #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) | 65 | #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) |
| 65 | #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) | 66 | #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) |
| 66 | 67 | ||
| 68 | /* Exported common interfaces */ | ||
| 69 | |||
| 70 | #ifdef CONFIG_PREEMPT_RCU | ||
| 71 | |||
| 67 | /** | 72 | /** |
| 68 | * struct rcu_head - callback structure for use with RCU | 73 | * call_rcu() - Queue an RCU callback for invocation after a grace period. |
| 69 | * @next: next update requests in a list | 74 | * @head: structure to be used for queueing the RCU updates. |
| 70 | * @func: actual update function to call after the grace period. | 75 | * @func: actual callback function to be invoked after the grace period |
| 76 | * | ||
| 77 | * The callback function will be invoked some time after a full grace | ||
| 78 | * period elapses, in other words after all pre-existing RCU read-side | ||
| 79 | * critical sections have completed. However, the callback function | ||
| 80 | * might well execute concurrently with RCU read-side critical sections | ||
| 81 | * that started after call_rcu() was invoked. RCU read-side critical | ||
| 82 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | ||
| 83 | * and may be nested. | ||
| 71 | */ | 84 | */ |
| 72 | struct rcu_head { | 85 | extern void call_rcu(struct rcu_head *head, |
| 73 | struct rcu_head *next; | 86 | void (*func)(struct rcu_head *head)); |
| 74 | void (*func)(struct rcu_head *head); | ||
| 75 | }; | ||
| 76 | 87 | ||
| 77 | /* Exported common interfaces */ | 88 | #else /* #ifdef CONFIG_PREEMPT_RCU */ |
| 89 | |||
| 90 | /* In classic RCU, call_rcu() is just call_rcu_sched(). */ | ||
| 91 | #define call_rcu call_rcu_sched | ||
| 92 | |||
| 93 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | ||
| 94 | |||
| 95 | /** | ||
| 96 | * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. | ||
| 97 | * @head: structure to be used for queueing the RCU updates. | ||
| 98 | * @func: actual callback function to be invoked after the grace period | ||
| 99 | * | ||
| 100 | * The callback function will be invoked some time after a full grace | ||
| 101 | * period elapses, in other words after all currently executing RCU | ||
| 102 | * read-side critical sections have completed. call_rcu_bh() assumes | ||
| 103 | * that the read-side critical sections end on completion of a softirq | ||
| 104 | * handler. This means that read-side critical sections in process | ||
| 105 | * context must not be interrupted by softirqs. This interface is to be | ||
| 106 | * used when most of the read-side critical sections are in softirq context. | ||
| 107 | * RCU read-side critical sections are delimited by : | ||
| 108 | * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. | ||
| 109 | * OR | ||
| 110 | * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. | ||
| 111 | * These may be nested. | ||
| 112 | */ | ||
| 113 | extern void call_rcu_bh(struct rcu_head *head, | ||
| 114 | void (*func)(struct rcu_head *head)); | ||
| 115 | |||
| 116 | /** | ||
| 117 | * call_rcu_sched() - Queue an RCU for invocation after sched grace period. | ||
| 118 | * @head: structure to be used for queueing the RCU updates. | ||
| 119 | * @func: actual callback function to be invoked after the grace period | ||
| 120 | * | ||
| 121 | * The callback function will be invoked some time after a full grace | ||
| 122 | * period elapses, in other words after all currently executing RCU | ||
| 123 | * read-side critical sections have completed. call_rcu_sched() assumes | ||
| 124 | * that the read-side critical sections end on enabling of preemption | ||
| 125 | * or on voluntary preemption. | ||
| 126 | * RCU read-side critical sections are delimited by : | ||
| 127 | * - rcu_read_lock_sched() and rcu_read_unlock_sched(), | ||
| 128 | * OR | ||
| 129 | * anything that disables preemption. | ||
| 130 | * These may be nested. | ||
| 131 | */ | ||
| 78 | extern void call_rcu_sched(struct rcu_head *head, | 132 | extern void call_rcu_sched(struct rcu_head *head, |
| 79 | void (*func)(struct rcu_head *rcu)); | 133 | void (*func)(struct rcu_head *rcu)); |
| 80 | extern void synchronize_sched(void); | ||
| 81 | extern void rcu_barrier_bh(void); | ||
| 82 | extern void rcu_barrier_sched(void); | ||
| 83 | |||
| 84 | static inline void __rcu_read_lock_bh(void) | ||
| 85 | { | ||
| 86 | local_bh_disable(); | ||
| 87 | } | ||
| 88 | 134 | ||
| 89 | static inline void __rcu_read_unlock_bh(void) | 135 | extern void synchronize_sched(void); |
| 90 | { | ||
| 91 | local_bh_enable(); | ||
| 92 | } | ||
| 93 | 136 | ||
| 94 | #ifdef CONFIG_PREEMPT_RCU | 137 | #ifdef CONFIG_PREEMPT_RCU |
| 95 | 138 | ||
| @@ -152,6 +195,15 @@ static inline void rcu_exit_nohz(void) | |||
| 152 | 195 | ||
| 153 | #endif /* #else #ifdef CONFIG_NO_HZ */ | 196 | #endif /* #else #ifdef CONFIG_NO_HZ */ |
| 154 | 197 | ||
| 198 | /* | ||
| 199 | * Infrastructure to implement the synchronize_() primitives in | ||
| 200 | * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. | ||
| 201 | */ | ||
| 202 | |||
| 203 | typedef void call_rcu_func_t(struct rcu_head *head, | ||
| 204 | void (*func)(struct rcu_head *head)); | ||
| 205 | void wait_rcu_gp(call_rcu_func_t crf); | ||
| 206 | |||
| 155 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) | 207 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) |
| 156 | #include <linux/rcutree.h> | 208 | #include <linux/rcutree.h> |
| 157 | #elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) | 209 | #elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) |
| @@ -297,19 +349,31 @@ extern int rcu_my_thread_group_empty(void); | |||
| 297 | /** | 349 | /** |
| 298 | * rcu_lockdep_assert - emit lockdep splat if specified condition not met | 350 | * rcu_lockdep_assert - emit lockdep splat if specified condition not met |
| 299 | * @c: condition to check | 351 | * @c: condition to check |
| 352 | * @s: informative message | ||
| 300 | */ | 353 | */ |
| 301 | #define rcu_lockdep_assert(c) \ | 354 | #define rcu_lockdep_assert(c, s) \ |
| 302 | do { \ | 355 | do { \ |
| 303 | static bool __warned; \ | 356 | static bool __warned; \ |
| 304 | if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ | 357 | if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ |
| 305 | __warned = true; \ | 358 | __warned = true; \ |
| 306 | lockdep_rcu_dereference(__FILE__, __LINE__); \ | 359 | lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ |
| 307 | } \ | 360 | } \ |
| 308 | } while (0) | 361 | } while (0) |
| 309 | 362 | ||
| 363 | #define rcu_sleep_check() \ | ||
| 364 | do { \ | ||
| 365 | rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \ | ||
| 366 | "Illegal context switch in RCU-bh" \ | ||
| 367 | " read-side critical section"); \ | ||
| 368 | rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), \ | ||
| 369 | "Illegal context switch in RCU-sched"\ | ||
| 370 | " read-side critical section"); \ | ||
| 371 | } while (0) | ||
| 372 | |||
| 310 | #else /* #ifdef CONFIG_PROVE_RCU */ | 373 | #else /* #ifdef CONFIG_PROVE_RCU */ |
| 311 | 374 | ||
| 312 | #define rcu_lockdep_assert(c) do { } while (0) | 375 | #define rcu_lockdep_assert(c, s) do { } while (0) |
| 376 | #define rcu_sleep_check() do { } while (0) | ||
| 313 | 377 | ||
| 314 | #endif /* #else #ifdef CONFIG_PROVE_RCU */ | 378 | #endif /* #else #ifdef CONFIG_PROVE_RCU */ |
| 315 | 379 | ||
| @@ -338,14 +402,16 @@ extern int rcu_my_thread_group_empty(void); | |||
| 338 | #define __rcu_dereference_check(p, c, space) \ | 402 | #define __rcu_dereference_check(p, c, space) \ |
| 339 | ({ \ | 403 | ({ \ |
| 340 | typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ | 404 | typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ |
| 341 | rcu_lockdep_assert(c); \ | 405 | rcu_lockdep_assert(c, "suspicious rcu_dereference_check()" \ |
| 406 | " usage"); \ | ||
| 342 | rcu_dereference_sparse(p, space); \ | 407 | rcu_dereference_sparse(p, space); \ |
| 343 | smp_read_barrier_depends(); \ | 408 | smp_read_barrier_depends(); \ |
| 344 | ((typeof(*p) __force __kernel *)(_________p1)); \ | 409 | ((typeof(*p) __force __kernel *)(_________p1)); \ |
| 345 | }) | 410 | }) |
| 346 | #define __rcu_dereference_protected(p, c, space) \ | 411 | #define __rcu_dereference_protected(p, c, space) \ |
| 347 | ({ \ | 412 | ({ \ |
| 348 | rcu_lockdep_assert(c); \ | 413 | rcu_lockdep_assert(c, "suspicious rcu_dereference_protected()" \ |
| 414 | " usage"); \ | ||
| 349 | rcu_dereference_sparse(p, space); \ | 415 | rcu_dereference_sparse(p, space); \ |
| 350 | ((typeof(*p) __force __kernel *)(p)); \ | 416 | ((typeof(*p) __force __kernel *)(p)); \ |
| 351 | }) | 417 | }) |
| @@ -359,15 +425,15 @@ extern int rcu_my_thread_group_empty(void); | |||
| 359 | #define __rcu_dereference_index_check(p, c) \ | 425 | #define __rcu_dereference_index_check(p, c) \ |
| 360 | ({ \ | 426 | ({ \ |
| 361 | typeof(p) _________p1 = ACCESS_ONCE(p); \ | 427 | typeof(p) _________p1 = ACCESS_ONCE(p); \ |
| 362 | rcu_lockdep_assert(c); \ | 428 | rcu_lockdep_assert(c, \ |
| 429 | "suspicious rcu_dereference_index_check()" \ | ||
| 430 | " usage"); \ | ||
| 363 | smp_read_barrier_depends(); \ | 431 | smp_read_barrier_depends(); \ |
| 364 | (_________p1); \ | 432 | (_________p1); \ |
| 365 | }) | 433 | }) |
| 366 | #define __rcu_assign_pointer(p, v, space) \ | 434 | #define __rcu_assign_pointer(p, v, space) \ |
| 367 | ({ \ | 435 | ({ \ |
| 368 | if (!__builtin_constant_p(v) || \ | 436 | smp_wmb(); \ |
| 369 | ((v) != NULL)) \ | ||
| 370 | smp_wmb(); \ | ||
| 371 | (p) = (typeof(*v) __force space *)(v); \ | 437 | (p) = (typeof(*v) __force space *)(v); \ |
| 372 | }) | 438 | }) |
| 373 | 439 | ||
| @@ -500,26 +566,6 @@ extern int rcu_my_thread_group_empty(void); | |||
| 500 | #define rcu_dereference_protected(p, c) \ | 566 | #define rcu_dereference_protected(p, c) \ |
| 501 | __rcu_dereference_protected((p), (c), __rcu) | 567 | __rcu_dereference_protected((p), (c), __rcu) |
| 502 | 568 | ||
| 503 | /** | ||
| 504 | * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented | ||
| 505 | * @p: The pointer to read, prior to dereferencing | ||
| 506 | * @c: The conditions under which the dereference will take place | ||
| 507 | * | ||
| 508 | * This is the RCU-bh counterpart to rcu_dereference_protected(). | ||
| 509 | */ | ||
| 510 | #define rcu_dereference_bh_protected(p, c) \ | ||
| 511 | __rcu_dereference_protected((p), (c), __rcu) | ||
| 512 | |||
| 513 | /** | ||
| 514 | * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented | ||
| 515 | * @p: The pointer to read, prior to dereferencing | ||
| 516 | * @c: The conditions under which the dereference will take place | ||
| 517 | * | ||
| 518 | * This is the RCU-sched counterpart to rcu_dereference_protected(). | ||
| 519 | */ | ||
| 520 | #define rcu_dereference_sched_protected(p, c) \ | ||
| 521 | __rcu_dereference_protected((p), (c), __rcu) | ||
| 522 | |||
| 523 | 569 | ||
| 524 | /** | 570 | /** |
| 525 | * rcu_dereference() - fetch RCU-protected pointer for dereferencing | 571 | * rcu_dereference() - fetch RCU-protected pointer for dereferencing |
| @@ -630,7 +676,7 @@ static inline void rcu_read_unlock(void) | |||
| 630 | */ | 676 | */ |
| 631 | static inline void rcu_read_lock_bh(void) | 677 | static inline void rcu_read_lock_bh(void) |
| 632 | { | 678 | { |
| 633 | __rcu_read_lock_bh(); | 679 | local_bh_disable(); |
| 634 | __acquire(RCU_BH); | 680 | __acquire(RCU_BH); |
| 635 | rcu_read_acquire_bh(); | 681 | rcu_read_acquire_bh(); |
| 636 | } | 682 | } |
| @@ -644,7 +690,7 @@ static inline void rcu_read_unlock_bh(void) | |||
| 644 | { | 690 | { |
| 645 | rcu_read_release_bh(); | 691 | rcu_read_release_bh(); |
| 646 | __release(RCU_BH); | 692 | __release(RCU_BH); |
| 647 | __rcu_read_unlock_bh(); | 693 | local_bh_enable(); |
| 648 | } | 694 | } |
| 649 | 695 | ||
| 650 | /** | 696 | /** |
| @@ -698,11 +744,18 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) | |||
| 698 | * any prior initialization. Returns the value assigned. | 744 | * any prior initialization. Returns the value assigned. |
| 699 | * | 745 | * |
| 700 | * Inserts memory barriers on architectures that require them | 746 | * Inserts memory barriers on architectures that require them |
| 701 | * (pretty much all of them other than x86), and also prevents | 747 | * (which is most of them), and also prevents the compiler from |
| 702 | * the compiler from reordering the code that initializes the | 748 | * reordering the code that initializes the structure after the pointer |
| 703 | * structure after the pointer assignment. More importantly, this | 749 | * assignment. More importantly, this call documents which pointers |
| 704 | * call documents which pointers will be dereferenced by RCU read-side | 750 | * will be dereferenced by RCU read-side code. |
| 705 | * code. | 751 | * |
| 752 | * In some special cases, you may use RCU_INIT_POINTER() instead | ||
| 753 | * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due | ||
| 754 | * to the fact that it does not constrain either the CPU or the compiler. | ||
| 755 | * That said, using RCU_INIT_POINTER() when you should have used | ||
| 756 | * rcu_assign_pointer() is a very bad thing that results in | ||
| 757 | * impossible-to-diagnose memory corruption. So please be careful. | ||
| 758 | * See the RCU_INIT_POINTER() comment header for details. | ||
| 706 | */ | 759 | */ |
| 707 | #define rcu_assign_pointer(p, v) \ | 760 | #define rcu_assign_pointer(p, v) \ |
| 708 | __rcu_assign_pointer((p), (v), __rcu) | 761 | __rcu_assign_pointer((p), (v), __rcu) |
| @@ -710,105 +763,38 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) | |||
| 710 | /** | 763 | /** |
| 711 | * RCU_INIT_POINTER() - initialize an RCU protected pointer | 764 | * RCU_INIT_POINTER() - initialize an RCU protected pointer |
| 712 | * | 765 | * |
| 713 | * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep | 766 | * Initialize an RCU-protected pointer in special cases where readers |
| 714 | * splats. | 767 | * do not need ordering constraints on the CPU or the compiler. These |
| 768 | * special cases are: | ||
| 769 | * | ||
| 770 | * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer -or- | ||
| 771 | * 2. The caller has taken whatever steps are required to prevent | ||
| 772 | * RCU readers from concurrently accessing this pointer -or- | ||
| 773 | * 3. The referenced data structure has already been exposed to | ||
| 774 | * readers either at compile time or via rcu_assign_pointer() -and- | ||
| 775 | * a. You have not made -any- reader-visible changes to | ||
| 776 | * this structure since then -or- | ||
| 777 | * b. It is OK for readers accessing this structure from its | ||
| 778 | * new location to see the old state of the structure. (For | ||
| 779 | * example, the changes were to statistical counters or to | ||
| 780 | * other state where exact synchronization is not required.) | ||
| 781 | * | ||
| 782 | * Failure to follow these rules governing use of RCU_INIT_POINTER() will | ||
| 783 | * result in impossible-to-diagnose memory corruption. As in the structures | ||
| 784 | * will look OK in crash dumps, but any concurrent RCU readers might | ||
| 785 | * see pre-initialized values of the referenced data structure. So | ||
| 786 | * please be very careful how you use RCU_INIT_POINTER()!!! | ||
| 787 | * | ||
| 788 | * If you are creating an RCU-protected linked structure that is accessed | ||
| 789 | * by a single external-to-structure RCU-protected pointer, then you may | ||
| 790 | * use RCU_INIT_POINTER() to initialize the internal RCU-protected | ||
| 791 | * pointers, but you must use rcu_assign_pointer() to initialize the | ||
| 792 | * external-to-structure pointer -after- you have completely initialized | ||
| 793 | * the reader-accessible portions of the linked structure. | ||
| 715 | */ | 794 | */ |
| 716 | #define RCU_INIT_POINTER(p, v) \ | 795 | #define RCU_INIT_POINTER(p, v) \ |
| 717 | p = (typeof(*v) __force __rcu *)(v) | 796 | p = (typeof(*v) __force __rcu *)(v) |
| 718 | 797 | ||
| 719 | /* Infrastructure to implement the synchronize_() primitives. */ | ||
| 720 | |||
| 721 | struct rcu_synchronize { | ||
| 722 | struct rcu_head head; | ||
| 723 | struct completion completion; | ||
| 724 | }; | ||
| 725 | |||
| 726 | extern void wakeme_after_rcu(struct rcu_head *head); | ||
| 727 | |||
| 728 | #ifdef CONFIG_PREEMPT_RCU | ||
| 729 | |||
| 730 | /** | ||
| 731 | * call_rcu() - Queue an RCU callback for invocation after a grace period. | ||
| 732 | * @head: structure to be used for queueing the RCU updates. | ||
| 733 | * @func: actual callback function to be invoked after the grace period | ||
| 734 | * | ||
| 735 | * The callback function will be invoked some time after a full grace | ||
| 736 | * period elapses, in other words after all pre-existing RCU read-side | ||
| 737 | * critical sections have completed. However, the callback function | ||
| 738 | * might well execute concurrently with RCU read-side critical sections | ||
| 739 | * that started after call_rcu() was invoked. RCU read-side critical | ||
| 740 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | ||
| 741 | * and may be nested. | ||
| 742 | */ | ||
| 743 | extern void call_rcu(struct rcu_head *head, | ||
| 744 | void (*func)(struct rcu_head *head)); | ||
| 745 | |||
| 746 | #else /* #ifdef CONFIG_PREEMPT_RCU */ | ||
| 747 | |||
| 748 | /* In classic RCU, call_rcu() is just call_rcu_sched(). */ | ||
| 749 | #define call_rcu call_rcu_sched | ||
| 750 | |||
| 751 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | ||
| 752 | |||
| 753 | /** | ||
| 754 | * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. | ||
| 755 | * @head: structure to be used for queueing the RCU updates. | ||
| 756 | * @func: actual callback function to be invoked after the grace period | ||
| 757 | * | ||
| 758 | * The callback function will be invoked some time after a full grace | ||
| 759 | * period elapses, in other words after all currently executing RCU | ||
| 760 | * read-side critical sections have completed. call_rcu_bh() assumes | ||
| 761 | * that the read-side critical sections end on completion of a softirq | ||
| 762 | * handler. This means that read-side critical sections in process | ||
| 763 | * context must not be interrupted by softirqs. This interface is to be | ||
| 764 | * used when most of the read-side critical sections are in softirq context. | ||
| 765 | * RCU read-side critical sections are delimited by : | ||
| 766 | * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. | ||
| 767 | * OR | ||
| 768 | * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. | ||
| 769 | * These may be nested. | ||
| 770 | */ | ||
| 771 | extern void call_rcu_bh(struct rcu_head *head, | ||
| 772 | void (*func)(struct rcu_head *head)); | ||
| 773 | |||
| 774 | /* | ||
| 775 | * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally | ||
| 776 | * by call_rcu() and rcu callback execution, and are therefore not part of the | ||
| 777 | * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. | ||
| 778 | */ | ||
| 779 | |||
| 780 | #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD | ||
| 781 | # define STATE_RCU_HEAD_READY 0 | ||
| 782 | # define STATE_RCU_HEAD_QUEUED 1 | ||
| 783 | |||
| 784 | extern struct debug_obj_descr rcuhead_debug_descr; | ||
| 785 | |||
| 786 | static inline void debug_rcu_head_queue(struct rcu_head *head) | ||
| 787 | { | ||
| 788 | WARN_ON_ONCE((unsigned long)head & 0x3); | ||
| 789 | debug_object_activate(head, &rcuhead_debug_descr); | ||
| 790 | debug_object_active_state(head, &rcuhead_debug_descr, | ||
| 791 | STATE_RCU_HEAD_READY, | ||
| 792 | STATE_RCU_HEAD_QUEUED); | ||
| 793 | } | ||
| 794 | |||
| 795 | static inline void debug_rcu_head_unqueue(struct rcu_head *head) | ||
| 796 | { | ||
| 797 | debug_object_active_state(head, &rcuhead_debug_descr, | ||
| 798 | STATE_RCU_HEAD_QUEUED, | ||
| 799 | STATE_RCU_HEAD_READY); | ||
| 800 | debug_object_deactivate(head, &rcuhead_debug_descr); | ||
| 801 | } | ||
| 802 | #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | ||
| 803 | static inline void debug_rcu_head_queue(struct rcu_head *head) | ||
| 804 | { | ||
| 805 | } | ||
| 806 | |||
| 807 | static inline void debug_rcu_head_unqueue(struct rcu_head *head) | ||
| 808 | { | ||
| 809 | } | ||
| 810 | #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | ||
| 811 | |||
| 812 | static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) | 798 | static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) |
| 813 | { | 799 | { |
| 814 | return offset < 4096; | 800 | return offset < 4096; |
| @@ -827,18 +813,6 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset) | |||
| 827 | call_rcu(head, (rcu_callback)offset); | 813 | call_rcu(head, (rcu_callback)offset); |
| 828 | } | 814 | } |
| 829 | 815 | ||
| 830 | extern void kfree(const void *); | ||
| 831 | |||
| 832 | static inline void __rcu_reclaim(struct rcu_head *head) | ||
| 833 | { | ||
| 834 | unsigned long offset = (unsigned long)head->func; | ||
| 835 | |||
| 836 | if (__is_kfree_rcu_offset(offset)) | ||
| 837 | kfree((void *)head - offset); | ||
| 838 | else | ||
| 839 | head->func(head); | ||
| 840 | } | ||
| 841 | |||
| 842 | /** | 816 | /** |
| 843 | * kfree_rcu() - kfree an object after a grace period. | 817 | * kfree_rcu() - kfree an object after a grace period. |
| 844 | * @ptr: pointer to kfree | 818 | * @ptr: pointer to kfree |
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 52b3e0281fd0..00b7a5e493d2 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h | |||
| @@ -27,9 +27,23 @@ | |||
| 27 | 27 | ||
| 28 | #include <linux/cache.h> | 28 | #include <linux/cache.h> |
| 29 | 29 | ||
| 30 | #ifdef CONFIG_RCU_BOOST | ||
| 30 | static inline void rcu_init(void) | 31 | static inline void rcu_init(void) |
| 31 | { | 32 | { |
| 32 | } | 33 | } |
| 34 | #else /* #ifdef CONFIG_RCU_BOOST */ | ||
| 35 | void rcu_init(void); | ||
| 36 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | ||
| 37 | |||
| 38 | static inline void rcu_barrier_bh(void) | ||
| 39 | { | ||
| 40 | wait_rcu_gp(call_rcu_bh); | ||
| 41 | } | ||
| 42 | |||
| 43 | static inline void rcu_barrier_sched(void) | ||
| 44 | { | ||
| 45 | wait_rcu_gp(call_rcu_sched); | ||
| 46 | } | ||
| 33 | 47 | ||
| 34 | #ifdef CONFIG_TINY_RCU | 48 | #ifdef CONFIG_TINY_RCU |
| 35 | 49 | ||
| @@ -45,9 +59,13 @@ static inline void rcu_barrier(void) | |||
| 45 | 59 | ||
| 46 | #else /* #ifdef CONFIG_TINY_RCU */ | 60 | #else /* #ifdef CONFIG_TINY_RCU */ |
| 47 | 61 | ||
| 48 | void rcu_barrier(void); | ||
| 49 | void synchronize_rcu_expedited(void); | 62 | void synchronize_rcu_expedited(void); |
| 50 | 63 | ||
| 64 | static inline void rcu_barrier(void) | ||
| 65 | { | ||
| 66 | wait_rcu_gp(call_rcu); | ||
| 67 | } | ||
| 68 | |||
| 51 | #endif /* #else #ifdef CONFIG_TINY_RCU */ | 69 | #endif /* #else #ifdef CONFIG_TINY_RCU */ |
| 52 | 70 | ||
| 53 | static inline void synchronize_rcu_bh(void) | 71 | static inline void synchronize_rcu_bh(void) |
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index e65d06634dd8..67458468f1a8 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h | |||
| @@ -67,6 +67,8 @@ static inline void synchronize_rcu_bh_expedited(void) | |||
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | extern void rcu_barrier(void); | 69 | extern void rcu_barrier(void); |
| 70 | extern void rcu_barrier_bh(void); | ||
| 71 | extern void rcu_barrier_sched(void); | ||
| 70 | 72 | ||
| 71 | extern unsigned long rcutorture_testseq; | 73 | extern unsigned long rcutorture_testseq; |
| 72 | extern unsigned long rcutorture_vernum; | 74 | extern unsigned long rcutorture_vernum; |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ac2c0578e0f..acca43560805 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -270,7 +270,6 @@ extern void init_idle_bootup_task(struct task_struct *idle); | |||
| 270 | 270 | ||
| 271 | extern int runqueue_is_locked(int cpu); | 271 | extern int runqueue_is_locked(int cpu); |
| 272 | 272 | ||
| 273 | extern cpumask_var_t nohz_cpu_mask; | ||
| 274 | #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) | 273 | #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) |
| 275 | extern void select_nohz_load_balancer(int stop_tick); | 274 | extern void select_nohz_load_balancer(int stop_tick); |
| 276 | extern int get_nohz_timer_target(void); | 275 | extern int get_nohz_timer_target(void); |
| @@ -1260,9 +1259,6 @@ struct task_struct { | |||
| 1260 | #ifdef CONFIG_PREEMPT_RCU | 1259 | #ifdef CONFIG_PREEMPT_RCU |
| 1261 | int rcu_read_lock_nesting; | 1260 | int rcu_read_lock_nesting; |
| 1262 | char rcu_read_unlock_special; | 1261 | char rcu_read_unlock_special; |
| 1263 | #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) | ||
| 1264 | int rcu_boosted; | ||
| 1265 | #endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */ | ||
| 1266 | struct list_head rcu_node_entry; | 1262 | struct list_head rcu_node_entry; |
| 1267 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ | 1263 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ |
| 1268 | #ifdef CONFIG_TREE_PREEMPT_RCU | 1264 | #ifdef CONFIG_TREE_PREEMPT_RCU |
diff --git a/include/linux/types.h b/include/linux/types.h index 176da8c1fbb1..57a97234bec1 100644 --- a/include/linux/types.h +++ b/include/linux/types.h | |||
| @@ -238,6 +238,16 @@ struct ustat { | |||
| 238 | char f_fpack[6]; | 238 | char f_fpack[6]; |
| 239 | }; | 239 | }; |
| 240 | 240 | ||
| 241 | /** | ||
| 242 | * struct rcu_head - callback structure for use with RCU | ||
| 243 | * @next: next update requests in a list | ||
| 244 | * @func: actual update function to call after the grace period. | ||
| 245 | */ | ||
| 246 | struct rcu_head { | ||
| 247 | struct rcu_head *next; | ||
| 248 | void (*func)(struct rcu_head *head); | ||
| 249 | }; | ||
| 250 | |||
| 241 | #endif /* __KERNEL__ */ | 251 | #endif /* __KERNEL__ */ |
| 242 | #endif /* __ASSEMBLY__ */ | 252 | #endif /* __ASSEMBLY__ */ |
| 243 | #endif /* _LINUX_TYPES_H */ | 253 | #endif /* _LINUX_TYPES_H */ |
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h new file mode 100644 index 000000000000..669fbd62ec25 --- /dev/null +++ b/include/trace/events/rcu.h | |||
| @@ -0,0 +1,459 @@ | |||
| 1 | #undef TRACE_SYSTEM | ||
| 2 | #define TRACE_SYSTEM rcu | ||
| 3 | |||
| 4 | #if !defined(_TRACE_RCU_H) || defined(TRACE_HEADER_MULTI_READ) | ||
| 5 | #define _TRACE_RCU_H | ||
| 6 | |||
| 7 | #include <linux/tracepoint.h> | ||
| 8 | |||
| 9 | /* | ||
| 10 | * Tracepoint for start/end markers used for utilization calculations. | ||
| 11 | * By convention, the string is of the following forms: | ||
| 12 | * | ||
| 13 | * "Start <activity>" -- Mark the start of the specified activity, | ||
| 14 | * such as "context switch". Nesting is permitted. | ||
| 15 | * "End <activity>" -- Mark the end of the specified activity. | ||
| 16 | * | ||
| 17 | * An "@" character within "<activity>" is a comment character: Data | ||
| 18 | * reduction scripts will ignore the "@" and the remainder of the line. | ||
| 19 | */ | ||
| 20 | TRACE_EVENT(rcu_utilization, | ||
| 21 | |||
| 22 | TP_PROTO(char *s), | ||
| 23 | |||
| 24 | TP_ARGS(s), | ||
| 25 | |||
| 26 | TP_STRUCT__entry( | ||
| 27 | __field(char *, s) | ||
| 28 | ), | ||
| 29 | |||
| 30 | TP_fast_assign( | ||
| 31 | __entry->s = s; | ||
| 32 | ), | ||
| 33 | |||
| 34 | TP_printk("%s", __entry->s) | ||
| 35 | ); | ||
| 36 | |||
| 37 | #ifdef CONFIG_RCU_TRACE | ||
| 38 | |||
| 39 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) | ||
| 40 | |||
| 41 | /* | ||
| 42 | * Tracepoint for grace-period events: starting and ending a grace | ||
| 43 | * period ("start" and "end", respectively), a CPU noting the start | ||
| 44 | * of a new grace period or the end of an old grace period ("cpustart" | ||
| 45 | * and "cpuend", respectively), a CPU passing through a quiescent | ||
| 46 | * state ("cpuqs"), a CPU coming online or going offline ("cpuonl" | ||
| 47 | * and "cpuofl", respectively), and a CPU being kicked for being too | ||
| 48 | * long in dyntick-idle mode ("kick"). | ||
| 49 | */ | ||
| 50 | TRACE_EVENT(rcu_grace_period, | ||
| 51 | |||
| 52 | TP_PROTO(char *rcuname, unsigned long gpnum, char *gpevent), | ||
| 53 | |||
| 54 | TP_ARGS(rcuname, gpnum, gpevent), | ||
| 55 | |||
| 56 | TP_STRUCT__entry( | ||
| 57 | __field(char *, rcuname) | ||
| 58 | __field(unsigned long, gpnum) | ||
| 59 | __field(char *, gpevent) | ||
| 60 | ), | ||
| 61 | |||
| 62 | TP_fast_assign( | ||
| 63 | __entry->rcuname = rcuname; | ||
| 64 | __entry->gpnum = gpnum; | ||
| 65 | __entry->gpevent = gpevent; | ||
| 66 | ), | ||
| 67 | |||
| 68 | TP_printk("%s %lu %s", | ||
| 69 | __entry->rcuname, __entry->gpnum, __entry->gpevent) | ||
| 70 | ); | ||
| 71 | |||
| 72 | /* | ||
| 73 | * Tracepoint for grace-period-initialization events. These are | ||
| 74 | * distinguished by the type of RCU, the new grace-period number, the | ||
| 75 | * rcu_node structure level, the starting and ending CPU covered by the | ||
| 76 | * rcu_node structure, and the mask of CPUs that will be waited for. | ||
| 77 | * All but the type of RCU are extracted from the rcu_node structure. | ||
| 78 | */ | ||
| 79 | TRACE_EVENT(rcu_grace_period_init, | ||
| 80 | |||
| 81 | TP_PROTO(char *rcuname, unsigned long gpnum, u8 level, | ||
| 82 | int grplo, int grphi, unsigned long qsmask), | ||
| 83 | |||
| 84 | TP_ARGS(rcuname, gpnum, level, grplo, grphi, qsmask), | ||
| 85 | |||
| 86 | TP_STRUCT__entry( | ||
| 87 | __field(char *, rcuname) | ||
| 88 | __field(unsigned long, gpnum) | ||
| 89 | __field(u8, level) | ||
| 90 | __field(int, grplo) | ||
| 91 | __field(int, grphi) | ||
| 92 | __field(unsigned long, qsmask) | ||
| 93 | ), | ||
| 94 | |||
| 95 | TP_fast_assign( | ||
| 96 | __entry->rcuname = rcuname; | ||
| 97 | __entry->gpnum = gpnum; | ||
| 98 | __entry->level = level; | ||
| 99 | __entry->grplo = grplo; | ||
| 100 | __entry->grphi = grphi; | ||
| 101 | __entry->qsmask = qsmask; | ||
| 102 | ), | ||
| 103 | |||
| 104 | TP_printk("%s %lu %u %d %d %lx", | ||
| 105 | __entry->rcuname, __entry->gpnum, __entry->level, | ||
| 106 | __entry->grplo, __entry->grphi, __entry->qsmask) | ||
| 107 | ); | ||
| 108 | |||
| 109 | /* | ||
| 110 | * Tracepoint for tasks blocking within preemptible-RCU read-side | ||
| 111 | * critical sections. Track the type of RCU (which one day might | ||
| 112 | * include SRCU), the grace-period number that the task is blocking | ||
| 113 | * (the current or the next), and the task's PID. | ||
| 114 | */ | ||
| 115 | TRACE_EVENT(rcu_preempt_task, | ||
| 116 | |||
| 117 | TP_PROTO(char *rcuname, int pid, unsigned long gpnum), | ||
| 118 | |||
| 119 | TP_ARGS(rcuname, pid, gpnum), | ||
| 120 | |||
| 121 | TP_STRUCT__entry( | ||
| 122 | __field(char *, rcuname) | ||
| 123 | __field(unsigned long, gpnum) | ||
| 124 | __field(int, pid) | ||
| 125 | ), | ||
| 126 | |||
| 127 | TP_fast_assign( | ||
| 128 | __entry->rcuname = rcuname; | ||
| 129 | __entry->gpnum = gpnum; | ||
| 130 | __entry->pid = pid; | ||
| 131 | ), | ||
| 132 | |||
| 133 | TP_printk("%s %lu %d", | ||
| 134 | __entry->rcuname, __entry->gpnum, __entry->pid) | ||
| 135 | ); | ||
| 136 | |||
| 137 | /* | ||
| 138 | * Tracepoint for tasks that blocked within a given preemptible-RCU | ||
| 139 | * read-side critical section exiting that critical section. Track the | ||
| 140 | * type of RCU (which one day might include SRCU) and the task's PID. | ||
| 141 | */ | ||
| 142 | TRACE_EVENT(rcu_unlock_preempted_task, | ||
| 143 | |||
| 144 | TP_PROTO(char *rcuname, unsigned long gpnum, int pid), | ||
| 145 | |||
| 146 | TP_ARGS(rcuname, gpnum, pid), | ||
| 147 | |||
| 148 | TP_STRUCT__entry( | ||
| 149 | __field(char *, rcuname) | ||
| 150 | __field(unsigned long, gpnum) | ||
| 151 | __field(int, pid) | ||
| 152 | ), | ||
| 153 | |||
| 154 | TP_fast_assign( | ||
| 155 | __entry->rcuname = rcuname; | ||
| 156 | __entry->gpnum = gpnum; | ||
| 157 | __entry->pid = pid; | ||
| 158 | ), | ||
| 159 | |||
| 160 | TP_printk("%s %lu %d", __entry->rcuname, __entry->gpnum, __entry->pid) | ||
| 161 | ); | ||
| 162 | |||
| 163 | /* | ||
| 164 | * Tracepoint for quiescent-state-reporting events. These are | ||
| 165 | * distinguished by the type of RCU, the grace-period number, the | ||
| 166 | * mask of quiescent lower-level entities, the rcu_node structure level, | ||
| 167 | * the starting and ending CPU covered by the rcu_node structure, and | ||
| 168 | * whether there are any blocked tasks blocking the current grace period. | ||
| 169 | * All but the type of RCU are extracted from the rcu_node structure. | ||
| 170 | */ | ||
| 171 | TRACE_EVENT(rcu_quiescent_state_report, | ||
| 172 | |||
| 173 | TP_PROTO(char *rcuname, unsigned long gpnum, | ||
| 174 | unsigned long mask, unsigned long qsmask, | ||
| 175 | u8 level, int grplo, int grphi, int gp_tasks), | ||
| 176 | |||
| 177 | TP_ARGS(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks), | ||
| 178 | |||
| 179 | TP_STRUCT__entry( | ||
| 180 | __field(char *, rcuname) | ||
| 181 | __field(unsigned long, gpnum) | ||
| 182 | __field(unsigned long, mask) | ||
| 183 | __field(unsigned long, qsmask) | ||
| 184 | __field(u8, level) | ||
| 185 | __field(int, grplo) | ||
| 186 | __field(int, grphi) | ||
| 187 | __field(u8, gp_tasks) | ||
| 188 | ), | ||
| 189 | |||
| 190 | TP_fast_assign( | ||
| 191 | __entry->rcuname = rcuname; | ||
| 192 | __entry->gpnum = gpnum; | ||
| 193 | __entry->mask = mask; | ||
| 194 | __entry->qsmask = qsmask; | ||
| 195 | __entry->level = level; | ||
| 196 | __entry->grplo = grplo; | ||
| 197 | __entry->grphi = grphi; | ||
| 198 | __entry->gp_tasks = gp_tasks; | ||
| 199 | ), | ||
| 200 | |||
| 201 | TP_printk("%s %lu %lx>%lx %u %d %d %u", | ||
| 202 | __entry->rcuname, __entry->gpnum, | ||
| 203 | __entry->mask, __entry->qsmask, __entry->level, | ||
| 204 | __entry->grplo, __entry->grphi, __entry->gp_tasks) | ||
| 205 | ); | ||
| 206 | |||
| 207 | /* | ||
| 208 | * Tracepoint for quiescent states detected by force_quiescent_state(). | ||
| 209 | * These trace events include the type of RCU, the grace-period number | ||
| 210 | * that was blocked by the CPU, the CPU itself, and the type of quiescent | ||
| 211 | * state, which can be "dti" for dyntick-idle mode, "ofl" for CPU offline, | ||
| 212 | * or "kick" when kicking a CPU that has been in dyntick-idle mode for | ||
| 213 | * too long. | ||
| 214 | */ | ||
| 215 | TRACE_EVENT(rcu_fqs, | ||
| 216 | |||
| 217 | TP_PROTO(char *rcuname, unsigned long gpnum, int cpu, char *qsevent), | ||
| 218 | |||
| 219 | TP_ARGS(rcuname, gpnum, cpu, qsevent), | ||
| 220 | |||
| 221 | TP_STRUCT__entry( | ||
| 222 | __field(char *, rcuname) | ||
| 223 | __field(unsigned long, gpnum) | ||
| 224 | __field(int, cpu) | ||
| 225 | __field(char *, qsevent) | ||
| 226 | ), | ||
| 227 | |||
| 228 | TP_fast_assign( | ||
| 229 | __entry->rcuname = rcuname; | ||
| 230 | __entry->gpnum = gpnum; | ||
| 231 | __entry->cpu = cpu; | ||
| 232 | __entry->qsevent = qsevent; | ||
| 233 | ), | ||
| 234 | |||
| 235 | TP_printk("%s %lu %d %s", | ||
| 236 | __entry->rcuname, __entry->gpnum, | ||
| 237 | __entry->cpu, __entry->qsevent) | ||
| 238 | ); | ||
| 239 | |||
| 240 | #endif /* #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) */ | ||
| 241 | |||
| 242 | /* | ||
| 243 | * Tracepoint for dyntick-idle entry/exit events. These take a string | ||
| 244 | * as argument: "Start" for entering dyntick-idle mode and "End" for | ||
| 245 | * leaving it. | ||
| 246 | */ | ||
| 247 | TRACE_EVENT(rcu_dyntick, | ||
| 248 | |||
| 249 | TP_PROTO(char *polarity), | ||
| 250 | |||
| 251 | TP_ARGS(polarity), | ||
| 252 | |||
| 253 | TP_STRUCT__entry( | ||
| 254 | __field(char *, polarity) | ||
| 255 | ), | ||
| 256 | |||
| 257 | TP_fast_assign( | ||
| 258 | __entry->polarity = polarity; | ||
| 259 | ), | ||
| 260 | |||
| 261 | TP_printk("%s", __entry->polarity) | ||
| 262 | ); | ||
| 263 | |||
| 264 | /* | ||
| 265 | * Tracepoint for the registration of a single RCU callback function. | ||
| 266 | * The first argument is the type of RCU, the second argument is | ||
| 267 | * a pointer to the RCU callback itself, and the third element is the | ||
| 268 | * new RCU callback queue length for the current CPU. | ||
| 269 | */ | ||
| 270 | TRACE_EVENT(rcu_callback, | ||
| 271 | |||
| 272 | TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen), | ||
| 273 | |||
| 274 | TP_ARGS(rcuname, rhp, qlen), | ||
| 275 | |||
| 276 | TP_STRUCT__entry( | ||
| 277 | __field(char *, rcuname) | ||
| 278 | __field(void *, rhp) | ||
| 279 | __field(void *, func) | ||
| 280 | __field(long, qlen) | ||
| 281 | ), | ||
| 282 | |||
| 283 | TP_fast_assign( | ||
| 284 | __entry->rcuname = rcuname; | ||
| 285 | __entry->rhp = rhp; | ||
| 286 | __entry->func = rhp->func; | ||
| 287 | __entry->qlen = qlen; | ||
| 288 | ), | ||
| 289 | |||
| 290 | TP_printk("%s rhp=%p func=%pf %ld", | ||
| 291 | __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen) | ||
| 292 | ); | ||
| 293 | |||
| 294 | /* | ||
| 295 | * Tracepoint for the registration of a single RCU callback of the special | ||
| 296 | * kfree() form. The first argument is the RCU type, the second argument | ||
| 297 | * is a pointer to the RCU callback, the third argument is the offset | ||
| 298 | * of the callback within the enclosing RCU-protected data structure, | ||
| 299 | * and the fourth argument is the new RCU callback queue length for the | ||
| 300 | * current CPU. | ||
| 301 | */ | ||
| 302 | TRACE_EVENT(rcu_kfree_callback, | ||
| 303 | |||
| 304 | TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset, | ||
| 305 | long qlen), | ||
| 306 | |||
| 307 | TP_ARGS(rcuname, rhp, offset, qlen), | ||
| 308 | |||
| 309 | TP_STRUCT__entry( | ||
| 310 | __field(char *, rcuname) | ||
| 311 | __field(void *, rhp) | ||
| 312 | __field(unsigned long, offset) | ||
| 313 | __field(long, qlen) | ||
| 314 | ), | ||
| 315 | |||
| 316 | TP_fast_assign( | ||
| 317 | __entry->rcuname = rcuname; | ||
| 318 | __entry->rhp = rhp; | ||
| 319 | __entry->offset = offset; | ||
| 320 | __entry->qlen = qlen; | ||
| 321 | ), | ||
| 322 | |||
| 323 | TP_printk("%s rhp=%p func=%ld %ld", | ||
| 324 | __entry->rcuname, __entry->rhp, __entry->offset, | ||
| 325 | __entry->qlen) | ||
| 326 | ); | ||
| 327 | |||
| 328 | /* | ||
| 329 | * Tracepoint for marking the beginning rcu_do_batch, performed to start | ||
| 330 | * RCU callback invocation. The first argument is the RCU flavor, | ||
| 331 | * the second is the total number of callbacks (including those that | ||
| 332 | * are not yet ready to be invoked), and the third argument is the | ||
| 333 | * current RCU-callback batch limit. | ||
| 334 | */ | ||
| 335 | TRACE_EVENT(rcu_batch_start, | ||
| 336 | |||
| 337 | TP_PROTO(char *rcuname, long qlen, int blimit), | ||
| 338 | |||
| 339 | TP_ARGS(rcuname, qlen, blimit), | ||
| 340 | |||
| 341 | TP_STRUCT__entry( | ||
| 342 | __field(char *, rcuname) | ||
| 343 | __field(long, qlen) | ||
| 344 | __field(int, blimit) | ||
| 345 | ), | ||
| 346 | |||
| 347 | TP_fast_assign( | ||
| 348 | __entry->rcuname = rcuname; | ||
| 349 | __entry->qlen = qlen; | ||
| 350 | __entry->blimit = blimit; | ||
| 351 | ), | ||
| 352 | |||
| 353 | TP_printk("%s CBs=%ld bl=%d", | ||
| 354 | __entry->rcuname, __entry->qlen, __entry->blimit) | ||
| 355 | ); | ||
| 356 | |||
| 357 | /* | ||
| 358 | * Tracepoint for the invocation of a single RCU callback function. | ||
| 359 | * The first argument is the type of RCU, and the second argument is | ||
| 360 | * a pointer to the RCU callback itself. | ||
| 361 | */ | ||
| 362 | TRACE_EVENT(rcu_invoke_callback, | ||
| 363 | |||
| 364 | TP_PROTO(char *rcuname, struct rcu_head *rhp), | ||
| 365 | |||
| 366 | TP_ARGS(rcuname, rhp), | ||
| 367 | |||
| 368 | TP_STRUCT__entry( | ||
| 369 | __field(char *, rcuname) | ||
| 370 | __field(void *, rhp) | ||
| 371 | __field(void *, func) | ||
| 372 | ), | ||
| 373 | |||
| 374 | TP_fast_assign( | ||
| 375 | __entry->rcuname = rcuname; | ||
| 376 | __entry->rhp = rhp; | ||
| 377 | __entry->func = rhp->func; | ||
| 378 | ), | ||
| 379 | |||
| 380 | TP_printk("%s rhp=%p func=%pf", | ||
| 381 | __entry->rcuname, __entry->rhp, __entry->func) | ||
| 382 | ); | ||
| 383 | |||
| 384 | /* | ||
| 385 | * Tracepoint for the invocation of a single RCU callback of the special | ||
| 386 | * kfree() form. The first argument is the RCU flavor, the second | ||
| 387 | * argument is a pointer to the RCU callback, and the third argument | ||
| 388 | * is the offset of the callback within the enclosing RCU-protected | ||
| 389 | * data structure. | ||
| 390 | */ | ||
| 391 | TRACE_EVENT(rcu_invoke_kfree_callback, | ||
| 392 | |||
| 393 | TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset), | ||
| 394 | |||
| 395 | TP_ARGS(rcuname, rhp, offset), | ||
| 396 | |||
| 397 | TP_STRUCT__entry( | ||
| 398 | __field(char *, rcuname) | ||
| 399 | __field(void *, rhp) | ||
| 400 | __field(unsigned long, offset) | ||
| 401 | ), | ||
| 402 | |||
| 403 | TP_fast_assign( | ||
| 404 | __entry->rcuname = rcuname; | ||
| 405 | __entry->rhp = rhp; | ||
| 406 | __entry->offset = offset; | ||
| 407 | ), | ||
| 408 | |||
| 409 | TP_printk("%s rhp=%p func=%ld", | ||
| 410 | __entry->rcuname, __entry->rhp, __entry->offset) | ||
| 411 | ); | ||
| 412 | |||
| 413 | /* | ||
| 414 | * Tracepoint for exiting rcu_do_batch after RCU callbacks have been | ||
| 415 | * invoked. The first argument is the name of the RCU flavor and | ||
| 416 | * the second argument is number of callbacks actually invoked. | ||
| 417 | */ | ||
| 418 | TRACE_EVENT(rcu_batch_end, | ||
| 419 | |||
| 420 | TP_PROTO(char *rcuname, int callbacks_invoked), | ||
| 421 | |||
| 422 | TP_ARGS(rcuname, callbacks_invoked), | ||
| 423 | |||
| 424 | TP_STRUCT__entry( | ||
| 425 | __field(char *, rcuname) | ||
| 426 | __field(int, callbacks_invoked) | ||
| 427 | ), | ||
| 428 | |||
| 429 | TP_fast_assign( | ||
| 430 | __entry->rcuname = rcuname; | ||
| 431 | __entry->callbacks_invoked = callbacks_invoked; | ||
| 432 | ), | ||
| 433 | |||
| 434 | TP_printk("%s CBs-invoked=%d", | ||
| 435 | __entry->rcuname, __entry->callbacks_invoked) | ||
| 436 | ); | ||
| 437 | |||
| 438 | #else /* #ifdef CONFIG_RCU_TRACE */ | ||
| 439 | |||
| 440 | #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) | ||
| 441 | #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0) | ||
| 442 | #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) | ||
| 443 | #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) | ||
| 444 | #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) | ||
| 445 | #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) | ||
| 446 | #define trace_rcu_dyntick(polarity) do { } while (0) | ||
| 447 | #define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) | ||
| 448 | #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) | ||
| 449 | #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) | ||
| 450 | #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) | ||
| 451 | #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) | ||
| 452 | #define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0) | ||
| 453 | |||
| 454 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | ||
| 455 | |||
| 456 | #endif /* _TRACE_RCU_H */ | ||
| 457 | |||
| 458 | /* This part must be outside protection */ | ||
| 459 | #include <trace/define_trace.h> | ||
diff --git a/init/Kconfig b/init/Kconfig index d62778390e55..dc7e27bf89a8 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
| @@ -391,7 +391,7 @@ config TREE_RCU | |||
| 391 | 391 | ||
| 392 | config TREE_PREEMPT_RCU | 392 | config TREE_PREEMPT_RCU |
| 393 | bool "Preemptible tree-based hierarchical RCU" | 393 | bool "Preemptible tree-based hierarchical RCU" |
| 394 | depends on PREEMPT | 394 | depends on PREEMPT && SMP |
| 395 | help | 395 | help |
| 396 | This option selects the RCU implementation that is | 396 | This option selects the RCU implementation that is |
| 397 | designed for very large SMP systems with hundreds or | 397 | designed for very large SMP systems with hundreds or |
| @@ -401,7 +401,7 @@ config TREE_PREEMPT_RCU | |||
| 401 | 401 | ||
| 402 | config TINY_RCU | 402 | config TINY_RCU |
| 403 | bool "UP-only small-memory-footprint RCU" | 403 | bool "UP-only small-memory-footprint RCU" |
| 404 | depends on !SMP | 404 | depends on !PREEMPT && !SMP |
| 405 | help | 405 | help |
| 406 | This option selects the RCU implementation that is | 406 | This option selects the RCU implementation that is |
| 407 | designed for UP systems from which real-time response | 407 | designed for UP systems from which real-time response |
| @@ -410,7 +410,7 @@ config TINY_RCU | |||
| 410 | 410 | ||
| 411 | config TINY_PREEMPT_RCU | 411 | config TINY_PREEMPT_RCU |
| 412 | bool "Preemptible UP-only small-memory-footprint RCU" | 412 | bool "Preemptible UP-only small-memory-footprint RCU" |
| 413 | depends on !SMP && PREEMPT | 413 | depends on PREEMPT && !SMP |
| 414 | help | 414 | help |
| 415 | This option selects the RCU implementation that is designed | 415 | This option selects the RCU implementation that is designed |
| 416 | for real-time UP systems. This option greatly reduces the | 416 | for real-time UP systems. This option greatly reduces the |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 91d67ce3a8d5..1e48f1c3ea70 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
| @@ -1129,10 +1129,11 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth, | |||
| 1129 | if (debug_locks_silent) | 1129 | if (debug_locks_silent) |
| 1130 | return 0; | 1130 | return 0; |
| 1131 | 1131 | ||
| 1132 | printk("\n=======================================================\n"); | 1132 | printk("\n"); |
| 1133 | printk( "[ INFO: possible circular locking dependency detected ]\n"); | 1133 | printk("======================================================\n"); |
| 1134 | printk("[ INFO: possible circular locking dependency detected ]\n"); | ||
| 1134 | print_kernel_version(); | 1135 | print_kernel_version(); |
| 1135 | printk( "-------------------------------------------------------\n"); | 1136 | printk("-------------------------------------------------------\n"); |
| 1136 | printk("%s/%d is trying to acquire lock:\n", | 1137 | printk("%s/%d is trying to acquire lock:\n", |
| 1137 | curr->comm, task_pid_nr(curr)); | 1138 | curr->comm, task_pid_nr(curr)); |
| 1138 | print_lock(check_src); | 1139 | print_lock(check_src); |
| @@ -1463,11 +1464,12 @@ print_bad_irq_dependency(struct task_struct *curr, | |||
| 1463 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | 1464 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
| 1464 | return 0; | 1465 | return 0; |
| 1465 | 1466 | ||
| 1466 | printk("\n======================================================\n"); | 1467 | printk("\n"); |
| 1467 | printk( "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", | 1468 | printk("======================================================\n"); |
| 1469 | printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", | ||
| 1468 | irqclass, irqclass); | 1470 | irqclass, irqclass); |
| 1469 | print_kernel_version(); | 1471 | print_kernel_version(); |
| 1470 | printk( "------------------------------------------------------\n"); | 1472 | printk("------------------------------------------------------\n"); |
| 1471 | printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", | 1473 | printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", |
| 1472 | curr->comm, task_pid_nr(curr), | 1474 | curr->comm, task_pid_nr(curr), |
| 1473 | curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, | 1475 | curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, |
| @@ -1692,10 +1694,11 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, | |||
| 1692 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | 1694 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
| 1693 | return 0; | 1695 | return 0; |
| 1694 | 1696 | ||
| 1695 | printk("\n=============================================\n"); | 1697 | printk("\n"); |
| 1696 | printk( "[ INFO: possible recursive locking detected ]\n"); | 1698 | printk("=============================================\n"); |
| 1699 | printk("[ INFO: possible recursive locking detected ]\n"); | ||
| 1697 | print_kernel_version(); | 1700 | print_kernel_version(); |
| 1698 | printk( "---------------------------------------------\n"); | 1701 | printk("---------------------------------------------\n"); |
| 1699 | printk("%s/%d is trying to acquire lock:\n", | 1702 | printk("%s/%d is trying to acquire lock:\n", |
| 1700 | curr->comm, task_pid_nr(curr)); | 1703 | curr->comm, task_pid_nr(curr)); |
| 1701 | print_lock(next); | 1704 | print_lock(next); |
| @@ -2177,10 +2180,11 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, | |||
| 2177 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | 2180 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
| 2178 | return 0; | 2181 | return 0; |
| 2179 | 2182 | ||
| 2180 | printk("\n=================================\n"); | 2183 | printk("\n"); |
| 2181 | printk( "[ INFO: inconsistent lock state ]\n"); | 2184 | printk("=================================\n"); |
| 2185 | printk("[ INFO: inconsistent lock state ]\n"); | ||
| 2182 | print_kernel_version(); | 2186 | print_kernel_version(); |
| 2183 | printk( "---------------------------------\n"); | 2187 | printk("---------------------------------\n"); |
| 2184 | 2188 | ||
| 2185 | printk("inconsistent {%s} -> {%s} usage.\n", | 2189 | printk("inconsistent {%s} -> {%s} usage.\n", |
| 2186 | usage_str[prev_bit], usage_str[new_bit]); | 2190 | usage_str[prev_bit], usage_str[new_bit]); |
| @@ -2241,10 +2245,11 @@ print_irq_inversion_bug(struct task_struct *curr, | |||
| 2241 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | 2245 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
| 2242 | return 0; | 2246 | return 0; |
| 2243 | 2247 | ||
| 2244 | printk("\n=========================================================\n"); | 2248 | printk("\n"); |
| 2245 | printk( "[ INFO: possible irq lock inversion dependency detected ]\n"); | 2249 | printk("=========================================================\n"); |
| 2250 | printk("[ INFO: possible irq lock inversion dependency detected ]\n"); | ||
| 2246 | print_kernel_version(); | 2251 | print_kernel_version(); |
| 2247 | printk( "---------------------------------------------------------\n"); | 2252 | printk("---------------------------------------------------------\n"); |
| 2248 | printk("%s/%d just changed the state of lock:\n", | 2253 | printk("%s/%d just changed the state of lock:\n", |
| 2249 | curr->comm, task_pid_nr(curr)); | 2254 | curr->comm, task_pid_nr(curr)); |
| 2250 | print_lock(this); | 2255 | print_lock(this); |
| @@ -3065,9 +3070,10 @@ print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock, | |||
| 3065 | if (debug_locks_silent) | 3070 | if (debug_locks_silent) |
| 3066 | return 0; | 3071 | return 0; |
| 3067 | 3072 | ||
| 3068 | printk("\n=====================================\n"); | 3073 | printk("\n"); |
| 3069 | printk( "[ BUG: bad unlock balance detected! ]\n"); | 3074 | printk("=====================================\n"); |
| 3070 | printk( "-------------------------------------\n"); | 3075 | printk("[ BUG: bad unlock balance detected! ]\n"); |
| 3076 | printk("-------------------------------------\n"); | ||
| 3071 | printk("%s/%d is trying to release lock (", | 3077 | printk("%s/%d is trying to release lock (", |
| 3072 | curr->comm, task_pid_nr(curr)); | 3078 | curr->comm, task_pid_nr(curr)); |
| 3073 | print_lockdep_cache(lock); | 3079 | print_lockdep_cache(lock); |
| @@ -3478,9 +3484,10 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, | |||
| 3478 | if (debug_locks_silent) | 3484 | if (debug_locks_silent) |
| 3479 | return 0; | 3485 | return 0; |
| 3480 | 3486 | ||
| 3481 | printk("\n=================================\n"); | 3487 | printk("\n"); |
| 3482 | printk( "[ BUG: bad contention detected! ]\n"); | 3488 | printk("=================================\n"); |
| 3483 | printk( "---------------------------------\n"); | 3489 | printk("[ BUG: bad contention detected! ]\n"); |
| 3490 | printk("---------------------------------\n"); | ||
| 3484 | printk("%s/%d is trying to contend lock (", | 3491 | printk("%s/%d is trying to contend lock (", |
| 3485 | curr->comm, task_pid_nr(curr)); | 3492 | curr->comm, task_pid_nr(curr)); |
| 3486 | print_lockdep_cache(lock); | 3493 | print_lockdep_cache(lock); |
| @@ -3839,9 +3846,10 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from, | |||
| 3839 | if (debug_locks_silent) | 3846 | if (debug_locks_silent) |
| 3840 | return; | 3847 | return; |
| 3841 | 3848 | ||
| 3842 | printk("\n=========================\n"); | 3849 | printk("\n"); |
| 3843 | printk( "[ BUG: held lock freed! ]\n"); | 3850 | printk("=========================\n"); |
| 3844 | printk( "-------------------------\n"); | 3851 | printk("[ BUG: held lock freed! ]\n"); |
| 3852 | printk("-------------------------\n"); | ||
| 3845 | printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", | 3853 | printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", |
| 3846 | curr->comm, task_pid_nr(curr), mem_from, mem_to-1); | 3854 | curr->comm, task_pid_nr(curr), mem_from, mem_to-1); |
| 3847 | print_lock(hlock); | 3855 | print_lock(hlock); |
| @@ -3895,9 +3903,10 @@ static void print_held_locks_bug(struct task_struct *curr) | |||
| 3895 | if (debug_locks_silent) | 3903 | if (debug_locks_silent) |
| 3896 | return; | 3904 | return; |
| 3897 | 3905 | ||
| 3898 | printk("\n=====================================\n"); | 3906 | printk("\n"); |
| 3899 | printk( "[ BUG: lock held at task exit time! ]\n"); | 3907 | printk("=====================================\n"); |
| 3900 | printk( "-------------------------------------\n"); | 3908 | printk("[ BUG: lock held at task exit time! ]\n"); |
| 3909 | printk("-------------------------------------\n"); | ||
| 3901 | printk("%s/%d is exiting with locks still held!\n", | 3910 | printk("%s/%d is exiting with locks still held!\n", |
| 3902 | curr->comm, task_pid_nr(curr)); | 3911 | curr->comm, task_pid_nr(curr)); |
| 3903 | lockdep_print_held_locks(curr); | 3912 | lockdep_print_held_locks(curr); |
| @@ -3991,16 +4000,17 @@ void lockdep_sys_exit(void) | |||
| 3991 | if (unlikely(curr->lockdep_depth)) { | 4000 | if (unlikely(curr->lockdep_depth)) { |
| 3992 | if (!debug_locks_off()) | 4001 | if (!debug_locks_off()) |
| 3993 | return; | 4002 | return; |
| 3994 | printk("\n================================================\n"); | 4003 | printk("\n"); |
| 3995 | printk( "[ BUG: lock held when returning to user space! ]\n"); | 4004 | printk("================================================\n"); |
| 3996 | printk( "------------------------------------------------\n"); | 4005 | printk("[ BUG: lock held when returning to user space! ]\n"); |
| 4006 | printk("------------------------------------------------\n"); | ||
| 3997 | printk("%s/%d is leaving the kernel with locks still held!\n", | 4007 | printk("%s/%d is leaving the kernel with locks still held!\n", |
| 3998 | curr->comm, curr->pid); | 4008 | curr->comm, curr->pid); |
| 3999 | lockdep_print_held_locks(curr); | 4009 | lockdep_print_held_locks(curr); |
| 4000 | } | 4010 | } |
| 4001 | } | 4011 | } |
| 4002 | 4012 | ||
| 4003 | void lockdep_rcu_dereference(const char *file, const int line) | 4013 | void lockdep_rcu_suspicious(const char *file, const int line, const char *s) |
| 4004 | { | 4014 | { |
| 4005 | struct task_struct *curr = current; | 4015 | struct task_struct *curr = current; |
| 4006 | 4016 | ||
| @@ -4009,15 +4019,15 @@ void lockdep_rcu_dereference(const char *file, const int line) | |||
| 4009 | return; | 4019 | return; |
| 4010 | #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ | 4020 | #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ |
| 4011 | /* Note: the following can be executed concurrently, so be careful. */ | 4021 | /* Note: the following can be executed concurrently, so be careful. */ |
| 4012 | printk("\n===================================================\n"); | 4022 | printk("\n"); |
| 4013 | printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n"); | 4023 | printk("===============================\n"); |
| 4014 | printk( "---------------------------------------------------\n"); | 4024 | printk("[ INFO: suspicious RCU usage. ]\n"); |
| 4015 | printk("%s:%d invoked rcu_dereference_check() without protection!\n", | 4025 | printk("-------------------------------\n"); |
| 4016 | file, line); | 4026 | printk("%s:%d %s!\n", file, line, s); |
| 4017 | printk("\nother info that might help us debug this:\n\n"); | 4027 | printk("\nother info that might help us debug this:\n\n"); |
| 4018 | printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); | 4028 | printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); |
| 4019 | lockdep_print_held_locks(curr); | 4029 | lockdep_print_held_locks(curr); |
| 4020 | printk("\nstack backtrace:\n"); | 4030 | printk("\nstack backtrace:\n"); |
| 4021 | dump_stack(); | 4031 | dump_stack(); |
| 4022 | } | 4032 | } |
| 4023 | EXPORT_SYMBOL_GPL(lockdep_rcu_dereference); | 4033 | EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); |
diff --git a/kernel/pid.c b/kernel/pid.c index e432057f3b21..8cafe7e72ad2 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
| @@ -418,7 +418,9 @@ EXPORT_SYMBOL(pid_task); | |||
| 418 | */ | 418 | */ |
| 419 | struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) | 419 | struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) |
| 420 | { | 420 | { |
| 421 | rcu_lockdep_assert(rcu_read_lock_held()); | 421 | rcu_lockdep_assert(rcu_read_lock_held(), |
| 422 | "find_task_by_pid_ns() needs rcu_read_lock()" | ||
| 423 | " protection"); | ||
| 422 | return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); | 424 | return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); |
| 423 | } | 425 | } |
| 424 | 426 | ||
diff --git a/kernel/rcu.h b/kernel/rcu.h new file mode 100644 index 000000000000..f600868d550d --- /dev/null +++ b/kernel/rcu.h | |||
| @@ -0,0 +1,85 @@ | |||
| 1 | /* | ||
| 2 | * Read-Copy Update definitions shared among RCU implementations. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, write to the Free Software | ||
| 16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
| 17 | * | ||
| 18 | * Copyright IBM Corporation, 2011 | ||
| 19 | * | ||
| 20 | * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> | ||
| 21 | */ | ||
| 22 | |||
| 23 | #ifndef __LINUX_RCU_H | ||
| 24 | #define __LINUX_RCU_H | ||
| 25 | |||
| 26 | #ifdef CONFIG_RCU_TRACE | ||
| 27 | #define RCU_TRACE(stmt) stmt | ||
| 28 | #else /* #ifdef CONFIG_RCU_TRACE */ | ||
| 29 | #define RCU_TRACE(stmt) | ||
| 30 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | ||
| 31 | |||
| 32 | /* | ||
| 33 | * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally | ||
| 34 | * by call_rcu() and rcu callback execution, and are therefore not part of the | ||
| 35 | * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. | ||
| 36 | */ | ||
| 37 | |||
| 38 | #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD | ||
| 39 | # define STATE_RCU_HEAD_READY 0 | ||
| 40 | # define STATE_RCU_HEAD_QUEUED 1 | ||
| 41 | |||
| 42 | extern struct debug_obj_descr rcuhead_debug_descr; | ||
| 43 | |||
| 44 | static inline void debug_rcu_head_queue(struct rcu_head *head) | ||
| 45 | { | ||
| 46 | WARN_ON_ONCE((unsigned long)head & 0x3); | ||
| 47 | debug_object_activate(head, &rcuhead_debug_descr); | ||
| 48 | debug_object_active_state(head, &rcuhead_debug_descr, | ||
| 49 | STATE_RCU_HEAD_READY, | ||
| 50 | STATE_RCU_HEAD_QUEUED); | ||
| 51 | } | ||
| 52 | |||
| 53 | static inline void debug_rcu_head_unqueue(struct rcu_head *head) | ||
| 54 | { | ||
| 55 | debug_object_active_state(head, &rcuhead_debug_descr, | ||
| 56 | STATE_RCU_HEAD_QUEUED, | ||
| 57 | STATE_RCU_HEAD_READY); | ||
| 58 | debug_object_deactivate(head, &rcuhead_debug_descr); | ||
| 59 | } | ||
| 60 | #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | ||
| 61 | static inline void debug_rcu_head_queue(struct rcu_head *head) | ||
| 62 | { | ||
| 63 | } | ||
| 64 | |||
| 65 | static inline void debug_rcu_head_unqueue(struct rcu_head *head) | ||
| 66 | { | ||
| 67 | } | ||
| 68 | #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | ||
| 69 | |||
| 70 | extern void kfree(const void *); | ||
| 71 | |||
| 72 | static inline void __rcu_reclaim(char *rn, struct rcu_head *head) | ||
| 73 | { | ||
| 74 | unsigned long offset = (unsigned long)head->func; | ||
| 75 | |||
| 76 | if (__is_kfree_rcu_offset(offset)) { | ||
| 77 | RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset)); | ||
| 78 | kfree((void *)head - offset); | ||
| 79 | } else { | ||
| 80 | RCU_TRACE(trace_rcu_invoke_callback(rn, head)); | ||
| 81 | head->func(head); | ||
| 82 | } | ||
| 83 | } | ||
| 84 | |||
| 85 | #endif /* __LINUX_RCU_H */ | ||
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index ddddb320be61..ca0d23b6b3e8 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
| @@ -46,6 +46,11 @@ | |||
| 46 | #include <linux/module.h> | 46 | #include <linux/module.h> |
| 47 | #include <linux/hardirq.h> | 47 | #include <linux/hardirq.h> |
| 48 | 48 | ||
| 49 | #define CREATE_TRACE_POINTS | ||
| 50 | #include <trace/events/rcu.h> | ||
| 51 | |||
| 52 | #include "rcu.h" | ||
| 53 | |||
| 49 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 54 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| 50 | static struct lock_class_key rcu_lock_key; | 55 | static struct lock_class_key rcu_lock_key; |
| 51 | struct lockdep_map rcu_lock_map = | 56 | struct lockdep_map rcu_lock_map = |
| @@ -94,11 +99,16 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); | |||
| 94 | 99 | ||
| 95 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 100 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
| 96 | 101 | ||
| 102 | struct rcu_synchronize { | ||
| 103 | struct rcu_head head; | ||
| 104 | struct completion completion; | ||
| 105 | }; | ||
| 106 | |||
| 97 | /* | 107 | /* |
| 98 | * Awaken the corresponding synchronize_rcu() instance now that a | 108 | * Awaken the corresponding synchronize_rcu() instance now that a |
| 99 | * grace period has elapsed. | 109 | * grace period has elapsed. |
| 100 | */ | 110 | */ |
| 101 | void wakeme_after_rcu(struct rcu_head *head) | 111 | static void wakeme_after_rcu(struct rcu_head *head) |
| 102 | { | 112 | { |
| 103 | struct rcu_synchronize *rcu; | 113 | struct rcu_synchronize *rcu; |
| 104 | 114 | ||
| @@ -106,6 +116,20 @@ void wakeme_after_rcu(struct rcu_head *head) | |||
| 106 | complete(&rcu->completion); | 116 | complete(&rcu->completion); |
| 107 | } | 117 | } |
| 108 | 118 | ||
| 119 | void wait_rcu_gp(call_rcu_func_t crf) | ||
| 120 | { | ||
| 121 | struct rcu_synchronize rcu; | ||
| 122 | |||
| 123 | init_rcu_head_on_stack(&rcu.head); | ||
| 124 | init_completion(&rcu.completion); | ||
| 125 | /* Will wake me after RCU finished. */ | ||
| 126 | crf(&rcu.head, wakeme_after_rcu); | ||
| 127 | /* Wait for it. */ | ||
| 128 | wait_for_completion(&rcu.completion); | ||
| 129 | destroy_rcu_head_on_stack(&rcu.head); | ||
| 130 | } | ||
| 131 | EXPORT_SYMBOL_GPL(wait_rcu_gp); | ||
| 132 | |||
| 109 | #ifdef CONFIG_PROVE_RCU | 133 | #ifdef CONFIG_PROVE_RCU |
| 110 | /* | 134 | /* |
| 111 | * wrapper function to avoid #include problems. | 135 | * wrapper function to avoid #include problems. |
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 7bbac7d0f5ab..da775c87f27f 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c | |||
| @@ -37,16 +37,17 @@ | |||
| 37 | #include <linux/cpu.h> | 37 | #include <linux/cpu.h> |
| 38 | #include <linux/prefetch.h> | 38 | #include <linux/prefetch.h> |
| 39 | 39 | ||
| 40 | /* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ | 40 | #ifdef CONFIG_RCU_TRACE |
| 41 | static struct task_struct *rcu_kthread_task; | 41 | #include <trace/events/rcu.h> |
| 42 | static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); | 42 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ |
| 43 | static unsigned long have_rcu_kthread_work; | 43 | |
| 44 | #include "rcu.h" | ||
| 44 | 45 | ||
| 45 | /* Forward declarations for rcutiny_plugin.h. */ | 46 | /* Forward declarations for rcutiny_plugin.h. */ |
| 46 | struct rcu_ctrlblk; | 47 | struct rcu_ctrlblk; |
| 47 | static void invoke_rcu_kthread(void); | 48 | static void invoke_rcu_callbacks(void); |
| 48 | static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); | 49 | static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); |
| 49 | static int rcu_kthread(void *arg); | 50 | static void rcu_process_callbacks(struct softirq_action *unused); |
| 50 | static void __call_rcu(struct rcu_head *head, | 51 | static void __call_rcu(struct rcu_head *head, |
| 51 | void (*func)(struct rcu_head *rcu), | 52 | void (*func)(struct rcu_head *rcu), |
| 52 | struct rcu_ctrlblk *rcp); | 53 | struct rcu_ctrlblk *rcp); |
| @@ -96,16 +97,6 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) | |||
| 96 | } | 97 | } |
| 97 | 98 | ||
| 98 | /* | 99 | /* |
| 99 | * Wake up rcu_kthread() to process callbacks now eligible for invocation | ||
| 100 | * or to boost readers. | ||
| 101 | */ | ||
| 102 | static void invoke_rcu_kthread(void) | ||
| 103 | { | ||
| 104 | have_rcu_kthread_work = 1; | ||
| 105 | wake_up(&rcu_kthread_wq); | ||
| 106 | } | ||
| 107 | |||
| 108 | /* | ||
| 109 | * Record an rcu quiescent state. And an rcu_bh quiescent state while we | 100 | * Record an rcu quiescent state. And an rcu_bh quiescent state while we |
| 110 | * are at it, given that any rcu quiescent state is also an rcu_bh | 101 | * are at it, given that any rcu quiescent state is also an rcu_bh |
| 111 | * quiescent state. Use "+" instead of "||" to defeat short circuiting. | 102 | * quiescent state. Use "+" instead of "||" to defeat short circuiting. |
| @@ -117,7 +108,7 @@ void rcu_sched_qs(int cpu) | |||
| 117 | local_irq_save(flags); | 108 | local_irq_save(flags); |
| 118 | if (rcu_qsctr_help(&rcu_sched_ctrlblk) + | 109 | if (rcu_qsctr_help(&rcu_sched_ctrlblk) + |
| 119 | rcu_qsctr_help(&rcu_bh_ctrlblk)) | 110 | rcu_qsctr_help(&rcu_bh_ctrlblk)) |
| 120 | invoke_rcu_kthread(); | 111 | invoke_rcu_callbacks(); |
| 121 | local_irq_restore(flags); | 112 | local_irq_restore(flags); |
| 122 | } | 113 | } |
| 123 | 114 | ||
| @@ -130,7 +121,7 @@ void rcu_bh_qs(int cpu) | |||
| 130 | 121 | ||
| 131 | local_irq_save(flags); | 122 | local_irq_save(flags); |
| 132 | if (rcu_qsctr_help(&rcu_bh_ctrlblk)) | 123 | if (rcu_qsctr_help(&rcu_bh_ctrlblk)) |
| 133 | invoke_rcu_kthread(); | 124 | invoke_rcu_callbacks(); |
| 134 | local_irq_restore(flags); | 125 | local_irq_restore(flags); |
| 135 | } | 126 | } |
| 136 | 127 | ||
| @@ -154,18 +145,23 @@ void rcu_check_callbacks(int cpu, int user) | |||
| 154 | * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure | 145 | * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure |
| 155 | * whose grace period has elapsed. | 146 | * whose grace period has elapsed. |
| 156 | */ | 147 | */ |
| 157 | static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) | 148 | static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) |
| 158 | { | 149 | { |
| 150 | char *rn = NULL; | ||
| 159 | struct rcu_head *next, *list; | 151 | struct rcu_head *next, *list; |
| 160 | unsigned long flags; | 152 | unsigned long flags; |
| 161 | RCU_TRACE(int cb_count = 0); | 153 | RCU_TRACE(int cb_count = 0); |
| 162 | 154 | ||
| 163 | /* If no RCU callbacks ready to invoke, just return. */ | 155 | /* If no RCU callbacks ready to invoke, just return. */ |
| 164 | if (&rcp->rcucblist == rcp->donetail) | 156 | if (&rcp->rcucblist == rcp->donetail) { |
| 157 | RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); | ||
| 158 | RCU_TRACE(trace_rcu_batch_end(rcp->name, 0)); | ||
| 165 | return; | 159 | return; |
| 160 | } | ||
| 166 | 161 | ||
| 167 | /* Move the ready-to-invoke callbacks to a local list. */ | 162 | /* Move the ready-to-invoke callbacks to a local list. */ |
| 168 | local_irq_save(flags); | 163 | local_irq_save(flags); |
| 164 | RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); | ||
| 169 | list = rcp->rcucblist; | 165 | list = rcp->rcucblist; |
| 170 | rcp->rcucblist = *rcp->donetail; | 166 | rcp->rcucblist = *rcp->donetail; |
| 171 | *rcp->donetail = NULL; | 167 | *rcp->donetail = NULL; |
| @@ -176,49 +172,26 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
| 176 | local_irq_restore(flags); | 172 | local_irq_restore(flags); |
| 177 | 173 | ||
| 178 | /* Invoke the callbacks on the local list. */ | 174 | /* Invoke the callbacks on the local list. */ |
| 175 | RCU_TRACE(rn = rcp->name); | ||
| 179 | while (list) { | 176 | while (list) { |
| 180 | next = list->next; | 177 | next = list->next; |
| 181 | prefetch(next); | 178 | prefetch(next); |
| 182 | debug_rcu_head_unqueue(list); | 179 | debug_rcu_head_unqueue(list); |
| 183 | local_bh_disable(); | 180 | local_bh_disable(); |
| 184 | __rcu_reclaim(list); | 181 | __rcu_reclaim(rn, list); |
| 185 | local_bh_enable(); | 182 | local_bh_enable(); |
| 186 | list = next; | 183 | list = next; |
| 187 | RCU_TRACE(cb_count++); | 184 | RCU_TRACE(cb_count++); |
| 188 | } | 185 | } |
| 189 | RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); | 186 | RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); |
| 187 | RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count)); | ||
| 190 | } | 188 | } |
| 191 | 189 | ||
| 192 | /* | 190 | static void rcu_process_callbacks(struct softirq_action *unused) |
| 193 | * This kthread invokes RCU callbacks whose grace periods have | ||
| 194 | * elapsed. It is awakened as needed, and takes the place of the | ||
| 195 | * RCU_SOFTIRQ that was used previously for this purpose. | ||
| 196 | * This is a kthread, but it is never stopped, at least not until | ||
| 197 | * the system goes down. | ||
| 198 | */ | ||
| 199 | static int rcu_kthread(void *arg) | ||
| 200 | { | 191 | { |
| 201 | unsigned long work; | 192 | __rcu_process_callbacks(&rcu_sched_ctrlblk); |
| 202 | unsigned long morework; | 193 | __rcu_process_callbacks(&rcu_bh_ctrlblk); |
| 203 | unsigned long flags; | 194 | rcu_preempt_process_callbacks(); |
| 204 | |||
| 205 | for (;;) { | ||
| 206 | wait_event_interruptible(rcu_kthread_wq, | ||
| 207 | have_rcu_kthread_work != 0); | ||
| 208 | morework = rcu_boost(); | ||
| 209 | local_irq_save(flags); | ||
| 210 | work = have_rcu_kthread_work; | ||
| 211 | have_rcu_kthread_work = morework; | ||
| 212 | local_irq_restore(flags); | ||
| 213 | if (work) { | ||
| 214 | rcu_process_callbacks(&rcu_sched_ctrlblk); | ||
| 215 | rcu_process_callbacks(&rcu_bh_ctrlblk); | ||
| 216 | rcu_preempt_process_callbacks(); | ||
| 217 | } | ||
| 218 | schedule_timeout_interruptible(1); /* Leave CPU for others. */ | ||
| 219 | } | ||
| 220 | |||
| 221 | return 0; /* Not reached, but needed to shut gcc up. */ | ||
| 222 | } | 195 | } |
| 223 | 196 | ||
| 224 | /* | 197 | /* |
| @@ -280,45 +253,3 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
| 280 | __call_rcu(head, func, &rcu_bh_ctrlblk); | 253 | __call_rcu(head, func, &rcu_bh_ctrlblk); |
| 281 | } | 254 | } |
| 282 | EXPORT_SYMBOL_GPL(call_rcu_bh); | 255 | EXPORT_SYMBOL_GPL(call_rcu_bh); |
| 283 | |||
| 284 | void rcu_barrier_bh(void) | ||
| 285 | { | ||
| 286 | struct rcu_synchronize rcu; | ||
| 287 | |||
| 288 | init_rcu_head_on_stack(&rcu.head); | ||
| 289 | init_completion(&rcu.completion); | ||
| 290 | /* Will wake me after RCU finished. */ | ||
| 291 | call_rcu_bh(&rcu.head, wakeme_after_rcu); | ||
| 292 | /* Wait for it. */ | ||
| 293 | wait_for_completion(&rcu.completion); | ||
| 294 | destroy_rcu_head_on_stack(&rcu.head); | ||
| 295 | } | ||
| 296 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); | ||
| 297 | |||
| 298 | void rcu_barrier_sched(void) | ||
| 299 | { | ||
| 300 | struct rcu_synchronize rcu; | ||
| 301 | |||
| 302 | init_rcu_head_on_stack(&rcu.head); | ||
| 303 | init_completion(&rcu.completion); | ||
| 304 | /* Will wake me after RCU finished. */ | ||
| 305 | call_rcu_sched(&rcu.head, wakeme_after_rcu); | ||
| 306 | /* Wait for it. */ | ||
| 307 | wait_for_completion(&rcu.completion); | ||
| 308 | destroy_rcu_head_on_stack(&rcu.head); | ||
| 309 | } | ||
| 310 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); | ||
| 311 | |||
| 312 | /* | ||
| 313 | * Spawn the kthread that invokes RCU callbacks. | ||
| 314 | */ | ||
| 315 | static int __init rcu_spawn_kthreads(void) | ||
| 316 | { | ||
| 317 | struct sched_param sp; | ||
| 318 | |||
| 319 | rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread"); | ||
| 320 | sp.sched_priority = RCU_BOOST_PRIO; | ||
| 321 | sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp); | ||
| 322 | return 0; | ||
| 323 | } | ||
| 324 | early_initcall(rcu_spawn_kthreads); | ||
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index f259c676195f..02aa7139861c 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h | |||
| @@ -26,29 +26,26 @@ | |||
| 26 | #include <linux/debugfs.h> | 26 | #include <linux/debugfs.h> |
| 27 | #include <linux/seq_file.h> | 27 | #include <linux/seq_file.h> |
| 28 | 28 | ||
| 29 | #ifdef CONFIG_RCU_TRACE | ||
| 30 | #define RCU_TRACE(stmt) stmt | ||
| 31 | #else /* #ifdef CONFIG_RCU_TRACE */ | ||
| 32 | #define RCU_TRACE(stmt) | ||
| 33 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | ||
| 34 | |||
| 35 | /* Global control variables for rcupdate callback mechanism. */ | 29 | /* Global control variables for rcupdate callback mechanism. */ |
| 36 | struct rcu_ctrlblk { | 30 | struct rcu_ctrlblk { |
| 37 | struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ | 31 | struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ |
| 38 | struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ | 32 | struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ |
| 39 | struct rcu_head **curtail; /* ->next pointer of last CB. */ | 33 | struct rcu_head **curtail; /* ->next pointer of last CB. */ |
| 40 | RCU_TRACE(long qlen); /* Number of pending CBs. */ | 34 | RCU_TRACE(long qlen); /* Number of pending CBs. */ |
| 35 | RCU_TRACE(char *name); /* Name of RCU type. */ | ||
| 41 | }; | 36 | }; |
| 42 | 37 | ||
| 43 | /* Definition for rcupdate control block. */ | 38 | /* Definition for rcupdate control block. */ |
| 44 | static struct rcu_ctrlblk rcu_sched_ctrlblk = { | 39 | static struct rcu_ctrlblk rcu_sched_ctrlblk = { |
| 45 | .donetail = &rcu_sched_ctrlblk.rcucblist, | 40 | .donetail = &rcu_sched_ctrlblk.rcucblist, |
| 46 | .curtail = &rcu_sched_ctrlblk.rcucblist, | 41 | .curtail = &rcu_sched_ctrlblk.rcucblist, |
| 42 | RCU_TRACE(.name = "rcu_sched") | ||
| 47 | }; | 43 | }; |
| 48 | 44 | ||
| 49 | static struct rcu_ctrlblk rcu_bh_ctrlblk = { | 45 | static struct rcu_ctrlblk rcu_bh_ctrlblk = { |
| 50 | .donetail = &rcu_bh_ctrlblk.rcucblist, | 46 | .donetail = &rcu_bh_ctrlblk.rcucblist, |
| 51 | .curtail = &rcu_bh_ctrlblk.rcucblist, | 47 | .curtail = &rcu_bh_ctrlblk.rcucblist, |
| 48 | RCU_TRACE(.name = "rcu_bh") | ||
| 52 | }; | 49 | }; |
| 53 | 50 | ||
| 54 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 51 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| @@ -131,6 +128,7 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { | |||
| 131 | .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist, | 128 | .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist, |
| 132 | .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist, | 129 | .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist, |
| 133 | .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks), | 130 | .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks), |
| 131 | RCU_TRACE(.rcb.name = "rcu_preempt") | ||
| 134 | }; | 132 | }; |
| 135 | 133 | ||
| 136 | static int rcu_preempted_readers_exp(void); | 134 | static int rcu_preempted_readers_exp(void); |
| @@ -247,6 +245,13 @@ static void show_tiny_preempt_stats(struct seq_file *m) | |||
| 247 | 245 | ||
| 248 | #include "rtmutex_common.h" | 246 | #include "rtmutex_common.h" |
| 249 | 247 | ||
| 248 | #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO | ||
| 249 | |||
| 250 | /* Controls for rcu_kthread() kthread. */ | ||
| 251 | static struct task_struct *rcu_kthread_task; | ||
| 252 | static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); | ||
| 253 | static unsigned long have_rcu_kthread_work; | ||
| 254 | |||
| 250 | /* | 255 | /* |
| 251 | * Carry out RCU priority boosting on the task indicated by ->boost_tasks, | 256 | * Carry out RCU priority boosting on the task indicated by ->boost_tasks, |
| 252 | * and advance ->boost_tasks to the next task in the ->blkd_tasks list. | 257 | * and advance ->boost_tasks to the next task in the ->blkd_tasks list. |
| @@ -334,7 +339,7 @@ static int rcu_initiate_boost(void) | |||
| 334 | if (rcu_preempt_ctrlblk.exp_tasks == NULL) | 339 | if (rcu_preempt_ctrlblk.exp_tasks == NULL) |
| 335 | rcu_preempt_ctrlblk.boost_tasks = | 340 | rcu_preempt_ctrlblk.boost_tasks = |
| 336 | rcu_preempt_ctrlblk.gp_tasks; | 341 | rcu_preempt_ctrlblk.gp_tasks; |
| 337 | invoke_rcu_kthread(); | 342 | invoke_rcu_callbacks(); |
| 338 | } else | 343 | } else |
| 339 | RCU_TRACE(rcu_initiate_boost_trace()); | 344 | RCU_TRACE(rcu_initiate_boost_trace()); |
| 340 | return 1; | 345 | return 1; |
| @@ -353,14 +358,6 @@ static void rcu_preempt_boost_start_gp(void) | |||
| 353 | #else /* #ifdef CONFIG_RCU_BOOST */ | 358 | #else /* #ifdef CONFIG_RCU_BOOST */ |
| 354 | 359 | ||
| 355 | /* | 360 | /* |
| 356 | * If there is no RCU priority boosting, we don't boost. | ||
| 357 | */ | ||
| 358 | static int rcu_boost(void) | ||
| 359 | { | ||
| 360 | return 0; | ||
| 361 | } | ||
| 362 | |||
| 363 | /* | ||
| 364 | * If there is no RCU priority boosting, we don't initiate boosting, | 361 | * If there is no RCU priority boosting, we don't initiate boosting, |
| 365 | * but we do indicate whether there are blocked readers blocking the | 362 | * but we do indicate whether there are blocked readers blocking the |
| 366 | * current grace period. | 363 | * current grace period. |
| @@ -427,7 +424,7 @@ static void rcu_preempt_cpu_qs(void) | |||
| 427 | 424 | ||
| 428 | /* If there are done callbacks, cause them to be invoked. */ | 425 | /* If there are done callbacks, cause them to be invoked. */ |
| 429 | if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) | 426 | if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) |
| 430 | invoke_rcu_kthread(); | 427 | invoke_rcu_callbacks(); |
| 431 | } | 428 | } |
| 432 | 429 | ||
| 433 | /* | 430 | /* |
| @@ -648,7 +645,7 @@ static void rcu_preempt_check_callbacks(void) | |||
| 648 | rcu_preempt_cpu_qs(); | 645 | rcu_preempt_cpu_qs(); |
| 649 | if (&rcu_preempt_ctrlblk.rcb.rcucblist != | 646 | if (&rcu_preempt_ctrlblk.rcb.rcucblist != |
| 650 | rcu_preempt_ctrlblk.rcb.donetail) | 647 | rcu_preempt_ctrlblk.rcb.donetail) |
| 651 | invoke_rcu_kthread(); | 648 | invoke_rcu_callbacks(); |
| 652 | if (rcu_preempt_gp_in_progress() && | 649 | if (rcu_preempt_gp_in_progress() && |
| 653 | rcu_cpu_blocking_cur_gp() && | 650 | rcu_cpu_blocking_cur_gp() && |
| 654 | rcu_preempt_running_reader()) | 651 | rcu_preempt_running_reader()) |
| @@ -674,7 +671,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) | |||
| 674 | */ | 671 | */ |
| 675 | static void rcu_preempt_process_callbacks(void) | 672 | static void rcu_preempt_process_callbacks(void) |
| 676 | { | 673 | { |
| 677 | rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); | 674 | __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); |
| 678 | } | 675 | } |
| 679 | 676 | ||
| 680 | /* | 677 | /* |
| @@ -697,20 +694,6 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
| 697 | } | 694 | } |
| 698 | EXPORT_SYMBOL_GPL(call_rcu); | 695 | EXPORT_SYMBOL_GPL(call_rcu); |
| 699 | 696 | ||
| 700 | void rcu_barrier(void) | ||
| 701 | { | ||
| 702 | struct rcu_synchronize rcu; | ||
| 703 | |||
| 704 | init_rcu_head_on_stack(&rcu.head); | ||
| 705 | init_completion(&rcu.completion); | ||
| 706 | /* Will wake me after RCU finished. */ | ||
| 707 | call_rcu(&rcu.head, wakeme_after_rcu); | ||
| 708 | /* Wait for it. */ | ||
| 709 | wait_for_completion(&rcu.completion); | ||
| 710 | destroy_rcu_head_on_stack(&rcu.head); | ||
| 711 | } | ||
| 712 | EXPORT_SYMBOL_GPL(rcu_barrier); | ||
| 713 | |||
| 714 | /* | 697 | /* |
| 715 | * synchronize_rcu - wait until a grace period has elapsed. | 698 | * synchronize_rcu - wait until a grace period has elapsed. |
| 716 | * | 699 | * |
| @@ -864,15 +847,6 @@ static void show_tiny_preempt_stats(struct seq_file *m) | |||
| 864 | #endif /* #ifdef CONFIG_RCU_TRACE */ | 847 | #endif /* #ifdef CONFIG_RCU_TRACE */ |
| 865 | 848 | ||
| 866 | /* | 849 | /* |
| 867 | * Because preemptible RCU does not exist, it is never necessary to | ||
| 868 | * boost preempted RCU readers. | ||
| 869 | */ | ||
| 870 | static int rcu_boost(void) | ||
| 871 | { | ||
| 872 | return 0; | ||
| 873 | } | ||
| 874 | |||
| 875 | /* | ||
| 876 | * Because preemptible RCU does not exist, it never has any callbacks | 850 | * Because preemptible RCU does not exist, it never has any callbacks |
| 877 | * to check. | 851 | * to check. |
| 878 | */ | 852 | */ |
| @@ -898,6 +872,78 @@ static void rcu_preempt_process_callbacks(void) | |||
| 898 | 872 | ||
| 899 | #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ | 873 | #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ |
| 900 | 874 | ||
| 875 | #ifdef CONFIG_RCU_BOOST | ||
| 876 | |||
| 877 | /* | ||
| 878 | * Wake up rcu_kthread() to process callbacks now eligible for invocation | ||
| 879 | * or to boost readers. | ||
| 880 | */ | ||
| 881 | static void invoke_rcu_callbacks(void) | ||
| 882 | { | ||
| 883 | have_rcu_kthread_work = 1; | ||
| 884 | wake_up(&rcu_kthread_wq); | ||
| 885 | } | ||
| 886 | |||
| 887 | /* | ||
| 888 | * This kthread invokes RCU callbacks whose grace periods have | ||
| 889 | * elapsed. It is awakened as needed, and takes the place of the | ||
| 890 | * RCU_SOFTIRQ that is used for this purpose when boosting is disabled. | ||
| 891 | * This is a kthread, but it is never stopped, at least not until | ||
| 892 | * the system goes down. | ||
| 893 | */ | ||
| 894 | static int rcu_kthread(void *arg) | ||
| 895 | { | ||
| 896 | unsigned long work; | ||
| 897 | unsigned long morework; | ||
| 898 | unsigned long flags; | ||
| 899 | |||
| 900 | for (;;) { | ||
| 901 | wait_event_interruptible(rcu_kthread_wq, | ||
| 902 | have_rcu_kthread_work != 0); | ||
| 903 | morework = rcu_boost(); | ||
| 904 | local_irq_save(flags); | ||
| 905 | work = have_rcu_kthread_work; | ||
| 906 | have_rcu_kthread_work = morework; | ||
| 907 | local_irq_restore(flags); | ||
| 908 | if (work) | ||
| 909 | rcu_process_callbacks(NULL); | ||
| 910 | schedule_timeout_interruptible(1); /* Leave CPU for others. */ | ||
| 911 | } | ||
| 912 | |||
| 913 | return 0; /* Not reached, but needed to shut gcc up. */ | ||
| 914 | } | ||
| 915 | |||
| 916 | /* | ||
| 917 | * Spawn the kthread that invokes RCU callbacks. | ||
| 918 | */ | ||
| 919 | static int __init rcu_spawn_kthreads(void) | ||
| 920 | { | ||
| 921 | struct sched_param sp; | ||
| 922 | |||
| 923 | rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread"); | ||
| 924 | sp.sched_priority = RCU_BOOST_PRIO; | ||
| 925 | sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp); | ||
| 926 | return 0; | ||
| 927 | } | ||
| 928 | early_initcall(rcu_spawn_kthreads); | ||
| 929 | |||
| 930 | #else /* #ifdef CONFIG_RCU_BOOST */ | ||
| 931 | |||
| 932 | /* | ||
| 933 | * Start up softirq processing of callbacks. | ||
| 934 | */ | ||
| 935 | void invoke_rcu_callbacks(void) | ||
| 936 | { | ||
| 937 | raise_softirq(RCU_SOFTIRQ); | ||
| 938 | } | ||
| 939 | |||
| 940 | void rcu_init(void) | ||
| 941 | { | ||
| 942 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | ||
| 943 | } | ||
| 944 | |||
| 945 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | ||
| 946 | |||
| 901 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 947 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| 902 | #include <linux/kernel_stat.h> | 948 | #include <linux/kernel_stat.h> |
| 903 | 949 | ||
| @@ -913,12 +959,6 @@ void __init rcu_scheduler_starting(void) | |||
| 913 | 959 | ||
| 914 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 960 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
| 915 | 961 | ||
| 916 | #ifdef CONFIG_RCU_BOOST | ||
| 917 | #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO | ||
| 918 | #else /* #ifdef CONFIG_RCU_BOOST */ | ||
| 919 | #define RCU_BOOST_PRIO 1 | ||
| 920 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | ||
| 921 | |||
| 922 | #ifdef CONFIG_RCU_TRACE | 962 | #ifdef CONFIG_RCU_TRACE |
| 923 | 963 | ||
| 924 | #ifdef CONFIG_RCU_BOOST | 964 | #ifdef CONFIG_RCU_BOOST |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 98f51b13bb7e..764825c2685c 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
| @@ -73,7 +73,7 @@ module_param(nreaders, int, 0444); | |||
| 73 | MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); | 73 | MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); |
| 74 | module_param(nfakewriters, int, 0444); | 74 | module_param(nfakewriters, int, 0444); |
| 75 | MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads"); | 75 | MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads"); |
| 76 | module_param(stat_interval, int, 0444); | 76 | module_param(stat_interval, int, 0644); |
| 77 | MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); | 77 | MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); |
| 78 | module_param(verbose, bool, 0444); | 78 | module_param(verbose, bool, 0444); |
| 79 | MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); | 79 | MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); |
| @@ -480,30 +480,6 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p) | |||
| 480 | call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); | 480 | call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); |
| 481 | } | 481 | } |
| 482 | 482 | ||
| 483 | struct rcu_bh_torture_synchronize { | ||
| 484 | struct rcu_head head; | ||
| 485 | struct completion completion; | ||
| 486 | }; | ||
| 487 | |||
| 488 | static void rcu_bh_torture_wakeme_after_cb(struct rcu_head *head) | ||
| 489 | { | ||
| 490 | struct rcu_bh_torture_synchronize *rcu; | ||
| 491 | |||
| 492 | rcu = container_of(head, struct rcu_bh_torture_synchronize, head); | ||
| 493 | complete(&rcu->completion); | ||
| 494 | } | ||
| 495 | |||
| 496 | static void rcu_bh_torture_synchronize(void) | ||
| 497 | { | ||
| 498 | struct rcu_bh_torture_synchronize rcu; | ||
| 499 | |||
| 500 | init_rcu_head_on_stack(&rcu.head); | ||
| 501 | init_completion(&rcu.completion); | ||
| 502 | call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb); | ||
| 503 | wait_for_completion(&rcu.completion); | ||
| 504 | destroy_rcu_head_on_stack(&rcu.head); | ||
| 505 | } | ||
| 506 | |||
| 507 | static struct rcu_torture_ops rcu_bh_ops = { | 483 | static struct rcu_torture_ops rcu_bh_ops = { |
| 508 | .init = NULL, | 484 | .init = NULL, |
| 509 | .cleanup = NULL, | 485 | .cleanup = NULL, |
| @@ -512,7 +488,7 @@ static struct rcu_torture_ops rcu_bh_ops = { | |||
| 512 | .readunlock = rcu_bh_torture_read_unlock, | 488 | .readunlock = rcu_bh_torture_read_unlock, |
| 513 | .completed = rcu_bh_torture_completed, | 489 | .completed = rcu_bh_torture_completed, |
| 514 | .deferred_free = rcu_bh_torture_deferred_free, | 490 | .deferred_free = rcu_bh_torture_deferred_free, |
| 515 | .sync = rcu_bh_torture_synchronize, | 491 | .sync = synchronize_rcu_bh, |
| 516 | .cb_barrier = rcu_barrier_bh, | 492 | .cb_barrier = rcu_barrier_bh, |
| 517 | .fqs = rcu_bh_force_quiescent_state, | 493 | .fqs = rcu_bh_force_quiescent_state, |
| 518 | .stats = NULL, | 494 | .stats = NULL, |
| @@ -528,7 +504,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = { | |||
| 528 | .readunlock = rcu_bh_torture_read_unlock, | 504 | .readunlock = rcu_bh_torture_read_unlock, |
| 529 | .completed = rcu_bh_torture_completed, | 505 | .completed = rcu_bh_torture_completed, |
| 530 | .deferred_free = rcu_sync_torture_deferred_free, | 506 | .deferred_free = rcu_sync_torture_deferred_free, |
| 531 | .sync = rcu_bh_torture_synchronize, | 507 | .sync = synchronize_rcu_bh, |
| 532 | .cb_barrier = NULL, | 508 | .cb_barrier = NULL, |
| 533 | .fqs = rcu_bh_force_quiescent_state, | 509 | .fqs = rcu_bh_force_quiescent_state, |
| 534 | .stats = NULL, | 510 | .stats = NULL, |
| @@ -536,6 +512,22 @@ static struct rcu_torture_ops rcu_bh_sync_ops = { | |||
| 536 | .name = "rcu_bh_sync" | 512 | .name = "rcu_bh_sync" |
| 537 | }; | 513 | }; |
| 538 | 514 | ||
| 515 | static struct rcu_torture_ops rcu_bh_expedited_ops = { | ||
| 516 | .init = rcu_sync_torture_init, | ||
| 517 | .cleanup = NULL, | ||
| 518 | .readlock = rcu_bh_torture_read_lock, | ||
| 519 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | ||
| 520 | .readunlock = rcu_bh_torture_read_unlock, | ||
| 521 | .completed = rcu_bh_torture_completed, | ||
| 522 | .deferred_free = rcu_sync_torture_deferred_free, | ||
| 523 | .sync = synchronize_rcu_bh_expedited, | ||
| 524 | .cb_barrier = NULL, | ||
| 525 | .fqs = rcu_bh_force_quiescent_state, | ||
| 526 | .stats = NULL, | ||
| 527 | .irq_capable = 1, | ||
| 528 | .name = "rcu_bh_expedited" | ||
| 529 | }; | ||
| 530 | |||
| 539 | /* | 531 | /* |
| 540 | * Definitions for srcu torture testing. | 532 | * Definitions for srcu torture testing. |
| 541 | */ | 533 | */ |
| @@ -659,11 +651,6 @@ static void rcu_sched_torture_deferred_free(struct rcu_torture *p) | |||
| 659 | call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); | 651 | call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); |
| 660 | } | 652 | } |
| 661 | 653 | ||
| 662 | static void sched_torture_synchronize(void) | ||
| 663 | { | ||
| 664 | synchronize_sched(); | ||
| 665 | } | ||
| 666 | |||
| 667 | static struct rcu_torture_ops sched_ops = { | 654 | static struct rcu_torture_ops sched_ops = { |
| 668 | .init = rcu_sync_torture_init, | 655 | .init = rcu_sync_torture_init, |
| 669 | .cleanup = NULL, | 656 | .cleanup = NULL, |
| @@ -672,7 +659,7 @@ static struct rcu_torture_ops sched_ops = { | |||
| 672 | .readunlock = sched_torture_read_unlock, | 659 | .readunlock = sched_torture_read_unlock, |
| 673 | .completed = rcu_no_completed, | 660 | .completed = rcu_no_completed, |
| 674 | .deferred_free = rcu_sched_torture_deferred_free, | 661 | .deferred_free = rcu_sched_torture_deferred_free, |
| 675 | .sync = sched_torture_synchronize, | 662 | .sync = synchronize_sched, |
| 676 | .cb_barrier = rcu_barrier_sched, | 663 | .cb_barrier = rcu_barrier_sched, |
| 677 | .fqs = rcu_sched_force_quiescent_state, | 664 | .fqs = rcu_sched_force_quiescent_state, |
| 678 | .stats = NULL, | 665 | .stats = NULL, |
| @@ -688,7 +675,7 @@ static struct rcu_torture_ops sched_sync_ops = { | |||
| 688 | .readunlock = sched_torture_read_unlock, | 675 | .readunlock = sched_torture_read_unlock, |
| 689 | .completed = rcu_no_completed, | 676 | .completed = rcu_no_completed, |
| 690 | .deferred_free = rcu_sync_torture_deferred_free, | 677 | .deferred_free = rcu_sync_torture_deferred_free, |
| 691 | .sync = sched_torture_synchronize, | 678 | .sync = synchronize_sched, |
| 692 | .cb_barrier = NULL, | 679 | .cb_barrier = NULL, |
| 693 | .fqs = rcu_sched_force_quiescent_state, | 680 | .fqs = rcu_sched_force_quiescent_state, |
| 694 | .stats = NULL, | 681 | .stats = NULL, |
| @@ -754,7 +741,7 @@ static int rcu_torture_boost(void *arg) | |||
| 754 | do { | 741 | do { |
| 755 | /* Wait for the next test interval. */ | 742 | /* Wait for the next test interval. */ |
| 756 | oldstarttime = boost_starttime; | 743 | oldstarttime = boost_starttime; |
| 757 | while (jiffies - oldstarttime > ULONG_MAX / 2) { | 744 | while (ULONG_CMP_LT(jiffies, oldstarttime)) { |
| 758 | schedule_timeout_uninterruptible(1); | 745 | schedule_timeout_uninterruptible(1); |
| 759 | rcu_stutter_wait("rcu_torture_boost"); | 746 | rcu_stutter_wait("rcu_torture_boost"); |
| 760 | if (kthread_should_stop() || | 747 | if (kthread_should_stop() || |
| @@ -765,7 +752,7 @@ static int rcu_torture_boost(void *arg) | |||
| 765 | /* Do one boost-test interval. */ | 752 | /* Do one boost-test interval. */ |
| 766 | endtime = oldstarttime + test_boost_duration * HZ; | 753 | endtime = oldstarttime + test_boost_duration * HZ; |
| 767 | call_rcu_time = jiffies; | 754 | call_rcu_time = jiffies; |
| 768 | while (jiffies - endtime > ULONG_MAX / 2) { | 755 | while (ULONG_CMP_LT(jiffies, endtime)) { |
| 769 | /* If we don't have a callback in flight, post one. */ | 756 | /* If we don't have a callback in flight, post one. */ |
| 770 | if (!rbi.inflight) { | 757 | if (!rbi.inflight) { |
| 771 | smp_mb(); /* RCU core before ->inflight = 1. */ | 758 | smp_mb(); /* RCU core before ->inflight = 1. */ |
| @@ -792,7 +779,8 @@ static int rcu_torture_boost(void *arg) | |||
| 792 | * interval. Besides, we are running at RT priority, | 779 | * interval. Besides, we are running at RT priority, |
| 793 | * so delays should be relatively rare. | 780 | * so delays should be relatively rare. |
| 794 | */ | 781 | */ |
| 795 | while (oldstarttime == boost_starttime) { | 782 | while (oldstarttime == boost_starttime && |
| 783 | !kthread_should_stop()) { | ||
| 796 | if (mutex_trylock(&boost_mutex)) { | 784 | if (mutex_trylock(&boost_mutex)) { |
| 797 | boost_starttime = jiffies + | 785 | boost_starttime = jiffies + |
| 798 | test_boost_interval * HZ; | 786 | test_boost_interval * HZ; |
| @@ -809,11 +797,11 @@ checkwait: rcu_stutter_wait("rcu_torture_boost"); | |||
| 809 | 797 | ||
| 810 | /* Clean up and exit. */ | 798 | /* Clean up and exit. */ |
| 811 | VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); | 799 | VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); |
| 812 | destroy_rcu_head_on_stack(&rbi.rcu); | ||
| 813 | rcutorture_shutdown_absorb("rcu_torture_boost"); | 800 | rcutorture_shutdown_absorb("rcu_torture_boost"); |
| 814 | while (!kthread_should_stop() || rbi.inflight) | 801 | while (!kthread_should_stop() || rbi.inflight) |
| 815 | schedule_timeout_uninterruptible(1); | 802 | schedule_timeout_uninterruptible(1); |
| 816 | smp_mb(); /* order accesses to ->inflight before stack-frame death. */ | 803 | smp_mb(); /* order accesses to ->inflight before stack-frame death. */ |
| 804 | destroy_rcu_head_on_stack(&rbi.rcu); | ||
| 817 | return 0; | 805 | return 0; |
| 818 | } | 806 | } |
| 819 | 807 | ||
| @@ -831,11 +819,13 @@ rcu_torture_fqs(void *arg) | |||
| 831 | VERBOSE_PRINTK_STRING("rcu_torture_fqs task started"); | 819 | VERBOSE_PRINTK_STRING("rcu_torture_fqs task started"); |
| 832 | do { | 820 | do { |
| 833 | fqs_resume_time = jiffies + fqs_stutter * HZ; | 821 | fqs_resume_time = jiffies + fqs_stutter * HZ; |
| 834 | while (jiffies - fqs_resume_time > LONG_MAX) { | 822 | while (ULONG_CMP_LT(jiffies, fqs_resume_time) && |
| 823 | !kthread_should_stop()) { | ||
| 835 | schedule_timeout_interruptible(1); | 824 | schedule_timeout_interruptible(1); |
| 836 | } | 825 | } |
| 837 | fqs_burst_remaining = fqs_duration; | 826 | fqs_burst_remaining = fqs_duration; |
| 838 | while (fqs_burst_remaining > 0) { | 827 | while (fqs_burst_remaining > 0 && |
| 828 | !kthread_should_stop()) { | ||
| 839 | cur_ops->fqs(); | 829 | cur_ops->fqs(); |
| 840 | udelay(fqs_holdoff); | 830 | udelay(fqs_holdoff); |
| 841 | fqs_burst_remaining -= fqs_holdoff; | 831 | fqs_burst_remaining -= fqs_holdoff; |
| @@ -1280,8 +1270,9 @@ static int rcutorture_booster_init(int cpu) | |||
| 1280 | /* Don't allow time recalculation while creating a new task. */ | 1270 | /* Don't allow time recalculation while creating a new task. */ |
| 1281 | mutex_lock(&boost_mutex); | 1271 | mutex_lock(&boost_mutex); |
| 1282 | VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task"); | 1272 | VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task"); |
| 1283 | boost_tasks[cpu] = kthread_create(rcu_torture_boost, NULL, | 1273 | boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL, |
| 1284 | "rcu_torture_boost"); | 1274 | cpu_to_node(cpu), |
| 1275 | "rcu_torture_boost"); | ||
| 1285 | if (IS_ERR(boost_tasks[cpu])) { | 1276 | if (IS_ERR(boost_tasks[cpu])) { |
| 1286 | retval = PTR_ERR(boost_tasks[cpu]); | 1277 | retval = PTR_ERR(boost_tasks[cpu]); |
| 1287 | VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed"); | 1278 | VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed"); |
| @@ -1424,7 +1415,7 @@ rcu_torture_init(void) | |||
| 1424 | int firsterr = 0; | 1415 | int firsterr = 0; |
| 1425 | static struct rcu_torture_ops *torture_ops[] = | 1416 | static struct rcu_torture_ops *torture_ops[] = |
| 1426 | { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, | 1417 | { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, |
| 1427 | &rcu_bh_ops, &rcu_bh_sync_ops, | 1418 | &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, |
| 1428 | &srcu_ops, &srcu_expedited_ops, | 1419 | &srcu_ops, &srcu_expedited_ops, |
| 1429 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; | 1420 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; |
| 1430 | 1421 | ||
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ba06207b1dd3..e234eb92a177 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
| @@ -52,13 +52,16 @@ | |||
| 52 | #include <linux/prefetch.h> | 52 | #include <linux/prefetch.h> |
| 53 | 53 | ||
| 54 | #include "rcutree.h" | 54 | #include "rcutree.h" |
| 55 | #include <trace/events/rcu.h> | ||
| 56 | |||
| 57 | #include "rcu.h" | ||
| 55 | 58 | ||
| 56 | /* Data structures. */ | 59 | /* Data structures. */ |
| 57 | 60 | ||
| 58 | static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | 61 | static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; |
| 59 | 62 | ||
| 60 | #define RCU_STATE_INITIALIZER(structname) { \ | 63 | #define RCU_STATE_INITIALIZER(structname) { \ |
| 61 | .level = { &structname.node[0] }, \ | 64 | .level = { &structname##_state.node[0] }, \ |
| 62 | .levelcnt = { \ | 65 | .levelcnt = { \ |
| 63 | NUM_RCU_LVL_0, /* root of hierarchy. */ \ | 66 | NUM_RCU_LVL_0, /* root of hierarchy. */ \ |
| 64 | NUM_RCU_LVL_1, \ | 67 | NUM_RCU_LVL_1, \ |
| @@ -69,17 +72,17 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | |||
| 69 | .signaled = RCU_GP_IDLE, \ | 72 | .signaled = RCU_GP_IDLE, \ |
| 70 | .gpnum = -300, \ | 73 | .gpnum = -300, \ |
| 71 | .completed = -300, \ | 74 | .completed = -300, \ |
| 72 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ | 75 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ |
| 73 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ | 76 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ |
| 74 | .n_force_qs = 0, \ | 77 | .n_force_qs = 0, \ |
| 75 | .n_force_qs_ngp = 0, \ | 78 | .n_force_qs_ngp = 0, \ |
| 76 | .name = #structname, \ | 79 | .name = #structname, \ |
| 77 | } | 80 | } |
| 78 | 81 | ||
| 79 | struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state); | 82 | struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); |
| 80 | DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); | 83 | DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); |
| 81 | 84 | ||
| 82 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); | 85 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); |
| 83 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); | 86 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); |
| 84 | 87 | ||
| 85 | static struct rcu_state *rcu_state; | 88 | static struct rcu_state *rcu_state; |
| @@ -128,8 +131,6 @@ static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); | |||
| 128 | static void invoke_rcu_core(void); | 131 | static void invoke_rcu_core(void); |
| 129 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | 132 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); |
| 130 | 133 | ||
| 131 | #define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ | ||
| 132 | |||
| 133 | /* | 134 | /* |
| 134 | * Track the rcutorture test sequence number and the update version | 135 | * Track the rcutorture test sequence number and the update version |
| 135 | * number within a given test. The rcutorture_testseq is incremented | 136 | * number within a given test. The rcutorture_testseq is incremented |
| @@ -156,33 +157,41 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) | |||
| 156 | * Note a quiescent state. Because we do not need to know | 157 | * Note a quiescent state. Because we do not need to know |
| 157 | * how many quiescent states passed, just if there was at least | 158 | * how many quiescent states passed, just if there was at least |
| 158 | * one since the start of the grace period, this just sets a flag. | 159 | * one since the start of the grace period, this just sets a flag. |
| 160 | * The caller must have disabled preemption. | ||
| 159 | */ | 161 | */ |
| 160 | void rcu_sched_qs(int cpu) | 162 | void rcu_sched_qs(int cpu) |
| 161 | { | 163 | { |
| 162 | struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); | 164 | struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); |
| 163 | 165 | ||
| 164 | rdp->passed_quiesc_completed = rdp->gpnum - 1; | 166 | rdp->passed_quiesce_gpnum = rdp->gpnum; |
| 165 | barrier(); | 167 | barrier(); |
| 166 | rdp->passed_quiesc = 1; | 168 | if (rdp->passed_quiesce == 0) |
| 169 | trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs"); | ||
| 170 | rdp->passed_quiesce = 1; | ||
| 167 | } | 171 | } |
| 168 | 172 | ||
| 169 | void rcu_bh_qs(int cpu) | 173 | void rcu_bh_qs(int cpu) |
| 170 | { | 174 | { |
| 171 | struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); | 175 | struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); |
| 172 | 176 | ||
| 173 | rdp->passed_quiesc_completed = rdp->gpnum - 1; | 177 | rdp->passed_quiesce_gpnum = rdp->gpnum; |
| 174 | barrier(); | 178 | barrier(); |
| 175 | rdp->passed_quiesc = 1; | 179 | if (rdp->passed_quiesce == 0) |
| 180 | trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs"); | ||
| 181 | rdp->passed_quiesce = 1; | ||
| 176 | } | 182 | } |
| 177 | 183 | ||
| 178 | /* | 184 | /* |
| 179 | * Note a context switch. This is a quiescent state for RCU-sched, | 185 | * Note a context switch. This is a quiescent state for RCU-sched, |
| 180 | * and requires special handling for preemptible RCU. | 186 | * and requires special handling for preemptible RCU. |
| 187 | * The caller must have disabled preemption. | ||
| 181 | */ | 188 | */ |
| 182 | void rcu_note_context_switch(int cpu) | 189 | void rcu_note_context_switch(int cpu) |
| 183 | { | 190 | { |
| 191 | trace_rcu_utilization("Start context switch"); | ||
| 184 | rcu_sched_qs(cpu); | 192 | rcu_sched_qs(cpu); |
| 185 | rcu_preempt_note_context_switch(cpu); | 193 | rcu_preempt_note_context_switch(cpu); |
| 194 | trace_rcu_utilization("End context switch"); | ||
| 186 | } | 195 | } |
| 187 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); | 196 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); |
| 188 | 197 | ||
| @@ -193,7 +202,7 @@ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | |||
| 193 | }; | 202 | }; |
| 194 | #endif /* #ifdef CONFIG_NO_HZ */ | 203 | #endif /* #ifdef CONFIG_NO_HZ */ |
| 195 | 204 | ||
| 196 | static int blimit = 10; /* Maximum callbacks per softirq. */ | 205 | static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ |
| 197 | static int qhimark = 10000; /* If this many pending, ignore blimit. */ | 206 | static int qhimark = 10000; /* If this many pending, ignore blimit. */ |
| 198 | static int qlowmark = 100; /* Once only this many pending, use blimit. */ | 207 | static int qlowmark = 100; /* Once only this many pending, use blimit. */ |
| 199 | 208 | ||
| @@ -314,6 +323,7 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp) | |||
| 314 | * trust its state not to change because interrupts are disabled. | 323 | * trust its state not to change because interrupts are disabled. |
| 315 | */ | 324 | */ |
| 316 | if (cpu_is_offline(rdp->cpu)) { | 325 | if (cpu_is_offline(rdp->cpu)) { |
| 326 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); | ||
| 317 | rdp->offline_fqs++; | 327 | rdp->offline_fqs++; |
| 318 | return 1; | 328 | return 1; |
| 319 | } | 329 | } |
| @@ -354,19 +364,13 @@ void rcu_enter_nohz(void) | |||
| 354 | local_irq_restore(flags); | 364 | local_irq_restore(flags); |
| 355 | return; | 365 | return; |
| 356 | } | 366 | } |
| 367 | trace_rcu_dyntick("Start"); | ||
| 357 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ | 368 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ |
| 358 | smp_mb__before_atomic_inc(); /* See above. */ | 369 | smp_mb__before_atomic_inc(); /* See above. */ |
| 359 | atomic_inc(&rdtp->dynticks); | 370 | atomic_inc(&rdtp->dynticks); |
| 360 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ | 371 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ |
| 361 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | 372 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); |
| 362 | local_irq_restore(flags); | 373 | local_irq_restore(flags); |
| 363 | |||
| 364 | /* If the interrupt queued a callback, get out of dyntick mode. */ | ||
| 365 | if (in_irq() && | ||
| 366 | (__get_cpu_var(rcu_sched_data).nxtlist || | ||
| 367 | __get_cpu_var(rcu_bh_data).nxtlist || | ||
| 368 | rcu_preempt_needs_cpu(smp_processor_id()))) | ||
| 369 | set_need_resched(); | ||
| 370 | } | 374 | } |
| 371 | 375 | ||
| 372 | /* | 376 | /* |
| @@ -391,6 +395,7 @@ void rcu_exit_nohz(void) | |||
| 391 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ | 395 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ |
| 392 | smp_mb__after_atomic_inc(); /* See above. */ | 396 | smp_mb__after_atomic_inc(); /* See above. */ |
| 393 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | 397 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); |
| 398 | trace_rcu_dyntick("End"); | ||
| 394 | local_irq_restore(flags); | 399 | local_irq_restore(flags); |
| 395 | } | 400 | } |
| 396 | 401 | ||
| @@ -481,11 +486,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) | |||
| 481 | */ | 486 | */ |
| 482 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | 487 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) |
| 483 | { | 488 | { |
| 484 | unsigned long curr; | 489 | unsigned int curr; |
| 485 | unsigned long snap; | 490 | unsigned int snap; |
| 486 | 491 | ||
| 487 | curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks); | 492 | curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks); |
| 488 | snap = (unsigned long)rdp->dynticks_snap; | 493 | snap = (unsigned int)rdp->dynticks_snap; |
| 489 | 494 | ||
| 490 | /* | 495 | /* |
| 491 | * If the CPU passed through or entered a dynticks idle phase with | 496 | * If the CPU passed through or entered a dynticks idle phase with |
| @@ -495,7 +500,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | |||
| 495 | * read-side critical section that started before the beginning | 500 | * read-side critical section that started before the beginning |
| 496 | * of the current RCU grace period. | 501 | * of the current RCU grace period. |
| 497 | */ | 502 | */ |
| 498 | if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) { | 503 | if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) { |
| 504 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti"); | ||
| 499 | rdp->dynticks_fqs++; | 505 | rdp->dynticks_fqs++; |
| 500 | return 1; | 506 | return 1; |
| 501 | } | 507 | } |
| @@ -537,6 +543,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
| 537 | int cpu; | 543 | int cpu; |
| 538 | long delta; | 544 | long delta; |
| 539 | unsigned long flags; | 545 | unsigned long flags; |
| 546 | int ndetected; | ||
| 540 | struct rcu_node *rnp = rcu_get_root(rsp); | 547 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 541 | 548 | ||
| 542 | /* Only let one CPU complain about others per time interval. */ | 549 | /* Only let one CPU complain about others per time interval. */ |
| @@ -553,7 +560,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
| 553 | * Now rat on any tasks that got kicked up to the root rcu_node | 560 | * Now rat on any tasks that got kicked up to the root rcu_node |
| 554 | * due to CPU offlining. | 561 | * due to CPU offlining. |
| 555 | */ | 562 | */ |
| 556 | rcu_print_task_stall(rnp); | 563 | ndetected = rcu_print_task_stall(rnp); |
| 557 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 564 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 558 | 565 | ||
| 559 | /* | 566 | /* |
| @@ -565,17 +572,22 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
| 565 | rsp->name); | 572 | rsp->name); |
| 566 | rcu_for_each_leaf_node(rsp, rnp) { | 573 | rcu_for_each_leaf_node(rsp, rnp) { |
| 567 | raw_spin_lock_irqsave(&rnp->lock, flags); | 574 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 568 | rcu_print_task_stall(rnp); | 575 | ndetected += rcu_print_task_stall(rnp); |
| 569 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 576 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 570 | if (rnp->qsmask == 0) | 577 | if (rnp->qsmask == 0) |
| 571 | continue; | 578 | continue; |
| 572 | for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) | 579 | for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) |
| 573 | if (rnp->qsmask & (1UL << cpu)) | 580 | if (rnp->qsmask & (1UL << cpu)) { |
| 574 | printk(" %d", rnp->grplo + cpu); | 581 | printk(" %d", rnp->grplo + cpu); |
| 582 | ndetected++; | ||
| 583 | } | ||
| 575 | } | 584 | } |
| 576 | printk("} (detected by %d, t=%ld jiffies)\n", | 585 | printk("} (detected by %d, t=%ld jiffies)\n", |
| 577 | smp_processor_id(), (long)(jiffies - rsp->gp_start)); | 586 | smp_processor_id(), (long)(jiffies - rsp->gp_start)); |
| 578 | trigger_all_cpu_backtrace(); | 587 | if (ndetected == 0) |
| 588 | printk(KERN_ERR "INFO: Stall ended before state dump start\n"); | ||
| 589 | else if (!trigger_all_cpu_backtrace()) | ||
| 590 | dump_stack(); | ||
| 579 | 591 | ||
| 580 | /* If so configured, complain about tasks blocking the grace period. */ | 592 | /* If so configured, complain about tasks blocking the grace period. */ |
| 581 | 593 | ||
| @@ -596,7 +608,8 @@ static void print_cpu_stall(struct rcu_state *rsp) | |||
| 596 | */ | 608 | */ |
| 597 | printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", | 609 | printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", |
| 598 | rsp->name, smp_processor_id(), jiffies - rsp->gp_start); | 610 | rsp->name, smp_processor_id(), jiffies - rsp->gp_start); |
| 599 | trigger_all_cpu_backtrace(); | 611 | if (!trigger_all_cpu_backtrace()) |
| 612 | dump_stack(); | ||
| 600 | 613 | ||
| 601 | raw_spin_lock_irqsave(&rnp->lock, flags); | 614 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 602 | if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) | 615 | if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) |
| @@ -678,9 +691,10 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct | |||
| 678 | * go looking for one. | 691 | * go looking for one. |
| 679 | */ | 692 | */ |
| 680 | rdp->gpnum = rnp->gpnum; | 693 | rdp->gpnum = rnp->gpnum; |
| 694 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart"); | ||
| 681 | if (rnp->qsmask & rdp->grpmask) { | 695 | if (rnp->qsmask & rdp->grpmask) { |
| 682 | rdp->qs_pending = 1; | 696 | rdp->qs_pending = 1; |
| 683 | rdp->passed_quiesc = 0; | 697 | rdp->passed_quiesce = 0; |
| 684 | } else | 698 | } else |
| 685 | rdp->qs_pending = 0; | 699 | rdp->qs_pending = 0; |
| 686 | } | 700 | } |
| @@ -741,6 +755,7 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat | |||
| 741 | 755 | ||
| 742 | /* Remember that we saw this grace-period completion. */ | 756 | /* Remember that we saw this grace-period completion. */ |
| 743 | rdp->completed = rnp->completed; | 757 | rdp->completed = rnp->completed; |
| 758 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend"); | ||
| 744 | 759 | ||
| 745 | /* | 760 | /* |
| 746 | * If we were in an extended quiescent state, we may have | 761 | * If we were in an extended quiescent state, we may have |
| @@ -826,31 +841,31 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
| 826 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 841 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
| 827 | struct rcu_node *rnp = rcu_get_root(rsp); | 842 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 828 | 843 | ||
| 829 | if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { | 844 | if (!rcu_scheduler_fully_active || |
| 830 | if (cpu_needs_another_gp(rsp, rdp)) | 845 | !cpu_needs_another_gp(rsp, rdp)) { |
| 831 | rsp->fqs_need_gp = 1; | 846 | /* |
| 832 | if (rnp->completed == rsp->completed) { | 847 | * Either the scheduler hasn't yet spawned the first |
| 833 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 848 | * non-idle task or this CPU does not need another |
| 834 | return; | 849 | * grace period. Either way, don't start a new grace |
| 835 | } | 850 | * period. |
| 836 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 851 | */ |
| 852 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
| 853 | return; | ||
| 854 | } | ||
| 837 | 855 | ||
| 856 | if (rsp->fqs_active) { | ||
| 838 | /* | 857 | /* |
| 839 | * Propagate new ->completed value to rcu_node structures | 858 | * This CPU needs a grace period, but force_quiescent_state() |
| 840 | * so that other CPUs don't have to wait until the start | 859 | * is running. Tell it to start one on this CPU's behalf. |
| 841 | * of the next grace period to process their callbacks. | ||
| 842 | */ | 860 | */ |
| 843 | rcu_for_each_node_breadth_first(rsp, rnp) { | 861 | rsp->fqs_need_gp = 1; |
| 844 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 862 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 845 | rnp->completed = rsp->completed; | ||
| 846 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
| 847 | } | ||
| 848 | local_irq_restore(flags); | ||
| 849 | return; | 863 | return; |
| 850 | } | 864 | } |
| 851 | 865 | ||
| 852 | /* Advance to a new grace period and initialize state. */ | 866 | /* Advance to a new grace period and initialize state. */ |
| 853 | rsp->gpnum++; | 867 | rsp->gpnum++; |
| 868 | trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); | ||
| 854 | WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); | 869 | WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); |
| 855 | rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ | 870 | rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ |
| 856 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; | 871 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; |
| @@ -865,6 +880,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
| 865 | rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ | 880 | rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ |
| 866 | rcu_start_gp_per_cpu(rsp, rnp, rdp); | 881 | rcu_start_gp_per_cpu(rsp, rnp, rdp); |
| 867 | rcu_preempt_boost_start_gp(rnp); | 882 | rcu_preempt_boost_start_gp(rnp); |
| 883 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, | ||
| 884 | rnp->level, rnp->grplo, | ||
| 885 | rnp->grphi, rnp->qsmask); | ||
| 868 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 886 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 869 | return; | 887 | return; |
| 870 | } | 888 | } |
| @@ -901,6 +919,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
| 901 | if (rnp == rdp->mynode) | 919 | if (rnp == rdp->mynode) |
| 902 | rcu_start_gp_per_cpu(rsp, rnp, rdp); | 920 | rcu_start_gp_per_cpu(rsp, rnp, rdp); |
| 903 | rcu_preempt_boost_start_gp(rnp); | 921 | rcu_preempt_boost_start_gp(rnp); |
| 922 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, | ||
| 923 | rnp->level, rnp->grplo, | ||
| 924 | rnp->grphi, rnp->qsmask); | ||
| 904 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 925 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
| 905 | } | 926 | } |
| 906 | 927 | ||
| @@ -922,6 +943,8 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | |||
| 922 | __releases(rcu_get_root(rsp)->lock) | 943 | __releases(rcu_get_root(rsp)->lock) |
| 923 | { | 944 | { |
| 924 | unsigned long gp_duration; | 945 | unsigned long gp_duration; |
| 946 | struct rcu_node *rnp = rcu_get_root(rsp); | ||
| 947 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | ||
| 925 | 948 | ||
| 926 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); | 949 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); |
| 927 | 950 | ||
| @@ -933,7 +956,41 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | |||
| 933 | gp_duration = jiffies - rsp->gp_start; | 956 | gp_duration = jiffies - rsp->gp_start; |
| 934 | if (gp_duration > rsp->gp_max) | 957 | if (gp_duration > rsp->gp_max) |
| 935 | rsp->gp_max = gp_duration; | 958 | rsp->gp_max = gp_duration; |
| 936 | rsp->completed = rsp->gpnum; | 959 | |
| 960 | /* | ||
| 961 | * We know the grace period is complete, but to everyone else | ||
| 962 | * it appears to still be ongoing. But it is also the case | ||
| 963 | * that to everyone else it looks like there is nothing that | ||
| 964 | * they can do to advance the grace period. It is therefore | ||
| 965 | * safe for us to drop the lock in order to mark the grace | ||
| 966 | * period as completed in all of the rcu_node structures. | ||
| 967 | * | ||
| 968 | * But if this CPU needs another grace period, it will take | ||
| 969 | * care of this while initializing the next grace period. | ||
| 970 | * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL | ||
| 971 | * because the callbacks have not yet been advanced: Those | ||
| 972 | * callbacks are waiting on the grace period that just now | ||
| 973 | * completed. | ||
| 974 | */ | ||
| 975 | if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { | ||
| 976 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
| 977 | |||
| 978 | /* | ||
| 979 | * Propagate new ->completed value to rcu_node structures | ||
| 980 | * so that other CPUs don't have to wait until the start | ||
| 981 | * of the next grace period to process their callbacks. | ||
| 982 | */ | ||
| 983 | rcu_for_each_node_breadth_first(rsp, rnp) { | ||
| 984 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
| 985 | rnp->completed = rsp->gpnum; | ||
| 986 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
| 987 | } | ||
| 988 | rnp = rcu_get_root(rsp); | ||
| 989 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
| 990 | } | ||
| 991 | |||
| 992 | rsp->completed = rsp->gpnum; /* Declare the grace period complete. */ | ||
| 993 | trace_rcu_grace_period(rsp->name, rsp->completed, "end"); | ||
| 937 | rsp->signaled = RCU_GP_IDLE; | 994 | rsp->signaled = RCU_GP_IDLE; |
| 938 | rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ | 995 | rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ |
| 939 | } | 996 | } |
| @@ -962,6 +1019,10 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, | |||
| 962 | return; | 1019 | return; |
| 963 | } | 1020 | } |
| 964 | rnp->qsmask &= ~mask; | 1021 | rnp->qsmask &= ~mask; |
| 1022 | trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, | ||
| 1023 | mask, rnp->qsmask, rnp->level, | ||
| 1024 | rnp->grplo, rnp->grphi, | ||
| 1025 | !!rnp->gp_tasks); | ||
| 965 | if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { | 1026 | if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { |
| 966 | 1027 | ||
| 967 | /* Other bits still set at this level, so done. */ | 1028 | /* Other bits still set at this level, so done. */ |
| @@ -1000,7 +1061,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, | |||
| 1000 | * based on quiescent states detected in an earlier grace period! | 1061 | * based on quiescent states detected in an earlier grace period! |
| 1001 | */ | 1062 | */ |
| 1002 | static void | 1063 | static void |
| 1003 | rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) | 1064 | rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastgp) |
| 1004 | { | 1065 | { |
| 1005 | unsigned long flags; | 1066 | unsigned long flags; |
| 1006 | unsigned long mask; | 1067 | unsigned long mask; |
| @@ -1008,17 +1069,15 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long las | |||
| 1008 | 1069 | ||
| 1009 | rnp = rdp->mynode; | 1070 | rnp = rdp->mynode; |
| 1010 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1071 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 1011 | if (lastcomp != rnp->completed) { | 1072 | if (lastgp != rnp->gpnum || rnp->completed == rnp->gpnum) { |
| 1012 | 1073 | ||
| 1013 | /* | 1074 | /* |
| 1014 | * Someone beat us to it for this grace period, so leave. | 1075 | * The grace period in which this quiescent state was |
| 1015 | * The race with GP start is resolved by the fact that we | 1076 | * recorded has ended, so don't report it upwards. |
| 1016 | * hold the leaf rcu_node lock, so that the per-CPU bits | 1077 | * We will instead need a new quiescent state that lies |
| 1017 | * cannot yet be initialized -- so we would simply find our | 1078 | * within the current grace period. |
| 1018 | * CPU's bit already cleared in rcu_report_qs_rnp() if this | ||
| 1019 | * race occurred. | ||
| 1020 | */ | 1079 | */ |
| 1021 | rdp->passed_quiesc = 0; /* try again later! */ | 1080 | rdp->passed_quiesce = 0; /* need qs for new gp. */ |
| 1022 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1081 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 1023 | return; | 1082 | return; |
| 1024 | } | 1083 | } |
| @@ -1062,14 +1121,14 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1062 | * Was there a quiescent state since the beginning of the grace | 1121 | * Was there a quiescent state since the beginning of the grace |
| 1063 | * period? If no, then exit and wait for the next call. | 1122 | * period? If no, then exit and wait for the next call. |
| 1064 | */ | 1123 | */ |
| 1065 | if (!rdp->passed_quiesc) | 1124 | if (!rdp->passed_quiesce) |
| 1066 | return; | 1125 | return; |
| 1067 | 1126 | ||
| 1068 | /* | 1127 | /* |
| 1069 | * Tell RCU we are done (but rcu_report_qs_rdp() will be the | 1128 | * Tell RCU we are done (but rcu_report_qs_rdp() will be the |
| 1070 | * judge of that). | 1129 | * judge of that). |
| 1071 | */ | 1130 | */ |
| 1072 | rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed); | 1131 | rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesce_gpnum); |
| 1073 | } | 1132 | } |
| 1074 | 1133 | ||
| 1075 | #ifdef CONFIG_HOTPLUG_CPU | 1134 | #ifdef CONFIG_HOTPLUG_CPU |
| @@ -1130,11 +1189,20 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
| 1130 | if (rnp->qsmaskinit != 0) { | 1189 | if (rnp->qsmaskinit != 0) { |
| 1131 | if (rnp != rdp->mynode) | 1190 | if (rnp != rdp->mynode) |
| 1132 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1191 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
| 1192 | else | ||
| 1193 | trace_rcu_grace_period(rsp->name, | ||
| 1194 | rnp->gpnum + 1 - | ||
| 1195 | !!(rnp->qsmask & mask), | ||
| 1196 | "cpuofl"); | ||
| 1133 | break; | 1197 | break; |
| 1134 | } | 1198 | } |
| 1135 | if (rnp == rdp->mynode) | 1199 | if (rnp == rdp->mynode) { |
| 1200 | trace_rcu_grace_period(rsp->name, | ||
| 1201 | rnp->gpnum + 1 - | ||
| 1202 | !!(rnp->qsmask & mask), | ||
| 1203 | "cpuofl"); | ||
| 1136 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); | 1204 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); |
| 1137 | else | 1205 | } else |
| 1138 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1206 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
| 1139 | mask = rnp->grpmask; | 1207 | mask = rnp->grpmask; |
| 1140 | rnp = rnp->parent; | 1208 | rnp = rnp->parent; |
| @@ -1190,17 +1258,22 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1190 | { | 1258 | { |
| 1191 | unsigned long flags; | 1259 | unsigned long flags; |
| 1192 | struct rcu_head *next, *list, **tail; | 1260 | struct rcu_head *next, *list, **tail; |
| 1193 | int count; | 1261 | int bl, count; |
| 1194 | 1262 | ||
| 1195 | /* If no callbacks are ready, just return.*/ | 1263 | /* If no callbacks are ready, just return.*/ |
| 1196 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) | 1264 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) { |
| 1265 | trace_rcu_batch_start(rsp->name, 0, 0); | ||
| 1266 | trace_rcu_batch_end(rsp->name, 0); | ||
| 1197 | return; | 1267 | return; |
| 1268 | } | ||
| 1198 | 1269 | ||
| 1199 | /* | 1270 | /* |
| 1200 | * Extract the list of ready callbacks, disabling to prevent | 1271 | * Extract the list of ready callbacks, disabling to prevent |
| 1201 | * races with call_rcu() from interrupt handlers. | 1272 | * races with call_rcu() from interrupt handlers. |
| 1202 | */ | 1273 | */ |
| 1203 | local_irq_save(flags); | 1274 | local_irq_save(flags); |
| 1275 | bl = rdp->blimit; | ||
| 1276 | trace_rcu_batch_start(rsp->name, rdp->qlen, bl); | ||
| 1204 | list = rdp->nxtlist; | 1277 | list = rdp->nxtlist; |
| 1205 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; | 1278 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; |
| 1206 | *rdp->nxttail[RCU_DONE_TAIL] = NULL; | 1279 | *rdp->nxttail[RCU_DONE_TAIL] = NULL; |
| @@ -1216,13 +1289,14 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1216 | next = list->next; | 1289 | next = list->next; |
| 1217 | prefetch(next); | 1290 | prefetch(next); |
| 1218 | debug_rcu_head_unqueue(list); | 1291 | debug_rcu_head_unqueue(list); |
| 1219 | __rcu_reclaim(list); | 1292 | __rcu_reclaim(rsp->name, list); |
| 1220 | list = next; | 1293 | list = next; |
| 1221 | if (++count >= rdp->blimit) | 1294 | if (++count >= bl) |
| 1222 | break; | 1295 | break; |
| 1223 | } | 1296 | } |
| 1224 | 1297 | ||
| 1225 | local_irq_save(flags); | 1298 | local_irq_save(flags); |
| 1299 | trace_rcu_batch_end(rsp->name, count); | ||
| 1226 | 1300 | ||
| 1227 | /* Update count, and requeue any remaining callbacks. */ | 1301 | /* Update count, and requeue any remaining callbacks. */ |
| 1228 | rdp->qlen -= count; | 1302 | rdp->qlen -= count; |
| @@ -1250,7 +1324,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1250 | 1324 | ||
| 1251 | local_irq_restore(flags); | 1325 | local_irq_restore(flags); |
| 1252 | 1326 | ||
| 1253 | /* Re-raise the RCU softirq if there are callbacks remaining. */ | 1327 | /* Re-invoke RCU core processing if there are callbacks remaining. */ |
| 1254 | if (cpu_has_callbacks_ready_to_invoke(rdp)) | 1328 | if (cpu_has_callbacks_ready_to_invoke(rdp)) |
| 1255 | invoke_rcu_core(); | 1329 | invoke_rcu_core(); |
| 1256 | } | 1330 | } |
| @@ -1258,7 +1332,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1258 | /* | 1332 | /* |
| 1259 | * Check to see if this CPU is in a non-context-switch quiescent state | 1333 | * Check to see if this CPU is in a non-context-switch quiescent state |
| 1260 | * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). | 1334 | * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). |
| 1261 | * Also schedule the RCU softirq handler. | 1335 | * Also schedule RCU core processing. |
| 1262 | * | 1336 | * |
| 1263 | * This function must be called with hardirqs disabled. It is normally | 1337 | * This function must be called with hardirqs disabled. It is normally |
| 1264 | * invoked from the scheduling-clock interrupt. If rcu_pending returns | 1338 | * invoked from the scheduling-clock interrupt. If rcu_pending returns |
| @@ -1266,6 +1340,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1266 | */ | 1340 | */ |
| 1267 | void rcu_check_callbacks(int cpu, int user) | 1341 | void rcu_check_callbacks(int cpu, int user) |
| 1268 | { | 1342 | { |
| 1343 | trace_rcu_utilization("Start scheduler-tick"); | ||
| 1269 | if (user || | 1344 | if (user || |
| 1270 | (idle_cpu(cpu) && rcu_scheduler_active && | 1345 | (idle_cpu(cpu) && rcu_scheduler_active && |
| 1271 | !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { | 1346 | !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { |
| @@ -1299,6 +1374,7 @@ void rcu_check_callbacks(int cpu, int user) | |||
| 1299 | rcu_preempt_check_callbacks(cpu); | 1374 | rcu_preempt_check_callbacks(cpu); |
| 1300 | if (rcu_pending(cpu)) | 1375 | if (rcu_pending(cpu)) |
| 1301 | invoke_rcu_core(); | 1376 | invoke_rcu_core(); |
| 1377 | trace_rcu_utilization("End scheduler-tick"); | ||
| 1302 | } | 1378 | } |
| 1303 | 1379 | ||
| 1304 | #ifdef CONFIG_SMP | 1380 | #ifdef CONFIG_SMP |
| @@ -1360,10 +1436,14 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
| 1360 | unsigned long flags; | 1436 | unsigned long flags; |
| 1361 | struct rcu_node *rnp = rcu_get_root(rsp); | 1437 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 1362 | 1438 | ||
| 1363 | if (!rcu_gp_in_progress(rsp)) | 1439 | trace_rcu_utilization("Start fqs"); |
| 1440 | if (!rcu_gp_in_progress(rsp)) { | ||
| 1441 | trace_rcu_utilization("End fqs"); | ||
| 1364 | return; /* No grace period in progress, nothing to force. */ | 1442 | return; /* No grace period in progress, nothing to force. */ |
| 1443 | } | ||
| 1365 | if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { | 1444 | if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { |
| 1366 | rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ | 1445 | rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ |
| 1446 | trace_rcu_utilization("End fqs"); | ||
| 1367 | return; /* Someone else is already on the job. */ | 1447 | return; /* Someone else is already on the job. */ |
| 1368 | } | 1448 | } |
| 1369 | if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) | 1449 | if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) |
| @@ -1412,11 +1492,13 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
| 1412 | raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ | 1492 | raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ |
| 1413 | rsp->fqs_need_gp = 0; | 1493 | rsp->fqs_need_gp = 0; |
| 1414 | rcu_start_gp(rsp, flags); /* releases rnp->lock */ | 1494 | rcu_start_gp(rsp, flags); /* releases rnp->lock */ |
| 1495 | trace_rcu_utilization("End fqs"); | ||
| 1415 | return; | 1496 | return; |
| 1416 | } | 1497 | } |
| 1417 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | 1498 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ |
| 1418 | unlock_fqs_ret: | 1499 | unlock_fqs_ret: |
| 1419 | raw_spin_unlock_irqrestore(&rsp->fqslock, flags); | 1500 | raw_spin_unlock_irqrestore(&rsp->fqslock, flags); |
| 1501 | trace_rcu_utilization("End fqs"); | ||
| 1420 | } | 1502 | } |
| 1421 | 1503 | ||
| 1422 | #else /* #ifdef CONFIG_SMP */ | 1504 | #else /* #ifdef CONFIG_SMP */ |
| @@ -1429,9 +1511,9 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
| 1429 | #endif /* #else #ifdef CONFIG_SMP */ | 1511 | #endif /* #else #ifdef CONFIG_SMP */ |
| 1430 | 1512 | ||
| 1431 | /* | 1513 | /* |
| 1432 | * This does the RCU processing work from softirq context for the | 1514 | * This does the RCU core processing work for the specified rcu_state |
| 1433 | * specified rcu_state and rcu_data structures. This may be called | 1515 | * and rcu_data structures. This may be called only from the CPU to |
| 1434 | * only from the CPU to whom the rdp belongs. | 1516 | * whom the rdp belongs. |
| 1435 | */ | 1517 | */ |
| 1436 | static void | 1518 | static void |
| 1437 | __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | 1519 | __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) |
| @@ -1468,24 +1550,24 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1468 | } | 1550 | } |
| 1469 | 1551 | ||
| 1470 | /* | 1552 | /* |
| 1471 | * Do softirq processing for the current CPU. | 1553 | * Do RCU core processing for the current CPU. |
| 1472 | */ | 1554 | */ |
| 1473 | static void rcu_process_callbacks(struct softirq_action *unused) | 1555 | static void rcu_process_callbacks(struct softirq_action *unused) |
| 1474 | { | 1556 | { |
| 1557 | trace_rcu_utilization("Start RCU core"); | ||
| 1475 | __rcu_process_callbacks(&rcu_sched_state, | 1558 | __rcu_process_callbacks(&rcu_sched_state, |
| 1476 | &__get_cpu_var(rcu_sched_data)); | 1559 | &__get_cpu_var(rcu_sched_data)); |
| 1477 | __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); | 1560 | __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); |
| 1478 | rcu_preempt_process_callbacks(); | 1561 | rcu_preempt_process_callbacks(); |
| 1479 | 1562 | trace_rcu_utilization("End RCU core"); | |
| 1480 | /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ | ||
| 1481 | rcu_needs_cpu_flush(); | ||
| 1482 | } | 1563 | } |
| 1483 | 1564 | ||
| 1484 | /* | 1565 | /* |
| 1485 | * Wake up the current CPU's kthread. This replaces raise_softirq() | 1566 | * Schedule RCU callback invocation. If the specified type of RCU |
| 1486 | * in earlier versions of RCU. Note that because we are running on | 1567 | * does not support RCU priority boosting, just do a direct call, |
| 1487 | * the current CPU with interrupts disabled, the rcu_cpu_kthread_task | 1568 | * otherwise wake up the per-CPU kernel kthread. Note that because we |
| 1488 | * cannot disappear out from under us. | 1569 | * are running on the current CPU with interrupts disabled, the |
| 1570 | * rcu_cpu_kthread_task cannot disappear out from under us. | ||
| 1489 | */ | 1571 | */ |
| 1490 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | 1572 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) |
| 1491 | { | 1573 | { |
| @@ -1530,6 +1612,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
| 1530 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; | 1612 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; |
| 1531 | rdp->qlen++; | 1613 | rdp->qlen++; |
| 1532 | 1614 | ||
| 1615 | if (__is_kfree_rcu_offset((unsigned long)func)) | ||
| 1616 | trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, | ||
| 1617 | rdp->qlen); | ||
| 1618 | else | ||
| 1619 | trace_rcu_callback(rsp->name, head, rdp->qlen); | ||
| 1620 | |||
| 1533 | /* If interrupts were disabled, don't dive into RCU core. */ | 1621 | /* If interrupts were disabled, don't dive into RCU core. */ |
| 1534 | if (irqs_disabled_flags(flags)) { | 1622 | if (irqs_disabled_flags(flags)) { |
| 1535 | local_irq_restore(flags); | 1623 | local_irq_restore(flags); |
| @@ -1613,18 +1701,9 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); | |||
| 1613 | */ | 1701 | */ |
| 1614 | void synchronize_sched(void) | 1702 | void synchronize_sched(void) |
| 1615 | { | 1703 | { |
| 1616 | struct rcu_synchronize rcu; | ||
| 1617 | |||
| 1618 | if (rcu_blocking_is_gp()) | 1704 | if (rcu_blocking_is_gp()) |
| 1619 | return; | 1705 | return; |
| 1620 | 1706 | wait_rcu_gp(call_rcu_sched); | |
| 1621 | init_rcu_head_on_stack(&rcu.head); | ||
| 1622 | init_completion(&rcu.completion); | ||
| 1623 | /* Will wake me after RCU finished. */ | ||
| 1624 | call_rcu_sched(&rcu.head, wakeme_after_rcu); | ||
| 1625 | /* Wait for it. */ | ||
| 1626 | wait_for_completion(&rcu.completion); | ||
| 1627 | destroy_rcu_head_on_stack(&rcu.head); | ||
| 1628 | } | 1707 | } |
| 1629 | EXPORT_SYMBOL_GPL(synchronize_sched); | 1708 | EXPORT_SYMBOL_GPL(synchronize_sched); |
| 1630 | 1709 | ||
| @@ -1639,18 +1718,9 @@ EXPORT_SYMBOL_GPL(synchronize_sched); | |||
| 1639 | */ | 1718 | */ |
| 1640 | void synchronize_rcu_bh(void) | 1719 | void synchronize_rcu_bh(void) |
| 1641 | { | 1720 | { |
| 1642 | struct rcu_synchronize rcu; | ||
| 1643 | |||
| 1644 | if (rcu_blocking_is_gp()) | 1721 | if (rcu_blocking_is_gp()) |
| 1645 | return; | 1722 | return; |
| 1646 | 1723 | wait_rcu_gp(call_rcu_bh); | |
| 1647 | init_rcu_head_on_stack(&rcu.head); | ||
| 1648 | init_completion(&rcu.completion); | ||
| 1649 | /* Will wake me after RCU finished. */ | ||
| 1650 | call_rcu_bh(&rcu.head, wakeme_after_rcu); | ||
| 1651 | /* Wait for it. */ | ||
| 1652 | wait_for_completion(&rcu.completion); | ||
| 1653 | destroy_rcu_head_on_stack(&rcu.head); | ||
| 1654 | } | 1724 | } |
| 1655 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | 1725 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); |
| 1656 | 1726 | ||
| @@ -1671,7 +1741,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1671 | check_cpu_stall(rsp, rdp); | 1741 | check_cpu_stall(rsp, rdp); |
| 1672 | 1742 | ||
| 1673 | /* Is the RCU core waiting for a quiescent state from this CPU? */ | 1743 | /* Is the RCU core waiting for a quiescent state from this CPU? */ |
| 1674 | if (rdp->qs_pending && !rdp->passed_quiesc) { | 1744 | if (rcu_scheduler_fully_active && |
| 1745 | rdp->qs_pending && !rdp->passed_quiesce) { | ||
| 1675 | 1746 | ||
| 1676 | /* | 1747 | /* |
| 1677 | * If force_quiescent_state() coming soon and this CPU | 1748 | * If force_quiescent_state() coming soon and this CPU |
| @@ -1683,7 +1754,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1683 | ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, | 1754 | ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, |
| 1684 | jiffies)) | 1755 | jiffies)) |
| 1685 | set_need_resched(); | 1756 | set_need_resched(); |
| 1686 | } else if (rdp->qs_pending && rdp->passed_quiesc) { | 1757 | } else if (rdp->qs_pending && rdp->passed_quiesce) { |
| 1687 | rdp->n_rp_report_qs++; | 1758 | rdp->n_rp_report_qs++; |
| 1688 | return 1; | 1759 | return 1; |
| 1689 | } | 1760 | } |
| @@ -1846,6 +1917,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
| 1846 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); | 1917 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); |
| 1847 | #endif /* #ifdef CONFIG_NO_HZ */ | 1918 | #endif /* #ifdef CONFIG_NO_HZ */ |
| 1848 | rdp->cpu = cpu; | 1919 | rdp->cpu = cpu; |
| 1920 | rdp->rsp = rsp; | ||
| 1849 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1921 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 1850 | } | 1922 | } |
| 1851 | 1923 | ||
| @@ -1865,8 +1937,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
| 1865 | 1937 | ||
| 1866 | /* Set up local state, ensuring consistent view of global state. */ | 1938 | /* Set up local state, ensuring consistent view of global state. */ |
| 1867 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1939 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 1868 | rdp->passed_quiesc = 0; /* We could be racing with new GP, */ | ||
| 1869 | rdp->qs_pending = 1; /* so set up to respond to current GP. */ | ||
| 1870 | rdp->beenonline = 1; /* We have now been online. */ | 1940 | rdp->beenonline = 1; /* We have now been online. */ |
| 1871 | rdp->preemptible = preemptible; | 1941 | rdp->preemptible = preemptible; |
| 1872 | rdp->qlen_last_fqs_check = 0; | 1942 | rdp->qlen_last_fqs_check = 0; |
| @@ -1891,9 +1961,17 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
| 1891 | rnp->qsmaskinit |= mask; | 1961 | rnp->qsmaskinit |= mask; |
| 1892 | mask = rnp->grpmask; | 1962 | mask = rnp->grpmask; |
| 1893 | if (rnp == rdp->mynode) { | 1963 | if (rnp == rdp->mynode) { |
| 1894 | rdp->gpnum = rnp->completed; /* if GP in progress... */ | 1964 | /* |
| 1965 | * If there is a grace period in progress, we will | ||
| 1966 | * set up to wait for it next time we run the | ||
| 1967 | * RCU core code. | ||
| 1968 | */ | ||
| 1969 | rdp->gpnum = rnp->completed; | ||
| 1895 | rdp->completed = rnp->completed; | 1970 | rdp->completed = rnp->completed; |
| 1896 | rdp->passed_quiesc_completed = rnp->completed - 1; | 1971 | rdp->passed_quiesce = 0; |
| 1972 | rdp->qs_pending = 0; | ||
| 1973 | rdp->passed_quiesce_gpnum = rnp->gpnum - 1; | ||
| 1974 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); | ||
| 1897 | } | 1975 | } |
| 1898 | raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ | 1976 | raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ |
| 1899 | rnp = rnp->parent; | 1977 | rnp = rnp->parent; |
| @@ -1919,6 +1997,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
| 1919 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | 1997 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); |
| 1920 | struct rcu_node *rnp = rdp->mynode; | 1998 | struct rcu_node *rnp = rdp->mynode; |
| 1921 | 1999 | ||
| 2000 | trace_rcu_utilization("Start CPU hotplug"); | ||
| 1922 | switch (action) { | 2001 | switch (action) { |
| 1923 | case CPU_UP_PREPARE: | 2002 | case CPU_UP_PREPARE: |
| 1924 | case CPU_UP_PREPARE_FROZEN: | 2003 | case CPU_UP_PREPARE_FROZEN: |
| @@ -1954,6 +2033,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
| 1954 | default: | 2033 | default: |
| 1955 | break; | 2034 | break; |
| 1956 | } | 2035 | } |
| 2036 | trace_rcu_utilization("End CPU hotplug"); | ||
| 1957 | return NOTIFY_OK; | 2037 | return NOTIFY_OK; |
| 1958 | } | 2038 | } |
| 1959 | 2039 | ||
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 01b2ccda26fb..849ce9ec51fe 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
| @@ -230,9 +230,9 @@ struct rcu_data { | |||
| 230 | /* in order to detect GP end. */ | 230 | /* in order to detect GP end. */ |
| 231 | unsigned long gpnum; /* Highest gp number that this CPU */ | 231 | unsigned long gpnum; /* Highest gp number that this CPU */ |
| 232 | /* is aware of having started. */ | 232 | /* is aware of having started. */ |
| 233 | unsigned long passed_quiesc_completed; | 233 | unsigned long passed_quiesce_gpnum; |
| 234 | /* Value of completed at time of qs. */ | 234 | /* gpnum at time of quiescent state. */ |
| 235 | bool passed_quiesc; /* User-mode/idle loop etc. */ | 235 | bool passed_quiesce; /* User-mode/idle loop etc. */ |
| 236 | bool qs_pending; /* Core waits for quiesc state. */ | 236 | bool qs_pending; /* Core waits for quiesc state. */ |
| 237 | bool beenonline; /* CPU online at least once. */ | 237 | bool beenonline; /* CPU online at least once. */ |
| 238 | bool preemptible; /* Preemptible RCU? */ | 238 | bool preemptible; /* Preemptible RCU? */ |
| @@ -299,6 +299,7 @@ struct rcu_data { | |||
| 299 | unsigned long n_rp_need_nothing; | 299 | unsigned long n_rp_need_nothing; |
| 300 | 300 | ||
| 301 | int cpu; | 301 | int cpu; |
| 302 | struct rcu_state *rsp; | ||
| 302 | }; | 303 | }; |
| 303 | 304 | ||
| 304 | /* Values for signaled field in struct rcu_state. */ | 305 | /* Values for signaled field in struct rcu_state. */ |
| @@ -417,6 +418,13 @@ extern struct rcu_state rcu_preempt_state; | |||
| 417 | DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); | 418 | DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); |
| 418 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 419 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
| 419 | 420 | ||
| 421 | #ifdef CONFIG_RCU_BOOST | ||
| 422 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | ||
| 423 | DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); | ||
| 424 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | ||
| 425 | DECLARE_PER_CPU(char, rcu_cpu_has_work); | ||
| 426 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
| 427 | |||
| 420 | #ifndef RCU_TREE_NONCORE | 428 | #ifndef RCU_TREE_NONCORE |
| 421 | 429 | ||
| 422 | /* Forward declarations for rcutree_plugin.h */ | 430 | /* Forward declarations for rcutree_plugin.h */ |
| @@ -430,7 +438,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, | |||
| 430 | static void rcu_stop_cpu_kthread(int cpu); | 438 | static void rcu_stop_cpu_kthread(int cpu); |
| 431 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 439 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
| 432 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); | 440 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); |
| 433 | static void rcu_print_task_stall(struct rcu_node *rnp); | 441 | static int rcu_print_task_stall(struct rcu_node *rnp); |
| 434 | static void rcu_preempt_stall_reset(void); | 442 | static void rcu_preempt_stall_reset(void); |
| 435 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); | 443 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); |
| 436 | #ifdef CONFIG_HOTPLUG_CPU | 444 | #ifdef CONFIG_HOTPLUG_CPU |
| @@ -450,7 +458,6 @@ static int rcu_preempt_needs_cpu(int cpu); | |||
| 450 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); | 458 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); |
| 451 | static void rcu_preempt_send_cbs_to_online(void); | 459 | static void rcu_preempt_send_cbs_to_online(void); |
| 452 | static void __init __rcu_init_preempt(void); | 460 | static void __init __rcu_init_preempt(void); |
| 453 | static void rcu_needs_cpu_flush(void); | ||
| 454 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | 461 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); |
| 455 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | 462 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); |
| 456 | static void invoke_rcu_callbacks_kthread(void); | 463 | static void invoke_rcu_callbacks_kthread(void); |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 8aafbb80b8b0..4b9b9f8a4184 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
| @@ -27,6 +27,14 @@ | |||
| 27 | #include <linux/delay.h> | 27 | #include <linux/delay.h> |
| 28 | #include <linux/stop_machine.h> | 28 | #include <linux/stop_machine.h> |
| 29 | 29 | ||
| 30 | #define RCU_KTHREAD_PRIO 1 | ||
| 31 | |||
| 32 | #ifdef CONFIG_RCU_BOOST | ||
| 33 | #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO | ||
| 34 | #else | ||
| 35 | #define RCU_BOOST_PRIO RCU_KTHREAD_PRIO | ||
| 36 | #endif | ||
| 37 | |||
| 30 | /* | 38 | /* |
| 31 | * Check the RCU kernel configuration parameters and print informative | 39 | * Check the RCU kernel configuration parameters and print informative |
| 32 | * messages about anything out of the ordinary. If you like #ifdef, you | 40 | * messages about anything out of the ordinary. If you like #ifdef, you |
| @@ -64,7 +72,7 @@ static void __init rcu_bootup_announce_oddness(void) | |||
| 64 | 72 | ||
| 65 | #ifdef CONFIG_TREE_PREEMPT_RCU | 73 | #ifdef CONFIG_TREE_PREEMPT_RCU |
| 66 | 74 | ||
| 67 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); | 75 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt); |
| 68 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); | 76 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); |
| 69 | static struct rcu_state *rcu_state = &rcu_preempt_state; | 77 | static struct rcu_state *rcu_state = &rcu_preempt_state; |
| 70 | 78 | ||
| @@ -122,9 +130,11 @@ static void rcu_preempt_qs(int cpu) | |||
| 122 | { | 130 | { |
| 123 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); | 131 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); |
| 124 | 132 | ||
| 125 | rdp->passed_quiesc_completed = rdp->gpnum - 1; | 133 | rdp->passed_quiesce_gpnum = rdp->gpnum; |
| 126 | barrier(); | 134 | barrier(); |
| 127 | rdp->passed_quiesc = 1; | 135 | if (rdp->passed_quiesce == 0) |
| 136 | trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs"); | ||
| 137 | rdp->passed_quiesce = 1; | ||
| 128 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | 138 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; |
| 129 | } | 139 | } |
| 130 | 140 | ||
| @@ -190,6 +200,11 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
| 190 | if (rnp->qsmask & rdp->grpmask) | 200 | if (rnp->qsmask & rdp->grpmask) |
| 191 | rnp->gp_tasks = &t->rcu_node_entry; | 201 | rnp->gp_tasks = &t->rcu_node_entry; |
| 192 | } | 202 | } |
| 203 | trace_rcu_preempt_task(rdp->rsp->name, | ||
| 204 | t->pid, | ||
| 205 | (rnp->qsmask & rdp->grpmask) | ||
| 206 | ? rnp->gpnum | ||
| 207 | : rnp->gpnum + 1); | ||
| 193 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 208 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 194 | } else if (t->rcu_read_lock_nesting < 0 && | 209 | } else if (t->rcu_read_lock_nesting < 0 && |
| 195 | t->rcu_read_unlock_special) { | 210 | t->rcu_read_unlock_special) { |
| @@ -299,6 +314,9 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
| 299 | int empty_exp; | 314 | int empty_exp; |
| 300 | unsigned long flags; | 315 | unsigned long flags; |
| 301 | struct list_head *np; | 316 | struct list_head *np; |
| 317 | #ifdef CONFIG_RCU_BOOST | ||
| 318 | struct rt_mutex *rbmp = NULL; | ||
| 319 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
| 302 | struct rcu_node *rnp; | 320 | struct rcu_node *rnp; |
| 303 | int special; | 321 | int special; |
| 304 | 322 | ||
| @@ -344,6 +362,9 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
| 344 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ | 362 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ |
| 345 | np = rcu_next_node_entry(t, rnp); | 363 | np = rcu_next_node_entry(t, rnp); |
| 346 | list_del_init(&t->rcu_node_entry); | 364 | list_del_init(&t->rcu_node_entry); |
| 365 | t->rcu_blocked_node = NULL; | ||
| 366 | trace_rcu_unlock_preempted_task("rcu_preempt", | ||
| 367 | rnp->gpnum, t->pid); | ||
| 347 | if (&t->rcu_node_entry == rnp->gp_tasks) | 368 | if (&t->rcu_node_entry == rnp->gp_tasks) |
| 348 | rnp->gp_tasks = np; | 369 | rnp->gp_tasks = np; |
| 349 | if (&t->rcu_node_entry == rnp->exp_tasks) | 370 | if (&t->rcu_node_entry == rnp->exp_tasks) |
| @@ -351,30 +372,34 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) | |||
| 351 | #ifdef CONFIG_RCU_BOOST | 372 | #ifdef CONFIG_RCU_BOOST |
| 352 | if (&t->rcu_node_entry == rnp->boost_tasks) | 373 | if (&t->rcu_node_entry == rnp->boost_tasks) |
| 353 | rnp->boost_tasks = np; | 374 | rnp->boost_tasks = np; |
| 354 | /* Snapshot and clear ->rcu_boosted with rcu_node lock held. */ | 375 | /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */ |
| 355 | if (t->rcu_boosted) { | 376 | if (t->rcu_boost_mutex) { |
| 356 | special |= RCU_READ_UNLOCK_BOOSTED; | 377 | rbmp = t->rcu_boost_mutex; |
| 357 | t->rcu_boosted = 0; | 378 | t->rcu_boost_mutex = NULL; |
| 358 | } | 379 | } |
| 359 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 380 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
| 360 | t->rcu_blocked_node = NULL; | ||
| 361 | 381 | ||
| 362 | /* | 382 | /* |
| 363 | * If this was the last task on the current list, and if | 383 | * If this was the last task on the current list, and if |
| 364 | * we aren't waiting on any CPUs, report the quiescent state. | 384 | * we aren't waiting on any CPUs, report the quiescent state. |
| 365 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. | 385 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. |
| 366 | */ | 386 | */ |
| 367 | if (empty) | 387 | if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { |
| 368 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 388 | trace_rcu_quiescent_state_report("preempt_rcu", |
| 369 | else | 389 | rnp->gpnum, |
| 390 | 0, rnp->qsmask, | ||
| 391 | rnp->level, | ||
| 392 | rnp->grplo, | ||
| 393 | rnp->grphi, | ||
| 394 | !!rnp->gp_tasks); | ||
| 370 | rcu_report_unblock_qs_rnp(rnp, flags); | 395 | rcu_report_unblock_qs_rnp(rnp, flags); |
| 396 | } else | ||
| 397 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
| 371 | 398 | ||
| 372 | #ifdef CONFIG_RCU_BOOST | 399 | #ifdef CONFIG_RCU_BOOST |
| 373 | /* Unboost if we were boosted. */ | 400 | /* Unboost if we were boosted. */ |
| 374 | if (special & RCU_READ_UNLOCK_BOOSTED) { | 401 | if (rbmp) |
| 375 | rt_mutex_unlock(t->rcu_boost_mutex); | 402 | rt_mutex_unlock(rbmp); |
| 376 | t->rcu_boost_mutex = NULL; | ||
| 377 | } | ||
| 378 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 403 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
| 379 | 404 | ||
| 380 | /* | 405 | /* |
| @@ -399,10 +424,10 @@ void __rcu_read_unlock(void) | |||
| 399 | { | 424 | { |
| 400 | struct task_struct *t = current; | 425 | struct task_struct *t = current; |
| 401 | 426 | ||
| 402 | barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ | ||
| 403 | if (t->rcu_read_lock_nesting != 1) | 427 | if (t->rcu_read_lock_nesting != 1) |
| 404 | --t->rcu_read_lock_nesting; | 428 | --t->rcu_read_lock_nesting; |
| 405 | else { | 429 | else { |
| 430 | barrier(); /* critical section before exit code. */ | ||
| 406 | t->rcu_read_lock_nesting = INT_MIN; | 431 | t->rcu_read_lock_nesting = INT_MIN; |
| 407 | barrier(); /* assign before ->rcu_read_unlock_special load */ | 432 | barrier(); /* assign before ->rcu_read_unlock_special load */ |
| 408 | if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | 433 | if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) |
| @@ -466,16 +491,20 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) | |||
| 466 | * Scan the current list of tasks blocked within RCU read-side critical | 491 | * Scan the current list of tasks blocked within RCU read-side critical |
| 467 | * sections, printing out the tid of each. | 492 | * sections, printing out the tid of each. |
| 468 | */ | 493 | */ |
| 469 | static void rcu_print_task_stall(struct rcu_node *rnp) | 494 | static int rcu_print_task_stall(struct rcu_node *rnp) |
| 470 | { | 495 | { |
| 471 | struct task_struct *t; | 496 | struct task_struct *t; |
| 497 | int ndetected = 0; | ||
| 472 | 498 | ||
| 473 | if (!rcu_preempt_blocked_readers_cgp(rnp)) | 499 | if (!rcu_preempt_blocked_readers_cgp(rnp)) |
| 474 | return; | 500 | return 0; |
| 475 | t = list_entry(rnp->gp_tasks, | 501 | t = list_entry(rnp->gp_tasks, |
| 476 | struct task_struct, rcu_node_entry); | 502 | struct task_struct, rcu_node_entry); |
| 477 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) | 503 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { |
| 478 | printk(" P%d", t->pid); | 504 | printk(" P%d", t->pid); |
| 505 | ndetected++; | ||
| 506 | } | ||
| 507 | return ndetected; | ||
| 479 | } | 508 | } |
| 480 | 509 | ||
| 481 | /* | 510 | /* |
| @@ -656,18 +685,9 @@ EXPORT_SYMBOL_GPL(call_rcu); | |||
| 656 | */ | 685 | */ |
| 657 | void synchronize_rcu(void) | 686 | void synchronize_rcu(void) |
| 658 | { | 687 | { |
| 659 | struct rcu_synchronize rcu; | ||
| 660 | |||
| 661 | if (!rcu_scheduler_active) | 688 | if (!rcu_scheduler_active) |
| 662 | return; | 689 | return; |
| 663 | 690 | wait_rcu_gp(call_rcu); | |
| 664 | init_rcu_head_on_stack(&rcu.head); | ||
| 665 | init_completion(&rcu.completion); | ||
| 666 | /* Will wake me after RCU finished. */ | ||
| 667 | call_rcu(&rcu.head, wakeme_after_rcu); | ||
| 668 | /* Wait for it. */ | ||
| 669 | wait_for_completion(&rcu.completion); | ||
| 670 | destroy_rcu_head_on_stack(&rcu.head); | ||
| 671 | } | 691 | } |
| 672 | EXPORT_SYMBOL_GPL(synchronize_rcu); | 692 | EXPORT_SYMBOL_GPL(synchronize_rcu); |
| 673 | 693 | ||
| @@ -968,8 +988,9 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) | |||
| 968 | * Because preemptible RCU does not exist, we never have to check for | 988 | * Because preemptible RCU does not exist, we never have to check for |
| 969 | * tasks blocked within RCU read-side critical sections. | 989 | * tasks blocked within RCU read-side critical sections. |
| 970 | */ | 990 | */ |
| 971 | static void rcu_print_task_stall(struct rcu_node *rnp) | 991 | static int rcu_print_task_stall(struct rcu_node *rnp) |
| 972 | { | 992 | { |
| 993 | return 0; | ||
| 973 | } | 994 | } |
| 974 | 995 | ||
| 975 | /* | 996 | /* |
| @@ -1136,6 +1157,8 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp) | |||
| 1136 | 1157 | ||
| 1137 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | 1158 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ |
| 1138 | 1159 | ||
| 1160 | static struct lock_class_key rcu_boost_class; | ||
| 1161 | |||
| 1139 | /* | 1162 | /* |
| 1140 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks | 1163 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks |
| 1141 | * or ->boost_tasks, advancing the pointer to the next task in the | 1164 | * or ->boost_tasks, advancing the pointer to the next task in the |
| @@ -1198,8 +1221,10 @@ static int rcu_boost(struct rcu_node *rnp) | |||
| 1198 | */ | 1221 | */ |
| 1199 | t = container_of(tb, struct task_struct, rcu_node_entry); | 1222 | t = container_of(tb, struct task_struct, rcu_node_entry); |
| 1200 | rt_mutex_init_proxy_locked(&mtx, t); | 1223 | rt_mutex_init_proxy_locked(&mtx, t); |
| 1224 | /* Avoid lockdep false positives. This rt_mutex is its own thing. */ | ||
| 1225 | lockdep_set_class_and_name(&mtx.wait_lock, &rcu_boost_class, | ||
| 1226 | "rcu_boost_mutex"); | ||
| 1201 | t->rcu_boost_mutex = &mtx; | 1227 | t->rcu_boost_mutex = &mtx; |
| 1202 | t->rcu_boosted = 1; | ||
| 1203 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1228 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 1204 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ | 1229 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ |
| 1205 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ | 1230 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ |
| @@ -1228,9 +1253,12 @@ static int rcu_boost_kthread(void *arg) | |||
| 1228 | int spincnt = 0; | 1253 | int spincnt = 0; |
| 1229 | int more2boost; | 1254 | int more2boost; |
| 1230 | 1255 | ||
| 1256 | trace_rcu_utilization("Start boost kthread@init"); | ||
| 1231 | for (;;) { | 1257 | for (;;) { |
| 1232 | rnp->boost_kthread_status = RCU_KTHREAD_WAITING; | 1258 | rnp->boost_kthread_status = RCU_KTHREAD_WAITING; |
| 1259 | trace_rcu_utilization("End boost kthread@rcu_wait"); | ||
| 1233 | rcu_wait(rnp->boost_tasks || rnp->exp_tasks); | 1260 | rcu_wait(rnp->boost_tasks || rnp->exp_tasks); |
| 1261 | trace_rcu_utilization("Start boost kthread@rcu_wait"); | ||
| 1234 | rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; | 1262 | rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; |
| 1235 | more2boost = rcu_boost(rnp); | 1263 | more2boost = rcu_boost(rnp); |
| 1236 | if (more2boost) | 1264 | if (more2boost) |
| @@ -1238,11 +1266,14 @@ static int rcu_boost_kthread(void *arg) | |||
| 1238 | else | 1266 | else |
| 1239 | spincnt = 0; | 1267 | spincnt = 0; |
| 1240 | if (spincnt > 10) { | 1268 | if (spincnt > 10) { |
| 1269 | trace_rcu_utilization("End boost kthread@rcu_yield"); | ||
| 1241 | rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); | 1270 | rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); |
| 1271 | trace_rcu_utilization("Start boost kthread@rcu_yield"); | ||
| 1242 | spincnt = 0; | 1272 | spincnt = 0; |
| 1243 | } | 1273 | } |
| 1244 | } | 1274 | } |
| 1245 | /* NOTREACHED */ | 1275 | /* NOTREACHED */ |
| 1276 | trace_rcu_utilization("End boost kthread@notreached"); | ||
| 1246 | return 0; | 1277 | return 0; |
| 1247 | } | 1278 | } |
| 1248 | 1279 | ||
| @@ -1291,11 +1322,9 @@ static void invoke_rcu_callbacks_kthread(void) | |||
| 1291 | 1322 | ||
| 1292 | local_irq_save(flags); | 1323 | local_irq_save(flags); |
| 1293 | __this_cpu_write(rcu_cpu_has_work, 1); | 1324 | __this_cpu_write(rcu_cpu_has_work, 1); |
| 1294 | if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) { | 1325 | if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && |
| 1295 | local_irq_restore(flags); | 1326 | current != __this_cpu_read(rcu_cpu_kthread_task)) |
| 1296 | return; | 1327 | wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); |
| 1297 | } | ||
| 1298 | wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); | ||
| 1299 | local_irq_restore(flags); | 1328 | local_irq_restore(flags); |
| 1300 | } | 1329 | } |
| 1301 | 1330 | ||
| @@ -1343,13 +1372,13 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |||
| 1343 | if (rnp->boost_kthread_task != NULL) | 1372 | if (rnp->boost_kthread_task != NULL) |
| 1344 | return 0; | 1373 | return 0; |
| 1345 | t = kthread_create(rcu_boost_kthread, (void *)rnp, | 1374 | t = kthread_create(rcu_boost_kthread, (void *)rnp, |
| 1346 | "rcub%d", rnp_index); | 1375 | "rcub/%d", rnp_index); |
| 1347 | if (IS_ERR(t)) | 1376 | if (IS_ERR(t)) |
| 1348 | return PTR_ERR(t); | 1377 | return PTR_ERR(t); |
| 1349 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1378 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 1350 | rnp->boost_kthread_task = t; | 1379 | rnp->boost_kthread_task = t; |
| 1351 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1380 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 1352 | sp.sched_priority = RCU_KTHREAD_PRIO; | 1381 | sp.sched_priority = RCU_BOOST_PRIO; |
| 1353 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 1382 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
| 1354 | wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ | 1383 | wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ |
| 1355 | return 0; | 1384 | return 0; |
| @@ -1444,6 +1473,7 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg) | |||
| 1444 | { | 1473 | { |
| 1445 | struct sched_param sp; | 1474 | struct sched_param sp; |
| 1446 | struct timer_list yield_timer; | 1475 | struct timer_list yield_timer; |
| 1476 | int prio = current->rt_priority; | ||
| 1447 | 1477 | ||
| 1448 | setup_timer_on_stack(&yield_timer, f, arg); | 1478 | setup_timer_on_stack(&yield_timer, f, arg); |
| 1449 | mod_timer(&yield_timer, jiffies + 2); | 1479 | mod_timer(&yield_timer, jiffies + 2); |
| @@ -1451,7 +1481,8 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg) | |||
| 1451 | sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); | 1481 | sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); |
| 1452 | set_user_nice(current, 19); | 1482 | set_user_nice(current, 19); |
| 1453 | schedule(); | 1483 | schedule(); |
| 1454 | sp.sched_priority = RCU_KTHREAD_PRIO; | 1484 | set_user_nice(current, 0); |
| 1485 | sp.sched_priority = prio; | ||
| 1455 | sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | 1486 | sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); |
| 1456 | del_timer(&yield_timer); | 1487 | del_timer(&yield_timer); |
| 1457 | } | 1488 | } |
| @@ -1489,7 +1520,8 @@ static int rcu_cpu_kthread_should_stop(int cpu) | |||
| 1489 | 1520 | ||
| 1490 | /* | 1521 | /* |
| 1491 | * Per-CPU kernel thread that invokes RCU callbacks. This replaces the | 1522 | * Per-CPU kernel thread that invokes RCU callbacks. This replaces the |
| 1492 | * earlier RCU softirq. | 1523 | * RCU softirq used in flavors and configurations of RCU that do not |
| 1524 | * support RCU priority boosting. | ||
| 1493 | */ | 1525 | */ |
| 1494 | static int rcu_cpu_kthread(void *arg) | 1526 | static int rcu_cpu_kthread(void *arg) |
| 1495 | { | 1527 | { |
| @@ -1500,9 +1532,12 @@ static int rcu_cpu_kthread(void *arg) | |||
| 1500 | char work; | 1532 | char work; |
| 1501 | char *workp = &per_cpu(rcu_cpu_has_work, cpu); | 1533 | char *workp = &per_cpu(rcu_cpu_has_work, cpu); |
| 1502 | 1534 | ||
| 1535 | trace_rcu_utilization("Start CPU kthread@init"); | ||
| 1503 | for (;;) { | 1536 | for (;;) { |
| 1504 | *statusp = RCU_KTHREAD_WAITING; | 1537 | *statusp = RCU_KTHREAD_WAITING; |
| 1538 | trace_rcu_utilization("End CPU kthread@rcu_wait"); | ||
| 1505 | rcu_wait(*workp != 0 || kthread_should_stop()); | 1539 | rcu_wait(*workp != 0 || kthread_should_stop()); |
| 1540 | trace_rcu_utilization("Start CPU kthread@rcu_wait"); | ||
| 1506 | local_bh_disable(); | 1541 | local_bh_disable(); |
| 1507 | if (rcu_cpu_kthread_should_stop(cpu)) { | 1542 | if (rcu_cpu_kthread_should_stop(cpu)) { |
| 1508 | local_bh_enable(); | 1543 | local_bh_enable(); |
| @@ -1523,11 +1558,14 @@ static int rcu_cpu_kthread(void *arg) | |||
| 1523 | spincnt = 0; | 1558 | spincnt = 0; |
| 1524 | if (spincnt > 10) { | 1559 | if (spincnt > 10) { |
| 1525 | *statusp = RCU_KTHREAD_YIELDING; | 1560 | *statusp = RCU_KTHREAD_YIELDING; |
| 1561 | trace_rcu_utilization("End CPU kthread@rcu_yield"); | ||
| 1526 | rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); | 1562 | rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); |
| 1563 | trace_rcu_utilization("Start CPU kthread@rcu_yield"); | ||
| 1527 | spincnt = 0; | 1564 | spincnt = 0; |
| 1528 | } | 1565 | } |
| 1529 | } | 1566 | } |
| 1530 | *statusp = RCU_KTHREAD_STOPPED; | 1567 | *statusp = RCU_KTHREAD_STOPPED; |
| 1568 | trace_rcu_utilization("End CPU kthread@term"); | ||
| 1531 | return 0; | 1569 | return 0; |
| 1532 | } | 1570 | } |
| 1533 | 1571 | ||
| @@ -1560,7 +1598,10 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) | |||
| 1560 | if (!rcu_scheduler_fully_active || | 1598 | if (!rcu_scheduler_fully_active || |
| 1561 | per_cpu(rcu_cpu_kthread_task, cpu) != NULL) | 1599 | per_cpu(rcu_cpu_kthread_task, cpu) != NULL) |
| 1562 | return 0; | 1600 | return 0; |
| 1563 | t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); | 1601 | t = kthread_create_on_node(rcu_cpu_kthread, |
| 1602 | (void *)(long)cpu, | ||
| 1603 | cpu_to_node(cpu), | ||
| 1604 | "rcuc/%d", cpu); | ||
| 1564 | if (IS_ERR(t)) | 1605 | if (IS_ERR(t)) |
| 1565 | return PTR_ERR(t); | 1606 | return PTR_ERR(t); |
| 1566 | if (cpu_online(cpu)) | 1607 | if (cpu_online(cpu)) |
| @@ -1669,7 +1710,7 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, | |||
| 1669 | return 0; | 1710 | return 0; |
| 1670 | if (rnp->node_kthread_task == NULL) { | 1711 | if (rnp->node_kthread_task == NULL) { |
| 1671 | t = kthread_create(rcu_node_kthread, (void *)rnp, | 1712 | t = kthread_create(rcu_node_kthread, (void *)rnp, |
| 1672 | "rcun%d", rnp_index); | 1713 | "rcun/%d", rnp_index); |
| 1673 | if (IS_ERR(t)) | 1714 | if (IS_ERR(t)) |
| 1674 | return PTR_ERR(t); | 1715 | return PTR_ERR(t); |
| 1675 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1716 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| @@ -1907,15 +1948,6 @@ int rcu_needs_cpu(int cpu) | |||
| 1907 | return rcu_needs_cpu_quick_check(cpu); | 1948 | return rcu_needs_cpu_quick_check(cpu); |
| 1908 | } | 1949 | } |
| 1909 | 1950 | ||
| 1910 | /* | ||
| 1911 | * Check to see if we need to continue a callback-flush operations to | ||
| 1912 | * allow the last CPU to enter dyntick-idle mode. But fast dyntick-idle | ||
| 1913 | * entry is not configured, so we never do need to. | ||
| 1914 | */ | ||
| 1915 | static void rcu_needs_cpu_flush(void) | ||
| 1916 | { | ||
| 1917 | } | ||
| 1918 | |||
| 1919 | #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 1951 | #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ |
| 1920 | 1952 | ||
| 1921 | #define RCU_NEEDS_CPU_FLUSHES 5 | 1953 | #define RCU_NEEDS_CPU_FLUSHES 5 |
| @@ -1991,20 +2023,4 @@ int rcu_needs_cpu(int cpu) | |||
| 1991 | return c; | 2023 | return c; |
| 1992 | } | 2024 | } |
| 1993 | 2025 | ||
| 1994 | /* | ||
| 1995 | * Check to see if we need to continue a callback-flush operations to | ||
| 1996 | * allow the last CPU to enter dyntick-idle mode. | ||
| 1997 | */ | ||
| 1998 | static void rcu_needs_cpu_flush(void) | ||
| 1999 | { | ||
| 2000 | int cpu = smp_processor_id(); | ||
| 2001 | unsigned long flags; | ||
| 2002 | |||
| 2003 | if (per_cpu(rcu_dyntick_drain, cpu) <= 0) | ||
| 2004 | return; | ||
| 2005 | local_irq_save(flags); | ||
| 2006 | (void)rcu_needs_cpu(cpu); | ||
| 2007 | local_irq_restore(flags); | ||
| 2008 | } | ||
| 2009 | |||
| 2010 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 2026 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ |
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 3b0c0986afc0..9feffa4c0695 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
| @@ -48,11 +48,6 @@ | |||
| 48 | 48 | ||
| 49 | #ifdef CONFIG_RCU_BOOST | 49 | #ifdef CONFIG_RCU_BOOST |
| 50 | 50 | ||
| 51 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | ||
| 52 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_cpu); | ||
| 53 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | ||
| 54 | DECLARE_PER_CPU(char, rcu_cpu_has_work); | ||
| 55 | |||
| 56 | static char convert_kthread_status(unsigned int kthread_status) | 51 | static char convert_kthread_status(unsigned int kthread_status) |
| 57 | { | 52 | { |
| 58 | if (kthread_status > RCU_KTHREAD_MAX) | 53 | if (kthread_status > RCU_KTHREAD_MAX) |
| @@ -66,11 +61,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | |||
| 66 | { | 61 | { |
| 67 | if (!rdp->beenonline) | 62 | if (!rdp->beenonline) |
| 68 | return; | 63 | return; |
| 69 | seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pqc=%lu qp=%d", | 64 | seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pgp=%lu qp=%d", |
| 70 | rdp->cpu, | 65 | rdp->cpu, |
| 71 | cpu_is_offline(rdp->cpu) ? '!' : ' ', | 66 | cpu_is_offline(rdp->cpu) ? '!' : ' ', |
| 72 | rdp->completed, rdp->gpnum, | 67 | rdp->completed, rdp->gpnum, |
| 73 | rdp->passed_quiesc, rdp->passed_quiesc_completed, | 68 | rdp->passed_quiesce, rdp->passed_quiesce_gpnum, |
| 74 | rdp->qs_pending); | 69 | rdp->qs_pending); |
| 75 | #ifdef CONFIG_NO_HZ | 70 | #ifdef CONFIG_NO_HZ |
| 76 | seq_printf(m, " dt=%d/%d/%d df=%lu", | 71 | seq_printf(m, " dt=%d/%d/%d df=%lu", |
| @@ -144,7 +139,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
| 144 | rdp->cpu, | 139 | rdp->cpu, |
| 145 | cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"", | 140 | cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"", |
| 146 | rdp->completed, rdp->gpnum, | 141 | rdp->completed, rdp->gpnum, |
| 147 | rdp->passed_quiesc, rdp->passed_quiesc_completed, | 142 | rdp->passed_quiesce, rdp->passed_quiesce_gpnum, |
| 148 | rdp->qs_pending); | 143 | rdp->qs_pending); |
| 149 | #ifdef CONFIG_NO_HZ | 144 | #ifdef CONFIG_NO_HZ |
| 150 | seq_printf(m, ",%d,%d,%d,%lu", | 145 | seq_printf(m, ",%d,%d,%d,%lu", |
| @@ -175,7 +170,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
| 175 | 170 | ||
| 176 | static int show_rcudata_csv(struct seq_file *m, void *unused) | 171 | static int show_rcudata_csv(struct seq_file *m, void *unused) |
| 177 | { | 172 | { |
| 178 | seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); | 173 | seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); |
| 179 | #ifdef CONFIG_NO_HZ | 174 | #ifdef CONFIG_NO_HZ |
| 180 | seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); | 175 | seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); |
| 181 | #endif /* #ifdef CONFIG_NO_HZ */ | 176 | #endif /* #ifdef CONFIG_NO_HZ */ |
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 255e1662acdb..5e8d9cce7470 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
| @@ -579,6 +579,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
| 579 | struct rt_mutex_waiter *waiter) | 579 | struct rt_mutex_waiter *waiter) |
| 580 | { | 580 | { |
| 581 | int ret = 0; | 581 | int ret = 0; |
| 582 | int was_disabled; | ||
| 582 | 583 | ||
| 583 | for (;;) { | 584 | for (;;) { |
| 584 | /* Try to acquire the lock: */ | 585 | /* Try to acquire the lock: */ |
| @@ -601,10 +602,17 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
| 601 | 602 | ||
| 602 | raw_spin_unlock(&lock->wait_lock); | 603 | raw_spin_unlock(&lock->wait_lock); |
| 603 | 604 | ||
| 605 | was_disabled = irqs_disabled(); | ||
| 606 | if (was_disabled) | ||
| 607 | local_irq_enable(); | ||
| 608 | |||
| 604 | debug_rt_mutex_print_deadlock(waiter); | 609 | debug_rt_mutex_print_deadlock(waiter); |
| 605 | 610 | ||
| 606 | schedule_rt_mutex(lock); | 611 | schedule_rt_mutex(lock); |
| 607 | 612 | ||
| 613 | if (was_disabled) | ||
| 614 | local_irq_disable(); | ||
| 615 | |||
| 608 | raw_spin_lock(&lock->wait_lock); | 616 | raw_spin_lock(&lock->wait_lock); |
| 609 | set_current_state(state); | 617 | set_current_state(state); |
| 610 | } | 618 | } |
diff --git a/kernel/sched.c b/kernel/sched.c index ec5f472bc5b9..3e5525630459 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -4237,6 +4237,7 @@ static inline void schedule_debug(struct task_struct *prev) | |||
| 4237 | */ | 4237 | */ |
| 4238 | if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) | 4238 | if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) |
| 4239 | __schedule_bug(prev); | 4239 | __schedule_bug(prev); |
| 4240 | rcu_sleep_check(); | ||
| 4240 | 4241 | ||
| 4241 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | 4242 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); |
| 4242 | 4243 | ||
| @@ -5979,15 +5980,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
| 5979 | } | 5980 | } |
| 5980 | 5981 | ||
| 5981 | /* | 5982 | /* |
| 5982 | * In a system that switches off the HZ timer nohz_cpu_mask | ||
| 5983 | * indicates which cpus entered this state. This is used | ||
| 5984 | * in the rcu update to wait only for active cpus. For system | ||
| 5985 | * which do not switch off the HZ timer nohz_cpu_mask should | ||
| 5986 | * always be CPU_BITS_NONE. | ||
| 5987 | */ | ||
| 5988 | cpumask_var_t nohz_cpu_mask; | ||
| 5989 | |||
| 5990 | /* | ||
| 5991 | * Increase the granularity value when there are more CPUs, | 5983 | * Increase the granularity value when there are more CPUs, |
| 5992 | * because with more CPUs the 'effective latency' as visible | 5984 | * because with more CPUs the 'effective latency' as visible |
| 5993 | * to users decreases. But the relationship is not linear, | 5985 | * to users decreases. But the relationship is not linear, |
| @@ -8199,8 +8191,6 @@ void __init sched_init(void) | |||
| 8199 | */ | 8191 | */ |
| 8200 | current->sched_class = &fair_sched_class; | 8192 | current->sched_class = &fair_sched_class; |
| 8201 | 8193 | ||
| 8202 | /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ | ||
| 8203 | zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); | ||
| 8204 | #ifdef CONFIG_SMP | 8194 | #ifdef CONFIG_SMP |
| 8205 | zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); | 8195 | zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); |
| 8206 | #ifdef CONFIG_NO_HZ | 8196 | #ifdef CONFIG_NO_HZ |
| @@ -8230,6 +8220,7 @@ void __might_sleep(const char *file, int line, int preempt_offset) | |||
| 8230 | { | 8220 | { |
| 8231 | static unsigned long prev_jiffy; /* ratelimiting */ | 8221 | static unsigned long prev_jiffy; /* ratelimiting */ |
| 8232 | 8222 | ||
| 8223 | rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ | ||
| 8233 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || | 8224 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || |
| 8234 | system_state != SYSTEM_RUNNING || oops_in_progress) | 8225 | system_state != SYSTEM_RUNNING || oops_in_progress) |
| 8235 | return; | 8226 | return; |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d5097c44b407..eb98e55196b9 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -139,7 +139,6 @@ static void tick_nohz_update_jiffies(ktime_t now) | |||
| 139 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 139 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
| 140 | unsigned long flags; | 140 | unsigned long flags; |
| 141 | 141 | ||
| 142 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||
| 143 | ts->idle_waketime = now; | 142 | ts->idle_waketime = now; |
| 144 | 143 | ||
| 145 | local_irq_save(flags); | 144 | local_irq_save(flags); |
| @@ -389,9 +388,6 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
| 389 | else | 388 | else |
| 390 | expires.tv64 = KTIME_MAX; | 389 | expires.tv64 = KTIME_MAX; |
| 391 | 390 | ||
| 392 | if (delta_jiffies > 1) | ||
| 393 | cpumask_set_cpu(cpu, nohz_cpu_mask); | ||
| 394 | |||
| 395 | /* Skip reprogram of event if its not changed */ | 391 | /* Skip reprogram of event if its not changed */ |
| 396 | if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) | 392 | if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) |
| 397 | goto out; | 393 | goto out; |
| @@ -441,7 +437,6 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
| 441 | * softirq. | 437 | * softirq. |
| 442 | */ | 438 | */ |
| 443 | tick_do_update_jiffies64(ktime_get()); | 439 | tick_do_update_jiffies64(ktime_get()); |
| 444 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||
| 445 | } | 440 | } |
| 446 | raise_softirq_irqoff(TIMER_SOFTIRQ); | 441 | raise_softirq_irqoff(TIMER_SOFTIRQ); |
| 447 | out: | 442 | out: |
| @@ -524,7 +519,6 @@ void tick_nohz_restart_sched_tick(void) | |||
| 524 | /* Update jiffies first */ | 519 | /* Update jiffies first */ |
| 525 | select_nohz_load_balancer(0); | 520 | select_nohz_load_balancer(0); |
| 526 | tick_do_update_jiffies64(now); | 521 | tick_do_update_jiffies64(now); |
| 527 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||
| 528 | 522 | ||
| 529 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 523 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
| 530 | /* | 524 | /* |
