aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c4
-rw-r--r--kernel/debug/debug_core.c4
-rw-r--r--kernel/events/core.c287
-rw-r--r--kernel/events/uprobes.c31
-rw-r--r--kernel/futex.c56
-rw-r--r--kernel/hrtimer.c1
-rw-r--r--kernel/kexec.c8
-rw-r--r--kernel/kmod.c2
-rw-r--r--kernel/ksysfs.c5
-rw-r--r--kernel/locking/lockdep_internals.h6
-rw-r--r--kernel/locking/locktorture.c10
-rw-r--r--kernel/locking/rtmutex.c32
-rw-r--r--kernel/locking/rwsem-xadd.c49
-rw-r--r--kernel/printk/printk.c1
-rw-r--r--kernel/rcu/rcutorture.c217
-rw-r--r--kernel/rcu/tiny_plugin.h8
-rw-r--r--kernel/rcu/tree.c331
-rw-r--r--kernel/rcu/tree.h11
-rw-r--r--kernel/rcu/tree_plugin.h144
-rw-r--r--kernel/rcu/update.c30
-rw-r--r--kernel/resource.c7
-rw-r--r--kernel/sched/core.c23
-rw-r--r--kernel/sched/cpupri.c6
-rw-r--r--kernel/sched/wait.c2
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/sysctl.c4
-rw-r--r--kernel/torture.c40
27 files changed, 855 insertions, 468 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3f1ca934a237..ceee0c54c6a4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -33,6 +33,7 @@
33#include <linux/init_task.h> 33#include <linux/init_task.h>
34#include <linux/kernel.h> 34#include <linux/kernel.h>
35#include <linux/list.h> 35#include <linux/list.h>
36#include <linux/magic.h>
36#include <linux/mm.h> 37#include <linux/mm.h>
37#include <linux/mutex.h> 38#include <linux/mutex.h>
38#include <linux/mount.h> 39#include <linux/mount.h>
@@ -1604,7 +1605,8 @@ out_unlock:
1604 if (ret) 1605 if (ret)
1605 return ERR_PTR(ret); 1606 return ERR_PTR(ret);
1606 1607
1607 dentry = kernfs_mount(fs_type, flags, root->kf_root, &new_sb); 1608 dentry = kernfs_mount(fs_type, flags, root->kf_root,
1609 CGROUP_SUPER_MAGIC, &new_sb);
1608 if (IS_ERR(dentry) || !new_sb) 1610 if (IS_ERR(dentry) || !new_sb)
1609 cgroup_put(&root->cgrp); 1611 cgroup_put(&root->cgrp);
1610 return dentry; 1612 return dentry;
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 2956c8da1605..1adf62b39b96 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -534,7 +534,7 @@ return_normal:
534 kgdb_info[cpu].exception_state &= 534 kgdb_info[cpu].exception_state &=
535 ~(DCPU_WANT_MASTER | DCPU_IS_SLAVE); 535 ~(DCPU_WANT_MASTER | DCPU_IS_SLAVE);
536 kgdb_info[cpu].enter_kgdb--; 536 kgdb_info[cpu].enter_kgdb--;
537 smp_mb__before_atomic_dec(); 537 smp_mb__before_atomic();
538 atomic_dec(&slaves_in_kgdb); 538 atomic_dec(&slaves_in_kgdb);
539 dbg_touch_watchdogs(); 539 dbg_touch_watchdogs();
540 local_irq_restore(flags); 540 local_irq_restore(flags);
@@ -662,7 +662,7 @@ kgdb_restore:
662 kgdb_info[cpu].exception_state &= 662 kgdb_info[cpu].exception_state &=
663 ~(DCPU_WANT_MASTER | DCPU_IS_SLAVE); 663 ~(DCPU_WANT_MASTER | DCPU_IS_SLAVE);
664 kgdb_info[cpu].enter_kgdb--; 664 kgdb_info[cpu].enter_kgdb--;
665 smp_mb__before_atomic_dec(); 665 smp_mb__before_atomic();
666 atomic_dec(&masters_in_kgdb); 666 atomic_dec(&masters_in_kgdb);
667 /* Free kgdb_active */ 667 /* Free kgdb_active */
668 atomic_set(&kgdb_active, -1); 668 atomic_set(&kgdb_active, -1);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f83a71a3e46d..689237a0c5e8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -39,6 +39,7 @@
39#include <linux/hw_breakpoint.h> 39#include <linux/hw_breakpoint.h>
40#include <linux/mm_types.h> 40#include <linux/mm_types.h>
41#include <linux/cgroup.h> 41#include <linux/cgroup.h>
42#include <linux/module.h>
42 43
43#include "internal.h" 44#include "internal.h"
44 45
@@ -1443,6 +1444,11 @@ group_sched_out(struct perf_event *group_event,
1443 cpuctx->exclusive = 0; 1444 cpuctx->exclusive = 0;
1444} 1445}
1445 1446
1447struct remove_event {
1448 struct perf_event *event;
1449 bool detach_group;
1450};
1451
1446/* 1452/*
1447 * Cross CPU call to remove a performance event 1453 * Cross CPU call to remove a performance event
1448 * 1454 *
@@ -1451,12 +1457,15 @@ group_sched_out(struct perf_event *group_event,
1451 */ 1457 */
1452static int __perf_remove_from_context(void *info) 1458static int __perf_remove_from_context(void *info)
1453{ 1459{
1454 struct perf_event *event = info; 1460 struct remove_event *re = info;
1461 struct perf_event *event = re->event;
1455 struct perf_event_context *ctx = event->ctx; 1462 struct perf_event_context *ctx = event->ctx;
1456 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); 1463 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1457 1464
1458 raw_spin_lock(&ctx->lock); 1465 raw_spin_lock(&ctx->lock);
1459 event_sched_out(event, cpuctx, ctx); 1466 event_sched_out(event, cpuctx, ctx);
1467 if (re->detach_group)
1468 perf_group_detach(event);
1460 list_del_event(event, ctx); 1469 list_del_event(event, ctx);
1461 if (!ctx->nr_events && cpuctx->task_ctx == ctx) { 1470 if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
1462 ctx->is_active = 0; 1471 ctx->is_active = 0;
@@ -1481,10 +1490,14 @@ static int __perf_remove_from_context(void *info)
1481 * When called from perf_event_exit_task, it's OK because the 1490 * When called from perf_event_exit_task, it's OK because the
1482 * context has been detached from its task. 1491 * context has been detached from its task.
1483 */ 1492 */
1484static void perf_remove_from_context(struct perf_event *event) 1493static void perf_remove_from_context(struct perf_event *event, bool detach_group)
1485{ 1494{
1486 struct perf_event_context *ctx = event->ctx; 1495 struct perf_event_context *ctx = event->ctx;
1487 struct task_struct *task = ctx->task; 1496 struct task_struct *task = ctx->task;
1497 struct remove_event re = {
1498 .event = event,
1499 .detach_group = detach_group,
1500 };
1488 1501
1489 lockdep_assert_held(&ctx->mutex); 1502 lockdep_assert_held(&ctx->mutex);
1490 1503
@@ -1493,12 +1506,12 @@ static void perf_remove_from_context(struct perf_event *event)
1493 * Per cpu events are removed via an smp call and 1506 * Per cpu events are removed via an smp call and
1494 * the removal is always successful. 1507 * the removal is always successful.
1495 */ 1508 */
1496 cpu_function_call(event->cpu, __perf_remove_from_context, event); 1509 cpu_function_call(event->cpu, __perf_remove_from_context, &re);
1497 return; 1510 return;
1498 } 1511 }
1499 1512
1500retry: 1513retry:
1501 if (!task_function_call(task, __perf_remove_from_context, event)) 1514 if (!task_function_call(task, __perf_remove_from_context, &re))
1502 return; 1515 return;
1503 1516
1504 raw_spin_lock_irq(&ctx->lock); 1517 raw_spin_lock_irq(&ctx->lock);
@@ -1515,6 +1528,8 @@ retry:
1515 * Since the task isn't running, its safe to remove the event, us 1528 * Since the task isn't running, its safe to remove the event, us
1516 * holding the ctx->lock ensures the task won't get scheduled in. 1529 * holding the ctx->lock ensures the task won't get scheduled in.
1517 */ 1530 */
1531 if (detach_group)
1532 perf_group_detach(event);
1518 list_del_event(event, ctx); 1533 list_del_event(event, ctx);
1519 raw_spin_unlock_irq(&ctx->lock); 1534 raw_spin_unlock_irq(&ctx->lock);
1520} 1535}
@@ -1663,6 +1678,8 @@ event_sched_in(struct perf_event *event,
1663 u64 tstamp = perf_event_time(event); 1678 u64 tstamp = perf_event_time(event);
1664 int ret = 0; 1679 int ret = 0;
1665 1680
1681 lockdep_assert_held(&ctx->lock);
1682
1666 if (event->state <= PERF_EVENT_STATE_OFF) 1683 if (event->state <= PERF_EVENT_STATE_OFF)
1667 return 0; 1684 return 0;
1668 1685
@@ -3178,7 +3195,8 @@ static void free_event_rcu(struct rcu_head *head)
3178} 3195}
3179 3196
3180static void ring_buffer_put(struct ring_buffer *rb); 3197static void ring_buffer_put(struct ring_buffer *rb);
3181static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb); 3198static void ring_buffer_attach(struct perf_event *event,
3199 struct ring_buffer *rb);
3182 3200
3183static void unaccount_event_cpu(struct perf_event *event, int cpu) 3201static void unaccount_event_cpu(struct perf_event *event, int cpu)
3184{ 3202{
@@ -3229,17 +3247,19 @@ static void __free_event(struct perf_event *event)
3229 if (event->ctx) 3247 if (event->ctx)
3230 put_ctx(event->ctx); 3248 put_ctx(event->ctx);
3231 3249
3250 if (event->pmu)
3251 module_put(event->pmu->module);
3252
3232 call_rcu(&event->rcu_head, free_event_rcu); 3253 call_rcu(&event->rcu_head, free_event_rcu);
3233} 3254}
3234static void free_event(struct perf_event *event) 3255
3256static void _free_event(struct perf_event *event)
3235{ 3257{
3236 irq_work_sync(&event->pending); 3258 irq_work_sync(&event->pending);
3237 3259
3238 unaccount_event(event); 3260 unaccount_event(event);
3239 3261
3240 if (event->rb) { 3262 if (event->rb) {
3241 struct ring_buffer *rb;
3242
3243 /* 3263 /*
3244 * Can happen when we close an event with re-directed output. 3264 * Can happen when we close an event with re-directed output.
3245 * 3265 *
@@ -3247,57 +3267,38 @@ static void free_event(struct perf_event *event)
3247 * over us; possibly making our ring_buffer_put() the last. 3267 * over us; possibly making our ring_buffer_put() the last.
3248 */ 3268 */
3249 mutex_lock(&event->mmap_mutex); 3269 mutex_lock(&event->mmap_mutex);
3250 rb = event->rb; 3270 ring_buffer_attach(event, NULL);
3251 if (rb) {
3252 rcu_assign_pointer(event->rb, NULL);
3253 ring_buffer_detach(event, rb);
3254 ring_buffer_put(rb); /* could be last */
3255 }
3256 mutex_unlock(&event->mmap_mutex); 3271 mutex_unlock(&event->mmap_mutex);
3257 } 3272 }
3258 3273
3259 if (is_cgroup_event(event)) 3274 if (is_cgroup_event(event))
3260 perf_detach_cgroup(event); 3275 perf_detach_cgroup(event);
3261 3276
3262
3263 __free_event(event); 3277 __free_event(event);
3264} 3278}
3265 3279
3266int perf_event_release_kernel(struct perf_event *event) 3280/*
3281 * Used to free events which have a known refcount of 1, such as in error paths
3282 * where the event isn't exposed yet and inherited events.
3283 */
3284static void free_event(struct perf_event *event)
3267{ 3285{
3268 struct perf_event_context *ctx = event->ctx; 3286 if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1,
3269 3287 "unexpected event refcount: %ld; ptr=%p\n",
3270 WARN_ON_ONCE(ctx->parent_ctx); 3288 atomic_long_read(&event->refcount), event)) {
3271 /* 3289 /* leak to avoid use-after-free */
3272 * There are two ways this annotation is useful: 3290 return;
3273 * 3291 }
3274 * 1) there is a lock recursion from perf_event_exit_task
3275 * see the comment there.
3276 *
3277 * 2) there is a lock-inversion with mmap_sem through
3278 * perf_event_read_group(), which takes faults while
3279 * holding ctx->mutex, however this is called after
3280 * the last filedesc died, so there is no possibility
3281 * to trigger the AB-BA case.
3282 */
3283 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
3284 raw_spin_lock_irq(&ctx->lock);
3285 perf_group_detach(event);
3286 raw_spin_unlock_irq(&ctx->lock);
3287 perf_remove_from_context(event);
3288 mutex_unlock(&ctx->mutex);
3289
3290 free_event(event);
3291 3292
3292 return 0; 3293 _free_event(event);
3293} 3294}
3294EXPORT_SYMBOL_GPL(perf_event_release_kernel);
3295 3295
3296/* 3296/*
3297 * Called when the last reference to the file is gone. 3297 * Called when the last reference to the file is gone.
3298 */ 3298 */
3299static void put_event(struct perf_event *event) 3299static void put_event(struct perf_event *event)
3300{ 3300{
3301 struct perf_event_context *ctx = event->ctx;
3301 struct task_struct *owner; 3302 struct task_struct *owner;
3302 3303
3303 if (!atomic_long_dec_and_test(&event->refcount)) 3304 if (!atomic_long_dec_and_test(&event->refcount))
@@ -3336,9 +3337,33 @@ static void put_event(struct perf_event *event)
3336 put_task_struct(owner); 3337 put_task_struct(owner);
3337 } 3338 }
3338 3339
3339 perf_event_release_kernel(event); 3340 WARN_ON_ONCE(ctx->parent_ctx);
3341 /*
3342 * There are two ways this annotation is useful:
3343 *
3344 * 1) there is a lock recursion from perf_event_exit_task
3345 * see the comment there.
3346 *
3347 * 2) there is a lock-inversion with mmap_sem through
3348 * perf_event_read_group(), which takes faults while
3349 * holding ctx->mutex, however this is called after
3350 * the last filedesc died, so there is no possibility
3351 * to trigger the AB-BA case.
3352 */
3353 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
3354 perf_remove_from_context(event, true);
3355 mutex_unlock(&ctx->mutex);
3356
3357 _free_event(event);
3340} 3358}
3341 3359
3360int perf_event_release_kernel(struct perf_event *event)
3361{
3362 put_event(event);
3363 return 0;
3364}
3365EXPORT_SYMBOL_GPL(perf_event_release_kernel);
3366
3342static int perf_release(struct inode *inode, struct file *file) 3367static int perf_release(struct inode *inode, struct file *file)
3343{ 3368{
3344 put_event(file->private_data); 3369 put_event(file->private_data);
@@ -3839,28 +3864,47 @@ unlock:
3839static void ring_buffer_attach(struct perf_event *event, 3864static void ring_buffer_attach(struct perf_event *event,
3840 struct ring_buffer *rb) 3865 struct ring_buffer *rb)
3841{ 3866{
3867 struct ring_buffer *old_rb = NULL;
3842 unsigned long flags; 3868 unsigned long flags;
3843 3869
3844 if (!list_empty(&event->rb_entry)) 3870 if (event->rb) {
3845 return; 3871 /*
3872 * Should be impossible, we set this when removing
3873 * event->rb_entry and wait/clear when adding event->rb_entry.
3874 */
3875 WARN_ON_ONCE(event->rcu_pending);
3846 3876
3847 spin_lock_irqsave(&rb->event_lock, flags); 3877 old_rb = event->rb;
3848 if (list_empty(&event->rb_entry)) 3878 event->rcu_batches = get_state_synchronize_rcu();
3849 list_add(&event->rb_entry, &rb->event_list); 3879 event->rcu_pending = 1;
3850 spin_unlock_irqrestore(&rb->event_lock, flags);
3851}
3852 3880
3853static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb) 3881 spin_lock_irqsave(&old_rb->event_lock, flags);
3854{ 3882 list_del_rcu(&event->rb_entry);
3855 unsigned long flags; 3883 spin_unlock_irqrestore(&old_rb->event_lock, flags);
3884 }
3856 3885
3857 if (list_empty(&event->rb_entry)) 3886 if (event->rcu_pending && rb) {
3858 return; 3887 cond_synchronize_rcu(event->rcu_batches);
3888 event->rcu_pending = 0;
3889 }
3890
3891 if (rb) {
3892 spin_lock_irqsave(&rb->event_lock, flags);
3893 list_add_rcu(&event->rb_entry, &rb->event_list);
3894 spin_unlock_irqrestore(&rb->event_lock, flags);
3895 }
3896
3897 rcu_assign_pointer(event->rb, rb);
3859 3898
3860 spin_lock_irqsave(&rb->event_lock, flags); 3899 if (old_rb) {
3861 list_del_init(&event->rb_entry); 3900 ring_buffer_put(old_rb);
3862 wake_up_all(&event->waitq); 3901 /*
3863 spin_unlock_irqrestore(&rb->event_lock, flags); 3902 * Since we detached before setting the new rb, so that we
3903 * could attach the new rb, we could have missed a wakeup.
3904 * Provide it now.
3905 */
3906 wake_up_all(&event->waitq);
3907 }
3864} 3908}
3865 3909
3866static void ring_buffer_wakeup(struct perf_event *event) 3910static void ring_buffer_wakeup(struct perf_event *event)
@@ -3929,7 +3973,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
3929{ 3973{
3930 struct perf_event *event = vma->vm_file->private_data; 3974 struct perf_event *event = vma->vm_file->private_data;
3931 3975
3932 struct ring_buffer *rb = event->rb; 3976 struct ring_buffer *rb = ring_buffer_get(event);
3933 struct user_struct *mmap_user = rb->mmap_user; 3977 struct user_struct *mmap_user = rb->mmap_user;
3934 int mmap_locked = rb->mmap_locked; 3978 int mmap_locked = rb->mmap_locked;
3935 unsigned long size = perf_data_size(rb); 3979 unsigned long size = perf_data_size(rb);
@@ -3937,18 +3981,14 @@ static void perf_mmap_close(struct vm_area_struct *vma)
3937 atomic_dec(&rb->mmap_count); 3981 atomic_dec(&rb->mmap_count);
3938 3982
3939 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) 3983 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
3940 return; 3984 goto out_put;
3941 3985
3942 /* Detach current event from the buffer. */ 3986 ring_buffer_attach(event, NULL);
3943 rcu_assign_pointer(event->rb, NULL);
3944 ring_buffer_detach(event, rb);
3945 mutex_unlock(&event->mmap_mutex); 3987 mutex_unlock(&event->mmap_mutex);
3946 3988
3947 /* If there's still other mmap()s of this buffer, we're done. */ 3989 /* If there's still other mmap()s of this buffer, we're done. */
3948 if (atomic_read(&rb->mmap_count)) { 3990 if (atomic_read(&rb->mmap_count))
3949 ring_buffer_put(rb); /* can't be last */ 3991 goto out_put;
3950 return;
3951 }
3952 3992
3953 /* 3993 /*
3954 * No other mmap()s, detach from all other events that might redirect 3994 * No other mmap()s, detach from all other events that might redirect
@@ -3978,11 +4018,9 @@ again:
3978 * still restart the iteration to make sure we're not now 4018 * still restart the iteration to make sure we're not now
3979 * iterating the wrong list. 4019 * iterating the wrong list.
3980 */ 4020 */
3981 if (event->rb == rb) { 4021 if (event->rb == rb)
3982 rcu_assign_pointer(event->rb, NULL); 4022 ring_buffer_attach(event, NULL);
3983 ring_buffer_detach(event, rb); 4023
3984 ring_buffer_put(rb); /* can't be last, we still have one */
3985 }
3986 mutex_unlock(&event->mmap_mutex); 4024 mutex_unlock(&event->mmap_mutex);
3987 put_event(event); 4025 put_event(event);
3988 4026
@@ -4007,6 +4045,7 @@ again:
4007 vma->vm_mm->pinned_vm -= mmap_locked; 4045 vma->vm_mm->pinned_vm -= mmap_locked;
4008 free_uid(mmap_user); 4046 free_uid(mmap_user);
4009 4047
4048out_put:
4010 ring_buffer_put(rb); /* could be last */ 4049 ring_buffer_put(rb); /* could be last */
4011} 4050}
4012 4051
@@ -4124,7 +4163,6 @@ again:
4124 vma->vm_mm->pinned_vm += extra; 4163 vma->vm_mm->pinned_vm += extra;
4125 4164
4126 ring_buffer_attach(event, rb); 4165 ring_buffer_attach(event, rb);
4127 rcu_assign_pointer(event->rb, rb);
4128 4166
4129 perf_event_init_userpage(event); 4167 perf_event_init_userpage(event);
4130 perf_event_update_userpage(event); 4168 perf_event_update_userpage(event);
@@ -5408,6 +5446,9 @@ struct swevent_htable {
5408 5446
5409 /* Recursion avoidance in each contexts */ 5447 /* Recursion avoidance in each contexts */
5410 int recursion[PERF_NR_CONTEXTS]; 5448 int recursion[PERF_NR_CONTEXTS];
5449
5450 /* Keeps track of cpu being initialized/exited */
5451 bool online;
5411}; 5452};
5412 5453
5413static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); 5454static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
@@ -5654,8 +5695,14 @@ static int perf_swevent_add(struct perf_event *event, int flags)
5654 hwc->state = !(flags & PERF_EF_START); 5695 hwc->state = !(flags & PERF_EF_START);
5655 5696
5656 head = find_swevent_head(swhash, event); 5697 head = find_swevent_head(swhash, event);
5657 if (WARN_ON_ONCE(!head)) 5698 if (!head) {
5699 /*
5700 * We can race with cpu hotplug code. Do not
5701 * WARN if the cpu just got unplugged.
5702 */
5703 WARN_ON_ONCE(swhash->online);
5658 return -EINVAL; 5704 return -EINVAL;
5705 }
5659 5706
5660 hlist_add_head_rcu(&event->hlist_entry, head); 5707 hlist_add_head_rcu(&event->hlist_entry, head);
5661 5708
@@ -6551,6 +6598,7 @@ free_pdc:
6551 free_percpu(pmu->pmu_disable_count); 6598 free_percpu(pmu->pmu_disable_count);
6552 goto unlock; 6599 goto unlock;
6553} 6600}
6601EXPORT_SYMBOL_GPL(perf_pmu_register);
6554 6602
6555void perf_pmu_unregister(struct pmu *pmu) 6603void perf_pmu_unregister(struct pmu *pmu)
6556{ 6604{
@@ -6572,6 +6620,7 @@ void perf_pmu_unregister(struct pmu *pmu)
6572 put_device(pmu->dev); 6620 put_device(pmu->dev);
6573 free_pmu_context(pmu); 6621 free_pmu_context(pmu);
6574} 6622}
6623EXPORT_SYMBOL_GPL(perf_pmu_unregister);
6575 6624
6576struct pmu *perf_init_event(struct perf_event *event) 6625struct pmu *perf_init_event(struct perf_event *event)
6577{ 6626{
@@ -6585,6 +6634,10 @@ struct pmu *perf_init_event(struct perf_event *event)
6585 pmu = idr_find(&pmu_idr, event->attr.type); 6634 pmu = idr_find(&pmu_idr, event->attr.type);
6586 rcu_read_unlock(); 6635 rcu_read_unlock();
6587 if (pmu) { 6636 if (pmu) {
6637 if (!try_module_get(pmu->module)) {
6638 pmu = ERR_PTR(-ENODEV);
6639 goto unlock;
6640 }
6588 event->pmu = pmu; 6641 event->pmu = pmu;
6589 ret = pmu->event_init(event); 6642 ret = pmu->event_init(event);
6590 if (ret) 6643 if (ret)
@@ -6593,6 +6646,10 @@ struct pmu *perf_init_event(struct perf_event *event)
6593 } 6646 }
6594 6647
6595 list_for_each_entry_rcu(pmu, &pmus, entry) { 6648 list_for_each_entry_rcu(pmu, &pmus, entry) {
6649 if (!try_module_get(pmu->module)) {
6650 pmu = ERR_PTR(-ENODEV);
6651 goto unlock;
6652 }
6596 event->pmu = pmu; 6653 event->pmu = pmu;
6597 ret = pmu->event_init(event); 6654 ret = pmu->event_init(event);
6598 if (!ret) 6655 if (!ret)
@@ -6771,6 +6828,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
6771err_pmu: 6828err_pmu:
6772 if (event->destroy) 6829 if (event->destroy)
6773 event->destroy(event); 6830 event->destroy(event);
6831 module_put(pmu->module);
6774err_ns: 6832err_ns:
6775 if (event->ns) 6833 if (event->ns)
6776 put_pid_ns(event->ns); 6834 put_pid_ns(event->ns);
@@ -6914,7 +6972,7 @@ err_size:
6914static int 6972static int
6915perf_event_set_output(struct perf_event *event, struct perf_event *output_event) 6973perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
6916{ 6974{
6917 struct ring_buffer *rb = NULL, *old_rb = NULL; 6975 struct ring_buffer *rb = NULL;
6918 int ret = -EINVAL; 6976 int ret = -EINVAL;
6919 6977
6920 if (!output_event) 6978 if (!output_event)
@@ -6942,8 +7000,6 @@ set:
6942 if (atomic_read(&event->mmap_count)) 7000 if (atomic_read(&event->mmap_count))
6943 goto unlock; 7001 goto unlock;
6944 7002
6945 old_rb = event->rb;
6946
6947 if (output_event) { 7003 if (output_event) {
6948 /* get the rb we want to redirect to */ 7004 /* get the rb we want to redirect to */
6949 rb = ring_buffer_get(output_event); 7005 rb = ring_buffer_get(output_event);
@@ -6951,23 +7007,7 @@ set:
6951 goto unlock; 7007 goto unlock;
6952 } 7008 }
6953 7009
6954 if (old_rb) 7010 ring_buffer_attach(event, rb);
6955 ring_buffer_detach(event, old_rb);
6956
6957 if (rb)
6958 ring_buffer_attach(event, rb);
6959
6960 rcu_assign_pointer(event->rb, rb);
6961
6962 if (old_rb) {
6963 ring_buffer_put(old_rb);
6964 /*
6965 * Since we detached before setting the new rb, so that we
6966 * could attach the new rb, we could have missed a wakeup.
6967 * Provide it now.
6968 */
6969 wake_up_all(&event->waitq);
6970 }
6971 7011
6972 ret = 0; 7012 ret = 0;
6973unlock: 7013unlock:
@@ -7018,6 +7058,9 @@ SYSCALL_DEFINE5(perf_event_open,
7018 if (attr.freq) { 7058 if (attr.freq) {
7019 if (attr.sample_freq > sysctl_perf_event_sample_rate) 7059 if (attr.sample_freq > sysctl_perf_event_sample_rate)
7020 return -EINVAL; 7060 return -EINVAL;
7061 } else {
7062 if (attr.sample_period & (1ULL << 63))
7063 return -EINVAL;
7021 } 7064 }
7022 7065
7023 /* 7066 /*
@@ -7055,20 +7098,26 @@ SYSCALL_DEFINE5(perf_event_open,
7055 } 7098 }
7056 } 7099 }
7057 7100
7101 if (task && group_leader &&
7102 group_leader->attr.inherit != attr.inherit) {
7103 err = -EINVAL;
7104 goto err_task;
7105 }
7106
7058 get_online_cpus(); 7107 get_online_cpus();
7059 7108
7060 event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, 7109 event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
7061 NULL, NULL); 7110 NULL, NULL);
7062 if (IS_ERR(event)) { 7111 if (IS_ERR(event)) {
7063 err = PTR_ERR(event); 7112 err = PTR_ERR(event);
7064 goto err_task; 7113 goto err_cpus;
7065 } 7114 }
7066 7115
7067 if (flags & PERF_FLAG_PID_CGROUP) { 7116 if (flags & PERF_FLAG_PID_CGROUP) {
7068 err = perf_cgroup_connect(pid, event, &attr, group_leader); 7117 err = perf_cgroup_connect(pid, event, &attr, group_leader);
7069 if (err) { 7118 if (err) {
7070 __free_event(event); 7119 __free_event(event);
7071 goto err_task; 7120 goto err_cpus;
7072 } 7121 }
7073 } 7122 }
7074 7123
@@ -7165,7 +7214,7 @@ SYSCALL_DEFINE5(perf_event_open,
7165 struct perf_event_context *gctx = group_leader->ctx; 7214 struct perf_event_context *gctx = group_leader->ctx;
7166 7215
7167 mutex_lock(&gctx->mutex); 7216 mutex_lock(&gctx->mutex);
7168 perf_remove_from_context(group_leader); 7217 perf_remove_from_context(group_leader, false);
7169 7218
7170 /* 7219 /*
7171 * Removing from the context ends up with disabled 7220 * Removing from the context ends up with disabled
@@ -7175,7 +7224,7 @@ SYSCALL_DEFINE5(perf_event_open,
7175 perf_event__state_init(group_leader); 7224 perf_event__state_init(group_leader);
7176 list_for_each_entry(sibling, &group_leader->sibling_list, 7225 list_for_each_entry(sibling, &group_leader->sibling_list,
7177 group_entry) { 7226 group_entry) {
7178 perf_remove_from_context(sibling); 7227 perf_remove_from_context(sibling, false);
7179 perf_event__state_init(sibling); 7228 perf_event__state_init(sibling);
7180 put_ctx(gctx); 7229 put_ctx(gctx);
7181 } 7230 }
@@ -7230,8 +7279,9 @@ err_context:
7230 put_ctx(ctx); 7279 put_ctx(ctx);
7231err_alloc: 7280err_alloc:
7232 free_event(event); 7281 free_event(event);
7233err_task: 7282err_cpus:
7234 put_online_cpus(); 7283 put_online_cpus();
7284err_task:
7235 if (task) 7285 if (task)
7236 put_task_struct(task); 7286 put_task_struct(task);
7237err_group_fd: 7287err_group_fd:
@@ -7305,7 +7355,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
7305 mutex_lock(&src_ctx->mutex); 7355 mutex_lock(&src_ctx->mutex);
7306 list_for_each_entry_safe(event, tmp, &src_ctx->event_list, 7356 list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
7307 event_entry) { 7357 event_entry) {
7308 perf_remove_from_context(event); 7358 perf_remove_from_context(event, false);
7309 unaccount_event_cpu(event, src_cpu); 7359 unaccount_event_cpu(event, src_cpu);
7310 put_ctx(src_ctx); 7360 put_ctx(src_ctx);
7311 list_add(&event->migrate_entry, &events); 7361 list_add(&event->migrate_entry, &events);
@@ -7367,13 +7417,7 @@ __perf_event_exit_task(struct perf_event *child_event,
7367 struct perf_event_context *child_ctx, 7417 struct perf_event_context *child_ctx,
7368 struct task_struct *child) 7418 struct task_struct *child)
7369{ 7419{
7370 if (child_event->parent) { 7420 perf_remove_from_context(child_event, true);
7371 raw_spin_lock_irq(&child_ctx->lock);
7372 perf_group_detach(child_event);
7373 raw_spin_unlock_irq(&child_ctx->lock);
7374 }
7375
7376 perf_remove_from_context(child_event);
7377 7421
7378 /* 7422 /*
7379 * It can happen that the parent exits first, and has events 7423 * It can happen that the parent exits first, and has events
@@ -7388,7 +7432,7 @@ __perf_event_exit_task(struct perf_event *child_event,
7388 7432
7389static void perf_event_exit_task_context(struct task_struct *child, int ctxn) 7433static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
7390{ 7434{
7391 struct perf_event *child_event, *tmp; 7435 struct perf_event *child_event;
7392 struct perf_event_context *child_ctx; 7436 struct perf_event_context *child_ctx;
7393 unsigned long flags; 7437 unsigned long flags;
7394 7438
@@ -7442,24 +7486,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
7442 */ 7486 */
7443 mutex_lock(&child_ctx->mutex); 7487 mutex_lock(&child_ctx->mutex);
7444 7488
7445again: 7489 list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry)
7446 list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
7447 group_entry)
7448 __perf_event_exit_task(child_event, child_ctx, child); 7490 __perf_event_exit_task(child_event, child_ctx, child);
7449 7491
7450 list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups,
7451 group_entry)
7452 __perf_event_exit_task(child_event, child_ctx, child);
7453
7454 /*
7455 * If the last event was a group event, it will have appended all
7456 * its siblings to the list, but we obtained 'tmp' before that which
7457 * will still point to the list head terminating the iteration.
7458 */
7459 if (!list_empty(&child_ctx->pinned_groups) ||
7460 !list_empty(&child_ctx->flexible_groups))
7461 goto again;
7462
7463 mutex_unlock(&child_ctx->mutex); 7492 mutex_unlock(&child_ctx->mutex);
7464 7493
7465 put_ctx(child_ctx); 7494 put_ctx(child_ctx);
@@ -7724,6 +7753,8 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
7724 * swapped under us. 7753 * swapped under us.
7725 */ 7754 */
7726 parent_ctx = perf_pin_task_context(parent, ctxn); 7755 parent_ctx = perf_pin_task_context(parent, ctxn);
7756 if (!parent_ctx)
7757 return 0;
7727 7758
7728 /* 7759 /*
7729 * No need to check if parent_ctx != NULL here; since we saw 7760 * No need to check if parent_ctx != NULL here; since we saw
@@ -7835,6 +7866,7 @@ static void perf_event_init_cpu(int cpu)
7835 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); 7866 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
7836 7867
7837 mutex_lock(&swhash->hlist_mutex); 7868 mutex_lock(&swhash->hlist_mutex);
7869 swhash->online = true;
7838 if (swhash->hlist_refcount > 0) { 7870 if (swhash->hlist_refcount > 0) {
7839 struct swevent_hlist *hlist; 7871 struct swevent_hlist *hlist;
7840 7872
@@ -7857,14 +7889,14 @@ static void perf_pmu_rotate_stop(struct pmu *pmu)
7857 7889
7858static void __perf_event_exit_context(void *__info) 7890static void __perf_event_exit_context(void *__info)
7859{ 7891{
7892 struct remove_event re = { .detach_group = false };
7860 struct perf_event_context *ctx = __info; 7893 struct perf_event_context *ctx = __info;
7861 struct perf_event *event;
7862 7894
7863 perf_pmu_rotate_stop(ctx->pmu); 7895 perf_pmu_rotate_stop(ctx->pmu);
7864 7896
7865 rcu_read_lock(); 7897 rcu_read_lock();
7866 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) 7898 list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
7867 __perf_remove_from_context(event); 7899 __perf_remove_from_context(&re);
7868 rcu_read_unlock(); 7900 rcu_read_unlock();
7869} 7901}
7870 7902
@@ -7892,6 +7924,7 @@ static void perf_event_exit_cpu(int cpu)
7892 perf_event_exit_cpu_context(cpu); 7924 perf_event_exit_cpu_context(cpu);
7893 7925
7894 mutex_lock(&swhash->hlist_mutex); 7926 mutex_lock(&swhash->hlist_mutex);
7927 swhash->online = false;
7895 swevent_hlist_release(swhash); 7928 swevent_hlist_release(swhash);
7896 mutex_unlock(&swhash->hlist_mutex); 7929 mutex_unlock(&swhash->hlist_mutex);
7897} 7930}
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 04709b66369d..d1edc5e6fd03 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -60,8 +60,6 @@ static struct percpu_rw_semaphore dup_mmap_sem;
60 60
61/* Have a copy of original instruction */ 61/* Have a copy of original instruction */
62#define UPROBE_COPY_INSN 0 62#define UPROBE_COPY_INSN 0
63/* Can skip singlestep */
64#define UPROBE_SKIP_SSTEP 1
65 63
66struct uprobe { 64struct uprobe {
67 struct rb_node rb_node; /* node in the rb tree */ 65 struct rb_node rb_node; /* node in the rb tree */
@@ -491,12 +489,9 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
491 uprobe->offset = offset; 489 uprobe->offset = offset;
492 init_rwsem(&uprobe->register_rwsem); 490 init_rwsem(&uprobe->register_rwsem);
493 init_rwsem(&uprobe->consumer_rwsem); 491 init_rwsem(&uprobe->consumer_rwsem);
494 /* For now assume that the instruction need not be single-stepped */
495 __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
496 492
497 /* add to uprobes_tree, sorted on inode:offset */ 493 /* add to uprobes_tree, sorted on inode:offset */
498 cur_uprobe = insert_uprobe(uprobe); 494 cur_uprobe = insert_uprobe(uprobe);
499
500 /* a uprobe exists for this inode:offset combination */ 495 /* a uprobe exists for this inode:offset combination */
501 if (cur_uprobe) { 496 if (cur_uprobe) {
502 kfree(uprobe); 497 kfree(uprobe);
@@ -1628,20 +1623,6 @@ bool uprobe_deny_signal(void)
1628 return true; 1623 return true;
1629} 1624}
1630 1625
1631/*
1632 * Avoid singlestepping the original instruction if the original instruction
1633 * is a NOP or can be emulated.
1634 */
1635static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
1636{
1637 if (test_bit(UPROBE_SKIP_SSTEP, &uprobe->flags)) {
1638 if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
1639 return true;
1640 clear_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
1641 }
1642 return false;
1643}
1644
1645static void mmf_recalc_uprobes(struct mm_struct *mm) 1626static void mmf_recalc_uprobes(struct mm_struct *mm)
1646{ 1627{
1647 struct vm_area_struct *vma; 1628 struct vm_area_struct *vma;
@@ -1868,13 +1849,13 @@ static void handle_swbp(struct pt_regs *regs)
1868 1849
1869 handler_chain(uprobe, regs); 1850 handler_chain(uprobe, regs);
1870 1851
1871 if (can_skip_sstep(uprobe, regs)) 1852 if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
1872 goto out; 1853 goto out;
1873 1854
1874 if (!pre_ssout(uprobe, regs, bp_vaddr)) 1855 if (!pre_ssout(uprobe, regs, bp_vaddr))
1875 return; 1856 return;
1876 1857
1877 /* can_skip_sstep() succeeded, or restart if can't singlestep */ 1858 /* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
1878out: 1859out:
1879 put_uprobe(uprobe); 1860 put_uprobe(uprobe);
1880} 1861}
@@ -1886,10 +1867,11 @@ out:
1886static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) 1867static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
1887{ 1868{
1888 struct uprobe *uprobe; 1869 struct uprobe *uprobe;
1870 int err = 0;
1889 1871
1890 uprobe = utask->active_uprobe; 1872 uprobe = utask->active_uprobe;
1891 if (utask->state == UTASK_SSTEP_ACK) 1873 if (utask->state == UTASK_SSTEP_ACK)
1892 arch_uprobe_post_xol(&uprobe->arch, regs); 1874 err = arch_uprobe_post_xol(&uprobe->arch, regs);
1893 else if (utask->state == UTASK_SSTEP_TRAPPED) 1875 else if (utask->state == UTASK_SSTEP_TRAPPED)
1894 arch_uprobe_abort_xol(&uprobe->arch, regs); 1876 arch_uprobe_abort_xol(&uprobe->arch, regs);
1895 else 1877 else
@@ -1903,6 +1885,11 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
1903 spin_lock_irq(&current->sighand->siglock); 1885 spin_lock_irq(&current->sighand->siglock);
1904 recalc_sigpending(); /* see uprobe_deny_signal() */ 1886 recalc_sigpending(); /* see uprobe_deny_signal() */
1905 spin_unlock_irq(&current->sighand->siglock); 1887 spin_unlock_irq(&current->sighand->siglock);
1888
1889 if (unlikely(err)) {
1890 uprobe_warn(current, "execute the probed insn, sending SIGILL.");
1891 force_sig_info(SIGILL, SEND_SIG_FORCED, current);
1892 }
1906} 1893}
1907 1894
1908/* 1895/*
diff --git a/kernel/futex.c b/kernel/futex.c
index 5f589279e462..89bc9d59ac65 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -267,7 +267,7 @@ static inline void futex_get_mm(union futex_key *key)
267 * get_futex_key() implies a full barrier. This is relied upon 267 * get_futex_key() implies a full barrier. This is relied upon
268 * as full barrier (B), see the ordering comment above. 268 * as full barrier (B), see the ordering comment above.
269 */ 269 */
270 smp_mb__after_atomic_inc(); 270 smp_mb__after_atomic();
271} 271}
272 272
273/* 273/*
@@ -280,7 +280,7 @@ static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
280 /* 280 /*
281 * Full barrier (A), see the ordering comment above. 281 * Full barrier (A), see the ordering comment above.
282 */ 282 */
283 smp_mb__after_atomic_inc(); 283 smp_mb__after_atomic();
284#endif 284#endif
285} 285}
286 286
@@ -745,7 +745,8 @@ void exit_pi_state_list(struct task_struct *curr)
745 745
746static int 746static int
747lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, 747lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
748 union futex_key *key, struct futex_pi_state **ps) 748 union futex_key *key, struct futex_pi_state **ps,
749 struct task_struct *task)
749{ 750{
750 struct futex_pi_state *pi_state = NULL; 751 struct futex_pi_state *pi_state = NULL;
751 struct futex_q *this, *next; 752 struct futex_q *this, *next;
@@ -786,6 +787,16 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
786 return -EINVAL; 787 return -EINVAL;
787 } 788 }
788 789
790 /*
791 * Protect against a corrupted uval. If uval
792 * is 0x80000000 then pid is 0 and the waiter
793 * bit is set. So the deadlock check in the
794 * calling code has failed and we did not fall
795 * into the check above due to !pid.
796 */
797 if (task && pi_state->owner == task)
798 return -EDEADLK;
799
789 atomic_inc(&pi_state->refcount); 800 atomic_inc(&pi_state->refcount);
790 *ps = pi_state; 801 *ps = pi_state;
791 802
@@ -803,6 +814,11 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
803 if (!p) 814 if (!p)
804 return -ESRCH; 815 return -ESRCH;
805 816
817 if (!p->mm) {
818 put_task_struct(p);
819 return -EPERM;
820 }
821
806 /* 822 /*
807 * We need to look at the task state flags to figure out, 823 * We need to look at the task state flags to figure out,
808 * whether the task is exiting. To protect against the do_exit 824 * whether the task is exiting. To protect against the do_exit
@@ -935,7 +951,7 @@ retry:
935 * We dont have the lock. Look up the PI state (or create it if 951 * We dont have the lock. Look up the PI state (or create it if
936 * we are the first waiter): 952 * we are the first waiter):
937 */ 953 */
938 ret = lookup_pi_state(uval, hb, key, ps); 954 ret = lookup_pi_state(uval, hb, key, ps, task);
939 955
940 if (unlikely(ret)) { 956 if (unlikely(ret)) {
941 switch (ret) { 957 switch (ret) {
@@ -1347,7 +1363,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1347 * 1363 *
1348 * Return: 1364 * Return:
1349 * 0 - failed to acquire the lock atomically; 1365 * 0 - failed to acquire the lock atomically;
1350 * 1 - acquired the lock; 1366 * >0 - acquired the lock, return value is vpid of the top_waiter
1351 * <0 - error 1367 * <0 - error
1352 */ 1368 */
1353static int futex_proxy_trylock_atomic(u32 __user *pifutex, 1369static int futex_proxy_trylock_atomic(u32 __user *pifutex,
@@ -1358,7 +1374,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1358{ 1374{
1359 struct futex_q *top_waiter = NULL; 1375 struct futex_q *top_waiter = NULL;
1360 u32 curval; 1376 u32 curval;
1361 int ret; 1377 int ret, vpid;
1362 1378
1363 if (get_futex_value_locked(&curval, pifutex)) 1379 if (get_futex_value_locked(&curval, pifutex))
1364 return -EFAULT; 1380 return -EFAULT;
@@ -1386,11 +1402,13 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1386 * the contended case or if set_waiters is 1. The pi_state is returned 1402 * the contended case or if set_waiters is 1. The pi_state is returned
1387 * in ps in contended cases. 1403 * in ps in contended cases.
1388 */ 1404 */
1405 vpid = task_pid_vnr(top_waiter->task);
1389 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, 1406 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1390 set_waiters); 1407 set_waiters);
1391 if (ret == 1) 1408 if (ret == 1) {
1392 requeue_pi_wake_futex(top_waiter, key2, hb2); 1409 requeue_pi_wake_futex(top_waiter, key2, hb2);
1393 1410 return vpid;
1411 }
1394 return ret; 1412 return ret;
1395} 1413}
1396 1414
@@ -1421,7 +1439,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1421 struct futex_pi_state *pi_state = NULL; 1439 struct futex_pi_state *pi_state = NULL;
1422 struct futex_hash_bucket *hb1, *hb2; 1440 struct futex_hash_bucket *hb1, *hb2;
1423 struct futex_q *this, *next; 1441 struct futex_q *this, *next;
1424 u32 curval2;
1425 1442
1426 if (requeue_pi) { 1443 if (requeue_pi) {
1427 /* 1444 /*
@@ -1509,16 +1526,25 @@ retry_private:
1509 * At this point the top_waiter has either taken uaddr2 or is 1526 * At this point the top_waiter has either taken uaddr2 or is
1510 * waiting on it. If the former, then the pi_state will not 1527 * waiting on it. If the former, then the pi_state will not
1511 * exist yet, look it up one more time to ensure we have a 1528 * exist yet, look it up one more time to ensure we have a
1512 * reference to it. 1529 * reference to it. If the lock was taken, ret contains the
1530 * vpid of the top waiter task.
1513 */ 1531 */
1514 if (ret == 1) { 1532 if (ret > 0) {
1515 WARN_ON(pi_state); 1533 WARN_ON(pi_state);
1516 drop_count++; 1534 drop_count++;
1517 task_count++; 1535 task_count++;
1518 ret = get_futex_value_locked(&curval2, uaddr2); 1536 /*
1519 if (!ret) 1537 * If we acquired the lock, then the user
1520 ret = lookup_pi_state(curval2, hb2, &key2, 1538 * space value of uaddr2 should be vpid. It
1521 &pi_state); 1539 * cannot be changed by the top waiter as it
1540 * is blocked on hb2 lock if it tries to do
1541 * so. If something fiddled with it behind our
1542 * back the pi state lookup might unearth
1543 * it. So we rather use the known value than
1544 * rereading and handing potential crap to
1545 * lookup_pi_state.
1546 */
1547 ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL);
1522 } 1548 }
1523 1549
1524 switch (ret) { 1550 switch (ret) {
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index e0501fe7140d..3ab28993f6e0 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1039,6 +1039,7 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1039 1039
1040 return ret; 1040 return ret;
1041} 1041}
1042EXPORT_SYMBOL_GPL(__hrtimer_start_range_ns);
1042 1043
1043/** 1044/**
1044 * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU 1045 * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
diff --git a/kernel/kexec.c b/kernel/kexec.c
index c8380ad203bc..28c57069ef68 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1683,6 +1683,14 @@ int kernel_kexec(void)
1683 kexec_in_progress = true; 1683 kexec_in_progress = true;
1684 kernel_restart_prepare(NULL); 1684 kernel_restart_prepare(NULL);
1685 migrate_to_reboot_cpu(); 1685 migrate_to_reboot_cpu();
1686
1687 /*
1688 * migrate_to_reboot_cpu() disables CPU hotplug assuming that
1689 * no further code needs to use CPU hotplug (which is true in
1690 * the reboot case). However, the kexec path depends on using
1691 * CPU hotplug again; so re-enable it here.
1692 */
1693 cpu_hotplug_enable();
1686 printk(KERN_EMERG "Starting new kernel\n"); 1694 printk(KERN_EMERG "Starting new kernel\n");
1687 machine_shutdown(); 1695 machine_shutdown();
1688 } 1696 }
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 6b375af4958d..0ac67a5861c5 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -498,7 +498,7 @@ int __usermodehelper_disable(enum umh_disable_depth depth)
498static void helper_lock(void) 498static void helper_lock(void)
499{ 499{
500 atomic_inc(&running_helpers); 500 atomic_inc(&running_helpers);
501 smp_mb__after_atomic_inc(); 501 smp_mb__after_atomic();
502} 502}
503 503
504static void helper_unlock(void) 504static void helper_unlock(void)
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 2495a9b14ac8..6683ccef9fff 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -37,6 +37,7 @@ static ssize_t uevent_seqnum_show(struct kobject *kobj,
37} 37}
38KERNEL_ATTR_RO(uevent_seqnum); 38KERNEL_ATTR_RO(uevent_seqnum);
39 39
40#ifdef CONFIG_UEVENT_HELPER
40/* uevent helper program, used during early boot */ 41/* uevent helper program, used during early boot */
41static ssize_t uevent_helper_show(struct kobject *kobj, 42static ssize_t uevent_helper_show(struct kobject *kobj,
42 struct kobj_attribute *attr, char *buf) 43 struct kobj_attribute *attr, char *buf)
@@ -56,7 +57,7 @@ static ssize_t uevent_helper_store(struct kobject *kobj,
56 return count; 57 return count;
57} 58}
58KERNEL_ATTR_RW(uevent_helper); 59KERNEL_ATTR_RW(uevent_helper);
59 60#endif
60 61
61#ifdef CONFIG_PROFILING 62#ifdef CONFIG_PROFILING
62static ssize_t profiling_show(struct kobject *kobj, 63static ssize_t profiling_show(struct kobject *kobj,
@@ -189,7 +190,9 @@ EXPORT_SYMBOL_GPL(kernel_kobj);
189static struct attribute * kernel_attrs[] = { 190static struct attribute * kernel_attrs[] = {
190 &fscaps_attr.attr, 191 &fscaps_attr.attr,
191 &uevent_seqnum_attr.attr, 192 &uevent_seqnum_attr.attr,
193#ifdef CONFIG_UEVENT_HELPER
192 &uevent_helper_attr.attr, 194 &uevent_helper_attr.attr,
195#endif
193#ifdef CONFIG_PROFILING 196#ifdef CONFIG_PROFILING
194 &profiling_attr.attr, 197 &profiling_attr.attr,
195#endif 198#endif
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index 4f560cfedc8f..51c4b24b6328 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -54,9 +54,9 @@ enum {
54 * table (if it's not there yet), and we check it for lock order 54 * table (if it's not there yet), and we check it for lock order
55 * conflicts and deadlocks. 55 * conflicts and deadlocks.
56 */ 56 */
57#define MAX_LOCKDEP_ENTRIES 16384UL 57#define MAX_LOCKDEP_ENTRIES 32768UL
58 58
59#define MAX_LOCKDEP_CHAINS_BITS 15 59#define MAX_LOCKDEP_CHAINS_BITS 16
60#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) 60#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
61 61
62#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) 62#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
@@ -65,7 +65,7 @@ enum {
65 * Stack-trace: tightly packed array of stack backtrace 65 * Stack-trace: tightly packed array of stack backtrace
66 * addresses. Protected by the hash_lock. 66 * addresses. Protected by the hash_lock.
67 */ 67 */
68#define MAX_STACK_TRACE_ENTRIES 262144UL 68#define MAX_STACK_TRACE_ENTRIES 524288UL
69 69
70extern struct list_head all_lock_classes; 70extern struct list_head all_lock_classes;
71extern struct lock_chain lock_chains[]; 71extern struct lock_chain lock_chains[];
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 23343be46e91..0955b885d0dc 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -82,14 +82,14 @@ struct lock_writer_stress_stats {
82}; 82};
83static struct lock_writer_stress_stats *lwsa; 83static struct lock_writer_stress_stats *lwsa;
84 84
85#if defined(MODULE) || defined(CONFIG_LOCK_TORTURE_TEST_RUNNABLE) 85#if defined(MODULE)
86#define LOCKTORTURE_RUNNABLE_INIT 1 86#define LOCKTORTURE_RUNNABLE_INIT 1
87#else 87#else
88#define LOCKTORTURE_RUNNABLE_INIT 0 88#define LOCKTORTURE_RUNNABLE_INIT 0
89#endif 89#endif
90int locktorture_runnable = LOCKTORTURE_RUNNABLE_INIT; 90int locktorture_runnable = LOCKTORTURE_RUNNABLE_INIT;
91module_param(locktorture_runnable, int, 0444); 91module_param(locktorture_runnable, int, 0444);
92MODULE_PARM_DESC(locktorture_runnable, "Start locktorture at boot"); 92MODULE_PARM_DESC(locktorture_runnable, "Start locktorture at module init");
93 93
94/* Forward reference. */ 94/* Forward reference. */
95static void lock_torture_cleanup(void); 95static void lock_torture_cleanup(void);
@@ -219,7 +219,8 @@ static int lock_torture_writer(void *arg)
219 set_user_nice(current, MAX_NICE); 219 set_user_nice(current, MAX_NICE);
220 220
221 do { 221 do {
222 schedule_timeout_uninterruptible(1); 222 if ((torture_random(&rand) & 0xfffff) == 0)
223 schedule_timeout_uninterruptible(1);
223 cur_ops->writelock(); 224 cur_ops->writelock();
224 if (WARN_ON_ONCE(lock_is_write_held)) 225 if (WARN_ON_ONCE(lock_is_write_held))
225 lwsp->n_write_lock_fail++; 226 lwsp->n_write_lock_fail++;
@@ -354,7 +355,8 @@ static int __init lock_torture_init(void)
354 &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops, 355 &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops,
355 }; 356 };
356 357
357 torture_init_begin(torture_type, verbose, &locktorture_runnable); 358 if (!torture_init_begin(torture_type, verbose, &locktorture_runnable))
359 return -EBUSY;
358 360
359 /* Process args and tell the world that the torturer is on the job. */ 361 /* Process args and tell the world that the torturer is on the job. */
360 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { 362 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index aa4dff04b594..a620d4d08ca6 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -343,9 +343,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
343 * top_waiter can be NULL, when we are in the deboosting 343 * top_waiter can be NULL, when we are in the deboosting
344 * mode! 344 * mode!
345 */ 345 */
346 if (top_waiter && (!task_has_pi_waiters(task) || 346 if (top_waiter) {
347 top_waiter != task_top_pi_waiter(task))) 347 if (!task_has_pi_waiters(task))
348 goto out_unlock_pi; 348 goto out_unlock_pi;
349 /*
350 * If deadlock detection is off, we stop here if we
351 * are not the top pi waiter of the task.
352 */
353 if (!detect_deadlock && top_waiter != task_top_pi_waiter(task))
354 goto out_unlock_pi;
355 }
349 356
350 /* 357 /*
351 * When deadlock detection is off then we check, if further 358 * When deadlock detection is off then we check, if further
@@ -361,7 +368,12 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
361 goto retry; 368 goto retry;
362 } 369 }
363 370
364 /* Deadlock detection */ 371 /*
372 * Deadlock detection. If the lock is the same as the original
373 * lock which caused us to walk the lock chain or if the
374 * current lock is owned by the task which initiated the chain
375 * walk, we detected a deadlock.
376 */
365 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 377 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
366 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); 378 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
367 raw_spin_unlock(&lock->wait_lock); 379 raw_spin_unlock(&lock->wait_lock);
@@ -527,6 +539,18 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
527 unsigned long flags; 539 unsigned long flags;
528 int chain_walk = 0, res; 540 int chain_walk = 0, res;
529 541
542 /*
543 * Early deadlock detection. We really don't want the task to
544 * enqueue on itself just to untangle the mess later. It's not
545 * only an optimization. We drop the locks, so another waiter
546 * can come in before the chain walk detects the deadlock. So
547 * the other will detect the deadlock and return -EDEADLOCK,
548 * which is wrong, as the other waiter is not in a deadlock
549 * situation.
550 */
551 if (detect_deadlock && owner == task)
552 return -EDEADLK;
553
530 raw_spin_lock_irqsave(&task->pi_lock, flags); 554 raw_spin_lock_irqsave(&task->pi_lock, flags);
531 __rt_mutex_adjust_prio(task); 555 __rt_mutex_adjust_prio(task);
532 waiter->task = task; 556 waiter->task = task;
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 1d66e08e897d..b4219ff87b8c 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -12,6 +12,55 @@
12#include <linux/export.h> 12#include <linux/export.h>
13 13
14/* 14/*
15 * Guide to the rw_semaphore's count field for common values.
16 * (32-bit case illustrated, similar for 64-bit)
17 *
18 * 0x0000000X (1) X readers active or attempting lock, no writer waiting
19 * X = #active_readers + #readers attempting to lock
20 * (X*ACTIVE_BIAS)
21 *
22 * 0x00000000 rwsem is unlocked, and no one is waiting for the lock or
23 * attempting to read lock or write lock.
24 *
25 * 0xffff000X (1) X readers active or attempting lock, with waiters for lock
26 * X = #active readers + # readers attempting lock
27 * (X*ACTIVE_BIAS + WAITING_BIAS)
28 * (2) 1 writer attempting lock, no waiters for lock
29 * X-1 = #active readers + #readers attempting lock
30 * ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
31 * (3) 1 writer active, no waiters for lock
32 * X-1 = #active readers + #readers attempting lock
33 * ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
34 *
35 * 0xffff0001 (1) 1 reader active or attempting lock, waiters for lock
36 * (WAITING_BIAS + ACTIVE_BIAS)
37 * (2) 1 writer active or attempting lock, no waiters for lock
38 * (ACTIVE_WRITE_BIAS)
39 *
40 * 0xffff0000 (1) There are writers or readers queued but none active
41 * or in the process of attempting lock.
42 * (WAITING_BIAS)
43 * Note: writer can attempt to steal lock for this count by adding
44 * ACTIVE_WRITE_BIAS in cmpxchg and checking the old count
45 *
46 * 0xfffe0001 (1) 1 writer active, or attempting lock. Waiters on queue.
47 * (ACTIVE_WRITE_BIAS + WAITING_BIAS)
48 *
49 * Note: Readers attempt to lock by adding ACTIVE_BIAS in down_read and checking
50 * the count becomes more than 0 for successful lock acquisition,
51 * i.e. the case where there are only readers or nobody has lock.
52 * (1st and 2nd case above).
53 *
54 * Writers attempt to lock by adding ACTIVE_WRITE_BIAS in down_write and
55 * checking the count becomes ACTIVE_WRITE_BIAS for successful lock
56 * acquisition (i.e. nobody else has lock or attempts lock). If
57 * unsuccessful, in rwsem_down_write_failed, we'll check to see if there
58 * are only waiters but none active (5th case above), and attempt to
59 * steal the lock.
60 *
61 */
62
63/*
15 * Initialize an rwsem: 64 * Initialize an rwsem:
16 */ 65 */
17void __init_rwsem(struct rw_semaphore *sem, const char *name, 66void __init_rwsem(struct rw_semaphore *sem, const char *name,
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 7228258b85ec..221229cf0190 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2413,6 +2413,7 @@ int unregister_console(struct console *console)
2413 if (console_drivers != NULL && console->flags & CON_CONSDEV) 2413 if (console_drivers != NULL && console->flags & CON_CONSDEV)
2414 console_drivers->flags |= CON_CONSDEV; 2414 console_drivers->flags |= CON_CONSDEV;
2415 2415
2416 console->flags &= ~CON_ENABLED;
2416 console_unlock(); 2417 console_unlock();
2417 console_sysfs_notify(); 2418 console_sysfs_notify();
2418 return res; 2419 return res;
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index bd30bc61bc05..7fa34f86e5ba 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -58,9 +58,11 @@ torture_param(int, fqs_duration, 0,
58 "Duration of fqs bursts (us), 0 to disable"); 58 "Duration of fqs bursts (us), 0 to disable");
59torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)"); 59torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)");
60torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)"); 60torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)");
61torture_param(bool, gp_cond, false, "Use conditional/async GP wait primitives");
61torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); 62torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
62torture_param(bool, gp_normal, false, 63torture_param(bool, gp_normal, false,
63 "Use normal (non-expedited) GP wait primitives"); 64 "Use normal (non-expedited) GP wait primitives");
65torture_param(bool, gp_sync, false, "Use synchronous GP wait primitives");
64torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers"); 66torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers");
65torture_param(int, n_barrier_cbs, 0, 67torture_param(int, n_barrier_cbs, 0,
66 "# of callbacks/kthreads for barrier testing"); 68 "# of callbacks/kthreads for barrier testing");
@@ -138,6 +140,18 @@ static long n_barrier_attempts;
138static long n_barrier_successes; 140static long n_barrier_successes;
139static struct list_head rcu_torture_removed; 141static struct list_head rcu_torture_removed;
140 142
143static int rcu_torture_writer_state;
144#define RTWS_FIXED_DELAY 0
145#define RTWS_DELAY 1
146#define RTWS_REPLACE 2
147#define RTWS_DEF_FREE 3
148#define RTWS_EXP_SYNC 4
149#define RTWS_COND_GET 5
150#define RTWS_COND_SYNC 6
151#define RTWS_SYNC 7
152#define RTWS_STUTTER 8
153#define RTWS_STOPPING 9
154
141#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE) 155#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
142#define RCUTORTURE_RUNNABLE_INIT 1 156#define RCUTORTURE_RUNNABLE_INIT 1
143#else 157#else
@@ -214,6 +228,7 @@ rcu_torture_free(struct rcu_torture *p)
214 */ 228 */
215 229
216struct rcu_torture_ops { 230struct rcu_torture_ops {
231 int ttype;
217 void (*init)(void); 232 void (*init)(void);
218 int (*readlock)(void); 233 int (*readlock)(void);
219 void (*read_delay)(struct torture_random_state *rrsp); 234 void (*read_delay)(struct torture_random_state *rrsp);
@@ -222,6 +237,8 @@ struct rcu_torture_ops {
222 void (*deferred_free)(struct rcu_torture *p); 237 void (*deferred_free)(struct rcu_torture *p);
223 void (*sync)(void); 238 void (*sync)(void);
224 void (*exp_sync)(void); 239 void (*exp_sync)(void);
240 unsigned long (*get_state)(void);
241 void (*cond_sync)(unsigned long oldstate);
225 void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 242 void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
226 void (*cb_barrier)(void); 243 void (*cb_barrier)(void);
227 void (*fqs)(void); 244 void (*fqs)(void);
@@ -273,10 +290,48 @@ static int rcu_torture_completed(void)
273 return rcu_batches_completed(); 290 return rcu_batches_completed();
274} 291}
275 292
293/*
294 * Update callback in the pipe. This should be invoked after a grace period.
295 */
296static bool
297rcu_torture_pipe_update_one(struct rcu_torture *rp)
298{
299 int i;
300
301 i = rp->rtort_pipe_count;
302 if (i > RCU_TORTURE_PIPE_LEN)
303 i = RCU_TORTURE_PIPE_LEN;
304 atomic_inc(&rcu_torture_wcount[i]);
305 if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {
306 rp->rtort_mbtest = 0;
307 return true;
308 }
309 return false;
310}
311
312/*
313 * Update all callbacks in the pipe. Suitable for synchronous grace-period
314 * primitives.
315 */
316static void
317rcu_torture_pipe_update(struct rcu_torture *old_rp)
318{
319 struct rcu_torture *rp;
320 struct rcu_torture *rp1;
321
322 if (old_rp)
323 list_add(&old_rp->rtort_free, &rcu_torture_removed);
324 list_for_each_entry_safe(rp, rp1, &rcu_torture_removed, rtort_free) {
325 if (rcu_torture_pipe_update_one(rp)) {
326 list_del(&rp->rtort_free);
327 rcu_torture_free(rp);
328 }
329 }
330}
331
276static void 332static void
277rcu_torture_cb(struct rcu_head *p) 333rcu_torture_cb(struct rcu_head *p)
278{ 334{
279 int i;
280 struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu); 335 struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu);
281 336
282 if (torture_must_stop_irq()) { 337 if (torture_must_stop_irq()) {
@@ -284,16 +339,10 @@ rcu_torture_cb(struct rcu_head *p)
284 /* The next initialization will pick up the pieces. */ 339 /* The next initialization will pick up the pieces. */
285 return; 340 return;
286 } 341 }
287 i = rp->rtort_pipe_count; 342 if (rcu_torture_pipe_update_one(rp))
288 if (i > RCU_TORTURE_PIPE_LEN)
289 i = RCU_TORTURE_PIPE_LEN;
290 atomic_inc(&rcu_torture_wcount[i]);
291 if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {
292 rp->rtort_mbtest = 0;
293 rcu_torture_free(rp); 343 rcu_torture_free(rp);
294 } else { 344 else
295 cur_ops->deferred_free(rp); 345 cur_ops->deferred_free(rp);
296 }
297} 346}
298 347
299static int rcu_no_completed(void) 348static int rcu_no_completed(void)
@@ -312,6 +361,7 @@ static void rcu_sync_torture_init(void)
312} 361}
313 362
314static struct rcu_torture_ops rcu_ops = { 363static struct rcu_torture_ops rcu_ops = {
364 .ttype = RCU_FLAVOR,
315 .init = rcu_sync_torture_init, 365 .init = rcu_sync_torture_init,
316 .readlock = rcu_torture_read_lock, 366 .readlock = rcu_torture_read_lock,
317 .read_delay = rcu_read_delay, 367 .read_delay = rcu_read_delay,
@@ -320,6 +370,8 @@ static struct rcu_torture_ops rcu_ops = {
320 .deferred_free = rcu_torture_deferred_free, 370 .deferred_free = rcu_torture_deferred_free,
321 .sync = synchronize_rcu, 371 .sync = synchronize_rcu,
322 .exp_sync = synchronize_rcu_expedited, 372 .exp_sync = synchronize_rcu_expedited,
373 .get_state = get_state_synchronize_rcu,
374 .cond_sync = cond_synchronize_rcu,
323 .call = call_rcu, 375 .call = call_rcu,
324 .cb_barrier = rcu_barrier, 376 .cb_barrier = rcu_barrier,
325 .fqs = rcu_force_quiescent_state, 377 .fqs = rcu_force_quiescent_state,
@@ -355,6 +407,7 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p)
355} 407}
356 408
357static struct rcu_torture_ops rcu_bh_ops = { 409static struct rcu_torture_ops rcu_bh_ops = {
410 .ttype = RCU_BH_FLAVOR,
358 .init = rcu_sync_torture_init, 411 .init = rcu_sync_torture_init,
359 .readlock = rcu_bh_torture_read_lock, 412 .readlock = rcu_bh_torture_read_lock,
360 .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 413 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
@@ -397,6 +450,7 @@ call_rcu_busted(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
397} 450}
398 451
399static struct rcu_torture_ops rcu_busted_ops = { 452static struct rcu_torture_ops rcu_busted_ops = {
453 .ttype = INVALID_RCU_FLAVOR,
400 .init = rcu_sync_torture_init, 454 .init = rcu_sync_torture_init,
401 .readlock = rcu_torture_read_lock, 455 .readlock = rcu_torture_read_lock,
402 .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 456 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
@@ -479,9 +533,11 @@ static void srcu_torture_stats(char *page)
479 page += sprintf(page, "%s%s per-CPU(idx=%d):", 533 page += sprintf(page, "%s%s per-CPU(idx=%d):",
480 torture_type, TORTURE_FLAG, idx); 534 torture_type, TORTURE_FLAG, idx);
481 for_each_possible_cpu(cpu) { 535 for_each_possible_cpu(cpu) {
482 page += sprintf(page, " %d(%lu,%lu)", cpu, 536 long c0, c1;
483 per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx], 537
484 per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]); 538 c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx];
539 c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx];
540 page += sprintf(page, " %d(%ld,%ld)", cpu, c0, c1);
485 } 541 }
486 sprintf(page, "\n"); 542 sprintf(page, "\n");
487} 543}
@@ -492,6 +548,7 @@ static void srcu_torture_synchronize_expedited(void)
492} 548}
493 549
494static struct rcu_torture_ops srcu_ops = { 550static struct rcu_torture_ops srcu_ops = {
551 .ttype = SRCU_FLAVOR,
495 .init = rcu_sync_torture_init, 552 .init = rcu_sync_torture_init,
496 .readlock = srcu_torture_read_lock, 553 .readlock = srcu_torture_read_lock,
497 .read_delay = srcu_read_delay, 554 .read_delay = srcu_read_delay,
@@ -527,6 +584,7 @@ static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
527} 584}
528 585
529static struct rcu_torture_ops sched_ops = { 586static struct rcu_torture_ops sched_ops = {
587 .ttype = RCU_SCHED_FLAVOR,
530 .init = rcu_sync_torture_init, 588 .init = rcu_sync_torture_init,
531 .readlock = sched_torture_read_lock, 589 .readlock = sched_torture_read_lock,
532 .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 590 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
@@ -688,23 +746,59 @@ rcu_torture_fqs(void *arg)
688static int 746static int
689rcu_torture_writer(void *arg) 747rcu_torture_writer(void *arg)
690{ 748{
691 bool exp; 749 unsigned long gp_snap;
750 bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal;
751 bool gp_sync1 = gp_sync;
692 int i; 752 int i;
693 struct rcu_torture *rp; 753 struct rcu_torture *rp;
694 struct rcu_torture *rp1;
695 struct rcu_torture *old_rp; 754 struct rcu_torture *old_rp;
696 static DEFINE_TORTURE_RANDOM(rand); 755 static DEFINE_TORTURE_RANDOM(rand);
756 int synctype[] = { RTWS_DEF_FREE, RTWS_EXP_SYNC,
757 RTWS_COND_GET, RTWS_SYNC };
758 int nsynctypes = 0;
697 759
698 VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); 760 VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
699 set_user_nice(current, MAX_NICE); 761
762 /* Initialize synctype[] array. If none set, take default. */
763 if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync)
764 gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true;
765 if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync)
766 synctype[nsynctypes++] = RTWS_COND_GET;
767 else if (gp_cond && (!cur_ops->get_state || !cur_ops->cond_sync))
768 pr_alert("rcu_torture_writer: gp_cond without primitives.\n");
769 if (gp_exp1 && cur_ops->exp_sync)
770 synctype[nsynctypes++] = RTWS_EXP_SYNC;
771 else if (gp_exp && !cur_ops->exp_sync)
772 pr_alert("rcu_torture_writer: gp_exp without primitives.\n");
773 if (gp_normal1 && cur_ops->deferred_free)
774 synctype[nsynctypes++] = RTWS_DEF_FREE;
775 else if (gp_normal && !cur_ops->deferred_free)
776 pr_alert("rcu_torture_writer: gp_normal without primitives.\n");
777 if (gp_sync1 && cur_ops->sync)
778 synctype[nsynctypes++] = RTWS_SYNC;
779 else if (gp_sync && !cur_ops->sync)
780 pr_alert("rcu_torture_writer: gp_sync without primitives.\n");
781 if (WARN_ONCE(nsynctypes == 0,
782 "rcu_torture_writer: No update-side primitives.\n")) {
783 /*
784 * No updates primitives, so don't try updating.
785 * The resulting test won't be testing much, hence the
786 * above WARN_ONCE().
787 */
788 rcu_torture_writer_state = RTWS_STOPPING;
789 torture_kthread_stopping("rcu_torture_writer");
790 }
700 791
701 do { 792 do {
793 rcu_torture_writer_state = RTWS_FIXED_DELAY;
702 schedule_timeout_uninterruptible(1); 794 schedule_timeout_uninterruptible(1);
703 rp = rcu_torture_alloc(); 795 rp = rcu_torture_alloc();
704 if (rp == NULL) 796 if (rp == NULL)
705 continue; 797 continue;
706 rp->rtort_pipe_count = 0; 798 rp->rtort_pipe_count = 0;
799 rcu_torture_writer_state = RTWS_DELAY;
707 udelay(torture_random(&rand) & 0x3ff); 800 udelay(torture_random(&rand) & 0x3ff);
801 rcu_torture_writer_state = RTWS_REPLACE;
708 old_rp = rcu_dereference_check(rcu_torture_current, 802 old_rp = rcu_dereference_check(rcu_torture_current,
709 current == writer_task); 803 current == writer_task);
710 rp->rtort_mbtest = 1; 804 rp->rtort_mbtest = 1;
@@ -716,35 +810,42 @@ rcu_torture_writer(void *arg)
716 i = RCU_TORTURE_PIPE_LEN; 810 i = RCU_TORTURE_PIPE_LEN;
717 atomic_inc(&rcu_torture_wcount[i]); 811 atomic_inc(&rcu_torture_wcount[i]);
718 old_rp->rtort_pipe_count++; 812 old_rp->rtort_pipe_count++;
719 if (gp_normal == gp_exp) 813 switch (synctype[torture_random(&rand) % nsynctypes]) {
720 exp = !!(torture_random(&rand) & 0x80); 814 case RTWS_DEF_FREE:
721 else 815 rcu_torture_writer_state = RTWS_DEF_FREE;
722 exp = gp_exp;
723 if (!exp) {
724 cur_ops->deferred_free(old_rp); 816 cur_ops->deferred_free(old_rp);
725 } else { 817 break;
818 case RTWS_EXP_SYNC:
819 rcu_torture_writer_state = RTWS_EXP_SYNC;
726 cur_ops->exp_sync(); 820 cur_ops->exp_sync();
727 list_add(&old_rp->rtort_free, 821 rcu_torture_pipe_update(old_rp);
728 &rcu_torture_removed); 822 break;
729 list_for_each_entry_safe(rp, rp1, 823 case RTWS_COND_GET:
730 &rcu_torture_removed, 824 rcu_torture_writer_state = RTWS_COND_GET;
731 rtort_free) { 825 gp_snap = cur_ops->get_state();
732 i = rp->rtort_pipe_count; 826 i = torture_random(&rand) % 16;
733 if (i > RCU_TORTURE_PIPE_LEN) 827 if (i != 0)
734 i = RCU_TORTURE_PIPE_LEN; 828 schedule_timeout_interruptible(i);
735 atomic_inc(&rcu_torture_wcount[i]); 829 udelay(torture_random(&rand) % 1000);
736 if (++rp->rtort_pipe_count >= 830 rcu_torture_writer_state = RTWS_COND_SYNC;
737 RCU_TORTURE_PIPE_LEN) { 831 cur_ops->cond_sync(gp_snap);
738 rp->rtort_mbtest = 0; 832 rcu_torture_pipe_update(old_rp);
739 list_del(&rp->rtort_free); 833 break;
740 rcu_torture_free(rp); 834 case RTWS_SYNC:
741 } 835 rcu_torture_writer_state = RTWS_SYNC;
742 } 836 cur_ops->sync();
837 rcu_torture_pipe_update(old_rp);
838 break;
839 default:
840 WARN_ON_ONCE(1);
841 break;
743 } 842 }
744 } 843 }
745 rcutorture_record_progress(++rcu_torture_current_version); 844 rcutorture_record_progress(++rcu_torture_current_version);
845 rcu_torture_writer_state = RTWS_STUTTER;
746 stutter_wait("rcu_torture_writer"); 846 stutter_wait("rcu_torture_writer");
747 } while (!torture_must_stop()); 847 } while (!torture_must_stop());
848 rcu_torture_writer_state = RTWS_STOPPING;
748 torture_kthread_stopping("rcu_torture_writer"); 849 torture_kthread_stopping("rcu_torture_writer");
749 return 0; 850 return 0;
750} 851}
@@ -784,7 +885,7 @@ rcu_torture_fakewriter(void *arg)
784 return 0; 885 return 0;
785} 886}
786 887
787void rcutorture_trace_dump(void) 888static void rcutorture_trace_dump(void)
788{ 889{
789 static atomic_t beenhere = ATOMIC_INIT(0); 890 static atomic_t beenhere = ATOMIC_INIT(0);
790 891
@@ -918,11 +1019,13 @@ rcu_torture_reader(void *arg)
918 __this_cpu_inc(rcu_torture_batch[completed]); 1019 __this_cpu_inc(rcu_torture_batch[completed]);
919 preempt_enable(); 1020 preempt_enable();
920 cur_ops->readunlock(idx); 1021 cur_ops->readunlock(idx);
921 schedule(); 1022 cond_resched();
922 stutter_wait("rcu_torture_reader"); 1023 stutter_wait("rcu_torture_reader");
923 } while (!torture_must_stop()); 1024 } while (!torture_must_stop());
924 if (irqreader && cur_ops->irq_capable) 1025 if (irqreader && cur_ops->irq_capable) {
925 del_timer_sync(&t); 1026 del_timer_sync(&t);
1027 destroy_timer_on_stack(&t);
1028 }
926 torture_kthread_stopping("rcu_torture_reader"); 1029 torture_kthread_stopping("rcu_torture_reader");
927 return 0; 1030 return 0;
928} 1031}
@@ -937,6 +1040,7 @@ rcu_torture_printk(char *page)
937 int i; 1040 int i;
938 long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; 1041 long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
939 long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; 1042 long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
1043 static unsigned long rtcv_snap = ULONG_MAX;
940 1044
941 for_each_possible_cpu(cpu) { 1045 for_each_possible_cpu(cpu) {
942 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { 1046 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
@@ -997,6 +1101,22 @@ rcu_torture_printk(char *page)
997 page += sprintf(page, "\n"); 1101 page += sprintf(page, "\n");
998 if (cur_ops->stats) 1102 if (cur_ops->stats)
999 cur_ops->stats(page); 1103 cur_ops->stats(page);
1104 if (rtcv_snap == rcu_torture_current_version &&
1105 rcu_torture_current != NULL) {
1106 int __maybe_unused flags;
1107 unsigned long __maybe_unused gpnum;
1108 unsigned long __maybe_unused completed;
1109
1110 rcutorture_get_gp_data(cur_ops->ttype,
1111 &flags, &gpnum, &completed);
1112 page += sprintf(page,
1113 "??? Writer stall state %d g%lu c%lu f%#x\n",
1114 rcu_torture_writer_state,
1115 gpnum, completed, flags);
1116 show_rcu_gp_kthreads();
1117 rcutorture_trace_dump();
1118 }
1119 rtcv_snap = rcu_torture_current_version;
1000} 1120}
1001 1121
1002/* 1122/*
@@ -1146,7 +1266,7 @@ static int __init rcu_torture_stall_init(void)
1146} 1266}
1147 1267
1148/* Callback function for RCU barrier testing. */ 1268/* Callback function for RCU barrier testing. */
1149void rcu_torture_barrier_cbf(struct rcu_head *rcu) 1269static void rcu_torture_barrier_cbf(struct rcu_head *rcu)
1150{ 1270{
1151 atomic_inc(&barrier_cbs_invoked); 1271 atomic_inc(&barrier_cbs_invoked);
1152} 1272}
@@ -1416,7 +1536,8 @@ rcu_torture_init(void)
1416 &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops, 1536 &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops,
1417 }; 1537 };
1418 1538
1419 torture_init_begin(torture_type, verbose, &rcutorture_runnable); 1539 if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable))
1540 return -EBUSY;
1420 1541
1421 /* Process args and tell the world that the torturer is on the job. */ 1542 /* Process args and tell the world that the torturer is on the job. */
1422 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { 1543 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
@@ -1441,10 +1562,13 @@ rcu_torture_init(void)
1441 if (cur_ops->init) 1562 if (cur_ops->init)
1442 cur_ops->init(); /* no "goto unwind" prior to this point!!! */ 1563 cur_ops->init(); /* no "goto unwind" prior to this point!!! */
1443 1564
1444 if (nreaders >= 0) 1565 if (nreaders >= 0) {
1445 nrealreaders = nreaders; 1566 nrealreaders = nreaders;
1446 else 1567 } else {
1447 nrealreaders = 2 * num_online_cpus(); 1568 nrealreaders = num_online_cpus() - 1;
1569 if (nrealreaders <= 0)
1570 nrealreaders = 1;
1571 }
1448 rcu_torture_print_module_parms(cur_ops, "Start of test"); 1572 rcu_torture_print_module_parms(cur_ops, "Start of test");
1449 1573
1450 /* Set up the freelist. */ 1574 /* Set up the freelist. */
@@ -1533,7 +1657,8 @@ rcu_torture_init(void)
1533 fqs_duration = 0; 1657 fqs_duration = 0;
1534 if (fqs_duration) { 1658 if (fqs_duration) {
1535 /* Create the fqs thread */ 1659 /* Create the fqs thread */
1536 torture_create_kthread(rcu_torture_fqs, NULL, fqs_task); 1660 firsterr = torture_create_kthread(rcu_torture_fqs, NULL,
1661 fqs_task);
1537 if (firsterr) 1662 if (firsterr)
1538 goto unwind; 1663 goto unwind;
1539 } 1664 }
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
index 431528520562..858c56569127 100644
--- a/kernel/rcu/tiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h
@@ -144,7 +144,7 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
144 return; 144 return;
145 rcp->ticks_this_gp++; 145 rcp->ticks_this_gp++;
146 j = jiffies; 146 j = jiffies;
147 js = rcp->jiffies_stall; 147 js = ACCESS_ONCE(rcp->jiffies_stall);
148 if (*rcp->curtail && ULONG_CMP_GE(j, js)) { 148 if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
149 pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n", 149 pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
150 rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting, 150 rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
@@ -152,17 +152,17 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
152 dump_stack(); 152 dump_stack();
153 } 153 }
154 if (*rcp->curtail && ULONG_CMP_GE(j, js)) 154 if (*rcp->curtail && ULONG_CMP_GE(j, js))
155 rcp->jiffies_stall = jiffies + 155 ACCESS_ONCE(rcp->jiffies_stall) = jiffies +
156 3 * rcu_jiffies_till_stall_check() + 3; 156 3 * rcu_jiffies_till_stall_check() + 3;
157 else if (ULONG_CMP_GE(j, js)) 157 else if (ULONG_CMP_GE(j, js))
158 rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check(); 158 ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check();
159} 159}
160 160
161static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp) 161static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
162{ 162{
163 rcp->ticks_this_gp = 0; 163 rcp->ticks_this_gp = 0;
164 rcp->gp_start = jiffies; 164 rcp->gp_start = jiffies;
165 rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check(); 165 ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check();
166} 166}
167 167
168static void check_cpu_stalls(void) 168static void check_cpu_stalls(void)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 0c47e300210a..f1ba77363fbb 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -101,7 +101,7 @@ DEFINE_PER_CPU(struct rcu_data, sname##_data)
101RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); 101RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
102RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); 102RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
103 103
104static struct rcu_state *rcu_state; 104static struct rcu_state *rcu_state_p;
105LIST_HEAD(rcu_struct_flavors); 105LIST_HEAD(rcu_struct_flavors);
106 106
107/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ 107/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
@@ -243,7 +243,7 @@ static ulong jiffies_till_next_fqs = ULONG_MAX;
243module_param(jiffies_till_first_fqs, ulong, 0644); 243module_param(jiffies_till_first_fqs, ulong, 0644);
244module_param(jiffies_till_next_fqs, ulong, 0644); 244module_param(jiffies_till_next_fqs, ulong, 0644);
245 245
246static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, 246static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
247 struct rcu_data *rdp); 247 struct rcu_data *rdp);
248static void force_qs_rnp(struct rcu_state *rsp, 248static void force_qs_rnp(struct rcu_state *rsp,
249 int (*f)(struct rcu_data *rsp, bool *isidle, 249 int (*f)(struct rcu_data *rsp, bool *isidle,
@@ -271,6 +271,15 @@ long rcu_batches_completed_bh(void)
271EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); 271EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
272 272
273/* 273/*
274 * Force a quiescent state.
275 */
276void rcu_force_quiescent_state(void)
277{
278 force_quiescent_state(rcu_state_p);
279}
280EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
281
282/*
274 * Force a quiescent state for RCU BH. 283 * Force a quiescent state for RCU BH.
275 */ 284 */
276void rcu_bh_force_quiescent_state(void) 285void rcu_bh_force_quiescent_state(void)
@@ -280,6 +289,21 @@ void rcu_bh_force_quiescent_state(void)
280EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); 289EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
281 290
282/* 291/*
292 * Show the state of the grace-period kthreads.
293 */
294void show_rcu_gp_kthreads(void)
295{
296 struct rcu_state *rsp;
297
298 for_each_rcu_flavor(rsp) {
299 pr_info("%s: wait state: %d ->state: %#lx\n",
300 rsp->name, rsp->gp_state, rsp->gp_kthread->state);
301 /* sched_show_task(rsp->gp_kthread); */
302 }
303}
304EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
305
306/*
283 * Record the number of times rcutorture tests have been initiated and 307 * Record the number of times rcutorture tests have been initiated and
284 * terminated. This information allows the debugfs tracing stats to be 308 * terminated. This information allows the debugfs tracing stats to be
285 * correlated to the rcutorture messages, even when the rcutorture module 309 * correlated to the rcutorture messages, even when the rcutorture module
@@ -294,6 +318,39 @@ void rcutorture_record_test_transition(void)
294EXPORT_SYMBOL_GPL(rcutorture_record_test_transition); 318EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
295 319
296/* 320/*
321 * Send along grace-period-related data for rcutorture diagnostics.
322 */
323void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
324 unsigned long *gpnum, unsigned long *completed)
325{
326 struct rcu_state *rsp = NULL;
327
328 switch (test_type) {
329 case RCU_FLAVOR:
330 rsp = rcu_state_p;
331 break;
332 case RCU_BH_FLAVOR:
333 rsp = &rcu_bh_state;
334 break;
335 case RCU_SCHED_FLAVOR:
336 rsp = &rcu_sched_state;
337 break;
338 default:
339 break;
340 }
341 if (rsp != NULL) {
342 *flags = ACCESS_ONCE(rsp->gp_flags);
343 *gpnum = ACCESS_ONCE(rsp->gpnum);
344 *completed = ACCESS_ONCE(rsp->completed);
345 return;
346 }
347 *flags = 0;
348 *gpnum = 0;
349 *completed = 0;
350}
351EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
352
353/*
297 * Record the number of writer passes through the current rcutorture test. 354 * Record the number of writer passes through the current rcutorture test.
298 * This is also used to correlate debugfs tracing stats with the rcutorture 355 * This is also used to correlate debugfs tracing stats with the rcutorture
299 * messages. 356 * messages.
@@ -324,6 +381,28 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
324} 381}
325 382
326/* 383/*
384 * Return the root node of the specified rcu_state structure.
385 */
386static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
387{
388 return &rsp->node[0];
389}
390
391/*
392 * Is there any need for future grace periods?
393 * Interrupts must be disabled. If the caller does not hold the root
394 * rnp_node structure's ->lock, the results are advisory only.
395 */
396static int rcu_future_needs_gp(struct rcu_state *rsp)
397{
398 struct rcu_node *rnp = rcu_get_root(rsp);
399 int idx = (ACCESS_ONCE(rnp->completed) + 1) & 0x1;
400 int *fp = &rnp->need_future_gp[idx];
401
402 return ACCESS_ONCE(*fp);
403}
404
405/*
327 * Does the current CPU require a not-yet-started grace period? 406 * Does the current CPU require a not-yet-started grace period?
328 * The caller must have disabled interrupts to prevent races with 407 * The caller must have disabled interrupts to prevent races with
329 * normal callback registry. 408 * normal callback registry.
@@ -335,7 +414,7 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
335 414
336 if (rcu_gp_in_progress(rsp)) 415 if (rcu_gp_in_progress(rsp))
337 return 0; /* No, a grace period is already in progress. */ 416 return 0; /* No, a grace period is already in progress. */
338 if (rcu_nocb_needs_gp(rsp)) 417 if (rcu_future_needs_gp(rsp))
339 return 1; /* Yes, a no-CBs CPU needs one. */ 418 return 1; /* Yes, a no-CBs CPU needs one. */
340 if (!rdp->nxttail[RCU_NEXT_TAIL]) 419 if (!rdp->nxttail[RCU_NEXT_TAIL])
341 return 0; /* No, this is a no-CBs (or offline) CPU. */ 420 return 0; /* No, this is a no-CBs (or offline) CPU. */
@@ -350,14 +429,6 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
350} 429}
351 430
352/* 431/*
353 * Return the root node of the specified rcu_state structure.
354 */
355static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
356{
357 return &rsp->node[0];
358}
359
360/*
361 * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state 432 * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state
362 * 433 *
363 * If the new value of the ->dynticks_nesting counter now is zero, 434 * If the new value of the ->dynticks_nesting counter now is zero,
@@ -387,9 +458,9 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
387 } 458 }
388 rcu_prepare_for_idle(smp_processor_id()); 459 rcu_prepare_for_idle(smp_processor_id());
389 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ 460 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
390 smp_mb__before_atomic_inc(); /* See above. */ 461 smp_mb__before_atomic(); /* See above. */
391 atomic_inc(&rdtp->dynticks); 462 atomic_inc(&rdtp->dynticks);
392 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ 463 smp_mb__after_atomic(); /* Force ordering with next sojourn. */
393 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 464 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
394 465
395 /* 466 /*
@@ -507,10 +578,10 @@ void rcu_irq_exit(void)
507static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, 578static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
508 int user) 579 int user)
509{ 580{
510 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ 581 smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */
511 atomic_inc(&rdtp->dynticks); 582 atomic_inc(&rdtp->dynticks);
512 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 583 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
513 smp_mb__after_atomic_inc(); /* See above. */ 584 smp_mb__after_atomic(); /* See above. */
514 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 585 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
515 rcu_cleanup_after_idle(smp_processor_id()); 586 rcu_cleanup_after_idle(smp_processor_id());
516 trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting); 587 trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
@@ -635,10 +706,10 @@ void rcu_nmi_enter(void)
635 (atomic_read(&rdtp->dynticks) & 0x1)) 706 (atomic_read(&rdtp->dynticks) & 0x1))
636 return; 707 return;
637 rdtp->dynticks_nmi_nesting++; 708 rdtp->dynticks_nmi_nesting++;
638 smp_mb__before_atomic_inc(); /* Force delay from prior write. */ 709 smp_mb__before_atomic(); /* Force delay from prior write. */
639 atomic_inc(&rdtp->dynticks); 710 atomic_inc(&rdtp->dynticks);
640 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 711 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
641 smp_mb__after_atomic_inc(); /* See above. */ 712 smp_mb__after_atomic(); /* See above. */
642 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 713 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
643} 714}
644 715
@@ -657,9 +728,9 @@ void rcu_nmi_exit(void)
657 --rdtp->dynticks_nmi_nesting != 0) 728 --rdtp->dynticks_nmi_nesting != 0)
658 return; 729 return;
659 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ 730 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
660 smp_mb__before_atomic_inc(); /* See above. */ 731 smp_mb__before_atomic(); /* See above. */
661 atomic_inc(&rdtp->dynticks); 732 atomic_inc(&rdtp->dynticks);
662 smp_mb__after_atomic_inc(); /* Force delay to next write. */ 733 smp_mb__after_atomic(); /* Force delay to next write. */
663 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 734 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
664} 735}
665 736
@@ -758,7 +829,12 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
758{ 829{
759 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); 830 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
760 rcu_sysidle_check_cpu(rdp, isidle, maxj); 831 rcu_sysidle_check_cpu(rdp, isidle, maxj);
761 return (rdp->dynticks_snap & 0x1) == 0; 832 if ((rdp->dynticks_snap & 0x1) == 0) {
833 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
834 return 1;
835 } else {
836 return 0;
837 }
762} 838}
763 839
764/* 840/*
@@ -834,7 +910,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
834 * we will beat on the first one until it gets unstuck, then move 910 * we will beat on the first one until it gets unstuck, then move
835 * to the next. Only do this for the primary flavor of RCU. 911 * to the next. Only do this for the primary flavor of RCU.
836 */ 912 */
837 if (rdp->rsp == rcu_state && 913 if (rdp->rsp == rcu_state_p &&
838 ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { 914 ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
839 rdp->rsp->jiffies_resched += 5; 915 rdp->rsp->jiffies_resched += 5;
840 resched_cpu(rdp->cpu); 916 resched_cpu(rdp->cpu);
@@ -851,7 +927,7 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
851 rsp->gp_start = j; 927 rsp->gp_start = j;
852 smp_wmb(); /* Record start time before stall time. */ 928 smp_wmb(); /* Record start time before stall time. */
853 j1 = rcu_jiffies_till_stall_check(); 929 j1 = rcu_jiffies_till_stall_check();
854 rsp->jiffies_stall = j + j1; 930 ACCESS_ONCE(rsp->jiffies_stall) = j + j1;
855 rsp->jiffies_resched = j + j1 / 2; 931 rsp->jiffies_resched = j + j1 / 2;
856} 932}
857 933
@@ -890,12 +966,12 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
890 /* Only let one CPU complain about others per time interval. */ 966 /* Only let one CPU complain about others per time interval. */
891 967
892 raw_spin_lock_irqsave(&rnp->lock, flags); 968 raw_spin_lock_irqsave(&rnp->lock, flags);
893 delta = jiffies - rsp->jiffies_stall; 969 delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall);
894 if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { 970 if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
895 raw_spin_unlock_irqrestore(&rnp->lock, flags); 971 raw_spin_unlock_irqrestore(&rnp->lock, flags);
896 return; 972 return;
897 } 973 }
898 rsp->jiffies_stall = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; 974 ACCESS_ONCE(rsp->jiffies_stall) = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
899 raw_spin_unlock_irqrestore(&rnp->lock, flags); 975 raw_spin_unlock_irqrestore(&rnp->lock, flags);
900 976
901 /* 977 /*
@@ -932,9 +1008,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
932 print_cpu_stall_info_end(); 1008 print_cpu_stall_info_end();
933 for_each_possible_cpu(cpu) 1009 for_each_possible_cpu(cpu)
934 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; 1010 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
935 pr_cont("(detected by %d, t=%ld jiffies, g=%lu, c=%lu, q=%lu)\n", 1011 pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n",
936 smp_processor_id(), (long)(jiffies - rsp->gp_start), 1012 smp_processor_id(), (long)(jiffies - rsp->gp_start),
937 rsp->gpnum, rsp->completed, totqlen); 1013 (long)rsp->gpnum, (long)rsp->completed, totqlen);
938 if (ndetected == 0) 1014 if (ndetected == 0)
939 pr_err("INFO: Stall ended before state dump start\n"); 1015 pr_err("INFO: Stall ended before state dump start\n");
940 else if (!trigger_all_cpu_backtrace()) 1016 else if (!trigger_all_cpu_backtrace())
@@ -947,12 +1023,6 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
947 force_quiescent_state(rsp); /* Kick them all. */ 1023 force_quiescent_state(rsp); /* Kick them all. */
948} 1024}
949 1025
950/*
951 * This function really isn't for public consumption, but RCU is special in
952 * that context switches can allow the state machine to make progress.
953 */
954extern void resched_cpu(int cpu);
955
956static void print_cpu_stall(struct rcu_state *rsp) 1026static void print_cpu_stall(struct rcu_state *rsp)
957{ 1027{
958 int cpu; 1028 int cpu;
@@ -971,14 +1041,15 @@ static void print_cpu_stall(struct rcu_state *rsp)
971 print_cpu_stall_info_end(); 1041 print_cpu_stall_info_end();
972 for_each_possible_cpu(cpu) 1042 for_each_possible_cpu(cpu)
973 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; 1043 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
974 pr_cont(" (t=%lu jiffies g=%lu c=%lu q=%lu)\n", 1044 pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n",
975 jiffies - rsp->gp_start, rsp->gpnum, rsp->completed, totqlen); 1045 jiffies - rsp->gp_start,
1046 (long)rsp->gpnum, (long)rsp->completed, totqlen);
976 if (!trigger_all_cpu_backtrace()) 1047 if (!trigger_all_cpu_backtrace())
977 dump_stack(); 1048 dump_stack();
978 1049
979 raw_spin_lock_irqsave(&rnp->lock, flags); 1050 raw_spin_lock_irqsave(&rnp->lock, flags);
980 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) 1051 if (ULONG_CMP_GE(jiffies, ACCESS_ONCE(rsp->jiffies_stall)))
981 rsp->jiffies_stall = jiffies + 1052 ACCESS_ONCE(rsp->jiffies_stall) = jiffies +
982 3 * rcu_jiffies_till_stall_check() + 3; 1053 3 * rcu_jiffies_till_stall_check() + 3;
983 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1054 raw_spin_unlock_irqrestore(&rnp->lock, flags);
984 1055
@@ -1062,7 +1133,7 @@ void rcu_cpu_stall_reset(void)
1062 struct rcu_state *rsp; 1133 struct rcu_state *rsp;
1063 1134
1064 for_each_rcu_flavor(rsp) 1135 for_each_rcu_flavor(rsp)
1065 rsp->jiffies_stall = jiffies + ULONG_MAX / 2; 1136 ACCESS_ONCE(rsp->jiffies_stall) = jiffies + ULONG_MAX / 2;
1066} 1137}
1067 1138
1068/* 1139/*
@@ -1123,15 +1194,18 @@ static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1123/* 1194/*
1124 * Start some future grace period, as needed to handle newly arrived 1195 * Start some future grace period, as needed to handle newly arrived
1125 * callbacks. The required future grace periods are recorded in each 1196 * callbacks. The required future grace periods are recorded in each
1126 * rcu_node structure's ->need_future_gp field. 1197 * rcu_node structure's ->need_future_gp field. Returns true if there
1198 * is reason to awaken the grace-period kthread.
1127 * 1199 *
1128 * The caller must hold the specified rcu_node structure's ->lock. 1200 * The caller must hold the specified rcu_node structure's ->lock.
1129 */ 1201 */
1130static unsigned long __maybe_unused 1202static bool __maybe_unused
1131rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) 1203rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1204 unsigned long *c_out)
1132{ 1205{
1133 unsigned long c; 1206 unsigned long c;
1134 int i; 1207 int i;
1208 bool ret = false;
1135 struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); 1209 struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
1136 1210
1137 /* 1211 /*
@@ -1142,7 +1216,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
1142 trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf")); 1216 trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
1143 if (rnp->need_future_gp[c & 0x1]) { 1217 if (rnp->need_future_gp[c & 0x1]) {
1144 trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf")); 1218 trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
1145 return c; 1219 goto out;
1146 } 1220 }
1147 1221
1148 /* 1222 /*
@@ -1156,7 +1230,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
1156 ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) { 1230 ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
1157 rnp->need_future_gp[c & 0x1]++; 1231 rnp->need_future_gp[c & 0x1]++;
1158 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf")); 1232 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
1159 return c; 1233 goto out;
1160 } 1234 }
1161 1235
1162 /* 1236 /*
@@ -1197,12 +1271,15 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
1197 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot")); 1271 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
1198 } else { 1272 } else {
1199 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot")); 1273 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
1200 rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp); 1274 ret = rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
1201 } 1275 }
1202unlock_out: 1276unlock_out:
1203 if (rnp != rnp_root) 1277 if (rnp != rnp_root)
1204 raw_spin_unlock(&rnp_root->lock); 1278 raw_spin_unlock(&rnp_root->lock);
1205 return c; 1279out:
1280 if (c_out != NULL)
1281 *c_out = c;
1282 return ret;
1206} 1283}
1207 1284
1208/* 1285/*
@@ -1226,25 +1303,43 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
1226} 1303}
1227 1304
1228/* 1305/*
1306 * Awaken the grace-period kthread for the specified flavor of RCU.
1307 * Don't do a self-awaken, and don't bother awakening when there is
1308 * nothing for the grace-period kthread to do (as in several CPUs
1309 * raced to awaken, and we lost), and finally don't try to awaken
1310 * a kthread that has not yet been created.
1311 */
1312static void rcu_gp_kthread_wake(struct rcu_state *rsp)
1313{
1314 if (current == rsp->gp_kthread ||
1315 !ACCESS_ONCE(rsp->gp_flags) ||
1316 !rsp->gp_kthread)
1317 return;
1318 wake_up(&rsp->gp_wq);
1319}
1320
1321/*
1229 * If there is room, assign a ->completed number to any callbacks on 1322 * If there is room, assign a ->completed number to any callbacks on
1230 * this CPU that have not already been assigned. Also accelerate any 1323 * this CPU that have not already been assigned. Also accelerate any
1231 * callbacks that were previously assigned a ->completed number that has 1324 * callbacks that were previously assigned a ->completed number that has
1232 * since proven to be too conservative, which can happen if callbacks get 1325 * since proven to be too conservative, which can happen if callbacks get
1233 * assigned a ->completed number while RCU is idle, but with reference to 1326 * assigned a ->completed number while RCU is idle, but with reference to
1234 * a non-root rcu_node structure. This function is idempotent, so it does 1327 * a non-root rcu_node structure. This function is idempotent, so it does
1235 * not hurt to call it repeatedly. 1328 * not hurt to call it repeatedly. Returns an flag saying that we should
1329 * awaken the RCU grace-period kthread.
1236 * 1330 *
1237 * The caller must hold rnp->lock with interrupts disabled. 1331 * The caller must hold rnp->lock with interrupts disabled.
1238 */ 1332 */
1239static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, 1333static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1240 struct rcu_data *rdp) 1334 struct rcu_data *rdp)
1241{ 1335{
1242 unsigned long c; 1336 unsigned long c;
1243 int i; 1337 int i;
1338 bool ret;
1244 1339
1245 /* If the CPU has no callbacks, nothing to do. */ 1340 /* If the CPU has no callbacks, nothing to do. */
1246 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL]) 1341 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1247 return; 1342 return false;
1248 1343
1249 /* 1344 /*
1250 * Starting from the sublist containing the callbacks most 1345 * Starting from the sublist containing the callbacks most
@@ -1273,7 +1368,7 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1273 * be grouped into. 1368 * be grouped into.
1274 */ 1369 */
1275 if (++i >= RCU_NEXT_TAIL) 1370 if (++i >= RCU_NEXT_TAIL)
1276 return; 1371 return false;
1277 1372
1278 /* 1373 /*
1279 * Assign all subsequent callbacks' ->completed number to the next 1374 * Assign all subsequent callbacks' ->completed number to the next
@@ -1285,13 +1380,14 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1285 rdp->nxtcompleted[i] = c; 1380 rdp->nxtcompleted[i] = c;
1286 } 1381 }
1287 /* Record any needed additional grace periods. */ 1382 /* Record any needed additional grace periods. */
1288 rcu_start_future_gp(rnp, rdp); 1383 ret = rcu_start_future_gp(rnp, rdp, NULL);
1289 1384
1290 /* Trace depending on how much we were able to accelerate. */ 1385 /* Trace depending on how much we were able to accelerate. */
1291 if (!*rdp->nxttail[RCU_WAIT_TAIL]) 1386 if (!*rdp->nxttail[RCU_WAIT_TAIL])
1292 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB")); 1387 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
1293 else 1388 else
1294 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB")); 1389 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
1390 return ret;
1295} 1391}
1296 1392
1297/* 1393/*
@@ -1300,17 +1396,18 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1300 * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL 1396 * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL
1301 * sublist. This function is idempotent, so it does not hurt to 1397 * sublist. This function is idempotent, so it does not hurt to
1302 * invoke it repeatedly. As long as it is not invoked -too- often... 1398 * invoke it repeatedly. As long as it is not invoked -too- often...
1399 * Returns true if the RCU grace-period kthread needs to be awakened.
1303 * 1400 *
1304 * The caller must hold rnp->lock with interrupts disabled. 1401 * The caller must hold rnp->lock with interrupts disabled.
1305 */ 1402 */
1306static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, 1403static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1307 struct rcu_data *rdp) 1404 struct rcu_data *rdp)
1308{ 1405{
1309 int i, j; 1406 int i, j;
1310 1407
1311 /* If the CPU has no callbacks, nothing to do. */ 1408 /* If the CPU has no callbacks, nothing to do. */
1312 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL]) 1409 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1313 return; 1410 return false;
1314 1411
1315 /* 1412 /*
1316 * Find all callbacks whose ->completed numbers indicate that they 1413 * Find all callbacks whose ->completed numbers indicate that they
@@ -1334,26 +1431,30 @@ static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1334 } 1431 }
1335 1432
1336 /* Classify any remaining callbacks. */ 1433 /* Classify any remaining callbacks. */
1337 rcu_accelerate_cbs(rsp, rnp, rdp); 1434 return rcu_accelerate_cbs(rsp, rnp, rdp);
1338} 1435}
1339 1436
1340/* 1437/*
1341 * Update CPU-local rcu_data state to record the beginnings and ends of 1438 * Update CPU-local rcu_data state to record the beginnings and ends of
1342 * grace periods. The caller must hold the ->lock of the leaf rcu_node 1439 * grace periods. The caller must hold the ->lock of the leaf rcu_node
1343 * structure corresponding to the current CPU, and must have irqs disabled. 1440 * structure corresponding to the current CPU, and must have irqs disabled.
1441 * Returns true if the grace-period kthread needs to be awakened.
1344 */ 1442 */
1345static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) 1443static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1444 struct rcu_data *rdp)
1346{ 1445{
1446 bool ret;
1447
1347 /* Handle the ends of any preceding grace periods first. */ 1448 /* Handle the ends of any preceding grace periods first. */
1348 if (rdp->completed == rnp->completed) { 1449 if (rdp->completed == rnp->completed) {
1349 1450
1350 /* No grace period end, so just accelerate recent callbacks. */ 1451 /* No grace period end, so just accelerate recent callbacks. */
1351 rcu_accelerate_cbs(rsp, rnp, rdp); 1452 ret = rcu_accelerate_cbs(rsp, rnp, rdp);
1352 1453
1353 } else { 1454 } else {
1354 1455
1355 /* Advance callbacks. */ 1456 /* Advance callbacks. */
1356 rcu_advance_cbs(rsp, rnp, rdp); 1457 ret = rcu_advance_cbs(rsp, rnp, rdp);
1357 1458
1358 /* Remember that we saw this grace-period completion. */ 1459 /* Remember that we saw this grace-period completion. */
1359 rdp->completed = rnp->completed; 1460 rdp->completed = rnp->completed;
@@ -1372,11 +1473,13 @@ static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struc
1372 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); 1473 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
1373 zero_cpu_stall_ticks(rdp); 1474 zero_cpu_stall_ticks(rdp);
1374 } 1475 }
1476 return ret;
1375} 1477}
1376 1478
1377static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) 1479static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
1378{ 1480{
1379 unsigned long flags; 1481 unsigned long flags;
1482 bool needwake;
1380 struct rcu_node *rnp; 1483 struct rcu_node *rnp;
1381 1484
1382 local_irq_save(flags); 1485 local_irq_save(flags);
@@ -1388,8 +1491,10 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
1388 return; 1491 return;
1389 } 1492 }
1390 smp_mb__after_unlock_lock(); 1493 smp_mb__after_unlock_lock();
1391 __note_gp_changes(rsp, rnp, rdp); 1494 needwake = __note_gp_changes(rsp, rnp, rdp);
1392 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1495 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1496 if (needwake)
1497 rcu_gp_kthread_wake(rsp);
1393} 1498}
1394 1499
1395/* 1500/*
@@ -1403,12 +1508,12 @@ static int rcu_gp_init(struct rcu_state *rsp)
1403 rcu_bind_gp_kthread(); 1508 rcu_bind_gp_kthread();
1404 raw_spin_lock_irq(&rnp->lock); 1509 raw_spin_lock_irq(&rnp->lock);
1405 smp_mb__after_unlock_lock(); 1510 smp_mb__after_unlock_lock();
1406 if (rsp->gp_flags == 0) { 1511 if (!ACCESS_ONCE(rsp->gp_flags)) {
1407 /* Spurious wakeup, tell caller to go back to sleep. */ 1512 /* Spurious wakeup, tell caller to go back to sleep. */
1408 raw_spin_unlock_irq(&rnp->lock); 1513 raw_spin_unlock_irq(&rnp->lock);
1409 return 0; 1514 return 0;
1410 } 1515 }
1411 rsp->gp_flags = 0; /* Clear all flags: New grace period. */ 1516 ACCESS_ONCE(rsp->gp_flags) = 0; /* Clear all flags: New grace period. */
1412 1517
1413 if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) { 1518 if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) {
1414 /* 1519 /*
@@ -1453,7 +1558,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1453 WARN_ON_ONCE(rnp->completed != rsp->completed); 1558 WARN_ON_ONCE(rnp->completed != rsp->completed);
1454 ACCESS_ONCE(rnp->completed) = rsp->completed; 1559 ACCESS_ONCE(rnp->completed) = rsp->completed;
1455 if (rnp == rdp->mynode) 1560 if (rnp == rdp->mynode)
1456 __note_gp_changes(rsp, rnp, rdp); 1561 (void)__note_gp_changes(rsp, rnp, rdp);
1457 rcu_preempt_boost_start_gp(rnp); 1562 rcu_preempt_boost_start_gp(rnp);
1458 trace_rcu_grace_period_init(rsp->name, rnp->gpnum, 1563 trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
1459 rnp->level, rnp->grplo, 1564 rnp->level, rnp->grplo,
@@ -1501,7 +1606,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1501 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { 1606 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
1502 raw_spin_lock_irq(&rnp->lock); 1607 raw_spin_lock_irq(&rnp->lock);
1503 smp_mb__after_unlock_lock(); 1608 smp_mb__after_unlock_lock();
1504 rsp->gp_flags &= ~RCU_GP_FLAG_FQS; 1609 ACCESS_ONCE(rsp->gp_flags) &= ~RCU_GP_FLAG_FQS;
1505 raw_spin_unlock_irq(&rnp->lock); 1610 raw_spin_unlock_irq(&rnp->lock);
1506 } 1611 }
1507 return fqs_state; 1612 return fqs_state;
@@ -1513,6 +1618,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1513static void rcu_gp_cleanup(struct rcu_state *rsp) 1618static void rcu_gp_cleanup(struct rcu_state *rsp)
1514{ 1619{
1515 unsigned long gp_duration; 1620 unsigned long gp_duration;
1621 bool needgp = false;
1516 int nocb = 0; 1622 int nocb = 0;
1517 struct rcu_data *rdp; 1623 struct rcu_data *rdp;
1518 struct rcu_node *rnp = rcu_get_root(rsp); 1624 struct rcu_node *rnp = rcu_get_root(rsp);
@@ -1548,7 +1654,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1548 ACCESS_ONCE(rnp->completed) = rsp->gpnum; 1654 ACCESS_ONCE(rnp->completed) = rsp->gpnum;
1549 rdp = this_cpu_ptr(rsp->rda); 1655 rdp = this_cpu_ptr(rsp->rda);
1550 if (rnp == rdp->mynode) 1656 if (rnp == rdp->mynode)
1551 __note_gp_changes(rsp, rnp, rdp); 1657 needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
1552 /* smp_mb() provided by prior unlock-lock pair. */ 1658 /* smp_mb() provided by prior unlock-lock pair. */
1553 nocb += rcu_future_gp_cleanup(rsp, rnp); 1659 nocb += rcu_future_gp_cleanup(rsp, rnp);
1554 raw_spin_unlock_irq(&rnp->lock); 1660 raw_spin_unlock_irq(&rnp->lock);
@@ -1564,9 +1670,10 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1564 trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end")); 1670 trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
1565 rsp->fqs_state = RCU_GP_IDLE; 1671 rsp->fqs_state = RCU_GP_IDLE;
1566 rdp = this_cpu_ptr(rsp->rda); 1672 rdp = this_cpu_ptr(rsp->rda);
1567 rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */ 1673 /* Advance CBs to reduce false positives below. */
1568 if (cpu_needs_another_gp(rsp, rdp)) { 1674 needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp;
1569 rsp->gp_flags = RCU_GP_FLAG_INIT; 1675 if (needgp || cpu_needs_another_gp(rsp, rdp)) {
1676 ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT;
1570 trace_rcu_grace_period(rsp->name, 1677 trace_rcu_grace_period(rsp->name,
1571 ACCESS_ONCE(rsp->gpnum), 1678 ACCESS_ONCE(rsp->gpnum),
1572 TPS("newreq")); 1679 TPS("newreq"));
@@ -1593,6 +1700,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
1593 trace_rcu_grace_period(rsp->name, 1700 trace_rcu_grace_period(rsp->name,
1594 ACCESS_ONCE(rsp->gpnum), 1701 ACCESS_ONCE(rsp->gpnum),
1595 TPS("reqwait")); 1702 TPS("reqwait"));
1703 rsp->gp_state = RCU_GP_WAIT_GPS;
1596 wait_event_interruptible(rsp->gp_wq, 1704 wait_event_interruptible(rsp->gp_wq,
1597 ACCESS_ONCE(rsp->gp_flags) & 1705 ACCESS_ONCE(rsp->gp_flags) &
1598 RCU_GP_FLAG_INIT); 1706 RCU_GP_FLAG_INIT);
@@ -1620,6 +1728,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
1620 trace_rcu_grace_period(rsp->name, 1728 trace_rcu_grace_period(rsp->name,
1621 ACCESS_ONCE(rsp->gpnum), 1729 ACCESS_ONCE(rsp->gpnum),
1622 TPS("fqswait")); 1730 TPS("fqswait"));
1731 rsp->gp_state = RCU_GP_WAIT_FQS;
1623 ret = wait_event_interruptible_timeout(rsp->gp_wq, 1732 ret = wait_event_interruptible_timeout(rsp->gp_wq,
1624 ((gf = ACCESS_ONCE(rsp->gp_flags)) & 1733 ((gf = ACCESS_ONCE(rsp->gp_flags)) &
1625 RCU_GP_FLAG_FQS) || 1734 RCU_GP_FLAG_FQS) ||
@@ -1665,14 +1774,6 @@ static int __noreturn rcu_gp_kthread(void *arg)
1665 } 1774 }
1666} 1775}
1667 1776
1668static void rsp_wakeup(struct irq_work *work)
1669{
1670 struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work);
1671
1672 /* Wake up rcu_gp_kthread() to start the grace period. */
1673 wake_up(&rsp->gp_wq);
1674}
1675
1676/* 1777/*
1677 * Start a new RCU grace period if warranted, re-initializing the hierarchy 1778 * Start a new RCU grace period if warranted, re-initializing the hierarchy
1678 * in preparation for detecting the next grace period. The caller must hold 1779 * in preparation for detecting the next grace period. The caller must hold
@@ -1681,8 +1782,10 @@ static void rsp_wakeup(struct irq_work *work)
1681 * Note that it is legal for a dying CPU (which is marked as offline) to 1782 * Note that it is legal for a dying CPU (which is marked as offline) to
1682 * invoke this function. This can happen when the dying CPU reports its 1783 * invoke this function. This can happen when the dying CPU reports its
1683 * quiescent state. 1784 * quiescent state.
1785 *
1786 * Returns true if the grace-period kthread must be awakened.
1684 */ 1787 */
1685static void 1788static bool
1686rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, 1789rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
1687 struct rcu_data *rdp) 1790 struct rcu_data *rdp)
1688{ 1791{
@@ -1693,20 +1796,18 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
1693 * or a grace period is already in progress. 1796 * or a grace period is already in progress.
1694 * Either way, don't start a new grace period. 1797 * Either way, don't start a new grace period.
1695 */ 1798 */
1696 return; 1799 return false;
1697 } 1800 }
1698 rsp->gp_flags = RCU_GP_FLAG_INIT; 1801 ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT;
1699 trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), 1802 trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum),
1700 TPS("newreq")); 1803 TPS("newreq"));
1701 1804
1702 /* 1805 /*
1703 * We can't do wakeups while holding the rnp->lock, as that 1806 * We can't do wakeups while holding the rnp->lock, as that
1704 * could cause possible deadlocks with the rq->lock. Defer 1807 * could cause possible deadlocks with the rq->lock. Defer
1705 * the wakeup to interrupt context. And don't bother waking 1808 * the wakeup to our caller.
1706 * up the running kthread.
1707 */ 1809 */
1708 if (current != rsp->gp_kthread) 1810 return true;
1709 irq_work_queue(&rsp->wakeup_work);
1710} 1811}
1711 1812
1712/* 1813/*
@@ -1715,12 +1816,14 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
1715 * is invoked indirectly from rcu_advance_cbs(), which would result in 1816 * is invoked indirectly from rcu_advance_cbs(), which would result in
1716 * endless recursion -- or would do so if it wasn't for the self-deadlock 1817 * endless recursion -- or would do so if it wasn't for the self-deadlock
1717 * that is encountered beforehand. 1818 * that is encountered beforehand.
1819 *
1820 * Returns true if the grace-period kthread needs to be awakened.
1718 */ 1821 */
1719static void 1822static bool rcu_start_gp(struct rcu_state *rsp)
1720rcu_start_gp(struct rcu_state *rsp)
1721{ 1823{
1722 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 1824 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1723 struct rcu_node *rnp = rcu_get_root(rsp); 1825 struct rcu_node *rnp = rcu_get_root(rsp);
1826 bool ret = false;
1724 1827
1725 /* 1828 /*
1726 * If there is no grace period in progress right now, any 1829 * If there is no grace period in progress right now, any
@@ -1730,8 +1833,9 @@ rcu_start_gp(struct rcu_state *rsp)
1730 * resulting in pointless grace periods. So, advance callbacks 1833 * resulting in pointless grace periods. So, advance callbacks
1731 * then start the grace period! 1834 * then start the grace period!
1732 */ 1835 */
1733 rcu_advance_cbs(rsp, rnp, rdp); 1836 ret = rcu_advance_cbs(rsp, rnp, rdp) || ret;
1734 rcu_start_gp_advanced(rsp, rnp, rdp); 1837 ret = rcu_start_gp_advanced(rsp, rnp, rdp) || ret;
1838 return ret;
1735} 1839}
1736 1840
1737/* 1841/*
@@ -1820,6 +1924,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
1820{ 1924{
1821 unsigned long flags; 1925 unsigned long flags;
1822 unsigned long mask; 1926 unsigned long mask;
1927 bool needwake;
1823 struct rcu_node *rnp; 1928 struct rcu_node *rnp;
1824 1929
1825 rnp = rdp->mynode; 1930 rnp = rdp->mynode;
@@ -1848,9 +1953,11 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
1848 * This GP can't end until cpu checks in, so all of our 1953 * This GP can't end until cpu checks in, so all of our
1849 * callbacks can be processed during the next GP. 1954 * callbacks can be processed during the next GP.
1850 */ 1955 */
1851 rcu_accelerate_cbs(rsp, rnp, rdp); 1956 needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
1852 1957
1853 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ 1958 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
1959 if (needwake)
1960 rcu_gp_kthread_wake(rsp);
1854 } 1961 }
1855} 1962}
1856 1963
@@ -1951,7 +2058,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1951static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) 2058static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
1952{ 2059{
1953 int i; 2060 int i;
1954 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); 2061 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
1955 2062
1956 /* No-CBs CPUs are handled specially. */ 2063 /* No-CBs CPUs are handled specially. */
1957 if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) 2064 if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
@@ -2320,7 +2427,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
2320 raw_spin_unlock_irqrestore(&rnp_old->lock, flags); 2427 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
2321 return; /* Someone beat us to it. */ 2428 return; /* Someone beat us to it. */
2322 } 2429 }
2323 rsp->gp_flags |= RCU_GP_FLAG_FQS; 2430 ACCESS_ONCE(rsp->gp_flags) |= RCU_GP_FLAG_FQS;
2324 raw_spin_unlock_irqrestore(&rnp_old->lock, flags); 2431 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
2325 wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ 2432 wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
2326} 2433}
@@ -2334,7 +2441,8 @@ static void
2334__rcu_process_callbacks(struct rcu_state *rsp) 2441__rcu_process_callbacks(struct rcu_state *rsp)
2335{ 2442{
2336 unsigned long flags; 2443 unsigned long flags;
2337 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); 2444 bool needwake;
2445 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
2338 2446
2339 WARN_ON_ONCE(rdp->beenonline == 0); 2447 WARN_ON_ONCE(rdp->beenonline == 0);
2340 2448
@@ -2345,8 +2453,10 @@ __rcu_process_callbacks(struct rcu_state *rsp)
2345 local_irq_save(flags); 2453 local_irq_save(flags);
2346 if (cpu_needs_another_gp(rsp, rdp)) { 2454 if (cpu_needs_another_gp(rsp, rdp)) {
2347 raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */ 2455 raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
2348 rcu_start_gp(rsp); 2456 needwake = rcu_start_gp(rsp);
2349 raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); 2457 raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
2458 if (needwake)
2459 rcu_gp_kthread_wake(rsp);
2350 } else { 2460 } else {
2351 local_irq_restore(flags); 2461 local_irq_restore(flags);
2352 } 2462 }
@@ -2404,6 +2514,8 @@ static void invoke_rcu_core(void)
2404static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, 2514static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2405 struct rcu_head *head, unsigned long flags) 2515 struct rcu_head *head, unsigned long flags)
2406{ 2516{
2517 bool needwake;
2518
2407 /* 2519 /*
2408 * If called from an extended quiescent state, invoke the RCU 2520 * If called from an extended quiescent state, invoke the RCU
2409 * core in order to force a re-evaluation of RCU's idleness. 2521 * core in order to force a re-evaluation of RCU's idleness.
@@ -2433,8 +2545,10 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2433 2545
2434 raw_spin_lock(&rnp_root->lock); 2546 raw_spin_lock(&rnp_root->lock);
2435 smp_mb__after_unlock_lock(); 2547 smp_mb__after_unlock_lock();
2436 rcu_start_gp(rsp); 2548 needwake = rcu_start_gp(rsp);
2437 raw_spin_unlock(&rnp_root->lock); 2549 raw_spin_unlock(&rnp_root->lock);
2550 if (needwake)
2551 rcu_gp_kthread_wake(rsp);
2438 } else { 2552 } else {
2439 /* Give the grace period a kick. */ 2553 /* Give the grace period a kick. */
2440 rdp->blimit = LONG_MAX; 2554 rdp->blimit = LONG_MAX;
@@ -2537,6 +2651,20 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
2537EXPORT_SYMBOL_GPL(call_rcu_bh); 2651EXPORT_SYMBOL_GPL(call_rcu_bh);
2538 2652
2539/* 2653/*
2654 * Queue an RCU callback for lazy invocation after a grace period.
2655 * This will likely be later named something like "call_rcu_lazy()",
2656 * but this change will require some way of tagging the lazy RCU
2657 * callbacks in the list of pending callbacks. Until then, this
2658 * function may only be called from __kfree_rcu().
2659 */
2660void kfree_call_rcu(struct rcu_head *head,
2661 void (*func)(struct rcu_head *rcu))
2662{
2663 __call_rcu(head, func, rcu_state_p, -1, 1);
2664}
2665EXPORT_SYMBOL_GPL(kfree_call_rcu);
2666
2667/*
2540 * Because a context switch is a grace period for RCU-sched and RCU-bh, 2668 * Because a context switch is a grace period for RCU-sched and RCU-bh,
2541 * any blocking grace-period wait automatically implies a grace period 2669 * any blocking grace-period wait automatically implies a grace period
2542 * if there is only one CPU online at any point time during execution 2670 * if there is only one CPU online at any point time during execution
@@ -2659,7 +2787,7 @@ unsigned long get_state_synchronize_rcu(void)
2659 * time-consuming work between get_state_synchronize_rcu() 2787 * time-consuming work between get_state_synchronize_rcu()
2660 * and cond_synchronize_rcu(). 2788 * and cond_synchronize_rcu().
2661 */ 2789 */
2662 return smp_load_acquire(&rcu_state->gpnum); 2790 return smp_load_acquire(&rcu_state_p->gpnum);
2663} 2791}
2664EXPORT_SYMBOL_GPL(get_state_synchronize_rcu); 2792EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);
2665 2793
@@ -2685,7 +2813,7 @@ void cond_synchronize_rcu(unsigned long oldstate)
2685 * Ensure that this load happens before any RCU-destructive 2813 * Ensure that this load happens before any RCU-destructive
2686 * actions the caller might carry out after we return. 2814 * actions the caller might carry out after we return.
2687 */ 2815 */
2688 newstate = smp_load_acquire(&rcu_state->completed); 2816 newstate = smp_load_acquire(&rcu_state_p->completed);
2689 if (ULONG_CMP_GE(oldstate, newstate)) 2817 if (ULONG_CMP_GE(oldstate, newstate))
2690 synchronize_rcu(); 2818 synchronize_rcu();
2691} 2819}
@@ -2790,7 +2918,7 @@ void synchronize_sched_expedited(void)
2790 s = atomic_long_read(&rsp->expedited_done); 2918 s = atomic_long_read(&rsp->expedited_done);
2791 if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) { 2919 if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
2792 /* ensure test happens before caller kfree */ 2920 /* ensure test happens before caller kfree */
2793 smp_mb__before_atomic_inc(); /* ^^^ */ 2921 smp_mb__before_atomic(); /* ^^^ */
2794 atomic_long_inc(&rsp->expedited_workdone1); 2922 atomic_long_inc(&rsp->expedited_workdone1);
2795 return; 2923 return;
2796 } 2924 }
@@ -2808,7 +2936,7 @@ void synchronize_sched_expedited(void)
2808 s = atomic_long_read(&rsp->expedited_done); 2936 s = atomic_long_read(&rsp->expedited_done);
2809 if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) { 2937 if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
2810 /* ensure test happens before caller kfree */ 2938 /* ensure test happens before caller kfree */
2811 smp_mb__before_atomic_inc(); /* ^^^ */ 2939 smp_mb__before_atomic(); /* ^^^ */
2812 atomic_long_inc(&rsp->expedited_workdone2); 2940 atomic_long_inc(&rsp->expedited_workdone2);
2813 return; 2941 return;
2814 } 2942 }
@@ -2837,7 +2965,7 @@ void synchronize_sched_expedited(void)
2837 s = atomic_long_read(&rsp->expedited_done); 2965 s = atomic_long_read(&rsp->expedited_done);
2838 if (ULONG_CMP_GE((ulong)s, (ulong)snap)) { 2966 if (ULONG_CMP_GE((ulong)s, (ulong)snap)) {
2839 /* ensure test happens before caller kfree */ 2967 /* ensure test happens before caller kfree */
2840 smp_mb__before_atomic_inc(); /* ^^^ */ 2968 smp_mb__before_atomic(); /* ^^^ */
2841 atomic_long_inc(&rsp->expedited_done_lost); 2969 atomic_long_inc(&rsp->expedited_done_lost);
2842 break; 2970 break;
2843 } 2971 }
@@ -2988,7 +3116,7 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
2988static void rcu_barrier_func(void *type) 3116static void rcu_barrier_func(void *type)
2989{ 3117{
2990 struct rcu_state *rsp = type; 3118 struct rcu_state *rsp = type;
2991 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); 3119 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
2992 3120
2993 _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done); 3121 _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done);
2994 atomic_inc(&rsp->barrier_cpu_count); 3122 atomic_inc(&rsp->barrier_cpu_count);
@@ -3160,7 +3288,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
3160 * that this CPU cannot possibly have any RCU callbacks in flight yet. 3288 * that this CPU cannot possibly have any RCU callbacks in flight yet.
3161 */ 3289 */
3162static void 3290static void
3163rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) 3291rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
3164{ 3292{
3165 unsigned long flags; 3293 unsigned long flags;
3166 unsigned long mask; 3294 unsigned long mask;
@@ -3173,7 +3301,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
3173 /* Set up local state, ensuring consistent view of global state. */ 3301 /* Set up local state, ensuring consistent view of global state. */
3174 raw_spin_lock_irqsave(&rnp->lock, flags); 3302 raw_spin_lock_irqsave(&rnp->lock, flags);
3175 rdp->beenonline = 1; /* We have now been online. */ 3303 rdp->beenonline = 1; /* We have now been online. */
3176 rdp->preemptible = preemptible;
3177 rdp->qlen_last_fqs_check = 0; 3304 rdp->qlen_last_fqs_check = 0;
3178 rdp->n_force_qs_snap = rsp->n_force_qs; 3305 rdp->n_force_qs_snap = rsp->n_force_qs;
3179 rdp->blimit = blimit; 3306 rdp->blimit = blimit;
@@ -3217,8 +3344,7 @@ static void rcu_prepare_cpu(int cpu)
3217 struct rcu_state *rsp; 3344 struct rcu_state *rsp;
3218 3345
3219 for_each_rcu_flavor(rsp) 3346 for_each_rcu_flavor(rsp)
3220 rcu_init_percpu_data(cpu, rsp, 3347 rcu_init_percpu_data(cpu, rsp);
3221 strcmp(rsp->name, "rcu_preempt") == 0);
3222} 3348}
3223 3349
3224/* 3350/*
@@ -3228,7 +3354,7 @@ static int rcu_cpu_notify(struct notifier_block *self,
3228 unsigned long action, void *hcpu) 3354 unsigned long action, void *hcpu)
3229{ 3355{
3230 long cpu = (long)hcpu; 3356 long cpu = (long)hcpu;
3231 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 3357 struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
3232 struct rcu_node *rnp = rdp->mynode; 3358 struct rcu_node *rnp = rdp->mynode;
3233 struct rcu_state *rsp; 3359 struct rcu_state *rsp;
3234 3360
@@ -3402,8 +3528,8 @@ static void __init rcu_init_one(struct rcu_state *rsp,
3402 rnp->qsmaskinit = 0; 3528 rnp->qsmaskinit = 0;
3403 rnp->grplo = j * cpustride; 3529 rnp->grplo = j * cpustride;
3404 rnp->grphi = (j + 1) * cpustride - 1; 3530 rnp->grphi = (j + 1) * cpustride - 1;
3405 if (rnp->grphi >= NR_CPUS) 3531 if (rnp->grphi >= nr_cpu_ids)
3406 rnp->grphi = NR_CPUS - 1; 3532 rnp->grphi = nr_cpu_ids - 1;
3407 if (i == 0) { 3533 if (i == 0) {
3408 rnp->grpnum = 0; 3534 rnp->grpnum = 0;
3409 rnp->grpmask = 0; 3535 rnp->grpmask = 0;
@@ -3422,7 +3548,6 @@ static void __init rcu_init_one(struct rcu_state *rsp,
3422 3548
3423 rsp->rda = rda; 3549 rsp->rda = rda;
3424 init_waitqueue_head(&rsp->gp_wq); 3550 init_waitqueue_head(&rsp->gp_wq);
3425 init_irq_work(&rsp->wakeup_work, rsp_wakeup);
3426 rnp = rsp->level[rcu_num_lvls - 1]; 3551 rnp = rsp->level[rcu_num_lvls - 1];
3427 for_each_possible_cpu(i) { 3552 for_each_possible_cpu(i) {
3428 while (i > rnp->grphi) 3553 while (i > rnp->grphi)
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 75dc3c39a02a..bf2c1e669691 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -252,7 +252,6 @@ struct rcu_data {
252 bool passed_quiesce; /* User-mode/idle loop etc. */ 252 bool passed_quiesce; /* User-mode/idle loop etc. */
253 bool qs_pending; /* Core waits for quiesc state. */ 253 bool qs_pending; /* Core waits for quiesc state. */
254 bool beenonline; /* CPU online at least once. */ 254 bool beenonline; /* CPU online at least once. */
255 bool preemptible; /* Preemptible RCU? */
256 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ 255 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
257 unsigned long grpmask; /* Mask to apply to leaf qsmask. */ 256 unsigned long grpmask; /* Mask to apply to leaf qsmask. */
258#ifdef CONFIG_RCU_CPU_STALL_INFO 257#ifdef CONFIG_RCU_CPU_STALL_INFO
@@ -406,7 +405,8 @@ struct rcu_state {
406 unsigned long completed; /* # of last completed gp. */ 405 unsigned long completed; /* # of last completed gp. */
407 struct task_struct *gp_kthread; /* Task for grace periods. */ 406 struct task_struct *gp_kthread; /* Task for grace periods. */
408 wait_queue_head_t gp_wq; /* Where GP task waits. */ 407 wait_queue_head_t gp_wq; /* Where GP task waits. */
409 int gp_flags; /* Commands for GP task. */ 408 short gp_flags; /* Commands for GP task. */
409 short gp_state; /* GP kthread sleep state. */
410 410
411 /* End of fields guarded by root rcu_node's lock. */ 411 /* End of fields guarded by root rcu_node's lock. */
412 412
@@ -462,13 +462,17 @@ struct rcu_state {
462 const char *name; /* Name of structure. */ 462 const char *name; /* Name of structure. */
463 char abbr; /* Abbreviated name. */ 463 char abbr; /* Abbreviated name. */
464 struct list_head flavors; /* List of RCU flavors. */ 464 struct list_head flavors; /* List of RCU flavors. */
465 struct irq_work wakeup_work; /* Postponed wakeups */
466}; 465};
467 466
468/* Values for rcu_state structure's gp_flags field. */ 467/* Values for rcu_state structure's gp_flags field. */
469#define RCU_GP_FLAG_INIT 0x1 /* Need grace-period initialization. */ 468#define RCU_GP_FLAG_INIT 0x1 /* Need grace-period initialization. */
470#define RCU_GP_FLAG_FQS 0x2 /* Need grace-period quiescent-state forcing. */ 469#define RCU_GP_FLAG_FQS 0x2 /* Need grace-period quiescent-state forcing. */
471 470
471/* Values for rcu_state structure's gp_flags field. */
472#define RCU_GP_WAIT_INIT 0 /* Initial state. */
473#define RCU_GP_WAIT_GPS 1 /* Wait for grace-period start. */
474#define RCU_GP_WAIT_FQS 2 /* Wait for force-quiescent-state time. */
475
472extern struct list_head rcu_struct_flavors; 476extern struct list_head rcu_struct_flavors;
473 477
474/* Sequence through rcu_state structures for each RCU flavor. */ 478/* Sequence through rcu_state structures for each RCU flavor. */
@@ -547,7 +551,6 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
547static void print_cpu_stall_info_end(void); 551static void print_cpu_stall_info_end(void);
548static void zero_cpu_stall_ticks(struct rcu_data *rdp); 552static void zero_cpu_stall_ticks(struct rcu_data *rdp);
549static void increment_cpu_stall_ticks(void); 553static void increment_cpu_stall_ticks(void);
550static int rcu_nocb_needs_gp(struct rcu_state *rsp);
551static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq); 554static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
552static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); 555static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
553static void rcu_init_one_nocb(struct rcu_node *rnp); 556static void rcu_init_one_nocb(struct rcu_node *rnp);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 962d1d589929..cbc2c45265e2 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -116,7 +116,7 @@ static void __init rcu_bootup_announce_oddness(void)
116#ifdef CONFIG_TREE_PREEMPT_RCU 116#ifdef CONFIG_TREE_PREEMPT_RCU
117 117
118RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); 118RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
119static struct rcu_state *rcu_state = &rcu_preempt_state; 119static struct rcu_state *rcu_state_p = &rcu_preempt_state;
120 120
121static int rcu_preempted_readers_exp(struct rcu_node *rnp); 121static int rcu_preempted_readers_exp(struct rcu_node *rnp);
122 122
@@ -149,15 +149,6 @@ long rcu_batches_completed(void)
149EXPORT_SYMBOL_GPL(rcu_batches_completed); 149EXPORT_SYMBOL_GPL(rcu_batches_completed);
150 150
151/* 151/*
152 * Force a quiescent state for preemptible RCU.
153 */
154void rcu_force_quiescent_state(void)
155{
156 force_quiescent_state(&rcu_preempt_state);
157}
158EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
159
160/*
161 * Record a preemptible-RCU quiescent state for the specified CPU. Note 152 * Record a preemptible-RCU quiescent state for the specified CPU. Note
162 * that this just means that the task currently running on the CPU is 153 * that this just means that the task currently running on the CPU is
163 * not in a quiescent state. There might be any number of tasks blocked 154 * not in a quiescent state. There might be any number of tasks blocked
@@ -688,20 +679,6 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
688} 679}
689EXPORT_SYMBOL_GPL(call_rcu); 680EXPORT_SYMBOL_GPL(call_rcu);
690 681
691/*
692 * Queue an RCU callback for lazy invocation after a grace period.
693 * This will likely be later named something like "call_rcu_lazy()",
694 * but this change will require some way of tagging the lazy RCU
695 * callbacks in the list of pending callbacks. Until then, this
696 * function may only be called from __kfree_rcu().
697 */
698void kfree_call_rcu(struct rcu_head *head,
699 void (*func)(struct rcu_head *rcu))
700{
701 __call_rcu(head, func, &rcu_preempt_state, -1, 1);
702}
703EXPORT_SYMBOL_GPL(kfree_call_rcu);
704
705/** 682/**
706 * synchronize_rcu - wait until a grace period has elapsed. 683 * synchronize_rcu - wait until a grace period has elapsed.
707 * 684 *
@@ -970,7 +947,7 @@ void exit_rcu(void)
970 947
971#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 948#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
972 949
973static struct rcu_state *rcu_state = &rcu_sched_state; 950static struct rcu_state *rcu_state_p = &rcu_sched_state;
974 951
975/* 952/*
976 * Tell them what RCU they are running. 953 * Tell them what RCU they are running.
@@ -991,16 +968,6 @@ long rcu_batches_completed(void)
991EXPORT_SYMBOL_GPL(rcu_batches_completed); 968EXPORT_SYMBOL_GPL(rcu_batches_completed);
992 969
993/* 970/*
994 * Force a quiescent state for RCU, which, because there is no preemptible
995 * RCU, becomes the same as rcu-sched.
996 */
997void rcu_force_quiescent_state(void)
998{
999 rcu_sched_force_quiescent_state();
1000}
1001EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
1002
1003/*
1004 * Because preemptible RCU does not exist, we never have to check for 971 * Because preemptible RCU does not exist, we never have to check for
1005 * CPUs being in quiescent states. 972 * CPUs being in quiescent states.
1006 */ 973 */
@@ -1080,22 +1047,6 @@ static void rcu_preempt_check_callbacks(int cpu)
1080} 1047}
1081 1048
1082/* 1049/*
1083 * Queue an RCU callback for lazy invocation after a grace period.
1084 * This will likely be later named something like "call_rcu_lazy()",
1085 * but this change will require some way of tagging the lazy RCU
1086 * callbacks in the list of pending callbacks. Until then, this
1087 * function may only be called from __kfree_rcu().
1088 *
1089 * Because there is no preemptible RCU, we use RCU-sched instead.
1090 */
1091void kfree_call_rcu(struct rcu_head *head,
1092 void (*func)(struct rcu_head *rcu))
1093{
1094 __call_rcu(head, func, &rcu_sched_state, -1, 1);
1095}
1096EXPORT_SYMBOL_GPL(kfree_call_rcu);
1097
1098/*
1099 * Wait for an rcu-preempt grace period, but make it happen quickly. 1050 * Wait for an rcu-preempt grace period, but make it happen quickly.
1100 * But because preemptible RCU does not exist, map to rcu-sched. 1051 * But because preemptible RCU does not exist, map to rcu-sched.
1101 */ 1052 */
@@ -1517,11 +1468,11 @@ static int __init rcu_spawn_kthreads(void)
1517 for_each_possible_cpu(cpu) 1468 for_each_possible_cpu(cpu)
1518 per_cpu(rcu_cpu_has_work, cpu) = 0; 1469 per_cpu(rcu_cpu_has_work, cpu) = 0;
1519 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); 1470 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
1520 rnp = rcu_get_root(rcu_state); 1471 rnp = rcu_get_root(rcu_state_p);
1521 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); 1472 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
1522 if (NUM_RCU_NODES > 1) { 1473 if (NUM_RCU_NODES > 1) {
1523 rcu_for_each_leaf_node(rcu_state, rnp) 1474 rcu_for_each_leaf_node(rcu_state_p, rnp)
1524 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); 1475 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
1525 } 1476 }
1526 return 0; 1477 return 0;
1527} 1478}
@@ -1529,12 +1480,12 @@ early_initcall(rcu_spawn_kthreads);
1529 1480
1530static void rcu_prepare_kthreads(int cpu) 1481static void rcu_prepare_kthreads(int cpu)
1531{ 1482{
1532 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 1483 struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
1533 struct rcu_node *rnp = rdp->mynode; 1484 struct rcu_node *rnp = rdp->mynode;
1534 1485
1535 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ 1486 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
1536 if (rcu_scheduler_fully_active) 1487 if (rcu_scheduler_fully_active)
1537 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); 1488 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
1538} 1489}
1539 1490
1540#else /* #ifdef CONFIG_RCU_BOOST */ 1491#else /* #ifdef CONFIG_RCU_BOOST */
@@ -1744,6 +1695,7 @@ int rcu_needs_cpu(int cpu, unsigned long *dj)
1744static void rcu_prepare_for_idle(int cpu) 1695static void rcu_prepare_for_idle(int cpu)
1745{ 1696{
1746#ifndef CONFIG_RCU_NOCB_CPU_ALL 1697#ifndef CONFIG_RCU_NOCB_CPU_ALL
1698 bool needwake;
1747 struct rcu_data *rdp; 1699 struct rcu_data *rdp;
1748 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1700 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1749 struct rcu_node *rnp; 1701 struct rcu_node *rnp;
@@ -1792,8 +1744,10 @@ static void rcu_prepare_for_idle(int cpu)
1792 rnp = rdp->mynode; 1744 rnp = rdp->mynode;
1793 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 1745 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1794 smp_mb__after_unlock_lock(); 1746 smp_mb__after_unlock_lock();
1795 rcu_accelerate_cbs(rsp, rnp, rdp); 1747 needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
1796 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1748 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1749 if (needwake)
1750 rcu_gp_kthread_wake(rsp);
1797 } 1751 }
1798#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ 1752#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
1799} 1753}
@@ -1855,7 +1809,7 @@ static void rcu_oom_notify_cpu(void *unused)
1855 struct rcu_data *rdp; 1809 struct rcu_data *rdp;
1856 1810
1857 for_each_rcu_flavor(rsp) { 1811 for_each_rcu_flavor(rsp) {
1858 rdp = __this_cpu_ptr(rsp->rda); 1812 rdp = raw_cpu_ptr(rsp->rda);
1859 if (rdp->qlen_lazy != 0) { 1813 if (rdp->qlen_lazy != 0) {
1860 atomic_inc(&oom_callback_count); 1814 atomic_inc(&oom_callback_count);
1861 rsp->call(&rdp->oom_head, rcu_oom_callback); 1815 rsp->call(&rdp->oom_head, rcu_oom_callback);
@@ -1997,7 +1951,7 @@ static void increment_cpu_stall_ticks(void)
1997 struct rcu_state *rsp; 1951 struct rcu_state *rsp;
1998 1952
1999 for_each_rcu_flavor(rsp) 1953 for_each_rcu_flavor(rsp)
2000 __this_cpu_ptr(rsp->rda)->ticks_this_gp++; 1954 raw_cpu_inc(rsp->rda->ticks_this_gp);
2001} 1955}
2002 1956
2003#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ 1957#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
@@ -2068,19 +2022,6 @@ static int __init parse_rcu_nocb_poll(char *arg)
2068early_param("rcu_nocb_poll", parse_rcu_nocb_poll); 2022early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
2069 2023
2070/* 2024/*
2071 * Do any no-CBs CPUs need another grace period?
2072 *
2073 * Interrupts must be disabled. If the caller does not hold the root
2074 * rnp_node structure's ->lock, the results are advisory only.
2075 */
2076static int rcu_nocb_needs_gp(struct rcu_state *rsp)
2077{
2078 struct rcu_node *rnp = rcu_get_root(rsp);
2079
2080 return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1];
2081}
2082
2083/*
2084 * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended 2025 * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
2085 * grace period. 2026 * grace period.
2086 */ 2027 */
@@ -2109,7 +2050,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
2109} 2050}
2110 2051
2111#ifndef CONFIG_RCU_NOCB_CPU_ALL 2052#ifndef CONFIG_RCU_NOCB_CPU_ALL
2112/* Is the specified CPU a no-CPUs CPU? */ 2053/* Is the specified CPU a no-CBs CPU? */
2113bool rcu_is_nocb_cpu(int cpu) 2054bool rcu_is_nocb_cpu(int cpu)
2114{ 2055{
2115 if (have_rcu_nocb_mask) 2056 if (have_rcu_nocb_mask)
@@ -2243,12 +2184,15 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
2243 unsigned long c; 2184 unsigned long c;
2244 bool d; 2185 bool d;
2245 unsigned long flags; 2186 unsigned long flags;
2187 bool needwake;
2246 struct rcu_node *rnp = rdp->mynode; 2188 struct rcu_node *rnp = rdp->mynode;
2247 2189
2248 raw_spin_lock_irqsave(&rnp->lock, flags); 2190 raw_spin_lock_irqsave(&rnp->lock, flags);
2249 smp_mb__after_unlock_lock(); 2191 smp_mb__after_unlock_lock();
2250 c = rcu_start_future_gp(rnp, rdp); 2192 needwake = rcu_start_future_gp(rnp, rdp, &c);
2251 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2193 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2194 if (needwake)
2195 rcu_gp_kthread_wake(rdp->rsp);
2252 2196
2253 /* 2197 /*
2254 * Wait for the grace period. Do so interruptibly to avoid messing 2198 * Wait for the grace period. Do so interruptibly to avoid messing
@@ -2402,11 +2346,6 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
2402 2346
2403#else /* #ifdef CONFIG_RCU_NOCB_CPU */ 2347#else /* #ifdef CONFIG_RCU_NOCB_CPU */
2404 2348
2405static int rcu_nocb_needs_gp(struct rcu_state *rsp)
2406{
2407 return 0;
2408}
2409
2410static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) 2349static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
2411{ 2350{
2412} 2351}
@@ -2523,9 +2462,9 @@ static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq)
2523 /* Record start of fully idle period. */ 2462 /* Record start of fully idle period. */
2524 j = jiffies; 2463 j = jiffies;
2525 ACCESS_ONCE(rdtp->dynticks_idle_jiffies) = j; 2464 ACCESS_ONCE(rdtp->dynticks_idle_jiffies) = j;
2526 smp_mb__before_atomic_inc(); 2465 smp_mb__before_atomic();
2527 atomic_inc(&rdtp->dynticks_idle); 2466 atomic_inc(&rdtp->dynticks_idle);
2528 smp_mb__after_atomic_inc(); 2467 smp_mb__after_atomic();
2529 WARN_ON_ONCE(atomic_read(&rdtp->dynticks_idle) & 0x1); 2468 WARN_ON_ONCE(atomic_read(&rdtp->dynticks_idle) & 0x1);
2530} 2469}
2531 2470
@@ -2590,9 +2529,9 @@ static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
2590 } 2529 }
2591 2530
2592 /* Record end of idle period. */ 2531 /* Record end of idle period. */
2593 smp_mb__before_atomic_inc(); 2532 smp_mb__before_atomic();
2594 atomic_inc(&rdtp->dynticks_idle); 2533 atomic_inc(&rdtp->dynticks_idle);
2595 smp_mb__after_atomic_inc(); 2534 smp_mb__after_atomic();
2596 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1)); 2535 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1));
2597 2536
2598 /* 2537 /*
@@ -2657,20 +2596,6 @@ static bool is_sysidle_rcu_state(struct rcu_state *rsp)
2657} 2596}
2658 2597
2659/* 2598/*
2660 * Bind the grace-period kthread for the sysidle flavor of RCU to the
2661 * timekeeping CPU.
2662 */
2663static void rcu_bind_gp_kthread(void)
2664{
2665 int cpu = ACCESS_ONCE(tick_do_timer_cpu);
2666
2667 if (cpu < 0 || cpu >= nr_cpu_ids)
2668 return;
2669 if (raw_smp_processor_id() != cpu)
2670 set_cpus_allowed_ptr(current, cpumask_of(cpu));
2671}
2672
2673/*
2674 * Return a delay in jiffies based on the number of CPUs, rcu_node 2599 * Return a delay in jiffies based on the number of CPUs, rcu_node
2675 * leaf fanout, and jiffies tick rate. The idea is to allow larger 2600 * leaf fanout, and jiffies tick rate. The idea is to allow larger
2676 * systems more time to transition to full-idle state in order to 2601 * systems more time to transition to full-idle state in order to
@@ -2734,7 +2659,8 @@ static void rcu_sysidle(unsigned long j)
2734static void rcu_sysidle_cancel(void) 2659static void rcu_sysidle_cancel(void)
2735{ 2660{
2736 smp_mb(); 2661 smp_mb();
2737 ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_NOT; 2662 if (full_sysidle_state > RCU_SYSIDLE_SHORT)
2663 ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_NOT;
2738} 2664}
2739 2665
2740/* 2666/*
@@ -2880,10 +2806,6 @@ static bool is_sysidle_rcu_state(struct rcu_state *rsp)
2880 return false; 2806 return false;
2881} 2807}
2882 2808
2883static void rcu_bind_gp_kthread(void)
2884{
2885}
2886
2887static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle, 2809static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
2888 unsigned long maxj) 2810 unsigned long maxj)
2889{ 2811{
@@ -2914,3 +2836,19 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
2914#endif /* #ifdef CONFIG_NO_HZ_FULL */ 2836#endif /* #ifdef CONFIG_NO_HZ_FULL */
2915 return 0; 2837 return 0;
2916} 2838}
2839
2840/*
2841 * Bind the grace-period kthread for the sysidle flavor of RCU to the
2842 * timekeeping CPU.
2843 */
2844static void rcu_bind_gp_kthread(void)
2845{
2846#ifdef CONFIG_NO_HZ_FULL
2847 int cpu = ACCESS_ONCE(tick_do_timer_cpu);
2848
2849 if (cpu < 0 || cpu >= nr_cpu_ids)
2850 return;
2851 if (raw_smp_processor_id() != cpu)
2852 set_cpus_allowed_ptr(current, cpumask_of(cpu));
2853#endif /* #ifdef CONFIG_NO_HZ_FULL */
2854}
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 4c0a9b0af469..a2aeb4df0f60 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -320,6 +320,18 @@ int rcu_jiffies_till_stall_check(void)
320 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA; 320 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
321} 321}
322 322
323void rcu_sysrq_start(void)
324{
325 if (!rcu_cpu_stall_suppress)
326 rcu_cpu_stall_suppress = 2;
327}
328
329void rcu_sysrq_end(void)
330{
331 if (rcu_cpu_stall_suppress == 2)
332 rcu_cpu_stall_suppress = 0;
333}
334
323static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) 335static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
324{ 336{
325 rcu_cpu_stall_suppress = 1; 337 rcu_cpu_stall_suppress = 1;
@@ -338,3 +350,21 @@ static int __init check_cpu_stall_init(void)
338early_initcall(check_cpu_stall_init); 350early_initcall(check_cpu_stall_init);
339 351
340#endif /* #ifdef CONFIG_RCU_STALL_COMMON */ 352#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
353
354/*
355 * Hooks for cond_resched() and friends to avoid RCU CPU stall warnings.
356 */
357
358DEFINE_PER_CPU(int, rcu_cond_resched_count);
359
360/*
361 * Report a set of RCU quiescent states, for use by cond_resched()
362 * and friends. Out of line due to being called infrequently.
363 */
364void rcu_resched(void)
365{
366 preempt_disable();
367 __this_cpu_write(rcu_cond_resched_count, 0);
368 rcu_note_context_switch(smp_processor_id());
369 preempt_enable();
370}
diff --git a/kernel/resource.c b/kernel/resource.c
index 8957d686e29b..3c2237ac32db 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1288,13 +1288,10 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size)
1288 if (p->flags & IORESOURCE_BUSY) 1288 if (p->flags & IORESOURCE_BUSY)
1289 continue; 1289 continue;
1290 1290
1291 printk(KERN_WARNING "resource map sanity check conflict: " 1291 printk(KERN_WARNING "resource sanity check: requesting [mem %#010llx-%#010llx], which spans more than %s %pR\n",
1292 "0x%llx 0x%llx 0x%llx 0x%llx %s\n",
1293 (unsigned long long)addr, 1292 (unsigned long long)addr,
1294 (unsigned long long)(addr + size - 1), 1293 (unsigned long long)(addr + size - 1),
1295 (unsigned long long)p->start, 1294 p->name, p);
1296 (unsigned long long)p->end,
1297 p->name);
1298 err = -1; 1295 err = -1;
1299 break; 1296 break;
1300 } 1297 }
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 321d800e4baa..913c6d6cc2c1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -90,6 +90,22 @@
90#define CREATE_TRACE_POINTS 90#define CREATE_TRACE_POINTS
91#include <trace/events/sched.h> 91#include <trace/events/sched.h>
92 92
93#ifdef smp_mb__before_atomic
94void __smp_mb__before_atomic(void)
95{
96 smp_mb__before_atomic();
97}
98EXPORT_SYMBOL(__smp_mb__before_atomic);
99#endif
100
101#ifdef smp_mb__after_atomic
102void __smp_mb__after_atomic(void)
103{
104 smp_mb__after_atomic();
105}
106EXPORT_SYMBOL(__smp_mb__after_atomic);
107#endif
108
93void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period) 109void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
94{ 110{
95 unsigned long delta; 111 unsigned long delta;
@@ -4101,6 +4117,7 @@ static void __cond_resched(void)
4101 4117
4102int __sched _cond_resched(void) 4118int __sched _cond_resched(void)
4103{ 4119{
4120 rcu_cond_resched();
4104 if (should_resched()) { 4121 if (should_resched()) {
4105 __cond_resched(); 4122 __cond_resched();
4106 return 1; 4123 return 1;
@@ -4119,15 +4136,18 @@ EXPORT_SYMBOL(_cond_resched);
4119 */ 4136 */
4120int __cond_resched_lock(spinlock_t *lock) 4137int __cond_resched_lock(spinlock_t *lock)
4121{ 4138{
4139 bool need_rcu_resched = rcu_should_resched();
4122 int resched = should_resched(); 4140 int resched = should_resched();
4123 int ret = 0; 4141 int ret = 0;
4124 4142
4125 lockdep_assert_held(lock); 4143 lockdep_assert_held(lock);
4126 4144
4127 if (spin_needbreak(lock) || resched) { 4145 if (spin_needbreak(lock) || resched || need_rcu_resched) {
4128 spin_unlock(lock); 4146 spin_unlock(lock);
4129 if (resched) 4147 if (resched)
4130 __cond_resched(); 4148 __cond_resched();
4149 else if (unlikely(need_rcu_resched))
4150 rcu_resched();
4131 else 4151 else
4132 cpu_relax(); 4152 cpu_relax();
4133 ret = 1; 4153 ret = 1;
@@ -4141,6 +4161,7 @@ int __sched __cond_resched_softirq(void)
4141{ 4161{
4142 BUG_ON(!in_softirq()); 4162 BUG_ON(!in_softirq());
4143 4163
4164 rcu_cond_resched(); /* BH disabled OK, just recording QSes. */
4144 if (should_resched()) { 4165 if (should_resched()) {
4145 local_bh_enable(); 4166 local_bh_enable();
4146 __cond_resched(); 4167 __cond_resched();
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 8834243abee2..981fcd7dc394 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -165,7 +165,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
165 * do a write memory barrier, and then update the count, to 165 * do a write memory barrier, and then update the count, to
166 * make sure the vector is visible when count is set. 166 * make sure the vector is visible when count is set.
167 */ 167 */
168 smp_mb__before_atomic_inc(); 168 smp_mb__before_atomic();
169 atomic_inc(&(vec)->count); 169 atomic_inc(&(vec)->count);
170 do_mb = 1; 170 do_mb = 1;
171 } 171 }
@@ -185,14 +185,14 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
185 * the new priority vec. 185 * the new priority vec.
186 */ 186 */
187 if (do_mb) 187 if (do_mb)
188 smp_mb__after_atomic_inc(); 188 smp_mb__after_atomic();
189 189
190 /* 190 /*
191 * When removing from the vector, we decrement the counter first 191 * When removing from the vector, we decrement the counter first
192 * do a memory barrier and then clear the mask. 192 * do a memory barrier and then clear the mask.
193 */ 193 */
194 atomic_dec(&(vec)->count); 194 atomic_dec(&(vec)->count);
195 smp_mb__after_atomic_inc(); 195 smp_mb__after_atomic();
196 cpumask_clear_cpu(cpu, vec->mask); 196 cpumask_clear_cpu(cpu, vec->mask);
197 } 197 }
198 198
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 7d50f794e248..0ffa20ae657b 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -394,7 +394,7 @@ EXPORT_SYMBOL(__wake_up_bit);
394 * 394 *
395 * In order for this to function properly, as it uses waitqueue_active() 395 * In order for this to function properly, as it uses waitqueue_active()
396 * internally, some kind of memory barrier must be done prior to calling 396 * internally, some kind of memory barrier must be done prior to calling
397 * this. Typically, this will be smp_mb__after_clear_bit(), but in some 397 * this. Typically, this will be smp_mb__after_atomic(), but in some
398 * cases where bitflags are manipulated non-atomically under a lock, one 398 * cases where bitflags are manipulated non-atomically under a lock, one
399 * may need to use a less regular barrier, such fs/inode.c's smp_mb(), 399 * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
400 * because spin_unlock() does not guarantee a memory barrier. 400 * because spin_unlock() does not guarantee a memory barrier.
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 92f24f5e8d52..5918d227730f 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -232,7 +232,6 @@ asmlinkage __visible void __do_softirq(void)
232 bool in_hardirq; 232 bool in_hardirq;
233 __u32 pending; 233 __u32 pending;
234 int softirq_bit; 234 int softirq_bit;
235 int cpu;
236 235
237 /* 236 /*
238 * Mask out PF_MEMALLOC s current task context is borrowed for the 237 * Mask out PF_MEMALLOC s current task context is borrowed for the
@@ -247,7 +246,6 @@ asmlinkage __visible void __do_softirq(void)
247 __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); 246 __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
248 in_hardirq = lockdep_softirq_start(); 247 in_hardirq = lockdep_softirq_start();
249 248
250 cpu = smp_processor_id();
251restart: 249restart:
252 /* Reset the pending bitmask before enabling irqs */ 250 /* Reset the pending bitmask before enabling irqs */
253 set_softirq_pending(0); 251 set_softirq_pending(0);
@@ -276,11 +274,11 @@ restart:
276 prev_count, preempt_count()); 274 prev_count, preempt_count());
277 preempt_count_set(prev_count); 275 preempt_count_set(prev_count);
278 } 276 }
279 rcu_bh_qs(cpu);
280 h++; 277 h++;
281 pending >>= softirq_bit; 278 pending >>= softirq_bit;
282 } 279 }
283 280
281 rcu_bh_qs(smp_processor_id());
284 local_irq_disable(); 282 local_irq_disable();
285 283
286 pending = local_softirq_pending(); 284 pending = local_softirq_pending();
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 74f5b580fe34..bc966a8ffc3e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -643,7 +643,7 @@ static struct ctl_table kern_table[] = {
643 .extra2 = &one, 643 .extra2 = &one,
644 }, 644 },
645#endif 645#endif
646 646#ifdef CONFIG_UEVENT_HELPER
647 { 647 {
648 .procname = "hotplug", 648 .procname = "hotplug",
649 .data = &uevent_helper, 649 .data = &uevent_helper,
@@ -651,7 +651,7 @@ static struct ctl_table kern_table[] = {
651 .mode = 0644, 651 .mode = 0644,
652 .proc_handler = proc_dostring, 652 .proc_handler = proc_dostring,
653 }, 653 },
654 654#endif
655#ifdef CONFIG_CHR_DEV_SG 655#ifdef CONFIG_CHR_DEV_SG
656 { 656 {
657 .procname = "sg-big-buff", 657 .procname = "sg-big-buff",
diff --git a/kernel/torture.c b/kernel/torture.c
index acc9afc2f26e..40bb511cca48 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -335,13 +335,8 @@ static void torture_shuffle_tasks(void)
335 shuffle_idle_cpu = cpumask_next(shuffle_idle_cpu, shuffle_tmp_mask); 335 shuffle_idle_cpu = cpumask_next(shuffle_idle_cpu, shuffle_tmp_mask);
336 if (shuffle_idle_cpu >= nr_cpu_ids) 336 if (shuffle_idle_cpu >= nr_cpu_ids)
337 shuffle_idle_cpu = -1; 337 shuffle_idle_cpu = -1;
338 if (shuffle_idle_cpu != -1) { 338 else
339 cpumask_clear_cpu(shuffle_idle_cpu, shuffle_tmp_mask); 339 cpumask_clear_cpu(shuffle_idle_cpu, shuffle_tmp_mask);
340 if (cpumask_empty(shuffle_tmp_mask)) {
341 put_online_cpus();
342 return;
343 }
344 }
345 340
346 mutex_lock(&shuffle_task_mutex); 341 mutex_lock(&shuffle_task_mutex);
347 list_for_each_entry(stp, &shuffle_task_list, st_l) 342 list_for_each_entry(stp, &shuffle_task_list, st_l)
@@ -533,7 +528,11 @@ void stutter_wait(const char *title)
533 while (ACCESS_ONCE(stutter_pause_test) || 528 while (ACCESS_ONCE(stutter_pause_test) ||
534 (torture_runnable && !ACCESS_ONCE(*torture_runnable))) { 529 (torture_runnable && !ACCESS_ONCE(*torture_runnable))) {
535 if (stutter_pause_test) 530 if (stutter_pause_test)
536 schedule_timeout_interruptible(1); 531 if (ACCESS_ONCE(stutter_pause_test) == 1)
532 schedule_timeout_interruptible(1);
533 else
534 while (ACCESS_ONCE(stutter_pause_test))
535 cond_resched();
537 else 536 else
538 schedule_timeout_interruptible(round_jiffies_relative(HZ)); 537 schedule_timeout_interruptible(round_jiffies_relative(HZ));
539 torture_shutdown_absorb(title); 538 torture_shutdown_absorb(title);
@@ -550,7 +549,11 @@ static int torture_stutter(void *arg)
550 VERBOSE_TOROUT_STRING("torture_stutter task started"); 549 VERBOSE_TOROUT_STRING("torture_stutter task started");
551 do { 550 do {
552 if (!torture_must_stop()) { 551 if (!torture_must_stop()) {
553 schedule_timeout_interruptible(stutter); 552 if (stutter > 1) {
553 schedule_timeout_interruptible(stutter - 1);
554 ACCESS_ONCE(stutter_pause_test) = 2;
555 }
556 schedule_timeout_interruptible(1);
554 ACCESS_ONCE(stutter_pause_test) = 1; 557 ACCESS_ONCE(stutter_pause_test) = 1;
555 } 558 }
556 if (!torture_must_stop()) 559 if (!torture_must_stop())
@@ -596,21 +599,27 @@ static void torture_stutter_cleanup(void)
596 * The runnable parameter points to a flag that controls whether or not 599 * The runnable parameter points to a flag that controls whether or not
597 * the test is currently runnable. If there is no such flag, pass in NULL. 600 * the test is currently runnable. If there is no such flag, pass in NULL.
598 */ 601 */
599void __init torture_init_begin(char *ttype, bool v, int *runnable) 602bool torture_init_begin(char *ttype, bool v, int *runnable)
600{ 603{
601 mutex_lock(&fullstop_mutex); 604 mutex_lock(&fullstop_mutex);
605 if (torture_type != NULL) {
606 pr_alert("torture_init_begin: refusing %s init: %s running",
607 ttype, torture_type);
608 mutex_unlock(&fullstop_mutex);
609 return false;
610 }
602 torture_type = ttype; 611 torture_type = ttype;
603 verbose = v; 612 verbose = v;
604 torture_runnable = runnable; 613 torture_runnable = runnable;
605 fullstop = FULLSTOP_DONTSTOP; 614 fullstop = FULLSTOP_DONTSTOP;
606 615 return true;
607} 616}
608EXPORT_SYMBOL_GPL(torture_init_begin); 617EXPORT_SYMBOL_GPL(torture_init_begin);
609 618
610/* 619/*
611 * Tell the torture module that initialization is complete. 620 * Tell the torture module that initialization is complete.
612 */ 621 */
613void __init torture_init_end(void) 622void torture_init_end(void)
614{ 623{
615 mutex_unlock(&fullstop_mutex); 624 mutex_unlock(&fullstop_mutex);
616 register_reboot_notifier(&torture_shutdown_nb); 625 register_reboot_notifier(&torture_shutdown_nb);
@@ -642,6 +651,9 @@ bool torture_cleanup(void)
642 torture_shuffle_cleanup(); 651 torture_shuffle_cleanup();
643 torture_stutter_cleanup(); 652 torture_stutter_cleanup();
644 torture_onoff_cleanup(); 653 torture_onoff_cleanup();
654 mutex_lock(&fullstop_mutex);
655 torture_type = NULL;
656 mutex_unlock(&fullstop_mutex);
645 return false; 657 return false;
646} 658}
647EXPORT_SYMBOL_GPL(torture_cleanup); 659EXPORT_SYMBOL_GPL(torture_cleanup);
@@ -674,8 +686,10 @@ EXPORT_SYMBOL_GPL(torture_must_stop_irq);
674 */ 686 */
675void torture_kthread_stopping(char *title) 687void torture_kthread_stopping(char *title)
676{ 688{
677 if (verbose) 689 char buf[128];
678 VERBOSE_TOROUT_STRING(title); 690
691 snprintf(buf, sizeof(buf), "Stopping %s", title);
692 VERBOSE_TOROUT_STRING(buf);
679 while (!kthread_should_stop()) { 693 while (!kthread_should_stop()) {
680 torture_shutdown_absorb(title); 694 torture_shutdown_absorb(title);
681 schedule_timeout_uninterruptible(1); 695 schedule_timeout_uninterruptible(1);