aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.locks4
-rw-r--r--kernel/cpu.c56
-rw-r--r--kernel/futex.c6
-rw-r--r--kernel/locking/Makefile3
-rw-r--r--kernel/locking/mcs_spinlock.h16
-rw-r--r--kernel/locking/mutex.c60
-rw-r--r--kernel/locking/osq_lock.c (renamed from kernel/locking/mcs_spinlock.c)9
-rw-r--r--kernel/locking/rtmutex.c7
-rw-r--r--kernel/locking/rwsem-spinlock.c2
-rw-r--r--kernel/locking/rwsem-xadd.c3
-rw-r--r--kernel/notifier.c3
-rw-r--r--kernel/power/Kconfig1
-rw-r--r--kernel/rcu/Makefile3
-rw-r--r--kernel/rcu/rcu.h6
-rw-r--r--kernel/rcu/rcutorture.c66
-rw-r--r--kernel/rcu/srcu.c2
-rw-r--r--kernel/rcu/tiny.c113
-rw-r--r--kernel/rcu/tiny_plugin.h9
-rw-r--r--kernel/rcu/tree.c355
-rw-r--r--kernel/rcu/tree.h62
-rw-r--r--kernel/rcu/tree_plugin.h271
-rw-r--r--kernel/rcu/tree_trace.c8
-rw-r--r--kernel/sched/completion.c18
-rw-r--r--kernel/sched/core.c36
-rw-r--r--kernel/sched/deadline.c3
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/smpboot.c2
-rw-r--r--kernel/softirq.c9
-rw-r--r--kernel/time/hrtimer.c2
29 files changed, 540 insertions, 597 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 76768ee812b2..08561f1acd13 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -231,6 +231,10 @@ config RWSEM_SPIN_ON_OWNER
231 def_bool y 231 def_bool y
232 depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW 232 depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
233 233
234config LOCK_SPIN_ON_OWNER
235 def_bool y
236 depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER
237
234config ARCH_USE_QUEUE_RWLOCK 238config ARCH_USE_QUEUE_RWLOCK
235 bool 239 bool
236 240
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 5d220234b3ca..1972b161c61e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -58,22 +58,23 @@ static int cpu_hotplug_disabled;
58 58
59static struct { 59static struct {
60 struct task_struct *active_writer; 60 struct task_struct *active_writer;
61 struct mutex lock; /* Synchronizes accesses to refcount, */ 61 /* wait queue to wake up the active_writer */
62 wait_queue_head_t wq;
63 /* verifies that no writer will get active while readers are active */
64 struct mutex lock;
62 /* 65 /*
63 * Also blocks the new readers during 66 * Also blocks the new readers during
64 * an ongoing cpu hotplug operation. 67 * an ongoing cpu hotplug operation.
65 */ 68 */
66 int refcount; 69 atomic_t refcount;
67 /* And allows lockless put_online_cpus(). */
68 atomic_t puts_pending;
69 70
70#ifdef CONFIG_DEBUG_LOCK_ALLOC 71#ifdef CONFIG_DEBUG_LOCK_ALLOC
71 struct lockdep_map dep_map; 72 struct lockdep_map dep_map;
72#endif 73#endif
73} cpu_hotplug = { 74} cpu_hotplug = {
74 .active_writer = NULL, 75 .active_writer = NULL,
76 .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
75 .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), 77 .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
76 .refcount = 0,
77#ifdef CONFIG_DEBUG_LOCK_ALLOC 78#ifdef CONFIG_DEBUG_LOCK_ALLOC
78 .dep_map = {.name = "cpu_hotplug.lock" }, 79 .dep_map = {.name = "cpu_hotplug.lock" },
79#endif 80#endif
@@ -86,15 +87,6 @@ static struct {
86#define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) 87#define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
87#define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) 88#define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
88 89
89static void apply_puts_pending(int max)
90{
91 int delta;
92
93 if (atomic_read(&cpu_hotplug.puts_pending) >= max) {
94 delta = atomic_xchg(&cpu_hotplug.puts_pending, 0);
95 cpu_hotplug.refcount -= delta;
96 }
97}
98 90
99void get_online_cpus(void) 91void get_online_cpus(void)
100{ 92{
@@ -103,8 +95,7 @@ void get_online_cpus(void)
103 return; 95 return;
104 cpuhp_lock_acquire_read(); 96 cpuhp_lock_acquire_read();
105 mutex_lock(&cpu_hotplug.lock); 97 mutex_lock(&cpu_hotplug.lock);
106 apply_puts_pending(65536); 98 atomic_inc(&cpu_hotplug.refcount);
107 cpu_hotplug.refcount++;
108 mutex_unlock(&cpu_hotplug.lock); 99 mutex_unlock(&cpu_hotplug.lock);
109} 100}
110EXPORT_SYMBOL_GPL(get_online_cpus); 101EXPORT_SYMBOL_GPL(get_online_cpus);
@@ -116,8 +107,7 @@ bool try_get_online_cpus(void)
116 if (!mutex_trylock(&cpu_hotplug.lock)) 107 if (!mutex_trylock(&cpu_hotplug.lock))
117 return false; 108 return false;
118 cpuhp_lock_acquire_tryread(); 109 cpuhp_lock_acquire_tryread();
119 apply_puts_pending(65536); 110 atomic_inc(&cpu_hotplug.refcount);
120 cpu_hotplug.refcount++;
121 mutex_unlock(&cpu_hotplug.lock); 111 mutex_unlock(&cpu_hotplug.lock);
122 return true; 112 return true;
123} 113}
@@ -125,20 +115,18 @@ EXPORT_SYMBOL_GPL(try_get_online_cpus);
125 115
126void put_online_cpus(void) 116void put_online_cpus(void)
127{ 117{
118 int refcount;
119
128 if (cpu_hotplug.active_writer == current) 120 if (cpu_hotplug.active_writer == current)
129 return; 121 return;
130 if (!mutex_trylock(&cpu_hotplug.lock)) {
131 atomic_inc(&cpu_hotplug.puts_pending);
132 cpuhp_lock_release();
133 return;
134 }
135 122
136 if (WARN_ON(!cpu_hotplug.refcount)) 123 refcount = atomic_dec_return(&cpu_hotplug.refcount);
137 cpu_hotplug.refcount++; /* try to fix things up */ 124 if (WARN_ON(refcount < 0)) /* try to fix things up */
125 atomic_inc(&cpu_hotplug.refcount);
126
127 if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
128 wake_up(&cpu_hotplug.wq);
138 129
139 if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
140 wake_up_process(cpu_hotplug.active_writer);
141 mutex_unlock(&cpu_hotplug.lock);
142 cpuhp_lock_release(); 130 cpuhp_lock_release();
143 131
144} 132}
@@ -168,18 +156,20 @@ EXPORT_SYMBOL_GPL(put_online_cpus);
168 */ 156 */
169void cpu_hotplug_begin(void) 157void cpu_hotplug_begin(void)
170{ 158{
171 cpu_hotplug.active_writer = current; 159 DEFINE_WAIT(wait);
172 160
161 cpu_hotplug.active_writer = current;
173 cpuhp_lock_acquire(); 162 cpuhp_lock_acquire();
163
174 for (;;) { 164 for (;;) {
175 mutex_lock(&cpu_hotplug.lock); 165 mutex_lock(&cpu_hotplug.lock);
176 apply_puts_pending(1); 166 prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
177 if (likely(!cpu_hotplug.refcount)) 167 if (likely(!atomic_read(&cpu_hotplug.refcount)))
178 break; 168 break;
179 __set_current_state(TASK_UNINTERRUPTIBLE);
180 mutex_unlock(&cpu_hotplug.lock); 169 mutex_unlock(&cpu_hotplug.lock);
181 schedule(); 170 schedule();
182 } 171 }
172 finish_wait(&cpu_hotplug.wq, &wait);
183} 173}
184 174
185void cpu_hotplug_done(void) 175void cpu_hotplug_done(void)
diff --git a/kernel/futex.c b/kernel/futex.c
index 63678b573d61..4eeb63de7e54 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2258,7 +2258,7 @@ static long futex_wait_restart(struct restart_block *restart)
2258 * if there are waiters then it will block, it does PI, etc. (Due to 2258 * if there are waiters then it will block, it does PI, etc. (Due to
2259 * races the kernel might see a 0 value of the futex too.) 2259 * races the kernel might see a 0 value of the futex too.)
2260 */ 2260 */
2261static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, 2261static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
2262 ktime_t *time, int trylock) 2262 ktime_t *time, int trylock)
2263{ 2263{
2264 struct hrtimer_sleeper timeout, *to = NULL; 2264 struct hrtimer_sleeper timeout, *to = NULL;
@@ -2953,11 +2953,11 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2953 case FUTEX_WAKE_OP: 2953 case FUTEX_WAKE_OP:
2954 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); 2954 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
2955 case FUTEX_LOCK_PI: 2955 case FUTEX_LOCK_PI:
2956 return futex_lock_pi(uaddr, flags, val, timeout, 0); 2956 return futex_lock_pi(uaddr, flags, timeout, 0);
2957 case FUTEX_UNLOCK_PI: 2957 case FUTEX_UNLOCK_PI:
2958 return futex_unlock_pi(uaddr, flags); 2958 return futex_unlock_pi(uaddr, flags);
2959 case FUTEX_TRYLOCK_PI: 2959 case FUTEX_TRYLOCK_PI:
2960 return futex_lock_pi(uaddr, flags, 0, timeout, 1); 2960 return futex_lock_pi(uaddr, flags, NULL, 1);
2961 case FUTEX_WAIT_REQUEUE_PI: 2961 case FUTEX_WAIT_REQUEUE_PI:
2962 val3 = FUTEX_BITSET_MATCH_ANY; 2962 val3 = FUTEX_BITSET_MATCH_ANY;
2963 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3, 2963 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 8541bfdfd232..4ca8eb151975 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -1,5 +1,5 @@
1 1
2obj-y += mutex.o semaphore.o rwsem.o mcs_spinlock.o 2obj-y += mutex.o semaphore.o rwsem.o
3 3
4ifdef CONFIG_FUNCTION_TRACER 4ifdef CONFIG_FUNCTION_TRACER
5CFLAGS_REMOVE_lockdep.o = -pg 5CFLAGS_REMOVE_lockdep.o = -pg
@@ -14,6 +14,7 @@ ifeq ($(CONFIG_PROC_FS),y)
14obj-$(CONFIG_LOCKDEP) += lockdep_proc.o 14obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
15endif 15endif
16obj-$(CONFIG_SMP) += spinlock.o 16obj-$(CONFIG_SMP) += spinlock.o
17obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
17obj-$(CONFIG_SMP) += lglock.o 18obj-$(CONFIG_SMP) += lglock.o
18obj-$(CONFIG_PROVE_LOCKING) += spinlock.o 19obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
19obj-$(CONFIG_RT_MUTEXES) += rtmutex.o 20obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index 4d60986fcbee..d1fe2ba5bac9 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -108,20 +108,4 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
108 arch_mcs_spin_unlock_contended(&next->locked); 108 arch_mcs_spin_unlock_contended(&next->locked);
109} 109}
110 110
111/*
112 * Cancellable version of the MCS lock above.
113 *
114 * Intended for adaptive spinning of sleeping locks:
115 * mutex_lock()/rwsem_down_{read,write}() etc.
116 */
117
118struct optimistic_spin_node {
119 struct optimistic_spin_node *next, *prev;
120 int locked; /* 1 if lock acquired */
121 int cpu; /* encoded CPU # value */
122};
123
124extern bool osq_lock(struct optimistic_spin_queue *lock);
125extern void osq_unlock(struct optimistic_spin_queue *lock);
126
127#endif /* __LINUX_MCS_SPINLOCK_H */ 111#endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 454195194d4a..57407062e209 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -147,7 +147,7 @@ static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
147} 147}
148 148
149/* 149/*
150 * after acquiring lock with fastpath or when we lost out in contested 150 * After acquiring lock with fastpath or when we lost out in contested
151 * slowpath, set ctx and wake up any waiters so they can recheck. 151 * slowpath, set ctx and wake up any waiters so they can recheck.
152 * 152 *
153 * This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set, 153 * This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set,
@@ -191,19 +191,32 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock,
191 spin_unlock_mutex(&lock->base.wait_lock, flags); 191 spin_unlock_mutex(&lock->base.wait_lock, flags);
192} 192}
193 193
194
195#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
196/* 194/*
197 * In order to avoid a stampede of mutex spinners from acquiring the mutex 195 * After acquiring lock in the slowpath set ctx and wake up any
198 * more or less simultaneously, the spinners need to acquire a MCS lock 196 * waiters so they can recheck.
199 * first before spinning on the owner field.
200 * 197 *
198 * Callers must hold the mutex wait_lock.
201 */ 199 */
200static __always_inline void
201ww_mutex_set_context_slowpath(struct ww_mutex *lock,
202 struct ww_acquire_ctx *ctx)
203{
204 struct mutex_waiter *cur;
202 205
203/* 206 ww_mutex_lock_acquired(lock, ctx);
204 * Mutex spinning code migrated from kernel/sched/core.c 207 lock->ctx = ctx;
205 */ 208
209 /*
210 * Give any possible sleeping processes the chance to wake up,
211 * so they can recheck if they have to back off.
212 */
213 list_for_each_entry(cur, &lock->base.wait_list, list) {
214 debug_mutex_wake_waiter(&lock->base, cur);
215 wake_up_process(cur->task);
216 }
217}
206 218
219#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
207static inline bool owner_running(struct mutex *lock, struct task_struct *owner) 220static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
208{ 221{
209 if (lock->owner != owner) 222 if (lock->owner != owner)
@@ -307,6 +320,11 @@ static bool mutex_optimistic_spin(struct mutex *lock,
307 if (!mutex_can_spin_on_owner(lock)) 320 if (!mutex_can_spin_on_owner(lock))
308 goto done; 321 goto done;
309 322
323 /*
324 * In order to avoid a stampede of mutex spinners trying to
325 * acquire the mutex all at once, the spinners need to take a
326 * MCS (queued) lock first before spinning on the owner field.
327 */
310 if (!osq_lock(&lock->osq)) 328 if (!osq_lock(&lock->osq))
311 goto done; 329 goto done;
312 330
@@ -469,7 +487,7 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock)
469EXPORT_SYMBOL(ww_mutex_unlock); 487EXPORT_SYMBOL(ww_mutex_unlock);
470 488
471static inline int __sched 489static inline int __sched
472__mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) 490__ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
473{ 491{
474 struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); 492 struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
475 struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); 493 struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
@@ -557,7 +575,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
557 } 575 }
558 576
559 if (use_ww_ctx && ww_ctx->acquired > 0) { 577 if (use_ww_ctx && ww_ctx->acquired > 0) {
560 ret = __mutex_lock_check_stamp(lock, ww_ctx); 578 ret = __ww_mutex_lock_check_stamp(lock, ww_ctx);
561 if (ret) 579 if (ret)
562 goto err; 580 goto err;
563 } 581 }
@@ -569,6 +587,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
569 schedule_preempt_disabled(); 587 schedule_preempt_disabled();
570 spin_lock_mutex(&lock->wait_lock, flags); 588 spin_lock_mutex(&lock->wait_lock, flags);
571 } 589 }
590 __set_task_state(task, TASK_RUNNING);
591
572 mutex_remove_waiter(lock, &waiter, current_thread_info()); 592 mutex_remove_waiter(lock, &waiter, current_thread_info());
573 /* set it to 0 if there are no waiters left: */ 593 /* set it to 0 if there are no waiters left: */
574 if (likely(list_empty(&lock->wait_list))) 594 if (likely(list_empty(&lock->wait_list)))
@@ -582,23 +602,7 @@ skip_wait:
582 602
583 if (use_ww_ctx) { 603 if (use_ww_ctx) {
584 struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); 604 struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
585 struct mutex_waiter *cur; 605 ww_mutex_set_context_slowpath(ww, ww_ctx);
586
587 /*
588 * This branch gets optimized out for the common case,
589 * and is only important for ww_mutex_lock.
590 */
591 ww_mutex_lock_acquired(ww, ww_ctx);
592 ww->ctx = ww_ctx;
593
594 /*
595 * Give any possible sleeping processes the chance to wake up,
596 * so they can recheck if they have to back off.
597 */
598 list_for_each_entry(cur, &lock->wait_list, list) {
599 debug_mutex_wake_waiter(lock, cur);
600 wake_up_process(cur->task);
601 }
602 } 606 }
603 607
604 spin_unlock_mutex(&lock->wait_lock, flags); 608 spin_unlock_mutex(&lock->wait_lock, flags);
diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/osq_lock.c
index 9887a905a762..c112d00341b0 100644
--- a/kernel/locking/mcs_spinlock.c
+++ b/kernel/locking/osq_lock.c
@@ -1,8 +1,6 @@
1#include <linux/percpu.h> 1#include <linux/percpu.h>
2#include <linux/sched.h> 2#include <linux/sched.h>
3#include "mcs_spinlock.h" 3#include <linux/osq_lock.h>
4
5#ifdef CONFIG_SMP
6 4
7/* 5/*
8 * An MCS like lock especially tailored for optimistic spinning for sleeping 6 * An MCS like lock especially tailored for optimistic spinning for sleeping
@@ -111,7 +109,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
111 * cmpxchg in an attempt to undo our queueing. 109 * cmpxchg in an attempt to undo our queueing.
112 */ 110 */
113 111
114 while (!smp_load_acquire(&node->locked)) { 112 while (!ACCESS_ONCE(node->locked)) {
115 /* 113 /*
116 * If we need to reschedule bail... so we can block. 114 * If we need to reschedule bail... so we can block.
117 */ 115 */
@@ -203,6 +201,3 @@ void osq_unlock(struct optimistic_spin_queue *lock)
203 if (next) 201 if (next)
204 ACCESS_ONCE(next->locked) = 1; 202 ACCESS_ONCE(next->locked) = 1;
205} 203}
206
207#endif
208
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 7c98873a3077..3059bc2f022d 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1130,6 +1130,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
1130 set_current_state(state); 1130 set_current_state(state);
1131 } 1131 }
1132 1132
1133 __set_current_state(TASK_RUNNING);
1133 return ret; 1134 return ret;
1134} 1135}
1135 1136
@@ -1188,10 +1189,9 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
1188 ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); 1189 ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
1189 1190
1190 if (likely(!ret)) 1191 if (likely(!ret))
1192 /* sleep on the mutex */
1191 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); 1193 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
1192 1194
1193 set_current_state(TASK_RUNNING);
1194
1195 if (unlikely(ret)) { 1195 if (unlikely(ret)) {
1196 remove_waiter(lock, &waiter); 1196 remove_waiter(lock, &waiter);
1197 rt_mutex_handle_deadlock(ret, chwalk, &waiter); 1197 rt_mutex_handle_deadlock(ret, chwalk, &waiter);
@@ -1626,10 +1626,9 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1626 1626
1627 set_current_state(TASK_INTERRUPTIBLE); 1627 set_current_state(TASK_INTERRUPTIBLE);
1628 1628
1629 /* sleep on the mutex */
1629 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); 1630 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
1630 1631
1631 set_current_state(TASK_RUNNING);
1632
1633 if (unlikely(ret)) 1632 if (unlikely(ret))
1634 remove_waiter(lock, waiter); 1633 remove_waiter(lock, waiter);
1635 1634
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 2c93571162cb..2555ae15ec14 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -154,7 +154,7 @@ void __sched __down_read(struct rw_semaphore *sem)
154 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 154 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
155 } 155 }
156 156
157 tsk->state = TASK_RUNNING; 157 __set_task_state(tsk, TASK_RUNNING);
158 out: 158 out:
159 ; 159 ;
160} 160}
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 7628c3fc37ca..2f7cc4076f50 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -242,8 +242,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
242 schedule(); 242 schedule();
243 } 243 }
244 244
245 tsk->state = TASK_RUNNING; 245 __set_task_state(tsk, TASK_RUNNING);
246
247 return sem; 246 return sem;
248} 247}
249EXPORT_SYMBOL(rwsem_down_read_failed); 248EXPORT_SYMBOL(rwsem_down_read_failed);
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 4803da6eab62..ae9fc7cc360e 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -402,6 +402,7 @@ int raw_notifier_call_chain(struct raw_notifier_head *nh,
402} 402}
403EXPORT_SYMBOL_GPL(raw_notifier_call_chain); 403EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
404 404
405#ifdef CONFIG_SRCU
405/* 406/*
406 * SRCU notifier chain routines. Registration and unregistration 407 * SRCU notifier chain routines. Registration and unregistration
407 * use a mutex, and call_chain is synchronized by SRCU (no locks). 408 * use a mutex, and call_chain is synchronized by SRCU (no locks).
@@ -528,6 +529,8 @@ void srcu_init_notifier_head(struct srcu_notifier_head *nh)
528} 529}
529EXPORT_SYMBOL_GPL(srcu_init_notifier_head); 530EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
530 531
532#endif /* CONFIG_SRCU */
533
531static ATOMIC_NOTIFIER_HEAD(die_chain); 534static ATOMIC_NOTIFIER_HEAD(die_chain);
532 535
533int notrace notify_die(enum die_val val, const char *str, 536int notrace notify_die(enum die_val val, const char *str,
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 48b28d387c7f..7e01f78f0417 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -251,6 +251,7 @@ config APM_EMULATION
251 251
252config PM_OPP 252config PM_OPP
253 bool 253 bool
254 select SRCU
254 ---help--- 255 ---help---
255 SOCs have a standard set of tuples consisting of frequency and 256 SOCs have a standard set of tuples consisting of frequency and
256 voltage pairs that the device will support per voltage domain. This 257 voltage pairs that the device will support per voltage domain. This
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index e6fae503d1bc..50a808424b06 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -1,4 +1,5 @@
1obj-y += update.o srcu.o 1obj-y += update.o
2obj-$(CONFIG_SRCU) += srcu.o
2obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 3obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
3obj-$(CONFIG_TREE_RCU) += tree.o 4obj-$(CONFIG_TREE_RCU) += tree.o
4obj-$(CONFIG_PREEMPT_RCU) += tree.o 5obj-$(CONFIG_PREEMPT_RCU) += tree.o
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 07bb02eda844..80adef7d4c3d 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -137,4 +137,10 @@ int rcu_jiffies_till_stall_check(void);
137 137
138void rcu_early_boot_tests(void); 138void rcu_early_boot_tests(void);
139 139
140/*
141 * This function really isn't for public consumption, but RCU is special in
142 * that context switches can allow the state machine to make progress.
143 */
144extern void resched_cpu(int cpu);
145
140#endif /* __LINUX_RCU_H */ 146#endif /* __LINUX_RCU_H */
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 4d559baf06e0..30d42aa55d83 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -244,7 +244,8 @@ struct rcu_torture_ops {
244 int (*readlock)(void); 244 int (*readlock)(void);
245 void (*read_delay)(struct torture_random_state *rrsp); 245 void (*read_delay)(struct torture_random_state *rrsp);
246 void (*readunlock)(int idx); 246 void (*readunlock)(int idx);
247 int (*completed)(void); 247 unsigned long (*started)(void);
248 unsigned long (*completed)(void);
248 void (*deferred_free)(struct rcu_torture *p); 249 void (*deferred_free)(struct rcu_torture *p);
249 void (*sync)(void); 250 void (*sync)(void);
250 void (*exp_sync)(void); 251 void (*exp_sync)(void);
@@ -296,11 +297,6 @@ static void rcu_torture_read_unlock(int idx) __releases(RCU)
296 rcu_read_unlock(); 297 rcu_read_unlock();
297} 298}
298 299
299static int rcu_torture_completed(void)
300{
301 return rcu_batches_completed();
302}
303
304/* 300/*
305 * Update callback in the pipe. This should be invoked after a grace period. 301 * Update callback in the pipe. This should be invoked after a grace period.
306 */ 302 */
@@ -356,7 +352,7 @@ rcu_torture_cb(struct rcu_head *p)
356 cur_ops->deferred_free(rp); 352 cur_ops->deferred_free(rp);
357} 353}
358 354
359static int rcu_no_completed(void) 355static unsigned long rcu_no_completed(void)
360{ 356{
361 return 0; 357 return 0;
362} 358}
@@ -377,7 +373,8 @@ static struct rcu_torture_ops rcu_ops = {
377 .readlock = rcu_torture_read_lock, 373 .readlock = rcu_torture_read_lock,
378 .read_delay = rcu_read_delay, 374 .read_delay = rcu_read_delay,
379 .readunlock = rcu_torture_read_unlock, 375 .readunlock = rcu_torture_read_unlock,
380 .completed = rcu_torture_completed, 376 .started = rcu_batches_started,
377 .completed = rcu_batches_completed,
381 .deferred_free = rcu_torture_deferred_free, 378 .deferred_free = rcu_torture_deferred_free,
382 .sync = synchronize_rcu, 379 .sync = synchronize_rcu,
383 .exp_sync = synchronize_rcu_expedited, 380 .exp_sync = synchronize_rcu_expedited,
@@ -407,11 +404,6 @@ static void rcu_bh_torture_read_unlock(int idx) __releases(RCU_BH)
407 rcu_read_unlock_bh(); 404 rcu_read_unlock_bh();
408} 405}
409 406
410static int rcu_bh_torture_completed(void)
411{
412 return rcu_batches_completed_bh();
413}
414
415static void rcu_bh_torture_deferred_free(struct rcu_torture *p) 407static void rcu_bh_torture_deferred_free(struct rcu_torture *p)
416{ 408{
417 call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); 409 call_rcu_bh(&p->rtort_rcu, rcu_torture_cb);
@@ -423,7 +415,8 @@ static struct rcu_torture_ops rcu_bh_ops = {
423 .readlock = rcu_bh_torture_read_lock, 415 .readlock = rcu_bh_torture_read_lock,
424 .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 416 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
425 .readunlock = rcu_bh_torture_read_unlock, 417 .readunlock = rcu_bh_torture_read_unlock,
426 .completed = rcu_bh_torture_completed, 418 .started = rcu_batches_started_bh,
419 .completed = rcu_batches_completed_bh,
427 .deferred_free = rcu_bh_torture_deferred_free, 420 .deferred_free = rcu_bh_torture_deferred_free,
428 .sync = synchronize_rcu_bh, 421 .sync = synchronize_rcu_bh,
429 .exp_sync = synchronize_rcu_bh_expedited, 422 .exp_sync = synchronize_rcu_bh_expedited,
@@ -466,6 +459,7 @@ static struct rcu_torture_ops rcu_busted_ops = {
466 .readlock = rcu_torture_read_lock, 459 .readlock = rcu_torture_read_lock,
467 .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 460 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
468 .readunlock = rcu_torture_read_unlock, 461 .readunlock = rcu_torture_read_unlock,
462 .started = rcu_no_completed,
469 .completed = rcu_no_completed, 463 .completed = rcu_no_completed,
470 .deferred_free = rcu_busted_torture_deferred_free, 464 .deferred_free = rcu_busted_torture_deferred_free,
471 .sync = synchronize_rcu_busted, 465 .sync = synchronize_rcu_busted,
@@ -510,7 +504,7 @@ static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl)
510 srcu_read_unlock(&srcu_ctl, idx); 504 srcu_read_unlock(&srcu_ctl, idx);
511} 505}
512 506
513static int srcu_torture_completed(void) 507static unsigned long srcu_torture_completed(void)
514{ 508{
515 return srcu_batches_completed(&srcu_ctl); 509 return srcu_batches_completed(&srcu_ctl);
516} 510}
@@ -564,6 +558,7 @@ static struct rcu_torture_ops srcu_ops = {
564 .readlock = srcu_torture_read_lock, 558 .readlock = srcu_torture_read_lock,
565 .read_delay = srcu_read_delay, 559 .read_delay = srcu_read_delay,
566 .readunlock = srcu_torture_read_unlock, 560 .readunlock = srcu_torture_read_unlock,
561 .started = NULL,
567 .completed = srcu_torture_completed, 562 .completed = srcu_torture_completed,
568 .deferred_free = srcu_torture_deferred_free, 563 .deferred_free = srcu_torture_deferred_free,
569 .sync = srcu_torture_synchronize, 564 .sync = srcu_torture_synchronize,
@@ -600,7 +595,8 @@ static struct rcu_torture_ops sched_ops = {
600 .readlock = sched_torture_read_lock, 595 .readlock = sched_torture_read_lock,
601 .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 596 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
602 .readunlock = sched_torture_read_unlock, 597 .readunlock = sched_torture_read_unlock,
603 .completed = rcu_no_completed, 598 .started = rcu_batches_started_sched,
599 .completed = rcu_batches_completed_sched,
604 .deferred_free = rcu_sched_torture_deferred_free, 600 .deferred_free = rcu_sched_torture_deferred_free,
605 .sync = synchronize_sched, 601 .sync = synchronize_sched,
606 .exp_sync = synchronize_sched_expedited, 602 .exp_sync = synchronize_sched_expedited,
@@ -638,6 +634,7 @@ static struct rcu_torture_ops tasks_ops = {
638 .readlock = tasks_torture_read_lock, 634 .readlock = tasks_torture_read_lock,
639 .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 635 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
640 .readunlock = tasks_torture_read_unlock, 636 .readunlock = tasks_torture_read_unlock,
637 .started = rcu_no_completed,
641 .completed = rcu_no_completed, 638 .completed = rcu_no_completed,
642 .deferred_free = rcu_tasks_torture_deferred_free, 639 .deferred_free = rcu_tasks_torture_deferred_free,
643 .sync = synchronize_rcu_tasks, 640 .sync = synchronize_rcu_tasks,
@@ -1015,8 +1012,8 @@ static void rcutorture_trace_dump(void)
1015static void rcu_torture_timer(unsigned long unused) 1012static void rcu_torture_timer(unsigned long unused)
1016{ 1013{
1017 int idx; 1014 int idx;
1018 int completed; 1015 unsigned long started;
1019 int completed_end; 1016 unsigned long completed;
1020 static DEFINE_TORTURE_RANDOM(rand); 1017 static DEFINE_TORTURE_RANDOM(rand);
1021 static DEFINE_SPINLOCK(rand_lock); 1018 static DEFINE_SPINLOCK(rand_lock);
1022 struct rcu_torture *p; 1019 struct rcu_torture *p;
@@ -1024,7 +1021,10 @@ static void rcu_torture_timer(unsigned long unused)
1024 unsigned long long ts; 1021 unsigned long long ts;
1025 1022
1026 idx = cur_ops->readlock(); 1023 idx = cur_ops->readlock();
1027 completed = cur_ops->completed(); 1024 if (cur_ops->started)
1025 started = cur_ops->started();
1026 else
1027 started = cur_ops->completed();
1028 ts = rcu_trace_clock_local(); 1028 ts = rcu_trace_clock_local();
1029 p = rcu_dereference_check(rcu_torture_current, 1029 p = rcu_dereference_check(rcu_torture_current,
1030 rcu_read_lock_bh_held() || 1030 rcu_read_lock_bh_held() ||
@@ -1047,14 +1047,16 @@ static void rcu_torture_timer(unsigned long unused)
1047 /* Should not happen, but... */ 1047 /* Should not happen, but... */
1048 pipe_count = RCU_TORTURE_PIPE_LEN; 1048 pipe_count = RCU_TORTURE_PIPE_LEN;
1049 } 1049 }
1050 completed_end = cur_ops->completed(); 1050 completed = cur_ops->completed();
1051 if (pipe_count > 1) { 1051 if (pipe_count > 1) {
1052 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts, 1052 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts,
1053 completed, completed_end); 1053 started, completed);
1054 rcutorture_trace_dump(); 1054 rcutorture_trace_dump();
1055 } 1055 }
1056 __this_cpu_inc(rcu_torture_count[pipe_count]); 1056 __this_cpu_inc(rcu_torture_count[pipe_count]);
1057 completed = completed_end - completed; 1057 completed = completed - started;
1058 if (cur_ops->started)
1059 completed++;
1058 if (completed > RCU_TORTURE_PIPE_LEN) { 1060 if (completed > RCU_TORTURE_PIPE_LEN) {
1059 /* Should not happen, but... */ 1061 /* Should not happen, but... */
1060 completed = RCU_TORTURE_PIPE_LEN; 1062 completed = RCU_TORTURE_PIPE_LEN;
@@ -1073,8 +1075,8 @@ static void rcu_torture_timer(unsigned long unused)
1073static int 1075static int
1074rcu_torture_reader(void *arg) 1076rcu_torture_reader(void *arg)
1075{ 1077{
1076 int completed; 1078 unsigned long started;
1077 int completed_end; 1079 unsigned long completed;
1078 int idx; 1080 int idx;
1079 DEFINE_TORTURE_RANDOM(rand); 1081 DEFINE_TORTURE_RANDOM(rand);
1080 struct rcu_torture *p; 1082 struct rcu_torture *p;
@@ -1093,7 +1095,10 @@ rcu_torture_reader(void *arg)
1093 mod_timer(&t, jiffies + 1); 1095 mod_timer(&t, jiffies + 1);
1094 } 1096 }
1095 idx = cur_ops->readlock(); 1097 idx = cur_ops->readlock();
1096 completed = cur_ops->completed(); 1098 if (cur_ops->started)
1099 started = cur_ops->started();
1100 else
1101 started = cur_ops->completed();
1097 ts = rcu_trace_clock_local(); 1102 ts = rcu_trace_clock_local();
1098 p = rcu_dereference_check(rcu_torture_current, 1103 p = rcu_dereference_check(rcu_torture_current,
1099 rcu_read_lock_bh_held() || 1104 rcu_read_lock_bh_held() ||
@@ -1114,14 +1119,16 @@ rcu_torture_reader(void *arg)
1114 /* Should not happen, but... */ 1119 /* Should not happen, but... */
1115 pipe_count = RCU_TORTURE_PIPE_LEN; 1120 pipe_count = RCU_TORTURE_PIPE_LEN;
1116 } 1121 }
1117 completed_end = cur_ops->completed(); 1122 completed = cur_ops->completed();
1118 if (pipe_count > 1) { 1123 if (pipe_count > 1) {
1119 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, 1124 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu,
1120 ts, completed, completed_end); 1125 ts, started, completed);
1121 rcutorture_trace_dump(); 1126 rcutorture_trace_dump();
1122 } 1127 }
1123 __this_cpu_inc(rcu_torture_count[pipe_count]); 1128 __this_cpu_inc(rcu_torture_count[pipe_count]);
1124 completed = completed_end - completed; 1129 completed = completed - started;
1130 if (cur_ops->started)
1131 completed++;
1125 if (completed > RCU_TORTURE_PIPE_LEN) { 1132 if (completed > RCU_TORTURE_PIPE_LEN) {
1126 /* Should not happen, but... */ 1133 /* Should not happen, but... */
1127 completed = RCU_TORTURE_PIPE_LEN; 1134 completed = RCU_TORTURE_PIPE_LEN;
@@ -1420,6 +1427,9 @@ static int rcu_torture_barrier(void *arg)
1420 cur_ops->cb_barrier(); /* Implies smp_mb() for wait_event(). */ 1427 cur_ops->cb_barrier(); /* Implies smp_mb() for wait_event(). */
1421 if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) { 1428 if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) {
1422 n_rcu_torture_barrier_error++; 1429 n_rcu_torture_barrier_error++;
1430 pr_err("barrier_cbs_invoked = %d, n_barrier_cbs = %d\n",
1431 atomic_read(&barrier_cbs_invoked),
1432 n_barrier_cbs);
1423 WARN_ON_ONCE(1); 1433 WARN_ON_ONCE(1);
1424 } 1434 }
1425 n_barrier_successes++; 1435 n_barrier_successes++;
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index e037f3eb2f7b..445bf8ffe3fb 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -546,7 +546,7 @@ EXPORT_SYMBOL_GPL(srcu_barrier);
546 * Report the number of batches, correlated with, but not necessarily 546 * Report the number of batches, correlated with, but not necessarily
547 * precisely the same as, the number of grace periods that have elapsed. 547 * precisely the same as, the number of grace periods that have elapsed.
548 */ 548 */
549long srcu_batches_completed(struct srcu_struct *sp) 549unsigned long srcu_batches_completed(struct srcu_struct *sp)
550{ 550{
551 return sp->completed; 551 return sp->completed;
552} 552}
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 0db5649f8817..cc9ceca7bde1 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -47,54 +47,14 @@ static void __call_rcu(struct rcu_head *head,
47 void (*func)(struct rcu_head *rcu), 47 void (*func)(struct rcu_head *rcu),
48 struct rcu_ctrlblk *rcp); 48 struct rcu_ctrlblk *rcp);
49 49
50static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
51
52#include "tiny_plugin.h" 50#include "tiny_plugin.h"
53 51
54/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcu/tree.c. */
55static void rcu_idle_enter_common(long long newval)
56{
57 if (newval) {
58 RCU_TRACE(trace_rcu_dyntick(TPS("--="),
59 rcu_dynticks_nesting, newval));
60 rcu_dynticks_nesting = newval;
61 return;
62 }
63 RCU_TRACE(trace_rcu_dyntick(TPS("Start"),
64 rcu_dynticks_nesting, newval));
65 if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) {
66 struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
67
68 RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"),
69 rcu_dynticks_nesting, newval));
70 ftrace_dump(DUMP_ALL);
71 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
72 current->pid, current->comm,
73 idle->pid, idle->comm); /* must be idle task! */
74 }
75 rcu_sched_qs(); /* implies rcu_bh_inc() */
76 barrier();
77 rcu_dynticks_nesting = newval;
78}
79
80/* 52/*
81 * Enter idle, which is an extended quiescent state if we have fully 53 * Enter idle, which is an extended quiescent state if we have fully
82 * entered that mode (i.e., if the new value of dynticks_nesting is zero). 54 * entered that mode.
83 */ 55 */
84void rcu_idle_enter(void) 56void rcu_idle_enter(void)
85{ 57{
86 unsigned long flags;
87 long long newval;
88
89 local_irq_save(flags);
90 WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0);
91 if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) ==
92 DYNTICK_TASK_NEST_VALUE)
93 newval = 0;
94 else
95 newval = rcu_dynticks_nesting - DYNTICK_TASK_NEST_VALUE;
96 rcu_idle_enter_common(newval);
97 local_irq_restore(flags);
98} 58}
99EXPORT_SYMBOL_GPL(rcu_idle_enter); 59EXPORT_SYMBOL_GPL(rcu_idle_enter);
100 60
@@ -103,55 +63,14 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
103 */ 63 */
104void rcu_irq_exit(void) 64void rcu_irq_exit(void)
105{ 65{
106 unsigned long flags;
107 long long newval;
108
109 local_irq_save(flags);
110 newval = rcu_dynticks_nesting - 1;
111 WARN_ON_ONCE(newval < 0);
112 rcu_idle_enter_common(newval);
113 local_irq_restore(flags);
114} 66}
115EXPORT_SYMBOL_GPL(rcu_irq_exit); 67EXPORT_SYMBOL_GPL(rcu_irq_exit);
116 68
117/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcu/tree.c. */
118static void rcu_idle_exit_common(long long oldval)
119{
120 if (oldval) {
121 RCU_TRACE(trace_rcu_dyntick(TPS("++="),
122 oldval, rcu_dynticks_nesting));
123 return;
124 }
125 RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting));
126 if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) {
127 struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
128
129 RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"),
130 oldval, rcu_dynticks_nesting));
131 ftrace_dump(DUMP_ALL);
132 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
133 current->pid, current->comm,
134 idle->pid, idle->comm); /* must be idle task! */
135 }
136}
137
138/* 69/*
139 * Exit idle, so that we are no longer in an extended quiescent state. 70 * Exit idle, so that we are no longer in an extended quiescent state.
140 */ 71 */
141void rcu_idle_exit(void) 72void rcu_idle_exit(void)
142{ 73{
143 unsigned long flags;
144 long long oldval;
145
146 local_irq_save(flags);
147 oldval = rcu_dynticks_nesting;
148 WARN_ON_ONCE(rcu_dynticks_nesting < 0);
149 if (rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK)
150 rcu_dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
151 else
152 rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
153 rcu_idle_exit_common(oldval);
154 local_irq_restore(flags);
155} 74}
156EXPORT_SYMBOL_GPL(rcu_idle_exit); 75EXPORT_SYMBOL_GPL(rcu_idle_exit);
157 76
@@ -160,15 +79,6 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit);
160 */ 79 */
161void rcu_irq_enter(void) 80void rcu_irq_enter(void)
162{ 81{
163 unsigned long flags;
164 long long oldval;
165
166 local_irq_save(flags);
167 oldval = rcu_dynticks_nesting;
168 rcu_dynticks_nesting++;
169 WARN_ON_ONCE(rcu_dynticks_nesting == 0);
170 rcu_idle_exit_common(oldval);
171 local_irq_restore(flags);
172} 82}
173EXPORT_SYMBOL_GPL(rcu_irq_enter); 83EXPORT_SYMBOL_GPL(rcu_irq_enter);
174 84
@@ -179,23 +89,13 @@ EXPORT_SYMBOL_GPL(rcu_irq_enter);
179 */ 89 */
180bool notrace __rcu_is_watching(void) 90bool notrace __rcu_is_watching(void)
181{ 91{
182 return rcu_dynticks_nesting; 92 return true;
183} 93}
184EXPORT_SYMBOL(__rcu_is_watching); 94EXPORT_SYMBOL(__rcu_is_watching);
185 95
186#endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ 96#endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
187 97
188/* 98/*
189 * Test whether the current CPU was interrupted from idle. Nested
190 * interrupts don't count, we must be running at the first interrupt
191 * level.
192 */
193static int rcu_is_cpu_rrupt_from_idle(void)
194{
195 return rcu_dynticks_nesting <= 1;
196}
197
198/*
199 * Helper function for rcu_sched_qs() and rcu_bh_qs(). 99 * Helper function for rcu_sched_qs() and rcu_bh_qs().
200 * Also irqs are disabled to avoid confusion due to interrupt handlers 100 * Also irqs are disabled to avoid confusion due to interrupt handlers
201 * invoking call_rcu(). 101 * invoking call_rcu().
@@ -250,7 +150,7 @@ void rcu_bh_qs(void)
250void rcu_check_callbacks(int user) 150void rcu_check_callbacks(int user)
251{ 151{
252 RCU_TRACE(check_cpu_stalls()); 152 RCU_TRACE(check_cpu_stalls());
253 if (user || rcu_is_cpu_rrupt_from_idle()) 153 if (user)
254 rcu_sched_qs(); 154 rcu_sched_qs();
255 else if (!in_softirq()) 155 else if (!in_softirq())
256 rcu_bh_qs(); 156 rcu_bh_qs();
@@ -357,6 +257,11 @@ static void __call_rcu(struct rcu_head *head,
357 rcp->curtail = &head->next; 257 rcp->curtail = &head->next;
358 RCU_TRACE(rcp->qlen++); 258 RCU_TRACE(rcp->qlen++);
359 local_irq_restore(flags); 259 local_irq_restore(flags);
260
261 if (unlikely(is_idle_task(current))) {
262 /* force scheduling for rcu_sched_qs() */
263 resched_cpu(0);
264 }
360} 265}
361 266
362/* 267/*
@@ -383,6 +288,8 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
383void __init rcu_init(void) 288void __init rcu_init(void)
384{ 289{
385 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 290 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
291 RCU_TRACE(reset_cpu_stall_ticks(&rcu_sched_ctrlblk));
292 RCU_TRACE(reset_cpu_stall_ticks(&rcu_bh_ctrlblk));
386 293
387 rcu_early_boot_tests(); 294 rcu_early_boot_tests();
388} 295}
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
index 858c56569127..f94e209a10d6 100644
--- a/kernel/rcu/tiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h
@@ -145,17 +145,16 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
145 rcp->ticks_this_gp++; 145 rcp->ticks_this_gp++;
146 j = jiffies; 146 j = jiffies;
147 js = ACCESS_ONCE(rcp->jiffies_stall); 147 js = ACCESS_ONCE(rcp->jiffies_stall);
148 if (*rcp->curtail && ULONG_CMP_GE(j, js)) { 148 if (rcp->rcucblist && ULONG_CMP_GE(j, js)) {
149 pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n", 149 pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
150 rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting, 150 rcp->name, rcp->ticks_this_gp, DYNTICK_TASK_EXIT_IDLE,
151 jiffies - rcp->gp_start, rcp->qlen); 151 jiffies - rcp->gp_start, rcp->qlen);
152 dump_stack(); 152 dump_stack();
153 }
154 if (*rcp->curtail && ULONG_CMP_GE(j, js))
155 ACCESS_ONCE(rcp->jiffies_stall) = jiffies + 153 ACCESS_ONCE(rcp->jiffies_stall) = jiffies +
156 3 * rcu_jiffies_till_stall_check() + 3; 154 3 * rcu_jiffies_till_stall_check() + 3;
157 else if (ULONG_CMP_GE(j, js)) 155 } else if (ULONG_CMP_GE(j, js)) {
158 ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check(); 156 ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check();
157 }
159} 158}
160 159
161static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp) 160static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 7680fc275036..48d640ca1a05 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -156,6 +156,10 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
156static void invoke_rcu_core(void); 156static void invoke_rcu_core(void);
157static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); 157static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
158 158
159/* rcuc/rcub kthread realtime priority */
160static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO;
161module_param(kthread_prio, int, 0644);
162
159/* 163/*
160 * Track the rcutorture test sequence number and the update version 164 * Track the rcutorture test sequence number and the update version
161 * number within a given test. The rcutorture_testseq is incremented 165 * number within a given test. The rcutorture_testseq is incremented
@@ -215,6 +219,9 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
215#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ 219#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
216}; 220};
217 221
222DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
223EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr);
224
218/* 225/*
219 * Let the RCU core know that this CPU has gone through the scheduler, 226 * Let the RCU core know that this CPU has gone through the scheduler,
220 * which is a quiescent state. This is called when the need for a 227 * which is a quiescent state. This is called when the need for a
@@ -284,6 +291,22 @@ void rcu_note_context_switch(void)
284} 291}
285EXPORT_SYMBOL_GPL(rcu_note_context_switch); 292EXPORT_SYMBOL_GPL(rcu_note_context_switch);
286 293
294/*
295 * Register a quiesecent state for all RCU flavors. If there is an
296 * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight
297 * dyntick-idle quiescent state visible to other CPUs (but only for those
298 * RCU flavors in desparate need of a quiescent state, which will normally
299 * be none of them). Either way, do a lightweight quiescent state for
300 * all RCU flavors.
301 */
302void rcu_all_qs(void)
303{
304 if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
305 rcu_momentary_dyntick_idle();
306 this_cpu_inc(rcu_qs_ctr);
307}
308EXPORT_SYMBOL_GPL(rcu_all_qs);
309
287static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ 310static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */
288static long qhimark = 10000; /* If this many pending, ignore blimit. */ 311static long qhimark = 10000; /* If this many pending, ignore blimit. */
289static long qlowmark = 100; /* Once only this many pending, use blimit. */ 312static long qlowmark = 100; /* Once only this many pending, use blimit. */
@@ -315,18 +338,54 @@ static void force_quiescent_state(struct rcu_state *rsp);
315static int rcu_pending(void); 338static int rcu_pending(void);
316 339
317/* 340/*
318 * Return the number of RCU-sched batches processed thus far for debug & stats. 341 * Return the number of RCU batches started thus far for debug & stats.
342 */
343unsigned long rcu_batches_started(void)
344{
345 return rcu_state_p->gpnum;
346}
347EXPORT_SYMBOL_GPL(rcu_batches_started);
348
349/*
350 * Return the number of RCU-sched batches started thus far for debug & stats.
351 */
352unsigned long rcu_batches_started_sched(void)
353{
354 return rcu_sched_state.gpnum;
355}
356EXPORT_SYMBOL_GPL(rcu_batches_started_sched);
357
358/*
359 * Return the number of RCU BH batches started thus far for debug & stats.
319 */ 360 */
320long rcu_batches_completed_sched(void) 361unsigned long rcu_batches_started_bh(void)
362{
363 return rcu_bh_state.gpnum;
364}
365EXPORT_SYMBOL_GPL(rcu_batches_started_bh);
366
367/*
368 * Return the number of RCU batches completed thus far for debug & stats.
369 */
370unsigned long rcu_batches_completed(void)
371{
372 return rcu_state_p->completed;
373}
374EXPORT_SYMBOL_GPL(rcu_batches_completed);
375
376/*
377 * Return the number of RCU-sched batches completed thus far for debug & stats.
378 */
379unsigned long rcu_batches_completed_sched(void)
321{ 380{
322 return rcu_sched_state.completed; 381 return rcu_sched_state.completed;
323} 382}
324EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); 383EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
325 384
326/* 385/*
327 * Return the number of RCU BH batches processed thus far for debug & stats. 386 * Return the number of RCU BH batches completed thus far for debug & stats.
328 */ 387 */
329long rcu_batches_completed_bh(void) 388unsigned long rcu_batches_completed_bh(void)
330{ 389{
331 return rcu_bh_state.completed; 390 return rcu_bh_state.completed;
332} 391}
@@ -759,39 +818,71 @@ void rcu_irq_enter(void)
759/** 818/**
760 * rcu_nmi_enter - inform RCU of entry to NMI context 819 * rcu_nmi_enter - inform RCU of entry to NMI context
761 * 820 *
762 * If the CPU was idle with dynamic ticks active, and there is no 821 * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and
763 * irq handler running, this updates rdtp->dynticks_nmi to let the 822 * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know
764 * RCU grace-period handling know that the CPU is active. 823 * that the CPU is active. This implementation permits nested NMIs, as
824 * long as the nesting level does not overflow an int. (You will probably
825 * run out of stack space first.)
765 */ 826 */
766void rcu_nmi_enter(void) 827void rcu_nmi_enter(void)
767{ 828{
768 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 829 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
830 int incby = 2;
769 831
770 if (rdtp->dynticks_nmi_nesting == 0 && 832 /* Complain about underflow. */
771 (atomic_read(&rdtp->dynticks) & 0x1)) 833 WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0);
772 return; 834
773 rdtp->dynticks_nmi_nesting++; 835 /*
774 smp_mb__before_atomic(); /* Force delay from prior write. */ 836 * If idle from RCU viewpoint, atomically increment ->dynticks
775 atomic_inc(&rdtp->dynticks); 837 * to mark non-idle and increment ->dynticks_nmi_nesting by one.
776 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 838 * Otherwise, increment ->dynticks_nmi_nesting by two. This means
777 smp_mb__after_atomic(); /* See above. */ 839 * if ->dynticks_nmi_nesting is equal to one, we are guaranteed
778 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 840 * to be in the outermost NMI handler that interrupted an RCU-idle
841 * period (observation due to Andy Lutomirski).
842 */
843 if (!(atomic_read(&rdtp->dynticks) & 0x1)) {
844 smp_mb__before_atomic(); /* Force delay from prior write. */
845 atomic_inc(&rdtp->dynticks);
846 /* atomic_inc() before later RCU read-side crit sects */
847 smp_mb__after_atomic(); /* See above. */
848 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
849 incby = 1;
850 }
851 rdtp->dynticks_nmi_nesting += incby;
852 barrier();
779} 853}
780 854
781/** 855/**
782 * rcu_nmi_exit - inform RCU of exit from NMI context 856 * rcu_nmi_exit - inform RCU of exit from NMI context
783 * 857 *
784 * If the CPU was idle with dynamic ticks active, and there is no 858 * If we are returning from the outermost NMI handler that interrupted an
785 * irq handler running, this updates rdtp->dynticks_nmi to let the 859 * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting
786 * RCU grace-period handling know that the CPU is no longer active. 860 * to let the RCU grace-period handling know that the CPU is back to
861 * being RCU-idle.
787 */ 862 */
788void rcu_nmi_exit(void) 863void rcu_nmi_exit(void)
789{ 864{
790 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 865 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
791 866
792 if (rdtp->dynticks_nmi_nesting == 0 || 867 /*
793 --rdtp->dynticks_nmi_nesting != 0) 868 * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
869 * (We are exiting an NMI handler, so RCU better be paying attention
870 * to us!)
871 */
872 WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0);
873 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
874
875 /*
876 * If the nesting level is not 1, the CPU wasn't RCU-idle, so
877 * leave it in non-RCU-idle state.
878 */
879 if (rdtp->dynticks_nmi_nesting != 1) {
880 rdtp->dynticks_nmi_nesting -= 2;
794 return; 881 return;
882 }
883
884 /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
885 rdtp->dynticks_nmi_nesting = 0;
795 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ 886 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
796 smp_mb__before_atomic(); /* See above. */ 887 smp_mb__before_atomic(); /* See above. */
797 atomic_inc(&rdtp->dynticks); 888 atomic_inc(&rdtp->dynticks);
@@ -898,17 +989,14 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
898 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); 989 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
899 return 1; 990 return 1;
900 } else { 991 } else {
992 if (ULONG_CMP_LT(ACCESS_ONCE(rdp->gpnum) + ULONG_MAX / 4,
993 rdp->mynode->gpnum))
994 ACCESS_ONCE(rdp->gpwrap) = true;
901 return 0; 995 return 0;
902 } 996 }
903} 997}
904 998
905/* 999/*
906 * This function really isn't for public consumption, but RCU is special in
907 * that context switches can allow the state machine to make progress.
908 */
909extern void resched_cpu(int cpu);
910
911/*
912 * Return true if the specified CPU has passed through a quiescent 1000 * Return true if the specified CPU has passed through a quiescent
913 * state by virtue of being in or having passed through an dynticks 1001 * state by virtue of being in or having passed through an dynticks
914 * idle state since the last call to dyntick_save_progress_counter() 1002 * idle state since the last call to dyntick_save_progress_counter()
@@ -1011,6 +1099,22 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
1011 j1 = rcu_jiffies_till_stall_check(); 1099 j1 = rcu_jiffies_till_stall_check();
1012 ACCESS_ONCE(rsp->jiffies_stall) = j + j1; 1100 ACCESS_ONCE(rsp->jiffies_stall) = j + j1;
1013 rsp->jiffies_resched = j + j1 / 2; 1101 rsp->jiffies_resched = j + j1 / 2;
1102 rsp->n_force_qs_gpstart = ACCESS_ONCE(rsp->n_force_qs);
1103}
1104
1105/*
1106 * Complain about starvation of grace-period kthread.
1107 */
1108static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
1109{
1110 unsigned long gpa;
1111 unsigned long j;
1112
1113 j = jiffies;
1114 gpa = ACCESS_ONCE(rsp->gp_activity);
1115 if (j - gpa > 2 * HZ)
1116 pr_err("%s kthread starved for %ld jiffies!\n",
1117 rsp->name, j - gpa);
1014} 1118}
1015 1119
1016/* 1120/*
@@ -1033,11 +1137,13 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
1033 } 1137 }
1034} 1138}
1035 1139
1036static void print_other_cpu_stall(struct rcu_state *rsp) 1140static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
1037{ 1141{
1038 int cpu; 1142 int cpu;
1039 long delta; 1143 long delta;
1040 unsigned long flags; 1144 unsigned long flags;
1145 unsigned long gpa;
1146 unsigned long j;
1041 int ndetected = 0; 1147 int ndetected = 0;
1042 struct rcu_node *rnp = rcu_get_root(rsp); 1148 struct rcu_node *rnp = rcu_get_root(rsp);
1043 long totqlen = 0; 1149 long totqlen = 0;
@@ -1075,30 +1181,34 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
1075 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1181 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1076 } 1182 }
1077 1183
1078 /*
1079 * Now rat on any tasks that got kicked up to the root rcu_node
1080 * due to CPU offlining.
1081 */
1082 rnp = rcu_get_root(rsp);
1083 raw_spin_lock_irqsave(&rnp->lock, flags);
1084 ndetected += rcu_print_task_stall(rnp);
1085 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1086
1087 print_cpu_stall_info_end(); 1184 print_cpu_stall_info_end();
1088 for_each_possible_cpu(cpu) 1185 for_each_possible_cpu(cpu)
1089 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; 1186 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
1090 pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n", 1187 pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n",
1091 smp_processor_id(), (long)(jiffies - rsp->gp_start), 1188 smp_processor_id(), (long)(jiffies - rsp->gp_start),
1092 (long)rsp->gpnum, (long)rsp->completed, totqlen); 1189 (long)rsp->gpnum, (long)rsp->completed, totqlen);
1093 if (ndetected == 0) 1190 if (ndetected) {
1094 pr_err("INFO: Stall ended before state dump start\n");
1095 else
1096 rcu_dump_cpu_stacks(rsp); 1191 rcu_dump_cpu_stacks(rsp);
1192 } else {
1193 if (ACCESS_ONCE(rsp->gpnum) != gpnum ||
1194 ACCESS_ONCE(rsp->completed) == gpnum) {
1195 pr_err("INFO: Stall ended before state dump start\n");
1196 } else {
1197 j = jiffies;
1198 gpa = ACCESS_ONCE(rsp->gp_activity);
1199 pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld\n",
1200 rsp->name, j - gpa, j, gpa,
1201 jiffies_till_next_fqs);
1202 /* In this case, the current CPU might be at fault. */
1203 sched_show_task(current);
1204 }
1205 }
1097 1206
1098 /* Complain about tasks blocking the grace period. */ 1207 /* Complain about tasks blocking the grace period. */
1099
1100 rcu_print_detail_task_stall(rsp); 1208 rcu_print_detail_task_stall(rsp);
1101 1209
1210 rcu_check_gp_kthread_starvation(rsp);
1211
1102 force_quiescent_state(rsp); /* Kick them all. */ 1212 force_quiescent_state(rsp); /* Kick them all. */
1103} 1213}
1104 1214
@@ -1123,6 +1233,9 @@ static void print_cpu_stall(struct rcu_state *rsp)
1123 pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n", 1233 pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n",
1124 jiffies - rsp->gp_start, 1234 jiffies - rsp->gp_start,
1125 (long)rsp->gpnum, (long)rsp->completed, totqlen); 1235 (long)rsp->gpnum, (long)rsp->completed, totqlen);
1236
1237 rcu_check_gp_kthread_starvation(rsp);
1238
1126 rcu_dump_cpu_stacks(rsp); 1239 rcu_dump_cpu_stacks(rsp);
1127 1240
1128 raw_spin_lock_irqsave(&rnp->lock, flags); 1241 raw_spin_lock_irqsave(&rnp->lock, flags);
@@ -1193,7 +1306,7 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
1193 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) { 1306 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
1194 1307
1195 /* They had a few time units to dump stack, so complain. */ 1308 /* They had a few time units to dump stack, so complain. */
1196 print_other_cpu_stall(rsp); 1309 print_other_cpu_stall(rsp, gpnum);
1197 } 1310 }
1198} 1311}
1199 1312
@@ -1530,7 +1643,8 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1530 bool ret; 1643 bool ret;
1531 1644
1532 /* Handle the ends of any preceding grace periods first. */ 1645 /* Handle the ends of any preceding grace periods first. */
1533 if (rdp->completed == rnp->completed) { 1646 if (rdp->completed == rnp->completed &&
1647 !unlikely(ACCESS_ONCE(rdp->gpwrap))) {
1534 1648
1535 /* No grace period end, so just accelerate recent callbacks. */ 1649 /* No grace period end, so just accelerate recent callbacks. */
1536 ret = rcu_accelerate_cbs(rsp, rnp, rdp); 1650 ret = rcu_accelerate_cbs(rsp, rnp, rdp);
@@ -1545,7 +1659,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1545 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend")); 1659 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend"));
1546 } 1660 }
1547 1661
1548 if (rdp->gpnum != rnp->gpnum) { 1662 if (rdp->gpnum != rnp->gpnum || unlikely(ACCESS_ONCE(rdp->gpwrap))) {
1549 /* 1663 /*
1550 * If the current grace period is waiting for this CPU, 1664 * If the current grace period is waiting for this CPU,
1551 * set up to detect a quiescent state, otherwise don't 1665 * set up to detect a quiescent state, otherwise don't
@@ -1554,8 +1668,10 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1554 rdp->gpnum = rnp->gpnum; 1668 rdp->gpnum = rnp->gpnum;
1555 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); 1669 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
1556 rdp->passed_quiesce = 0; 1670 rdp->passed_quiesce = 0;
1671 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
1557 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); 1672 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
1558 zero_cpu_stall_ticks(rdp); 1673 zero_cpu_stall_ticks(rdp);
1674 ACCESS_ONCE(rdp->gpwrap) = false;
1559 } 1675 }
1560 return ret; 1676 return ret;
1561} 1677}
@@ -1569,7 +1685,8 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
1569 local_irq_save(flags); 1685 local_irq_save(flags);
1570 rnp = rdp->mynode; 1686 rnp = rdp->mynode;
1571 if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) && 1687 if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) &&
1572 rdp->completed == ACCESS_ONCE(rnp->completed)) || /* w/out lock. */ 1688 rdp->completed == ACCESS_ONCE(rnp->completed) &&
1689 !unlikely(ACCESS_ONCE(rdp->gpwrap))) || /* w/out lock. */
1573 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ 1690 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
1574 local_irq_restore(flags); 1691 local_irq_restore(flags);
1575 return; 1692 return;
@@ -1589,6 +1706,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1589 struct rcu_data *rdp; 1706 struct rcu_data *rdp;
1590 struct rcu_node *rnp = rcu_get_root(rsp); 1707 struct rcu_node *rnp = rcu_get_root(rsp);
1591 1708
1709 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1592 rcu_bind_gp_kthread(); 1710 rcu_bind_gp_kthread();
1593 raw_spin_lock_irq(&rnp->lock); 1711 raw_spin_lock_irq(&rnp->lock);
1594 smp_mb__after_unlock_lock(); 1712 smp_mb__after_unlock_lock();
@@ -1649,6 +1767,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1649 rnp->grphi, rnp->qsmask); 1767 rnp->grphi, rnp->qsmask);
1650 raw_spin_unlock_irq(&rnp->lock); 1768 raw_spin_unlock_irq(&rnp->lock);
1651 cond_resched_rcu_qs(); 1769 cond_resched_rcu_qs();
1770 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1652 } 1771 }
1653 1772
1654 mutex_unlock(&rsp->onoff_mutex); 1773 mutex_unlock(&rsp->onoff_mutex);
@@ -1665,6 +1784,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1665 unsigned long maxj; 1784 unsigned long maxj;
1666 struct rcu_node *rnp = rcu_get_root(rsp); 1785 struct rcu_node *rnp = rcu_get_root(rsp);
1667 1786
1787 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1668 rsp->n_force_qs++; 1788 rsp->n_force_qs++;
1669 if (fqs_state == RCU_SAVE_DYNTICK) { 1789 if (fqs_state == RCU_SAVE_DYNTICK) {
1670 /* Collect dyntick-idle snapshots. */ 1790 /* Collect dyntick-idle snapshots. */
@@ -1703,6 +1823,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1703 struct rcu_data *rdp; 1823 struct rcu_data *rdp;
1704 struct rcu_node *rnp = rcu_get_root(rsp); 1824 struct rcu_node *rnp = rcu_get_root(rsp);
1705 1825
1826 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1706 raw_spin_lock_irq(&rnp->lock); 1827 raw_spin_lock_irq(&rnp->lock);
1707 smp_mb__after_unlock_lock(); 1828 smp_mb__after_unlock_lock();
1708 gp_duration = jiffies - rsp->gp_start; 1829 gp_duration = jiffies - rsp->gp_start;
@@ -1739,6 +1860,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1739 nocb += rcu_future_gp_cleanup(rsp, rnp); 1860 nocb += rcu_future_gp_cleanup(rsp, rnp);
1740 raw_spin_unlock_irq(&rnp->lock); 1861 raw_spin_unlock_irq(&rnp->lock);
1741 cond_resched_rcu_qs(); 1862 cond_resched_rcu_qs();
1863 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1742 } 1864 }
1743 rnp = rcu_get_root(rsp); 1865 rnp = rcu_get_root(rsp);
1744 raw_spin_lock_irq(&rnp->lock); 1866 raw_spin_lock_irq(&rnp->lock);
@@ -1788,6 +1910,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
1788 if (rcu_gp_init(rsp)) 1910 if (rcu_gp_init(rsp))
1789 break; 1911 break;
1790 cond_resched_rcu_qs(); 1912 cond_resched_rcu_qs();
1913 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1791 WARN_ON(signal_pending(current)); 1914 WARN_ON(signal_pending(current));
1792 trace_rcu_grace_period(rsp->name, 1915 trace_rcu_grace_period(rsp->name,
1793 ACCESS_ONCE(rsp->gpnum), 1916 ACCESS_ONCE(rsp->gpnum),
@@ -1831,9 +1954,11 @@ static int __noreturn rcu_gp_kthread(void *arg)
1831 ACCESS_ONCE(rsp->gpnum), 1954 ACCESS_ONCE(rsp->gpnum),
1832 TPS("fqsend")); 1955 TPS("fqsend"));
1833 cond_resched_rcu_qs(); 1956 cond_resched_rcu_qs();
1957 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1834 } else { 1958 } else {
1835 /* Deal with stray signal. */ 1959 /* Deal with stray signal. */
1836 cond_resched_rcu_qs(); 1960 cond_resched_rcu_qs();
1961 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1837 WARN_ON(signal_pending(current)); 1962 WARN_ON(signal_pending(current));
1838 trace_rcu_grace_period(rsp->name, 1963 trace_rcu_grace_period(rsp->name,
1839 ACCESS_ONCE(rsp->gpnum), 1964 ACCESS_ONCE(rsp->gpnum),
@@ -2010,8 +2135,10 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
2010 rnp = rdp->mynode; 2135 rnp = rdp->mynode;
2011 raw_spin_lock_irqsave(&rnp->lock, flags); 2136 raw_spin_lock_irqsave(&rnp->lock, flags);
2012 smp_mb__after_unlock_lock(); 2137 smp_mb__after_unlock_lock();
2013 if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum || 2138 if ((rdp->passed_quiesce == 0 &&
2014 rnp->completed == rnp->gpnum) { 2139 rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) ||
2140 rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum ||
2141 rdp->gpwrap) {
2015 2142
2016 /* 2143 /*
2017 * The grace period in which this quiescent state was 2144 * The grace period in which this quiescent state was
@@ -2020,6 +2147,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
2020 * within the current grace period. 2147 * within the current grace period.
2021 */ 2148 */
2022 rdp->passed_quiesce = 0; /* need qs for new gp. */ 2149 rdp->passed_quiesce = 0; /* need qs for new gp. */
2150 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
2023 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2151 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2024 return; 2152 return;
2025 } 2153 }
@@ -2064,7 +2192,8 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
2064 * Was there a quiescent state since the beginning of the grace 2192 * Was there a quiescent state since the beginning of the grace
2065 * period? If no, then exit and wait for the next call. 2193 * period? If no, then exit and wait for the next call.
2066 */ 2194 */
2067 if (!rdp->passed_quiesce) 2195 if (!rdp->passed_quiesce &&
2196 rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr))
2068 return; 2197 return;
2069 2198
2070 /* 2199 /*
@@ -2195,6 +2324,46 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
2195} 2324}
2196 2325
2197/* 2326/*
2327 * All CPUs for the specified rcu_node structure have gone offline,
2328 * and all tasks that were preempted within an RCU read-side critical
2329 * section while running on one of those CPUs have since exited their RCU
2330 * read-side critical section. Some other CPU is reporting this fact with
2331 * the specified rcu_node structure's ->lock held and interrupts disabled.
2332 * This function therefore goes up the tree of rcu_node structures,
2333 * clearing the corresponding bits in the ->qsmaskinit fields. Note that
2334 * the leaf rcu_node structure's ->qsmaskinit field has already been
2335 * updated
2336 *
2337 * This function does check that the specified rcu_node structure has
2338 * all CPUs offline and no blocked tasks, so it is OK to invoke it
2339 * prematurely. That said, invoking it after the fact will cost you
2340 * a needless lock acquisition. So once it has done its work, don't
2341 * invoke it again.
2342 */
2343static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
2344{
2345 long mask;
2346 struct rcu_node *rnp = rnp_leaf;
2347
2348 if (rnp->qsmaskinit || rcu_preempt_has_tasks(rnp))
2349 return;
2350 for (;;) {
2351 mask = rnp->grpmask;
2352 rnp = rnp->parent;
2353 if (!rnp)
2354 break;
2355 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
2356 smp_mb__after_unlock_lock(); /* GP memory ordering. */
2357 rnp->qsmaskinit &= ~mask;
2358 if (rnp->qsmaskinit) {
2359 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2360 return;
2361 }
2362 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2363 }
2364}
2365
2366/*
2198 * The CPU has been completely removed, and some other CPU is reporting 2367 * The CPU has been completely removed, and some other CPU is reporting
2199 * this fact from process context. Do the remainder of the cleanup, 2368 * this fact from process context. Do the remainder of the cleanup,
2200 * including orphaning the outgoing CPU's RCU callbacks, and also 2369 * including orphaning the outgoing CPU's RCU callbacks, and also
@@ -2204,8 +2373,6 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
2204static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) 2373static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2205{ 2374{
2206 unsigned long flags; 2375 unsigned long flags;
2207 unsigned long mask;
2208 int need_report = 0;
2209 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2376 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2210 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ 2377 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
2211 2378
@@ -2219,40 +2386,15 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2219 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ 2386 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
2220 rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); 2387 rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
2221 rcu_adopt_orphan_cbs(rsp, flags); 2388 rcu_adopt_orphan_cbs(rsp, flags);
2389 raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags);
2222 2390
2223 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ 2391 /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
2224 mask = rdp->grpmask; /* rnp->grplo is constant. */ 2392 raw_spin_lock_irqsave(&rnp->lock, flags);
2225 do { 2393 smp_mb__after_unlock_lock(); /* Enforce GP memory-order guarantee. */
2226 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 2394 rnp->qsmaskinit &= ~rdp->grpmask;
2227 smp_mb__after_unlock_lock(); 2395 if (rnp->qsmaskinit == 0 && !rcu_preempt_has_tasks(rnp))
2228 rnp->qsmaskinit &= ~mask; 2396 rcu_cleanup_dead_rnp(rnp);
2229 if (rnp->qsmaskinit != 0) { 2397 rcu_report_qs_rnp(rdp->grpmask, rsp, rnp, flags); /* Rlses rnp->lock. */
2230 if (rnp != rdp->mynode)
2231 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2232 break;
2233 }
2234 if (rnp == rdp->mynode)
2235 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
2236 else
2237 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2238 mask = rnp->grpmask;
2239 rnp = rnp->parent;
2240 } while (rnp != NULL);
2241
2242 /*
2243 * We still hold the leaf rcu_node structure lock here, and
2244 * irqs are still disabled. The reason for this subterfuge is
2245 * because invoking rcu_report_unblock_qs_rnp() with ->orphan_lock
2246 * held leads to deadlock.
2247 */
2248 raw_spin_unlock(&rsp->orphan_lock); /* irqs remain disabled. */
2249 rnp = rdp->mynode;
2250 if (need_report & RCU_OFL_TASKS_NORM_GP)
2251 rcu_report_unblock_qs_rnp(rnp, flags);
2252 else
2253 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2254 if (need_report & RCU_OFL_TASKS_EXP_GP)
2255 rcu_report_exp_rnp(rsp, rnp, true);
2256 WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, 2398 WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
2257 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", 2399 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
2258 cpu, rdp->qlen, rdp->nxtlist); 2400 cpu, rdp->qlen, rdp->nxtlist);
@@ -2268,6 +2410,10 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
2268{ 2410{
2269} 2411}
2270 2412
2413static void __maybe_unused rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
2414{
2415}
2416
2271static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) 2417static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2272{ 2418{
2273} 2419}
@@ -2464,12 +2610,6 @@ static void force_qs_rnp(struct rcu_state *rsp,
2464 } 2610 }
2465 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2611 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2466 } 2612 }
2467 rnp = rcu_get_root(rsp);
2468 if (rnp->qsmask == 0) {
2469 raw_spin_lock_irqsave(&rnp->lock, flags);
2470 smp_mb__after_unlock_lock();
2471 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
2472 }
2473} 2613}
2474 2614
2475/* 2615/*
@@ -2569,7 +2709,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
2569 * Schedule RCU callback invocation. If the specified type of RCU 2709 * Schedule RCU callback invocation. If the specified type of RCU
2570 * does not support RCU priority boosting, just do a direct call, 2710 * does not support RCU priority boosting, just do a direct call,
2571 * otherwise wake up the per-CPU kernel kthread. Note that because we 2711 * otherwise wake up the per-CPU kernel kthread. Note that because we
2572 * are running on the current CPU with interrupts disabled, the 2712 * are running on the current CPU with softirqs disabled, the
2573 * rcu_cpu_kthread_task cannot disappear out from under us. 2713 * rcu_cpu_kthread_task cannot disappear out from under us.
2574 */ 2714 */
2575static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) 2715static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
@@ -3109,9 +3249,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
3109 3249
3110 /* Is the RCU core waiting for a quiescent state from this CPU? */ 3250 /* Is the RCU core waiting for a quiescent state from this CPU? */
3111 if (rcu_scheduler_fully_active && 3251 if (rcu_scheduler_fully_active &&
3112 rdp->qs_pending && !rdp->passed_quiesce) { 3252 rdp->qs_pending && !rdp->passed_quiesce &&
3253 rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) {
3113 rdp->n_rp_qs_pending++; 3254 rdp->n_rp_qs_pending++;
3114 } else if (rdp->qs_pending && rdp->passed_quiesce) { 3255 } else if (rdp->qs_pending &&
3256 (rdp->passed_quiesce ||
3257 rdp->rcu_qs_ctr_snap != __this_cpu_read(rcu_qs_ctr))) {
3115 rdp->n_rp_report_qs++; 3258 rdp->n_rp_report_qs++;
3116 return 1; 3259 return 1;
3117 } 3260 }
@@ -3135,7 +3278,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
3135 } 3278 }
3136 3279
3137 /* Has a new RCU grace period started? */ 3280 /* Has a new RCU grace period started? */
3138 if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */ 3281 if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum ||
3282 unlikely(ACCESS_ONCE(rdp->gpwrap))) { /* outside lock */
3139 rdp->n_rp_gp_started++; 3283 rdp->n_rp_gp_started++;
3140 return 1; 3284 return 1;
3141 } 3285 }
@@ -3318,6 +3462,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
3318 } else { 3462 } else {
3319 _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, 3463 _rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
3320 rsp->n_barrier_done); 3464 rsp->n_barrier_done);
3465 smp_mb__before_atomic();
3321 atomic_inc(&rsp->barrier_cpu_count); 3466 atomic_inc(&rsp->barrier_cpu_count);
3322 __call_rcu(&rdp->barrier_head, 3467 __call_rcu(&rdp->barrier_head,
3323 rcu_barrier_callback, rsp, cpu, 0); 3468 rcu_barrier_callback, rsp, cpu, 0);
@@ -3385,9 +3530,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
3385 /* Set up local state, ensuring consistent view of global state. */ 3530 /* Set up local state, ensuring consistent view of global state. */
3386 raw_spin_lock_irqsave(&rnp->lock, flags); 3531 raw_spin_lock_irqsave(&rnp->lock, flags);
3387 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); 3532 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
3388 init_callback_list(rdp);
3389 rdp->qlen_lazy = 0;
3390 ACCESS_ONCE(rdp->qlen) = 0;
3391 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 3533 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
3392 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); 3534 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
3393 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); 3535 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
@@ -3444,6 +3586,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
3444 rdp->gpnum = rnp->completed; 3586 rdp->gpnum = rnp->completed;
3445 rdp->completed = rnp->completed; 3587 rdp->completed = rnp->completed;
3446 rdp->passed_quiesce = 0; 3588 rdp->passed_quiesce = 0;
3589 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
3447 rdp->qs_pending = 0; 3590 rdp->qs_pending = 0;
3448 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); 3591 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
3449 } 3592 }
@@ -3535,17 +3678,35 @@ static int rcu_pm_notify(struct notifier_block *self,
3535static int __init rcu_spawn_gp_kthread(void) 3678static int __init rcu_spawn_gp_kthread(void)
3536{ 3679{
3537 unsigned long flags; 3680 unsigned long flags;
3681 int kthread_prio_in = kthread_prio;
3538 struct rcu_node *rnp; 3682 struct rcu_node *rnp;
3539 struct rcu_state *rsp; 3683 struct rcu_state *rsp;
3684 struct sched_param sp;
3540 struct task_struct *t; 3685 struct task_struct *t;
3541 3686
3687 /* Force priority into range. */
3688 if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1)
3689 kthread_prio = 1;
3690 else if (kthread_prio < 0)
3691 kthread_prio = 0;
3692 else if (kthread_prio > 99)
3693 kthread_prio = 99;
3694 if (kthread_prio != kthread_prio_in)
3695 pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n",
3696 kthread_prio, kthread_prio_in);
3697
3542 rcu_scheduler_fully_active = 1; 3698 rcu_scheduler_fully_active = 1;
3543 for_each_rcu_flavor(rsp) { 3699 for_each_rcu_flavor(rsp) {
3544 t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name); 3700 t = kthread_create(rcu_gp_kthread, rsp, "%s", rsp->name);
3545 BUG_ON(IS_ERR(t)); 3701 BUG_ON(IS_ERR(t));
3546 rnp = rcu_get_root(rsp); 3702 rnp = rcu_get_root(rsp);
3547 raw_spin_lock_irqsave(&rnp->lock, flags); 3703 raw_spin_lock_irqsave(&rnp->lock, flags);
3548 rsp->gp_kthread = t; 3704 rsp->gp_kthread = t;
3705 if (kthread_prio) {
3706 sp.sched_priority = kthread_prio;
3707 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
3708 }
3709 wake_up_process(t);
3549 raw_spin_unlock_irqrestore(&rnp->lock, flags); 3710 raw_spin_unlock_irqrestore(&rnp->lock, flags);
3550 } 3711 }
3551 rcu_spawn_nocb_kthreads(); 3712 rcu_spawn_nocb_kthreads();
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 8e7b1843896e..119de399eb2f 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -27,7 +27,6 @@
27#include <linux/threads.h> 27#include <linux/threads.h>
28#include <linux/cpumask.h> 28#include <linux/cpumask.h>
29#include <linux/seqlock.h> 29#include <linux/seqlock.h>
30#include <linux/irq_work.h>
31 30
32/* 31/*
33 * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and 32 * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
@@ -172,11 +171,6 @@ struct rcu_node {
172 /* queued on this rcu_node structure that */ 171 /* queued on this rcu_node structure that */
173 /* are blocking the current grace period, */ 172 /* are blocking the current grace period, */
174 /* there can be no such task. */ 173 /* there can be no such task. */
175 struct completion boost_completion;
176 /* Used to ensure that the rt_mutex used */
177 /* to carry out the boosting is fully */
178 /* released with no future boostee accesses */
179 /* before that rt_mutex is re-initialized. */
180 struct rt_mutex boost_mtx; 174 struct rt_mutex boost_mtx;
181 /* Used only for the priority-boosting */ 175 /* Used only for the priority-boosting */
182 /* side effect, not as a lock. */ 176 /* side effect, not as a lock. */
@@ -257,9 +251,12 @@ struct rcu_data {
257 /* in order to detect GP end. */ 251 /* in order to detect GP end. */
258 unsigned long gpnum; /* Highest gp number that this CPU */ 252 unsigned long gpnum; /* Highest gp number that this CPU */
259 /* is aware of having started. */ 253 /* is aware of having started. */
254 unsigned long rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */
255 /* for rcu_all_qs() invocations. */
260 bool passed_quiesce; /* User-mode/idle loop etc. */ 256 bool passed_quiesce; /* User-mode/idle loop etc. */
261 bool qs_pending; /* Core waits for quiesc state. */ 257 bool qs_pending; /* Core waits for quiesc state. */
262 bool beenonline; /* CPU online at least once. */ 258 bool beenonline; /* CPU online at least once. */
259 bool gpwrap; /* Possible gpnum/completed wrap. */
263 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ 260 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
264 unsigned long grpmask; /* Mask to apply to leaf qsmask. */ 261 unsigned long grpmask; /* Mask to apply to leaf qsmask. */
265#ifdef CONFIG_RCU_CPU_STALL_INFO 262#ifdef CONFIG_RCU_CPU_STALL_INFO
@@ -340,14 +337,10 @@ struct rcu_data {
340#ifdef CONFIG_RCU_NOCB_CPU 337#ifdef CONFIG_RCU_NOCB_CPU
341 struct rcu_head *nocb_head; /* CBs waiting for kthread. */ 338 struct rcu_head *nocb_head; /* CBs waiting for kthread. */
342 struct rcu_head **nocb_tail; 339 struct rcu_head **nocb_tail;
343 atomic_long_t nocb_q_count; /* # CBs waiting for kthread */ 340 atomic_long_t nocb_q_count; /* # CBs waiting for nocb */
344 atomic_long_t nocb_q_count_lazy; /* (approximate). */ 341 atomic_long_t nocb_q_count_lazy; /* invocation (all stages). */
345 struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */ 342 struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */
346 struct rcu_head **nocb_follower_tail; 343 struct rcu_head **nocb_follower_tail;
347 atomic_long_t nocb_follower_count; /* # CBs ready to invoke. */
348 atomic_long_t nocb_follower_count_lazy; /* (approximate). */
349 int nocb_p_count; /* # CBs being invoked by kthread */
350 int nocb_p_count_lazy; /* (approximate). */
351 wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ 344 wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
352 struct task_struct *nocb_kthread; 345 struct task_struct *nocb_kthread;
353 int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ 346 int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
@@ -356,8 +349,6 @@ struct rcu_data {
356 struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp; 349 struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp;
357 /* CBs waiting for GP. */ 350 /* CBs waiting for GP. */
358 struct rcu_head **nocb_gp_tail; 351 struct rcu_head **nocb_gp_tail;
359 long nocb_gp_count;
360 long nocb_gp_count_lazy;
361 bool nocb_leader_sleep; /* Is the nocb leader thread asleep? */ 352 bool nocb_leader_sleep; /* Is the nocb leader thread asleep? */
362 struct rcu_data *nocb_next_follower; 353 struct rcu_data *nocb_next_follower;
363 /* Next follower in wakeup chain. */ 354 /* Next follower in wakeup chain. */
@@ -488,10 +479,14 @@ struct rcu_state {
488 /* due to no GP active. */ 479 /* due to no GP active. */
489 unsigned long gp_start; /* Time at which GP started, */ 480 unsigned long gp_start; /* Time at which GP started, */
490 /* but in jiffies. */ 481 /* but in jiffies. */
482 unsigned long gp_activity; /* Time of last GP kthread */
483 /* activity in jiffies. */
491 unsigned long jiffies_stall; /* Time at which to check */ 484 unsigned long jiffies_stall; /* Time at which to check */
492 /* for CPU stalls. */ 485 /* for CPU stalls. */
493 unsigned long jiffies_resched; /* Time at which to resched */ 486 unsigned long jiffies_resched; /* Time at which to resched */
494 /* a reluctant CPU. */ 487 /* a reluctant CPU. */
488 unsigned long n_force_qs_gpstart; /* Snapshot of n_force_qs at */
489 /* GP start. */
495 unsigned long gp_max; /* Maximum GP duration in */ 490 unsigned long gp_max; /* Maximum GP duration in */
496 /* jiffies. */ 491 /* jiffies. */
497 const char *name; /* Name of structure. */ 492 const char *name; /* Name of structure. */
@@ -514,13 +509,6 @@ extern struct list_head rcu_struct_flavors;
514#define for_each_rcu_flavor(rsp) \ 509#define for_each_rcu_flavor(rsp) \
515 list_for_each_entry((rsp), &rcu_struct_flavors, flavors) 510 list_for_each_entry((rsp), &rcu_struct_flavors, flavors)
516 511
517/* Return values for rcu_preempt_offline_tasks(). */
518
519#define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */
520 /* GP were moved to root. */
521#define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */
522 /* GP were moved to root. */
523
524/* 512/*
525 * RCU implementation internal declarations: 513 * RCU implementation internal declarations:
526 */ 514 */
@@ -546,27 +534,16 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);
546 534
547/* Forward declarations for rcutree_plugin.h */ 535/* Forward declarations for rcutree_plugin.h */
548static void rcu_bootup_announce(void); 536static void rcu_bootup_announce(void);
549long rcu_batches_completed(void);
550static void rcu_preempt_note_context_switch(void); 537static void rcu_preempt_note_context_switch(void);
551static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); 538static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
552#ifdef CONFIG_HOTPLUG_CPU 539#ifdef CONFIG_HOTPLUG_CPU
553static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, 540static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
554 unsigned long flags);
555#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 541#endif /* #ifdef CONFIG_HOTPLUG_CPU */
556static void rcu_print_detail_task_stall(struct rcu_state *rsp); 542static void rcu_print_detail_task_stall(struct rcu_state *rsp);
557static int rcu_print_task_stall(struct rcu_node *rnp); 543static int rcu_print_task_stall(struct rcu_node *rnp);
558static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); 544static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
559#ifdef CONFIG_HOTPLUG_CPU
560static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
561 struct rcu_node *rnp,
562 struct rcu_data *rdp);
563#endif /* #ifdef CONFIG_HOTPLUG_CPU */
564static void rcu_preempt_check_callbacks(void); 545static void rcu_preempt_check_callbacks(void);
565void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 546void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
566#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PREEMPT_RCU)
567static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
568 bool wake);
569#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PREEMPT_RCU) */
570static void __init __rcu_init_preempt(void); 547static void __init __rcu_init_preempt(void);
571static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); 548static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
572static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); 549static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
@@ -622,24 +599,15 @@ static void rcu_dynticks_task_exit(void);
622#endif /* #ifndef RCU_TREE_NONCORE */ 599#endif /* #ifndef RCU_TREE_NONCORE */
623 600
624#ifdef CONFIG_RCU_TRACE 601#ifdef CONFIG_RCU_TRACE
625#ifdef CONFIG_RCU_NOCB_CPU 602/* Read out queue lengths for tracing. */
626/* Sum up queue lengths for tracing. */
627static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll) 603static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
628{ 604{
629 *ql = atomic_long_read(&rdp->nocb_q_count) + 605#ifdef CONFIG_RCU_NOCB_CPU
630 rdp->nocb_p_count + 606 *ql = atomic_long_read(&rdp->nocb_q_count);
631 atomic_long_read(&rdp->nocb_follower_count) + 607 *qll = atomic_long_read(&rdp->nocb_q_count_lazy);
632 rdp->nocb_p_count + rdp->nocb_gp_count;
633 *qll = atomic_long_read(&rdp->nocb_q_count_lazy) +
634 rdp->nocb_p_count_lazy +
635 atomic_long_read(&rdp->nocb_follower_count_lazy) +
636 rdp->nocb_p_count_lazy + rdp->nocb_gp_count_lazy;
637}
638#else /* #ifdef CONFIG_RCU_NOCB_CPU */ 608#else /* #ifdef CONFIG_RCU_NOCB_CPU */
639static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
640{
641 *ql = 0; 609 *ql = 0;
642 *qll = 0; 610 *qll = 0;
643}
644#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ 611#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
612}
645#endif /* #ifdef CONFIG_RCU_TRACE */ 613#endif /* #ifdef CONFIG_RCU_TRACE */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 3ec85cb5d544..2e850a51bb8f 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -34,10 +34,6 @@
34 34
35#include "../locking/rtmutex_common.h" 35#include "../locking/rtmutex_common.h"
36 36
37/* rcuc/rcub kthread realtime priority */
38static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO;
39module_param(kthread_prio, int, 0644);
40
41/* 37/*
42 * Control variables for per-CPU and per-rcu_node kthreads. These 38 * Control variables for per-CPU and per-rcu_node kthreads. These
43 * handle all flavors of RCU. 39 * handle all flavors of RCU.
@@ -103,6 +99,8 @@ RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
103static struct rcu_state *rcu_state_p = &rcu_preempt_state; 99static struct rcu_state *rcu_state_p = &rcu_preempt_state;
104 100
105static int rcu_preempted_readers_exp(struct rcu_node *rnp); 101static int rcu_preempted_readers_exp(struct rcu_node *rnp);
102static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
103 bool wake);
106 104
107/* 105/*
108 * Tell them what RCU they are running. 106 * Tell them what RCU they are running.
@@ -114,25 +112,6 @@ static void __init rcu_bootup_announce(void)
114} 112}
115 113
116/* 114/*
117 * Return the number of RCU-preempt batches processed thus far
118 * for debug and statistics.
119 */
120static long rcu_batches_completed_preempt(void)
121{
122 return rcu_preempt_state.completed;
123}
124EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
125
126/*
127 * Return the number of RCU batches processed thus far for debug & stats.
128 */
129long rcu_batches_completed(void)
130{
131 return rcu_batches_completed_preempt();
132}
133EXPORT_SYMBOL_GPL(rcu_batches_completed);
134
135/*
136 * Record a preemptible-RCU quiescent state for the specified CPU. Note 115 * Record a preemptible-RCU quiescent state for the specified CPU. Note
137 * that this just means that the task currently running on the CPU is 116 * that this just means that the task currently running on the CPU is
138 * not in a quiescent state. There might be any number of tasks blocked 117 * not in a quiescent state. There might be any number of tasks blocked
@@ -307,15 +286,25 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t,
307} 286}
308 287
309/* 288/*
289 * Return true if the specified rcu_node structure has tasks that were
290 * preempted within an RCU read-side critical section.
291 */
292static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
293{
294 return !list_empty(&rnp->blkd_tasks);
295}
296
297/*
310 * Handle special cases during rcu_read_unlock(), such as needing to 298 * Handle special cases during rcu_read_unlock(), such as needing to
311 * notify RCU core processing or task having blocked during the RCU 299 * notify RCU core processing or task having blocked during the RCU
312 * read-side critical section. 300 * read-side critical section.
313 */ 301 */
314void rcu_read_unlock_special(struct task_struct *t) 302void rcu_read_unlock_special(struct task_struct *t)
315{ 303{
316 int empty; 304 bool empty;
317 int empty_exp; 305 bool empty_exp;
318 int empty_exp_now; 306 bool empty_norm;
307 bool empty_exp_now;
319 unsigned long flags; 308 unsigned long flags;
320 struct list_head *np; 309 struct list_head *np;
321#ifdef CONFIG_RCU_BOOST 310#ifdef CONFIG_RCU_BOOST
@@ -367,7 +356,8 @@ void rcu_read_unlock_special(struct task_struct *t)
367 break; 356 break;
368 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 357 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
369 } 358 }
370 empty = !rcu_preempt_blocked_readers_cgp(rnp); 359 empty = !rcu_preempt_has_tasks(rnp);
360 empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
371 empty_exp = !rcu_preempted_readers_exp(rnp); 361 empty_exp = !rcu_preempted_readers_exp(rnp);
372 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 362 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
373 np = rcu_next_node_entry(t, rnp); 363 np = rcu_next_node_entry(t, rnp);
@@ -387,13 +377,21 @@ void rcu_read_unlock_special(struct task_struct *t)
387#endif /* #ifdef CONFIG_RCU_BOOST */ 377#endif /* #ifdef CONFIG_RCU_BOOST */
388 378
389 /* 379 /*
380 * If this was the last task on the list, go see if we
381 * need to propagate ->qsmaskinit bit clearing up the
382 * rcu_node tree.
383 */
384 if (!empty && !rcu_preempt_has_tasks(rnp))
385 rcu_cleanup_dead_rnp(rnp);
386
387 /*
390 * If this was the last task on the current list, and if 388 * If this was the last task on the current list, and if
391 * we aren't waiting on any CPUs, report the quiescent state. 389 * we aren't waiting on any CPUs, report the quiescent state.
392 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, 390 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
393 * so we must take a snapshot of the expedited state. 391 * so we must take a snapshot of the expedited state.
394 */ 392 */
395 empty_exp_now = !rcu_preempted_readers_exp(rnp); 393 empty_exp_now = !rcu_preempted_readers_exp(rnp);
396 if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { 394 if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) {
397 trace_rcu_quiescent_state_report(TPS("preempt_rcu"), 395 trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
398 rnp->gpnum, 396 rnp->gpnum,
399 0, rnp->qsmask, 397 0, rnp->qsmask,
@@ -408,10 +406,8 @@ void rcu_read_unlock_special(struct task_struct *t)
408 406
409#ifdef CONFIG_RCU_BOOST 407#ifdef CONFIG_RCU_BOOST
410 /* Unboost if we were boosted. */ 408 /* Unboost if we were boosted. */
411 if (drop_boost_mutex) { 409 if (drop_boost_mutex)
412 rt_mutex_unlock(&rnp->boost_mtx); 410 rt_mutex_unlock(&rnp->boost_mtx);
413 complete(&rnp->boost_completion);
414 }
415#endif /* #ifdef CONFIG_RCU_BOOST */ 411#endif /* #ifdef CONFIG_RCU_BOOST */
416 412
417 /* 413 /*
@@ -519,99 +515,13 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
519static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 515static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
520{ 516{
521 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); 517 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
522 if (!list_empty(&rnp->blkd_tasks)) 518 if (rcu_preempt_has_tasks(rnp))
523 rnp->gp_tasks = rnp->blkd_tasks.next; 519 rnp->gp_tasks = rnp->blkd_tasks.next;
524 WARN_ON_ONCE(rnp->qsmask); 520 WARN_ON_ONCE(rnp->qsmask);
525} 521}
526 522
527#ifdef CONFIG_HOTPLUG_CPU 523#ifdef CONFIG_HOTPLUG_CPU
528 524
529/*
530 * Handle tasklist migration for case in which all CPUs covered by the
531 * specified rcu_node have gone offline. Move them up to the root
532 * rcu_node. The reason for not just moving them to the immediate
533 * parent is to remove the need for rcu_read_unlock_special() to
534 * make more than two attempts to acquire the target rcu_node's lock.
535 * Returns true if there were tasks blocking the current RCU grace
536 * period.
537 *
538 * Returns 1 if there was previously a task blocking the current grace
539 * period on the specified rcu_node structure.
540 *
541 * The caller must hold rnp->lock with irqs disabled.
542 */
543static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
544 struct rcu_node *rnp,
545 struct rcu_data *rdp)
546{
547 struct list_head *lp;
548 struct list_head *lp_root;
549 int retval = 0;
550 struct rcu_node *rnp_root = rcu_get_root(rsp);
551 struct task_struct *t;
552
553 if (rnp == rnp_root) {
554 WARN_ONCE(1, "Last CPU thought to be offlined?");
555 return 0; /* Shouldn't happen: at least one CPU online. */
556 }
557
558 /* If we are on an internal node, complain bitterly. */
559 WARN_ON_ONCE(rnp != rdp->mynode);
560
561 /*
562 * Move tasks up to root rcu_node. Don't try to get fancy for
563 * this corner-case operation -- just put this node's tasks
564 * at the head of the root node's list, and update the root node's
565 * ->gp_tasks and ->exp_tasks pointers to those of this node's,
566 * if non-NULL. This might result in waiting for more tasks than
567 * absolutely necessary, but this is a good performance/complexity
568 * tradeoff.
569 */
570 if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0)
571 retval |= RCU_OFL_TASKS_NORM_GP;
572 if (rcu_preempted_readers_exp(rnp))
573 retval |= RCU_OFL_TASKS_EXP_GP;
574 lp = &rnp->blkd_tasks;
575 lp_root = &rnp_root->blkd_tasks;
576 while (!list_empty(lp)) {
577 t = list_entry(lp->next, typeof(*t), rcu_node_entry);
578 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
579 smp_mb__after_unlock_lock();
580 list_del(&t->rcu_node_entry);
581 t->rcu_blocked_node = rnp_root;
582 list_add(&t->rcu_node_entry, lp_root);
583 if (&t->rcu_node_entry == rnp->gp_tasks)
584 rnp_root->gp_tasks = rnp->gp_tasks;
585 if (&t->rcu_node_entry == rnp->exp_tasks)
586 rnp_root->exp_tasks = rnp->exp_tasks;
587#ifdef CONFIG_RCU_BOOST
588 if (&t->rcu_node_entry == rnp->boost_tasks)
589 rnp_root->boost_tasks = rnp->boost_tasks;
590#endif /* #ifdef CONFIG_RCU_BOOST */
591 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
592 }
593
594 rnp->gp_tasks = NULL;
595 rnp->exp_tasks = NULL;
596#ifdef CONFIG_RCU_BOOST
597 rnp->boost_tasks = NULL;
598 /*
599 * In case root is being boosted and leaf was not. Make sure
600 * that we boost the tasks blocking the current grace period
601 * in this case.
602 */
603 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
604 smp_mb__after_unlock_lock();
605 if (rnp_root->boost_tasks != NULL &&
606 rnp_root->boost_tasks != rnp_root->gp_tasks &&
607 rnp_root->boost_tasks != rnp_root->exp_tasks)
608 rnp_root->boost_tasks = rnp_root->gp_tasks;
609 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
610#endif /* #ifdef CONFIG_RCU_BOOST */
611
612 return retval;
613}
614
615#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 525#endif /* #ifdef CONFIG_HOTPLUG_CPU */
616 526
617/* 527/*
@@ -771,7 +681,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
771 681
772 raw_spin_lock_irqsave(&rnp->lock, flags); 682 raw_spin_lock_irqsave(&rnp->lock, flags);
773 smp_mb__after_unlock_lock(); 683 smp_mb__after_unlock_lock();
774 if (list_empty(&rnp->blkd_tasks)) { 684 if (!rcu_preempt_has_tasks(rnp)) {
775 raw_spin_unlock_irqrestore(&rnp->lock, flags); 685 raw_spin_unlock_irqrestore(&rnp->lock, flags);
776 } else { 686 } else {
777 rnp->exp_tasks = rnp->blkd_tasks.next; 687 rnp->exp_tasks = rnp->blkd_tasks.next;
@@ -933,15 +843,6 @@ static void __init rcu_bootup_announce(void)
933} 843}
934 844
935/* 845/*
936 * Return the number of RCU batches processed thus far for debug & stats.
937 */
938long rcu_batches_completed(void)
939{
940 return rcu_batches_completed_sched();
941}
942EXPORT_SYMBOL_GPL(rcu_batches_completed);
943
944/*
945 * Because preemptible RCU does not exist, we never have to check for 846 * Because preemptible RCU does not exist, we never have to check for
946 * CPUs being in quiescent states. 847 * CPUs being in quiescent states.
947 */ 848 */
@@ -960,11 +861,12 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
960 861
961#ifdef CONFIG_HOTPLUG_CPU 862#ifdef CONFIG_HOTPLUG_CPU
962 863
963/* Because preemptible RCU does not exist, no quieting of tasks. */ 864/*
964static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) 865 * Because there is no preemptible RCU, there can be no readers blocked.
965 __releases(rnp->lock) 866 */
867static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
966{ 868{
967 raw_spin_unlock_irqrestore(&rnp->lock, flags); 869 return false;
968} 870}
969 871
970#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 872#endif /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -996,23 +898,6 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
996 WARN_ON_ONCE(rnp->qsmask); 898 WARN_ON_ONCE(rnp->qsmask);
997} 899}
998 900
999#ifdef CONFIG_HOTPLUG_CPU
1000
1001/*
1002 * Because preemptible RCU does not exist, it never needs to migrate
1003 * tasks that were blocked within RCU read-side critical sections, and
1004 * such non-existent tasks cannot possibly have been blocking the current
1005 * grace period.
1006 */
1007static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
1008 struct rcu_node *rnp,
1009 struct rcu_data *rdp)
1010{
1011 return 0;
1012}
1013
1014#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1015
1016/* 901/*
1017 * Because preemptible RCU does not exist, it never has any callbacks 902 * Because preemptible RCU does not exist, it never has any callbacks
1018 * to check. 903 * to check.
@@ -1031,20 +916,6 @@ void synchronize_rcu_expedited(void)
1031} 916}
1032EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 917EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
1033 918
1034#ifdef CONFIG_HOTPLUG_CPU
1035
1036/*
1037 * Because preemptible RCU does not exist, there is never any need to
1038 * report on tasks preempted in RCU read-side critical sections during
1039 * expedited RCU grace periods.
1040 */
1041static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
1042 bool wake)
1043{
1044}
1045
1046#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1047
1048/* 919/*
1049 * Because preemptible RCU does not exist, rcu_barrier() is just 920 * Because preemptible RCU does not exist, rcu_barrier() is just
1050 * another name for rcu_barrier_sched(). 921 * another name for rcu_barrier_sched().
@@ -1080,7 +951,7 @@ void exit_rcu(void)
1080 951
1081static void rcu_initiate_boost_trace(struct rcu_node *rnp) 952static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1082{ 953{
1083 if (list_empty(&rnp->blkd_tasks)) 954 if (!rcu_preempt_has_tasks(rnp))
1084 rnp->n_balk_blkd_tasks++; 955 rnp->n_balk_blkd_tasks++;
1085 else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL) 956 else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
1086 rnp->n_balk_exp_gp_tasks++; 957 rnp->n_balk_exp_gp_tasks++;
@@ -1127,7 +998,8 @@ static int rcu_boost(struct rcu_node *rnp)
1127 struct task_struct *t; 998 struct task_struct *t;
1128 struct list_head *tb; 999 struct list_head *tb;
1129 1000
1130 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) 1001 if (ACCESS_ONCE(rnp->exp_tasks) == NULL &&
1002 ACCESS_ONCE(rnp->boost_tasks) == NULL)
1131 return 0; /* Nothing left to boost. */ 1003 return 0; /* Nothing left to boost. */
1132 1004
1133 raw_spin_lock_irqsave(&rnp->lock, flags); 1005 raw_spin_lock_irqsave(&rnp->lock, flags);
@@ -1175,15 +1047,11 @@ static int rcu_boost(struct rcu_node *rnp)
1175 */ 1047 */
1176 t = container_of(tb, struct task_struct, rcu_node_entry); 1048 t = container_of(tb, struct task_struct, rcu_node_entry);
1177 rt_mutex_init_proxy_locked(&rnp->boost_mtx, t); 1049 rt_mutex_init_proxy_locked(&rnp->boost_mtx, t);
1178 init_completion(&rnp->boost_completion);
1179 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1050 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1180 /* Lock only for side effect: boosts task t's priority. */ 1051 /* Lock only for side effect: boosts task t's priority. */
1181 rt_mutex_lock(&rnp->boost_mtx); 1052 rt_mutex_lock(&rnp->boost_mtx);
1182 rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */ 1053 rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */
1183 1054
1184 /* Wait for boostee to be done w/boost_mtx before reinitializing. */
1185 wait_for_completion(&rnp->boost_completion);
1186
1187 return ACCESS_ONCE(rnp->exp_tasks) != NULL || 1055 return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
1188 ACCESS_ONCE(rnp->boost_tasks) != NULL; 1056 ACCESS_ONCE(rnp->boost_tasks) != NULL;
1189} 1057}
@@ -1416,12 +1284,8 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1416 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) 1284 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
1417 if ((mask & 0x1) && cpu != outgoingcpu) 1285 if ((mask & 0x1) && cpu != outgoingcpu)
1418 cpumask_set_cpu(cpu, cm); 1286 cpumask_set_cpu(cpu, cm);
1419 if (cpumask_weight(cm) == 0) { 1287 if (cpumask_weight(cm) == 0)
1420 cpumask_setall(cm); 1288 cpumask_setall(cm);
1421 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
1422 cpumask_clear_cpu(cpu, cm);
1423 WARN_ON_ONCE(cpumask_weight(cm) == 0);
1424 }
1425 set_cpus_allowed_ptr(t, cm); 1289 set_cpus_allowed_ptr(t, cm);
1426 free_cpumask_var(cm); 1290 free_cpumask_var(cm);
1427} 1291}
@@ -1446,12 +1310,8 @@ static void __init rcu_spawn_boost_kthreads(void)
1446 for_each_possible_cpu(cpu) 1310 for_each_possible_cpu(cpu)
1447 per_cpu(rcu_cpu_has_work, cpu) = 0; 1311 per_cpu(rcu_cpu_has_work, cpu) = 0;
1448 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); 1312 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
1449 rnp = rcu_get_root(rcu_state_p); 1313 rcu_for_each_leaf_node(rcu_state_p, rnp)
1450 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); 1314 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
1451 if (NUM_RCU_NODES > 1) {
1452 rcu_for_each_leaf_node(rcu_state_p, rnp)
1453 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
1454 }
1455} 1315}
1456 1316
1457static void rcu_prepare_kthreads(int cpu) 1317static void rcu_prepare_kthreads(int cpu)
@@ -1605,7 +1465,8 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
1605 * completed since we last checked and there are 1465 * completed since we last checked and there are
1606 * callbacks not yet ready to invoke. 1466 * callbacks not yet ready to invoke.
1607 */ 1467 */
1608 if (rdp->completed != rnp->completed && 1468 if ((rdp->completed != rnp->completed ||
1469 unlikely(ACCESS_ONCE(rdp->gpwrap))) &&
1609 rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) 1470 rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])
1610 note_gp_changes(rsp, rdp); 1471 note_gp_changes(rsp, rdp);
1611 1472
@@ -1898,11 +1759,12 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
1898 ticks_value = rsp->gpnum - rdp->gpnum; 1759 ticks_value = rsp->gpnum - rdp->gpnum;
1899 } 1760 }
1900 print_cpu_stall_fast_no_hz(fast_no_hz, cpu); 1761 print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
1901 pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n", 1762 pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n",
1902 cpu, ticks_value, ticks_title, 1763 cpu, ticks_value, ticks_title,
1903 atomic_read(&rdtp->dynticks) & 0xfff, 1764 atomic_read(&rdtp->dynticks) & 0xfff,
1904 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, 1765 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
1905 rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), 1766 rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
1767 ACCESS_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart,
1906 fast_no_hz); 1768 fast_no_hz);
1907} 1769}
1908 1770
@@ -2056,9 +1918,26 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
2056static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) 1918static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
2057{ 1919{
2058 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 1920 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1921 unsigned long ret;
1922#ifdef CONFIG_PROVE_RCU
2059 struct rcu_head *rhp; 1923 struct rcu_head *rhp;
1924#endif /* #ifdef CONFIG_PROVE_RCU */
2060 1925
2061 /* No-CBs CPUs might have callbacks on any of three lists. */ 1926 /*
1927 * Check count of all no-CBs callbacks awaiting invocation.
1928 * There needs to be a barrier before this function is called,
1929 * but associated with a prior determination that no more
1930 * callbacks would be posted. In the worst case, the first
1931 * barrier in _rcu_barrier() suffices (but the caller cannot
1932 * necessarily rely on this, not a substitute for the caller
1933 * getting the concurrency design right!). There must also be
1934 * a barrier between the following load an posting of a callback
1935 * (if a callback is in fact needed). This is associated with an
1936 * atomic_inc() in the caller.
1937 */
1938 ret = atomic_long_read(&rdp->nocb_q_count);
1939
1940#ifdef CONFIG_PROVE_RCU
2062 rhp = ACCESS_ONCE(rdp->nocb_head); 1941 rhp = ACCESS_ONCE(rdp->nocb_head);
2063 if (!rhp) 1942 if (!rhp)
2064 rhp = ACCESS_ONCE(rdp->nocb_gp_head); 1943 rhp = ACCESS_ONCE(rdp->nocb_gp_head);
@@ -2072,8 +1951,9 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
2072 cpu, rhp->func); 1951 cpu, rhp->func);
2073 WARN_ON_ONCE(1); 1952 WARN_ON_ONCE(1);
2074 } 1953 }
1954#endif /* #ifdef CONFIG_PROVE_RCU */
2075 1955
2076 return !!rhp; 1956 return !!ret;
2077} 1957}
2078 1958
2079/* 1959/*
@@ -2095,9 +1975,10 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2095 struct task_struct *t; 1975 struct task_struct *t;
2096 1976
2097 /* Enqueue the callback on the nocb list and update counts. */ 1977 /* Enqueue the callback on the nocb list and update counts. */
1978 atomic_long_add(rhcount, &rdp->nocb_q_count);
1979 /* rcu_barrier() relies on ->nocb_q_count add before xchg. */
2098 old_rhpp = xchg(&rdp->nocb_tail, rhtp); 1980 old_rhpp = xchg(&rdp->nocb_tail, rhtp);
2099 ACCESS_ONCE(*old_rhpp) = rhp; 1981 ACCESS_ONCE(*old_rhpp) = rhp;
2100 atomic_long_add(rhcount, &rdp->nocb_q_count);
2101 atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy); 1982 atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);
2102 smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */ 1983 smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */
2103 1984
@@ -2288,9 +2169,6 @@ wait_again:
2288 /* Move callbacks to wait-for-GP list, which is empty. */ 2169 /* Move callbacks to wait-for-GP list, which is empty. */
2289 ACCESS_ONCE(rdp->nocb_head) = NULL; 2170 ACCESS_ONCE(rdp->nocb_head) = NULL;
2290 rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head); 2171 rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);
2291 rdp->nocb_gp_count = atomic_long_xchg(&rdp->nocb_q_count, 0);
2292 rdp->nocb_gp_count_lazy =
2293 atomic_long_xchg(&rdp->nocb_q_count_lazy, 0);
2294 gotcbs = true; 2172 gotcbs = true;
2295 } 2173 }
2296 2174
@@ -2338,9 +2216,6 @@ wait_again:
2338 /* Append callbacks to follower's "done" list. */ 2216 /* Append callbacks to follower's "done" list. */
2339 tail = xchg(&rdp->nocb_follower_tail, rdp->nocb_gp_tail); 2217 tail = xchg(&rdp->nocb_follower_tail, rdp->nocb_gp_tail);
2340 *tail = rdp->nocb_gp_head; 2218 *tail = rdp->nocb_gp_head;
2341 atomic_long_add(rdp->nocb_gp_count, &rdp->nocb_follower_count);
2342 atomic_long_add(rdp->nocb_gp_count_lazy,
2343 &rdp->nocb_follower_count_lazy);
2344 smp_mb__after_atomic(); /* Store *tail before wakeup. */ 2219 smp_mb__after_atomic(); /* Store *tail before wakeup. */
2345 if (rdp != my_rdp && tail == &rdp->nocb_follower_head) { 2220 if (rdp != my_rdp && tail == &rdp->nocb_follower_head) {
2346 /* 2221 /*
@@ -2415,13 +2290,11 @@ static int rcu_nocb_kthread(void *arg)
2415 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty"); 2290 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty");
2416 ACCESS_ONCE(rdp->nocb_follower_head) = NULL; 2291 ACCESS_ONCE(rdp->nocb_follower_head) = NULL;
2417 tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head); 2292 tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head);
2418 c = atomic_long_xchg(&rdp->nocb_follower_count, 0);
2419 cl = atomic_long_xchg(&rdp->nocb_follower_count_lazy, 0);
2420 rdp->nocb_p_count += c;
2421 rdp->nocb_p_count_lazy += cl;
2422 2293
2423 /* Each pass through the following loop invokes a callback. */ 2294 /* Each pass through the following loop invokes a callback. */
2424 trace_rcu_batch_start(rdp->rsp->name, cl, c, -1); 2295 trace_rcu_batch_start(rdp->rsp->name,
2296 atomic_long_read(&rdp->nocb_q_count_lazy),
2297 atomic_long_read(&rdp->nocb_q_count), -1);
2425 c = cl = 0; 2298 c = cl = 0;
2426 while (list) { 2299 while (list) {
2427 next = list->next; 2300 next = list->next;
@@ -2443,9 +2316,9 @@ static int rcu_nocb_kthread(void *arg)
2443 list = next; 2316 list = next;
2444 } 2317 }
2445 trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1); 2318 trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
2446 ACCESS_ONCE(rdp->nocb_p_count) = rdp->nocb_p_count - c; 2319 smp_mb__before_atomic(); /* _add after CB invocation. */
2447 ACCESS_ONCE(rdp->nocb_p_count_lazy) = 2320 atomic_long_add(-c, &rdp->nocb_q_count);
2448 rdp->nocb_p_count_lazy - cl; 2321 atomic_long_add(-cl, &rdp->nocb_q_count_lazy);
2449 rdp->n_nocbs_invoked += c; 2322 rdp->n_nocbs_invoked += c;
2450 } 2323 }
2451 return 0; 2324 return 0;
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 5cdc62e1beeb..fbb6240509ea 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -46,6 +46,8 @@
46#define RCU_TREE_NONCORE 46#define RCU_TREE_NONCORE
47#include "tree.h" 47#include "tree.h"
48 48
49DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
50
49static int r_open(struct inode *inode, struct file *file, 51static int r_open(struct inode *inode, struct file *file,
50 const struct seq_operations *op) 52 const struct seq_operations *op)
51{ 53{
@@ -115,11 +117,13 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
115 117
116 if (!rdp->beenonline) 118 if (!rdp->beenonline)
117 return; 119 return;
118 seq_printf(m, "%3d%cc=%ld g=%ld pq=%d qp=%d", 120 seq_printf(m, "%3d%cc=%ld g=%ld pq=%d/%d qp=%d",
119 rdp->cpu, 121 rdp->cpu,
120 cpu_is_offline(rdp->cpu) ? '!' : ' ', 122 cpu_is_offline(rdp->cpu) ? '!' : ' ',
121 ulong2long(rdp->completed), ulong2long(rdp->gpnum), 123 ulong2long(rdp->completed), ulong2long(rdp->gpnum),
122 rdp->passed_quiesce, rdp->qs_pending); 124 rdp->passed_quiesce,
125 rdp->rcu_qs_ctr_snap == per_cpu(rcu_qs_ctr, rdp->cpu),
126 rdp->qs_pending);
123 seq_printf(m, " dt=%d/%llx/%d df=%lu", 127 seq_printf(m, " dt=%d/%llx/%d df=%lu",
124 atomic_read(&rdp->dynticks->dynticks), 128 atomic_read(&rdp->dynticks->dynticks),
125 rdp->dynticks->dynticks_nesting, 129 rdp->dynticks->dynticks_nesting,
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 607f852b4d04..7052d3fd4e7b 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -268,6 +268,15 @@ bool try_wait_for_completion(struct completion *x)
268 unsigned long flags; 268 unsigned long flags;
269 int ret = 1; 269 int ret = 1;
270 270
271 /*
272 * Since x->done will need to be locked only
273 * in the non-blocking case, we check x->done
274 * first without taking the lock so we can
275 * return early in the blocking case.
276 */
277 if (!ACCESS_ONCE(x->done))
278 return 0;
279
271 spin_lock_irqsave(&x->wait.lock, flags); 280 spin_lock_irqsave(&x->wait.lock, flags);
272 if (!x->done) 281 if (!x->done)
273 ret = 0; 282 ret = 0;
@@ -288,13 +297,6 @@ EXPORT_SYMBOL(try_wait_for_completion);
288 */ 297 */
289bool completion_done(struct completion *x) 298bool completion_done(struct completion *x)
290{ 299{
291 unsigned long flags; 300 return !!ACCESS_ONCE(x->done);
292 int ret = 1;
293
294 spin_lock_irqsave(&x->wait.lock, flags);
295 if (!x->done)
296 ret = 0;
297 spin_unlock_irqrestore(&x->wait.lock, flags);
298 return ret;
299} 301}
300EXPORT_SYMBOL(completion_done); 302EXPORT_SYMBOL(completion_done);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ae1188f62693..1612578a5b7a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1814,6 +1814,10 @@ void __dl_clear_params(struct task_struct *p)
1814 dl_se->dl_period = 0; 1814 dl_se->dl_period = 0;
1815 dl_se->flags = 0; 1815 dl_se->flags = 0;
1816 dl_se->dl_bw = 0; 1816 dl_se->dl_bw = 0;
1817
1818 dl_se->dl_throttled = 0;
1819 dl_se->dl_new = 1;
1820 dl_se->dl_yielded = 0;
1817} 1821}
1818 1822
1819/* 1823/*
@@ -1839,7 +1843,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
1839#endif 1843#endif
1840 1844
1841 RB_CLEAR_NODE(&p->dl.rb_node); 1845 RB_CLEAR_NODE(&p->dl.rb_node);
1842 hrtimer_init(&p->dl.dl_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 1846 init_dl_task_timer(&p->dl);
1843 __dl_clear_params(p); 1847 __dl_clear_params(p);
1844 1848
1845 INIT_LIST_HEAD(&p->rt.run_list); 1849 INIT_LIST_HEAD(&p->rt.run_list);
@@ -2049,6 +2053,9 @@ static inline int dl_bw_cpus(int i)
2049 * allocated bandwidth to reflect the new situation. 2053 * allocated bandwidth to reflect the new situation.
2050 * 2054 *
2051 * This function is called while holding p's rq->lock. 2055 * This function is called while holding p's rq->lock.
2056 *
2057 * XXX we should delay bw change until the task's 0-lag point, see
2058 * __setparam_dl().
2052 */ 2059 */
2053static int dl_overflow(struct task_struct *p, int policy, 2060static int dl_overflow(struct task_struct *p, int policy,
2054 const struct sched_attr *attr) 2061 const struct sched_attr *attr)
@@ -3251,15 +3258,31 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
3251{ 3258{
3252 struct sched_dl_entity *dl_se = &p->dl; 3259 struct sched_dl_entity *dl_se = &p->dl;
3253 3260
3254 init_dl_task_timer(dl_se);
3255 dl_se->dl_runtime = attr->sched_runtime; 3261 dl_se->dl_runtime = attr->sched_runtime;
3256 dl_se->dl_deadline = attr->sched_deadline; 3262 dl_se->dl_deadline = attr->sched_deadline;
3257 dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline; 3263 dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
3258 dl_se->flags = attr->sched_flags; 3264 dl_se->flags = attr->sched_flags;
3259 dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); 3265 dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
3260 dl_se->dl_throttled = 0; 3266
3261 dl_se->dl_new = 1; 3267 /*
3262 dl_se->dl_yielded = 0; 3268 * Changing the parameters of a task is 'tricky' and we're not doing
3269 * the correct thing -- also see task_dead_dl() and switched_from_dl().
3270 *
3271 * What we SHOULD do is delay the bandwidth release until the 0-lag
3272 * point. This would include retaining the task_struct until that time
3273 * and change dl_overflow() to not immediately decrement the current
3274 * amount.
3275 *
3276 * Instead we retain the current runtime/deadline and let the new
3277 * parameters take effect after the current reservation period lapses.
3278 * This is safe (albeit pessimistic) because the 0-lag point is always
3279 * before the current scheduling deadline.
3280 *
3281 * We can still have temporary overloads because we do not delay the
3282 * change in bandwidth until that time; so admission control is
3283 * not on the safe side. It does however guarantee tasks will never
3284 * consume more than promised.
3285 */
3263} 3286}
3264 3287
3265/* 3288/*
@@ -4642,6 +4665,9 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur,
4642 struct dl_bw *cur_dl_b; 4665 struct dl_bw *cur_dl_b;
4643 unsigned long flags; 4666 unsigned long flags;
4644 4667
4668 if (!cpumask_weight(cur))
4669 return ret;
4670
4645 rcu_read_lock_sched(); 4671 rcu_read_lock_sched();
4646 cur_dl_b = dl_bw_of(cpumask_any(cur)); 4672 cur_dl_b = dl_bw_of(cpumask_any(cur));
4647 trial_cpus = cpumask_weight(trial); 4673 trial_cpus = cpumask_weight(trial);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index b52092f2636d..726470d47f87 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1094,6 +1094,7 @@ static void task_dead_dl(struct task_struct *p)
1094 * Since we are TASK_DEAD we won't slip out of the domain! 1094 * Since we are TASK_DEAD we won't slip out of the domain!
1095 */ 1095 */
1096 raw_spin_lock_irq(&dl_b->lock); 1096 raw_spin_lock_irq(&dl_b->lock);
1097 /* XXX we should retain the bw until 0-lag */
1097 dl_b->total_bw -= p->dl.dl_bw; 1098 dl_b->total_bw -= p->dl.dl_bw;
1098 raw_spin_unlock_irq(&dl_b->lock); 1099 raw_spin_unlock_irq(&dl_b->lock);
1099 1100
@@ -1614,8 +1615,8 @@ static void cancel_dl_timer(struct rq *rq, struct task_struct *p)
1614 1615
1615static void switched_from_dl(struct rq *rq, struct task_struct *p) 1616static void switched_from_dl(struct rq *rq, struct task_struct *p)
1616{ 1617{
1618 /* XXX we should retain the bw until 0-lag */
1617 cancel_dl_timer(rq, p); 1619 cancel_dl_timer(rq, p);
1618
1619 __dl_clear_params(p); 1620 __dl_clear_params(p);
1620 1621
1621 /* 1622 /*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 40667cbf371b..fe331fc391f5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1730,7 +1730,7 @@ static int preferred_group_nid(struct task_struct *p, int nid)
1730 nodes = node_online_map; 1730 nodes = node_online_map;
1731 for (dist = sched_max_numa_distance; dist > LOCAL_DISTANCE; dist--) { 1731 for (dist = sched_max_numa_distance; dist > LOCAL_DISTANCE; dist--) {
1732 unsigned long max_faults = 0; 1732 unsigned long max_faults = 0;
1733 nodemask_t max_group; 1733 nodemask_t max_group = NODE_MASK_NONE;
1734 int a, b; 1734 int a, b;
1735 1735
1736 /* Are there nodes at this distance from each other? */ 1736 /* Are there nodes at this distance from each other? */
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index f032fb5284e3..40190f28db35 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -280,6 +280,7 @@ int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
280 unsigned int cpu; 280 unsigned int cpu;
281 int ret = 0; 281 int ret = 0;
282 282
283 get_online_cpus();
283 mutex_lock(&smpboot_threads_lock); 284 mutex_lock(&smpboot_threads_lock);
284 for_each_online_cpu(cpu) { 285 for_each_online_cpu(cpu) {
285 ret = __smpboot_create_thread(plug_thread, cpu); 286 ret = __smpboot_create_thread(plug_thread, cpu);
@@ -292,6 +293,7 @@ int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
292 list_add(&plug_thread->list, &hotplug_threads); 293 list_add(&plug_thread->list, &hotplug_threads);
293out: 294out:
294 mutex_unlock(&smpboot_threads_lock); 295 mutex_unlock(&smpboot_threads_lock);
296 put_online_cpus();
295 return ret; 297 return ret;
296} 298}
297EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread); 299EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 501baa9ac1be..479e4436f787 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -114,8 +114,12 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
114 trace_softirqs_off(ip); 114 trace_softirqs_off(ip);
115 raw_local_irq_restore(flags); 115 raw_local_irq_restore(flags);
116 116
117 if (preempt_count() == cnt) 117 if (preempt_count() == cnt) {
118#ifdef CONFIG_DEBUG_PREEMPT
119 current->preempt_disable_ip = get_parent_ip(CALLER_ADDR1);
120#endif
118 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); 121 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
122 }
119} 123}
120EXPORT_SYMBOL(__local_bh_disable_ip); 124EXPORT_SYMBOL(__local_bh_disable_ip);
121#endif /* CONFIG_TRACE_IRQFLAGS */ 125#endif /* CONFIG_TRACE_IRQFLAGS */
@@ -656,9 +660,8 @@ static void run_ksoftirqd(unsigned int cpu)
656 * in the task stack here. 660 * in the task stack here.
657 */ 661 */
658 __do_softirq(); 662 __do_softirq();
659 rcu_note_context_switch();
660 local_irq_enable(); 663 local_irq_enable();
661 cond_resched(); 664 cond_resched_rcu_qs();
662 return; 665 return;
663 } 666 }
664 local_irq_enable(); 667 local_irq_enable();
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 37e50aadd471..d8c724cda37b 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -122,7 +122,7 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
122 mono = ktime_get_update_offsets_tick(&off_real, &off_boot, &off_tai); 122 mono = ktime_get_update_offsets_tick(&off_real, &off_boot, &off_tai);
123 boot = ktime_add(mono, off_boot); 123 boot = ktime_add(mono, off_boot);
124 xtim = ktime_add(mono, off_real); 124 xtim = ktime_add(mono, off_real);
125 tai = ktime_add(xtim, off_tai); 125 tai = ktime_add(mono, off_tai);
126 126
127 base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim; 127 base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
128 base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono; 128 base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;