aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.locks9
-rw-r--r--kernel/events/core.c34
-rw-r--r--kernel/kexec.c4
-rw-r--r--kernel/kprobes.c14
-rw-r--r--kernel/locking/mcs_spinlock.c64
-rw-r--r--kernel/locking/mcs_spinlock.h9
-rw-r--r--kernel/locking/mutex.c2
-rw-r--r--kernel/locking/rwsem-spinlock.c28
-rw-r--r--kernel/locking/rwsem-xadd.c16
-rw-r--r--kernel/locking/rwsem.c2
-rw-r--r--kernel/power/process.c1
-rw-r--r--kernel/power/suspend.c4
-rw-r--r--kernel/rcu/rcutorture.c4
-rw-r--r--kernel/rcu/tree.c140
-rw-r--r--kernel/rcu/tree.h6
-rw-r--r--kernel/rcu/tree_plugin.h2
-rw-r--r--kernel/rcu/update.c22
-rw-r--r--kernel/sched/core.c7
-rw-r--r--kernel/sched/debug.c2
-rw-r--r--kernel/time/alarmtimer.c20
-rw-r--r--kernel/time/clockevents.c10
-rw-r--r--kernel/time/sched_clock.c4
-rw-r--r--kernel/trace/ftrace.c4
-rw-r--r--kernel/trace/ring_buffer.c4
-rw-r--r--kernel/trace/trace.c20
-rw-r--r--kernel/trace/trace_clock.c9
-rw-r--r--kernel/trace/trace_events.c1
27 files changed, 308 insertions, 134 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 35536d9c0964..76768ee812b2 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -220,9 +220,16 @@ config INLINE_WRITE_UNLOCK_IRQRESTORE
220 220
221endif 221endif
222 222
223config ARCH_SUPPORTS_ATOMIC_RMW
224 bool
225
223config MUTEX_SPIN_ON_OWNER 226config MUTEX_SPIN_ON_OWNER
224 def_bool y 227 def_bool y
225 depends on SMP && !DEBUG_MUTEXES 228 depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW
229
230config RWSEM_SPIN_ON_OWNER
231 def_bool y
232 depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
226 233
227config ARCH_USE_QUEUE_RWLOCK 234config ARCH_USE_QUEUE_RWLOCK
228 bool 235 bool
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a33d9a2bcbd7..6b17ac1b0c2a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2320,7 +2320,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
2320 next_parent = rcu_dereference(next_ctx->parent_ctx); 2320 next_parent = rcu_dereference(next_ctx->parent_ctx);
2321 2321
2322 /* If neither context have a parent context; they cannot be clones. */ 2322 /* If neither context have a parent context; they cannot be clones. */
2323 if (!parent && !next_parent) 2323 if (!parent || !next_parent)
2324 goto unlock; 2324 goto unlock;
2325 2325
2326 if (next_parent == ctx || next_ctx == parent || next_parent == parent) { 2326 if (next_parent == ctx || next_ctx == parent || next_parent == parent) {
@@ -7458,7 +7458,19 @@ __perf_event_exit_task(struct perf_event *child_event,
7458 struct perf_event_context *child_ctx, 7458 struct perf_event_context *child_ctx,
7459 struct task_struct *child) 7459 struct task_struct *child)
7460{ 7460{
7461 perf_remove_from_context(child_event, true); 7461 /*
7462 * Do not destroy the 'original' grouping; because of the context
7463 * switch optimization the original events could've ended up in a
7464 * random child task.
7465 *
7466 * If we were to destroy the original group, all group related
7467 * operations would cease to function properly after this random
7468 * child dies.
7469 *
7470 * Do destroy all inherited groups, we don't care about those
7471 * and being thorough is better.
7472 */
7473 perf_remove_from_context(child_event, !!child_event->parent);
7462 7474
7463 /* 7475 /*
7464 * It can happen that the parent exits first, and has events 7476 * It can happen that the parent exits first, and has events
@@ -7474,7 +7486,7 @@ __perf_event_exit_task(struct perf_event *child_event,
7474static void perf_event_exit_task_context(struct task_struct *child, int ctxn) 7486static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
7475{ 7487{
7476 struct perf_event *child_event, *next; 7488 struct perf_event *child_event, *next;
7477 struct perf_event_context *child_ctx; 7489 struct perf_event_context *child_ctx, *parent_ctx;
7478 unsigned long flags; 7490 unsigned long flags;
7479 7491
7480 if (likely(!child->perf_event_ctxp[ctxn])) { 7492 if (likely(!child->perf_event_ctxp[ctxn])) {
@@ -7499,6 +7511,15 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
7499 raw_spin_lock(&child_ctx->lock); 7511 raw_spin_lock(&child_ctx->lock);
7500 task_ctx_sched_out(child_ctx); 7512 task_ctx_sched_out(child_ctx);
7501 child->perf_event_ctxp[ctxn] = NULL; 7513 child->perf_event_ctxp[ctxn] = NULL;
7514
7515 /*
7516 * In order to avoid freeing: child_ctx->parent_ctx->task
7517 * under perf_event_context::lock, grab another reference.
7518 */
7519 parent_ctx = child_ctx->parent_ctx;
7520 if (parent_ctx)
7521 get_ctx(parent_ctx);
7522
7502 /* 7523 /*
7503 * If this context is a clone; unclone it so it can't get 7524 * If this context is a clone; unclone it so it can't get
7504 * swapped to another process while we're removing all 7525 * swapped to another process while we're removing all
@@ -7509,6 +7530,13 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
7509 raw_spin_unlock_irqrestore(&child_ctx->lock, flags); 7530 raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
7510 7531
7511 /* 7532 /*
7533 * Now that we no longer hold perf_event_context::lock, drop
7534 * our extra child_ctx->parent_ctx reference.
7535 */
7536 if (parent_ctx)
7537 put_ctx(parent_ctx);
7538
7539 /*
7512 * Report the task dead after unscheduling the events so that we 7540 * Report the task dead after unscheduling the events so that we
7513 * won't get any samples after PERF_RECORD_EXIT. We can however still 7541 * won't get any samples after PERF_RECORD_EXIT. We can however still
7514 * get a few PERF_RECORD_READ events. 7542 * get a few PERF_RECORD_READ events.
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 369f41a94124..4b8f0c925884 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -33,6 +33,7 @@
33#include <linux/swap.h> 33#include <linux/swap.h>
34#include <linux/syscore_ops.h> 34#include <linux/syscore_ops.h>
35#include <linux/compiler.h> 35#include <linux/compiler.h>
36#include <linux/hugetlb.h>
36 37
37#include <asm/page.h> 38#include <asm/page.h>
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
@@ -1619,6 +1620,9 @@ static int __init crash_save_vmcoreinfo_init(void)
1619#endif 1620#endif
1620 VMCOREINFO_NUMBER(PG_head_mask); 1621 VMCOREINFO_NUMBER(PG_head_mask);
1621 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); 1622 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
1623#ifdef CONFIG_HUGETLBFS
1624 VMCOREINFO_SYMBOL(free_huge_page);
1625#endif
1622 1626
1623 arch_crash_save_vmcoreinfo(); 1627 arch_crash_save_vmcoreinfo();
1624 update_vmcoreinfo_note(); 1628 update_vmcoreinfo_note();
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3214289df5a7..734e9a7d280b 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2037,19 +2037,23 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
2037{ 2037{
2038 unsigned long *iter; 2038 unsigned long *iter;
2039 struct kprobe_blacklist_entry *ent; 2039 struct kprobe_blacklist_entry *ent;
2040 unsigned long offset = 0, size = 0; 2040 unsigned long entry, offset = 0, size = 0;
2041 2041
2042 for (iter = start; iter < end; iter++) { 2042 for (iter = start; iter < end; iter++) {
2043 if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) { 2043 entry = arch_deref_entry_point((void *)*iter);
2044 pr_err("Failed to find blacklist %p\n", (void *)*iter); 2044
2045 if (!kernel_text_address(entry) ||
2046 !kallsyms_lookup_size_offset(entry, &size, &offset)) {
2047 pr_err("Failed to find blacklist at %p\n",
2048 (void *)entry);
2045 continue; 2049 continue;
2046 } 2050 }
2047 2051
2048 ent = kmalloc(sizeof(*ent), GFP_KERNEL); 2052 ent = kmalloc(sizeof(*ent), GFP_KERNEL);
2049 if (!ent) 2053 if (!ent)
2050 return -ENOMEM; 2054 return -ENOMEM;
2051 ent->start_addr = *iter; 2055 ent->start_addr = entry;
2052 ent->end_addr = *iter + size; 2056 ent->end_addr = entry + size;
2053 INIT_LIST_HEAD(&ent->list); 2057 INIT_LIST_HEAD(&ent->list);
2054 list_add_tail(&ent->list, &kprobe_blacklist); 2058 list_add_tail(&ent->list, &kprobe_blacklist);
2055 } 2059 }
diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
index 838dc9e00669..be9ee1559fca 100644
--- a/kernel/locking/mcs_spinlock.c
+++ b/kernel/locking/mcs_spinlock.c
@@ -14,21 +14,47 @@
14 * called from interrupt context and we have preemption disabled while 14 * called from interrupt context and we have preemption disabled while
15 * spinning. 15 * spinning.
16 */ 16 */
17static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_queue, osq_node); 17static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);
18
19/*
20 * We use the value 0 to represent "no CPU", thus the encoded value
21 * will be the CPU number incremented by 1.
22 */
23static inline int encode_cpu(int cpu_nr)
24{
25 return cpu_nr + 1;
26}
27
28static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val)
29{
30 int cpu_nr = encoded_cpu_val - 1;
31
32 return per_cpu_ptr(&osq_node, cpu_nr);
33}
18 34
19/* 35/*
20 * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. 36 * Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
21 * Can return NULL in case we were the last queued and we updated @lock instead. 37 * Can return NULL in case we were the last queued and we updated @lock instead.
22 */ 38 */
23static inline struct optimistic_spin_queue * 39static inline struct optimistic_spin_node *
24osq_wait_next(struct optimistic_spin_queue **lock, 40osq_wait_next(struct optimistic_spin_queue *lock,
25 struct optimistic_spin_queue *node, 41 struct optimistic_spin_node *node,
26 struct optimistic_spin_queue *prev) 42 struct optimistic_spin_node *prev)
27{ 43{
28 struct optimistic_spin_queue *next = NULL; 44 struct optimistic_spin_node *next = NULL;
45 int curr = encode_cpu(smp_processor_id());
46 int old;
47
48 /*
49 * If there is a prev node in queue, then the 'old' value will be
50 * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if
51 * we're currently last in queue, then the queue will then become empty.
52 */
53 old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
29 54
30 for (;;) { 55 for (;;) {
31 if (*lock == node && cmpxchg(lock, node, prev) == node) { 56 if (atomic_read(&lock->tail) == curr &&
57 atomic_cmpxchg(&lock->tail, curr, old) == curr) {
32 /* 58 /*
33 * We were the last queued, we moved @lock back. @prev 59 * We were the last queued, we moved @lock back. @prev
34 * will now observe @lock and will complete its 60 * will now observe @lock and will complete its
@@ -59,18 +85,23 @@ osq_wait_next(struct optimistic_spin_queue **lock,
59 return next; 85 return next;
60} 86}
61 87
62bool osq_lock(struct optimistic_spin_queue **lock) 88bool osq_lock(struct optimistic_spin_queue *lock)
63{ 89{
64 struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); 90 struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
65 struct optimistic_spin_queue *prev, *next; 91 struct optimistic_spin_node *prev, *next;
92 int curr = encode_cpu(smp_processor_id());
93 int old;
66 94
67 node->locked = 0; 95 node->locked = 0;
68 node->next = NULL; 96 node->next = NULL;
97 node->cpu = curr;
69 98
70 node->prev = prev = xchg(lock, node); 99 old = atomic_xchg(&lock->tail, curr);
71 if (likely(prev == NULL)) 100 if (old == OSQ_UNLOCKED_VAL)
72 return true; 101 return true;
73 102
103 prev = decode_cpu(old);
104 node->prev = prev;
74 ACCESS_ONCE(prev->next) = node; 105 ACCESS_ONCE(prev->next) = node;
75 106
76 /* 107 /*
@@ -149,20 +180,21 @@ unqueue:
149 return false; 180 return false;
150} 181}
151 182
152void osq_unlock(struct optimistic_spin_queue **lock) 183void osq_unlock(struct optimistic_spin_queue *lock)
153{ 184{
154 struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); 185 struct optimistic_spin_node *node, *next;
155 struct optimistic_spin_queue *next; 186 int curr = encode_cpu(smp_processor_id());
156 187
157 /* 188 /*
158 * Fast path for the uncontended case. 189 * Fast path for the uncontended case.
159 */ 190 */
160 if (likely(cmpxchg(lock, node, NULL) == node)) 191 if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
161 return; 192 return;
162 193
163 /* 194 /*
164 * Second most likely case. 195 * Second most likely case.
165 */ 196 */
197 node = this_cpu_ptr(&osq_node);
166 next = xchg(&node->next, NULL); 198 next = xchg(&node->next, NULL);
167 if (next) { 199 if (next) {
168 ACCESS_ONCE(next->locked) = 1; 200 ACCESS_ONCE(next->locked) = 1;
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index a2dbac4aca6b..74356dc0ce29 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -118,12 +118,13 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
118 * mutex_lock()/rwsem_down_{read,write}() etc. 118 * mutex_lock()/rwsem_down_{read,write}() etc.
119 */ 119 */
120 120
121struct optimistic_spin_queue { 121struct optimistic_spin_node {
122 struct optimistic_spin_queue *next, *prev; 122 struct optimistic_spin_node *next, *prev;
123 int locked; /* 1 if lock acquired */ 123 int locked; /* 1 if lock acquired */
124 int cpu; /* encoded CPU # value */
124}; 125};
125 126
126extern bool osq_lock(struct optimistic_spin_queue **lock); 127extern bool osq_lock(struct optimistic_spin_queue *lock);
127extern void osq_unlock(struct optimistic_spin_queue **lock); 128extern void osq_unlock(struct optimistic_spin_queue *lock);
128 129
129#endif /* __LINUX_MCS_SPINLOCK_H */ 130#endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index bc73d33c6760..acca2c1a3c5e 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -60,7 +60,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
60 INIT_LIST_HEAD(&lock->wait_list); 60 INIT_LIST_HEAD(&lock->wait_list);
61 mutex_clear_owner(lock); 61 mutex_clear_owner(lock);
62#ifdef CONFIG_MUTEX_SPIN_ON_OWNER 62#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
63 lock->osq = NULL; 63 osq_lock_init(&lock->osq);
64#endif 64#endif
65 65
66 debug_mutex_init(lock, name, key); 66 debug_mutex_init(lock, name, key);
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 9be8a9144978..2c93571162cb 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -26,7 +26,7 @@ int rwsem_is_locked(struct rw_semaphore *sem)
26 unsigned long flags; 26 unsigned long flags;
27 27
28 if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) { 28 if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
29 ret = (sem->activity != 0); 29 ret = (sem->count != 0);
30 raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 30 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
31 } 31 }
32 return ret; 32 return ret;
@@ -46,7 +46,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
46 debug_check_no_locks_freed((void *)sem, sizeof(*sem)); 46 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
47 lockdep_init_map(&sem->dep_map, name, key, 0); 47 lockdep_init_map(&sem->dep_map, name, key, 0);
48#endif 48#endif
49 sem->activity = 0; 49 sem->count = 0;
50 raw_spin_lock_init(&sem->wait_lock); 50 raw_spin_lock_init(&sem->wait_lock);
51 INIT_LIST_HEAD(&sem->wait_list); 51 INIT_LIST_HEAD(&sem->wait_list);
52} 52}
@@ -95,7 +95,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
95 waiter = list_entry(next, struct rwsem_waiter, list); 95 waiter = list_entry(next, struct rwsem_waiter, list);
96 } while (waiter->type != RWSEM_WAITING_FOR_WRITE); 96 } while (waiter->type != RWSEM_WAITING_FOR_WRITE);
97 97
98 sem->activity += woken; 98 sem->count += woken;
99 99
100 out: 100 out:
101 return sem; 101 return sem;
@@ -126,9 +126,9 @@ void __sched __down_read(struct rw_semaphore *sem)
126 126
127 raw_spin_lock_irqsave(&sem->wait_lock, flags); 127 raw_spin_lock_irqsave(&sem->wait_lock, flags);
128 128
129 if (sem->activity >= 0 && list_empty(&sem->wait_list)) { 129 if (sem->count >= 0 && list_empty(&sem->wait_list)) {
130 /* granted */ 130 /* granted */
131 sem->activity++; 131 sem->count++;
132 raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 132 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
133 goto out; 133 goto out;
134 } 134 }
@@ -170,9 +170,9 @@ int __down_read_trylock(struct rw_semaphore *sem)
170 170
171 raw_spin_lock_irqsave(&sem->wait_lock, flags); 171 raw_spin_lock_irqsave(&sem->wait_lock, flags);
172 172
173 if (sem->activity >= 0 && list_empty(&sem->wait_list)) { 173 if (sem->count >= 0 && list_empty(&sem->wait_list)) {
174 /* granted */ 174 /* granted */
175 sem->activity++; 175 sem->count++;
176 ret = 1; 176 ret = 1;
177 } 177 }
178 178
@@ -206,7 +206,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
206 * itself into sleep and waiting for system woke it or someone 206 * itself into sleep and waiting for system woke it or someone
207 * else in the head of the wait list up. 207 * else in the head of the wait list up.
208 */ 208 */
209 if (sem->activity == 0) 209 if (sem->count == 0)
210 break; 210 break;
211 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 211 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
212 raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 212 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
@@ -214,7 +214,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
214 raw_spin_lock_irqsave(&sem->wait_lock, flags); 214 raw_spin_lock_irqsave(&sem->wait_lock, flags);
215 } 215 }
216 /* got the lock */ 216 /* got the lock */
217 sem->activity = -1; 217 sem->count = -1;
218 list_del(&waiter.list); 218 list_del(&waiter.list);
219 219
220 raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 220 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
@@ -235,9 +235,9 @@ int __down_write_trylock(struct rw_semaphore *sem)
235 235
236 raw_spin_lock_irqsave(&sem->wait_lock, flags); 236 raw_spin_lock_irqsave(&sem->wait_lock, flags);
237 237
238 if (sem->activity == 0) { 238 if (sem->count == 0) {
239 /* got the lock */ 239 /* got the lock */
240 sem->activity = -1; 240 sem->count = -1;
241 ret = 1; 241 ret = 1;
242 } 242 }
243 243
@@ -255,7 +255,7 @@ void __up_read(struct rw_semaphore *sem)
255 255
256 raw_spin_lock_irqsave(&sem->wait_lock, flags); 256 raw_spin_lock_irqsave(&sem->wait_lock, flags);
257 257
258 if (--sem->activity == 0 && !list_empty(&sem->wait_list)) 258 if (--sem->count == 0 && !list_empty(&sem->wait_list))
259 sem = __rwsem_wake_one_writer(sem); 259 sem = __rwsem_wake_one_writer(sem);
260 260
261 raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 261 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
@@ -270,7 +270,7 @@ void __up_write(struct rw_semaphore *sem)
270 270
271 raw_spin_lock_irqsave(&sem->wait_lock, flags); 271 raw_spin_lock_irqsave(&sem->wait_lock, flags);
272 272
273 sem->activity = 0; 273 sem->count = 0;
274 if (!list_empty(&sem->wait_list)) 274 if (!list_empty(&sem->wait_list))
275 sem = __rwsem_do_wake(sem, 1); 275 sem = __rwsem_do_wake(sem, 1);
276 276
@@ -287,7 +287,7 @@ void __downgrade_write(struct rw_semaphore *sem)
287 287
288 raw_spin_lock_irqsave(&sem->wait_lock, flags); 288 raw_spin_lock_irqsave(&sem->wait_lock, flags);
289 289
290 sem->activity = 1; 290 sem->count = 1;
291 if (!list_empty(&sem->wait_list)) 291 if (!list_empty(&sem->wait_list))
292 sem = __rwsem_do_wake(sem, 0); 292 sem = __rwsem_do_wake(sem, 0);
293 293
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index dacc32142fcc..a2391ac135c8 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -82,9 +82,9 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
82 sem->count = RWSEM_UNLOCKED_VALUE; 82 sem->count = RWSEM_UNLOCKED_VALUE;
83 raw_spin_lock_init(&sem->wait_lock); 83 raw_spin_lock_init(&sem->wait_lock);
84 INIT_LIST_HEAD(&sem->wait_list); 84 INIT_LIST_HEAD(&sem->wait_list);
85#ifdef CONFIG_SMP 85#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
86 sem->owner = NULL; 86 sem->owner = NULL;
87 sem->osq = NULL; 87 osq_lock_init(&sem->osq);
88#endif 88#endif
89} 89}
90 90
@@ -262,7 +262,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
262 return false; 262 return false;
263} 263}
264 264
265#ifdef CONFIG_SMP 265#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
266/* 266/*
267 * Try to acquire write lock before the writer has been put on wait queue. 267 * Try to acquire write lock before the writer has been put on wait queue.
268 */ 268 */
@@ -285,10 +285,10 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
285static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) 285static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
286{ 286{
287 struct task_struct *owner; 287 struct task_struct *owner;
288 bool on_cpu = true; 288 bool on_cpu = false;
289 289
290 if (need_resched()) 290 if (need_resched())
291 return 0; 291 return false;
292 292
293 rcu_read_lock(); 293 rcu_read_lock();
294 owner = ACCESS_ONCE(sem->owner); 294 owner = ACCESS_ONCE(sem->owner);
@@ -297,9 +297,9 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
297 rcu_read_unlock(); 297 rcu_read_unlock();
298 298
299 /* 299 /*
300 * If sem->owner is not set, the rwsem owner may have 300 * If sem->owner is not set, yet we have just recently entered the
301 * just acquired it and not set the owner yet or the rwsem 301 * slowpath, then there is a possibility reader(s) may have the lock.
302 * has been released. 302 * To be safe, avoid spinning in these situations.
303 */ 303 */
304 return on_cpu; 304 return on_cpu;
305} 305}
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 42f806de49d4..e2d3bc7f03b4 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -12,7 +12,7 @@
12 12
13#include <linux/atomic.h> 13#include <linux/atomic.h>
14 14
15#if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM) 15#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
16static inline void rwsem_set_owner(struct rw_semaphore *sem) 16static inline void rwsem_set_owner(struct rw_semaphore *sem)
17{ 17{
18 sem->owner = current; 18 sem->owner = current;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 0ca8d83e2369..4ee194eb524b 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -186,6 +186,7 @@ void thaw_processes(void)
186 186
187 printk("Restarting tasks ... "); 187 printk("Restarting tasks ... ");
188 188
189 __usermodehelper_set_disable_depth(UMH_FREEZING);
189 thaw_workqueues(); 190 thaw_workqueues();
190 191
191 read_lock(&tasklist_lock); 192 read_lock(&tasklist_lock);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 4dd8822f732a..ed35a4790afe 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -306,7 +306,7 @@ int suspend_devices_and_enter(suspend_state_t state)
306 error = suspend_ops->begin(state); 306 error = suspend_ops->begin(state);
307 if (error) 307 if (error)
308 goto Close; 308 goto Close;
309 } else if (state == PM_SUSPEND_FREEZE && freeze_ops->begin) { 309 } else if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->begin) {
310 error = freeze_ops->begin(); 310 error = freeze_ops->begin();
311 if (error) 311 if (error)
312 goto Close; 312 goto Close;
@@ -335,7 +335,7 @@ int suspend_devices_and_enter(suspend_state_t state)
335 Close: 335 Close:
336 if (need_suspend_ops(state) && suspend_ops->end) 336 if (need_suspend_ops(state) && suspend_ops->end)
337 suspend_ops->end(); 337 suspend_ops->end();
338 else if (state == PM_SUSPEND_FREEZE && freeze_ops->end) 338 else if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->end)
339 freeze_ops->end(); 339 freeze_ops->end();
340 340
341 return error; 341 return error;
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 7fa34f86e5ba..948a7693748e 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -18,7 +18,7 @@
18 * Copyright (C) IBM Corporation, 2005, 2006 18 * Copyright (C) IBM Corporation, 2005, 2006
19 * 19 *
20 * Authors: Paul E. McKenney <paulmck@us.ibm.com> 20 * Authors: Paul E. McKenney <paulmck@us.ibm.com>
21 * Josh Triplett <josh@freedesktop.org> 21 * Josh Triplett <josh@joshtriplett.org>
22 * 22 *
23 * See also: Documentation/RCU/torture.txt 23 * See also: Documentation/RCU/torture.txt
24 */ 24 */
@@ -51,7 +51,7 @@
51#include <linux/torture.h> 51#include <linux/torture.h>
52 52
53MODULE_LICENSE("GPL"); 53MODULE_LICENSE("GPL");
54MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>"); 54MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@joshtriplett.org>");
55 55
56 56
57torture_param(int, fqs_duration, 0, 57torture_param(int, fqs_duration, 0,
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f1ba77363fbb..625d0b0cd75a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -206,6 +206,70 @@ void rcu_bh_qs(int cpu)
206 rdp->passed_quiesce = 1; 206 rdp->passed_quiesce = 1;
207} 207}
208 208
209static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
210
211static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
212 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
213 .dynticks = ATOMIC_INIT(1),
214#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
215 .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
216 .dynticks_idle = ATOMIC_INIT(1),
217#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
218};
219
220/*
221 * Let the RCU core know that this CPU has gone through the scheduler,
222 * which is a quiescent state. This is called when the need for a
223 * quiescent state is urgent, so we burn an atomic operation and full
224 * memory barriers to let the RCU core know about it, regardless of what
225 * this CPU might (or might not) do in the near future.
226 *
227 * We inform the RCU core by emulating a zero-duration dyntick-idle
228 * period, which we in turn do by incrementing the ->dynticks counter
229 * by two.
230 */
231static void rcu_momentary_dyntick_idle(void)
232{
233 unsigned long flags;
234 struct rcu_data *rdp;
235 struct rcu_dynticks *rdtp;
236 int resched_mask;
237 struct rcu_state *rsp;
238
239 local_irq_save(flags);
240
241 /*
242 * Yes, we can lose flag-setting operations. This is OK, because
243 * the flag will be set again after some delay.
244 */
245 resched_mask = raw_cpu_read(rcu_sched_qs_mask);
246 raw_cpu_write(rcu_sched_qs_mask, 0);
247
248 /* Find the flavor that needs a quiescent state. */
249 for_each_rcu_flavor(rsp) {
250 rdp = raw_cpu_ptr(rsp->rda);
251 if (!(resched_mask & rsp->flavor_mask))
252 continue;
253 smp_mb(); /* rcu_sched_qs_mask before cond_resched_completed. */
254 if (ACCESS_ONCE(rdp->mynode->completed) !=
255 ACCESS_ONCE(rdp->cond_resched_completed))
256 continue;
257
258 /*
259 * Pretend to be momentarily idle for the quiescent state.
260 * This allows the grace-period kthread to record the
261 * quiescent state, with no need for this CPU to do anything
262 * further.
263 */
264 rdtp = this_cpu_ptr(&rcu_dynticks);
265 smp_mb__before_atomic(); /* Earlier stuff before QS. */
266 atomic_add(2, &rdtp->dynticks); /* QS. */
267 smp_mb__after_atomic(); /* Later stuff after QS. */
268 break;
269 }
270 local_irq_restore(flags);
271}
272
209/* 273/*
210 * Note a context switch. This is a quiescent state for RCU-sched, 274 * Note a context switch. This is a quiescent state for RCU-sched,
211 * and requires special handling for preemptible RCU. 275 * and requires special handling for preemptible RCU.
@@ -216,19 +280,12 @@ void rcu_note_context_switch(int cpu)
216 trace_rcu_utilization(TPS("Start context switch")); 280 trace_rcu_utilization(TPS("Start context switch"));
217 rcu_sched_qs(cpu); 281 rcu_sched_qs(cpu);
218 rcu_preempt_note_context_switch(cpu); 282 rcu_preempt_note_context_switch(cpu);
283 if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
284 rcu_momentary_dyntick_idle();
219 trace_rcu_utilization(TPS("End context switch")); 285 trace_rcu_utilization(TPS("End context switch"));
220} 286}
221EXPORT_SYMBOL_GPL(rcu_note_context_switch); 287EXPORT_SYMBOL_GPL(rcu_note_context_switch);
222 288
223static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
224 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
225 .dynticks = ATOMIC_INIT(1),
226#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
227 .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
228 .dynticks_idle = ATOMIC_INIT(1),
229#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
230};
231
232static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ 289static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */
233static long qhimark = 10000; /* If this many pending, ignore blimit. */ 290static long qhimark = 10000; /* If this many pending, ignore blimit. */
234static long qlowmark = 100; /* Once only this many pending, use blimit. */ 291static long qlowmark = 100; /* Once only this many pending, use blimit. */
@@ -243,6 +300,13 @@ static ulong jiffies_till_next_fqs = ULONG_MAX;
243module_param(jiffies_till_first_fqs, ulong, 0644); 300module_param(jiffies_till_first_fqs, ulong, 0644);
244module_param(jiffies_till_next_fqs, ulong, 0644); 301module_param(jiffies_till_next_fqs, ulong, 0644);
245 302
303/*
304 * How long the grace period must be before we start recruiting
305 * quiescent-state help from rcu_note_context_switch().
306 */
307static ulong jiffies_till_sched_qs = HZ / 20;
308module_param(jiffies_till_sched_qs, ulong, 0644);
309
246static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, 310static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
247 struct rcu_data *rdp); 311 struct rcu_data *rdp);
248static void force_qs_rnp(struct rcu_state *rsp, 312static void force_qs_rnp(struct rcu_state *rsp,
@@ -853,6 +917,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
853 bool *isidle, unsigned long *maxj) 917 bool *isidle, unsigned long *maxj)
854{ 918{
855 unsigned int curr; 919 unsigned int curr;
920 int *rcrmp;
856 unsigned int snap; 921 unsigned int snap;
857 922
858 curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks); 923 curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks);
@@ -893,27 +958,43 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
893 } 958 }
894 959
895 /* 960 /*
896 * There is a possibility that a CPU in adaptive-ticks state 961 * A CPU running for an extended time within the kernel can
897 * might run in the kernel with the scheduling-clock tick disabled 962 * delay RCU grace periods. When the CPU is in NO_HZ_FULL mode,
898 * for an extended time period. Invoke rcu_kick_nohz_cpu() to 963 * even context-switching back and forth between a pair of
899 * force the CPU to restart the scheduling-clock tick in this 964 * in-kernel CPU-bound tasks cannot advance grace periods.
900 * CPU is in this state. 965 * So if the grace period is old enough, make the CPU pay attention.
901 */ 966 * Note that the unsynchronized assignments to the per-CPU
902 rcu_kick_nohz_cpu(rdp->cpu); 967 * rcu_sched_qs_mask variable are safe. Yes, setting of
903 968 * bits can be lost, but they will be set again on the next
904 /* 969 * force-quiescent-state pass. So lost bit sets do not result
905 * Alternatively, the CPU might be running in the kernel 970 * in incorrect behavior, merely in a grace period lasting
906 * for an extended period of time without a quiescent state. 971 * a few jiffies longer than it might otherwise. Because
907 * Attempt to force the CPU through the scheduler to gain the 972 * there are at most four threads involved, and because the
908 * needed quiescent state, but only if the grace period has gone 973 * updates are only once every few jiffies, the probability of
909 * on for an uncommonly long time. If there are many stuck CPUs, 974 * lossage (and thus of slight grace-period extension) is
910 * we will beat on the first one until it gets unstuck, then move 975 * quite low.
911 * to the next. Only do this for the primary flavor of RCU. 976 *
977 * Note that if the jiffies_till_sched_qs boot/sysfs parameter
978 * is set too high, we override with half of the RCU CPU stall
979 * warning delay.
912 */ 980 */
913 if (rdp->rsp == rcu_state_p && 981 rcrmp = &per_cpu(rcu_sched_qs_mask, rdp->cpu);
982 if (ULONG_CMP_GE(jiffies,
983 rdp->rsp->gp_start + jiffies_till_sched_qs) ||
914 ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { 984 ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
915 rdp->rsp->jiffies_resched += 5; 985 if (!(ACCESS_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) {
916 resched_cpu(rdp->cpu); 986 ACCESS_ONCE(rdp->cond_resched_completed) =
987 ACCESS_ONCE(rdp->mynode->completed);
988 smp_mb(); /* ->cond_resched_completed before *rcrmp. */
989 ACCESS_ONCE(*rcrmp) =
990 ACCESS_ONCE(*rcrmp) + rdp->rsp->flavor_mask;
991 resched_cpu(rdp->cpu); /* Force CPU into scheduler. */
992 rdp->rsp->jiffies_resched += 5; /* Enable beating. */
993 } else if (ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
994 /* Time to beat on that CPU again! */
995 resched_cpu(rdp->cpu); /* Force CPU into scheduler. */
996 rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
997 }
917 } 998 }
918 999
919 return 0; 1000 return 0;
@@ -3491,6 +3572,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
3491 "rcu_node_fqs_1", 3572 "rcu_node_fqs_1",
3492 "rcu_node_fqs_2", 3573 "rcu_node_fqs_2",
3493 "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */ 3574 "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */
3575 static u8 fl_mask = 0x1;
3494 int cpustride = 1; 3576 int cpustride = 1;
3495 int i; 3577 int i;
3496 int j; 3578 int j;
@@ -3509,6 +3591,8 @@ static void __init rcu_init_one(struct rcu_state *rsp,
3509 for (i = 1; i < rcu_num_lvls; i++) 3591 for (i = 1; i < rcu_num_lvls; i++)
3510 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; 3592 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
3511 rcu_init_levelspread(rsp); 3593 rcu_init_levelspread(rsp);
3594 rsp->flavor_mask = fl_mask;
3595 fl_mask <<= 1;
3512 3596
3513 /* Initialize the elements themselves, starting from the leaves. */ 3597 /* Initialize the elements themselves, starting from the leaves. */
3514 3598
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index bf2c1e669691..0f69a79c5b7d 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -307,6 +307,9 @@ struct rcu_data {
307 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ 307 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */
308 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ 308 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */
309 unsigned long offline_fqs; /* Kicked due to being offline. */ 309 unsigned long offline_fqs; /* Kicked due to being offline. */
310 unsigned long cond_resched_completed;
311 /* Grace period that needs help */
312 /* from cond_resched(). */
310 313
311 /* 5) __rcu_pending() statistics. */ 314 /* 5) __rcu_pending() statistics. */
312 unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ 315 unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */
@@ -392,6 +395,7 @@ struct rcu_state {
392 struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */ 395 struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */
393 u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ 396 u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */
394 u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */ 397 u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */
398 u8 flavor_mask; /* bit in flavor mask. */
395 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ 399 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
396 void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ 400 void (*call)(struct rcu_head *head, /* call_rcu() flavor. */
397 void (*func)(struct rcu_head *head)); 401 void (*func)(struct rcu_head *head));
@@ -563,7 +567,7 @@ static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
563static void do_nocb_deferred_wakeup(struct rcu_data *rdp); 567static void do_nocb_deferred_wakeup(struct rcu_data *rdp);
564static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); 568static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
565static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); 569static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
566static void rcu_kick_nohz_cpu(int cpu); 570static void __maybe_unused rcu_kick_nohz_cpu(int cpu);
567static bool init_nocb_callback_list(struct rcu_data *rdp); 571static bool init_nocb_callback_list(struct rcu_data *rdp);
568static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq); 572static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq);
569static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq); 573static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index cbc2c45265e2..02ac0fb186b8 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2404,7 +2404,7 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
2404 * if an adaptive-ticks CPU is failing to respond to the current grace 2404 * if an adaptive-ticks CPU is failing to respond to the current grace
2405 * period and has not be idle from an RCU perspective, kick it. 2405 * period and has not be idle from an RCU perspective, kick it.
2406 */ 2406 */
2407static void rcu_kick_nohz_cpu(int cpu) 2407static void __maybe_unused rcu_kick_nohz_cpu(int cpu)
2408{ 2408{
2409#ifdef CONFIG_NO_HZ_FULL 2409#ifdef CONFIG_NO_HZ_FULL
2410 if (tick_nohz_full_cpu(cpu)) 2410 if (tick_nohz_full_cpu(cpu))
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index a2aeb4df0f60..bc7883570530 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -200,12 +200,12 @@ void wait_rcu_gp(call_rcu_func_t crf)
200EXPORT_SYMBOL_GPL(wait_rcu_gp); 200EXPORT_SYMBOL_GPL(wait_rcu_gp);
201 201
202#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD 202#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
203static inline void debug_init_rcu_head(struct rcu_head *head) 203void init_rcu_head(struct rcu_head *head)
204{ 204{
205 debug_object_init(head, &rcuhead_debug_descr); 205 debug_object_init(head, &rcuhead_debug_descr);
206} 206}
207 207
208static inline void debug_rcu_head_free(struct rcu_head *head) 208void destroy_rcu_head(struct rcu_head *head)
209{ 209{
210 debug_object_free(head, &rcuhead_debug_descr); 210 debug_object_free(head, &rcuhead_debug_descr);
211} 211}
@@ -350,21 +350,3 @@ static int __init check_cpu_stall_init(void)
350early_initcall(check_cpu_stall_init); 350early_initcall(check_cpu_stall_init);
351 351
352#endif /* #ifdef CONFIG_RCU_STALL_COMMON */ 352#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
353
354/*
355 * Hooks for cond_resched() and friends to avoid RCU CPU stall warnings.
356 */
357
358DEFINE_PER_CPU(int, rcu_cond_resched_count);
359
360/*
361 * Report a set of RCU quiescent states, for use by cond_resched()
362 * and friends. Out of line due to being called infrequently.
363 */
364void rcu_resched(void)
365{
366 preempt_disable();
367 __this_cpu_write(rcu_cond_resched_count, 0);
368 rcu_note_context_switch(smp_processor_id());
369 preempt_enable();
370}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3bdf01b494fe..bc1638b33449 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4147,7 +4147,6 @@ static void __cond_resched(void)
4147 4147
4148int __sched _cond_resched(void) 4148int __sched _cond_resched(void)
4149{ 4149{
4150 rcu_cond_resched();
4151 if (should_resched()) { 4150 if (should_resched()) {
4152 __cond_resched(); 4151 __cond_resched();
4153 return 1; 4152 return 1;
@@ -4166,18 +4165,15 @@ EXPORT_SYMBOL(_cond_resched);
4166 */ 4165 */
4167int __cond_resched_lock(spinlock_t *lock) 4166int __cond_resched_lock(spinlock_t *lock)
4168{ 4167{
4169 bool need_rcu_resched = rcu_should_resched();
4170 int resched = should_resched(); 4168 int resched = should_resched();
4171 int ret = 0; 4169 int ret = 0;
4172 4170
4173 lockdep_assert_held(lock); 4171 lockdep_assert_held(lock);
4174 4172
4175 if (spin_needbreak(lock) || resched || need_rcu_resched) { 4173 if (spin_needbreak(lock) || resched) {
4176 spin_unlock(lock); 4174 spin_unlock(lock);
4177 if (resched) 4175 if (resched)
4178 __cond_resched(); 4176 __cond_resched();
4179 else if (unlikely(need_rcu_resched))
4180 rcu_resched();
4181 else 4177 else
4182 cpu_relax(); 4178 cpu_relax();
4183 ret = 1; 4179 ret = 1;
@@ -4191,7 +4187,6 @@ int __sched __cond_resched_softirq(void)
4191{ 4187{
4192 BUG_ON(!in_softirq()); 4188 BUG_ON(!in_softirq());
4193 4189
4194 rcu_cond_resched(); /* BH disabled OK, just recording QSes. */
4195 if (should_resched()) { 4190 if (should_resched()) {
4196 local_bh_enable(); 4191 local_bh_enable();
4197 __cond_resched(); 4192 __cond_resched();
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 695f9773bb60..627b3c34b821 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -608,7 +608,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
608 608
609 avg_atom = p->se.sum_exec_runtime; 609 avg_atom = p->se.sum_exec_runtime;
610 if (nr_switches) 610 if (nr_switches)
611 do_div(avg_atom, nr_switches); 611 avg_atom = div64_ul(avg_atom, nr_switches);
612 else 612 else
613 avg_atom = -1LL; 613 avg_atom = -1LL;
614 614
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 88c9c65a430d..fe75444ae7ec 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -585,9 +585,14 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
585 struct itimerspec *new_setting, 585 struct itimerspec *new_setting,
586 struct itimerspec *old_setting) 586 struct itimerspec *old_setting)
587{ 587{
588 ktime_t exp;
589
588 if (!rtcdev) 590 if (!rtcdev)
589 return -ENOTSUPP; 591 return -ENOTSUPP;
590 592
593 if (flags & ~TIMER_ABSTIME)
594 return -EINVAL;
595
591 if (old_setting) 596 if (old_setting)
592 alarm_timer_get(timr, old_setting); 597 alarm_timer_get(timr, old_setting);
593 598
@@ -597,8 +602,16 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
597 602
598 /* start the timer */ 603 /* start the timer */
599 timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval); 604 timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval);
600 alarm_start(&timr->it.alarm.alarmtimer, 605 exp = timespec_to_ktime(new_setting->it_value);
601 timespec_to_ktime(new_setting->it_value)); 606 /* Convert (if necessary) to absolute time */
607 if (flags != TIMER_ABSTIME) {
608 ktime_t now;
609
610 now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime();
611 exp = ktime_add(now, exp);
612 }
613
614 alarm_start(&timr->it.alarm.alarmtimer, exp);
602 return 0; 615 return 0;
603} 616}
604 617
@@ -730,6 +743,9 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
730 if (!alarmtimer_get_rtcdev()) 743 if (!alarmtimer_get_rtcdev())
731 return -ENOTSUPP; 744 return -ENOTSUPP;
732 745
746 if (flags & ~TIMER_ABSTIME)
747 return -EINVAL;
748
733 if (!capable(CAP_WAKE_ALARM)) 749 if (!capable(CAP_WAKE_ALARM))
734 return -EPERM; 750 return -EPERM;
735 751
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index ad362c260ef4..9c94c19f1305 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -146,7 +146,8 @@ static int clockevents_increase_min_delta(struct clock_event_device *dev)
146{ 146{
147 /* Nothing to do if we already reached the limit */ 147 /* Nothing to do if we already reached the limit */
148 if (dev->min_delta_ns >= MIN_DELTA_LIMIT) { 148 if (dev->min_delta_ns >= MIN_DELTA_LIMIT) {
149 printk(KERN_WARNING "CE: Reprogramming failure. Giving up\n"); 149 printk_deferred(KERN_WARNING
150 "CE: Reprogramming failure. Giving up\n");
150 dev->next_event.tv64 = KTIME_MAX; 151 dev->next_event.tv64 = KTIME_MAX;
151 return -ETIME; 152 return -ETIME;
152 } 153 }
@@ -159,9 +160,10 @@ static int clockevents_increase_min_delta(struct clock_event_device *dev)
159 if (dev->min_delta_ns > MIN_DELTA_LIMIT) 160 if (dev->min_delta_ns > MIN_DELTA_LIMIT)
160 dev->min_delta_ns = MIN_DELTA_LIMIT; 161 dev->min_delta_ns = MIN_DELTA_LIMIT;
161 162
162 printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n", 163 printk_deferred(KERN_WARNING
163 dev->name ? dev->name : "?", 164 "CE: %s increased min_delta_ns to %llu nsec\n",
164 (unsigned long long) dev->min_delta_ns); 165 dev->name ? dev->name : "?",
166 (unsigned long long) dev->min_delta_ns);
165 return 0; 167 return 0;
166} 168}
167 169
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 445106d2c729..01d2d15aa662 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -191,7 +191,8 @@ void __init sched_clock_postinit(void)
191 191
192static int sched_clock_suspend(void) 192static int sched_clock_suspend(void)
193{ 193{
194 sched_clock_poll(&sched_clock_timer); 194 update_sched_clock();
195 hrtimer_cancel(&sched_clock_timer);
195 cd.suspended = true; 196 cd.suspended = true;
196 return 0; 197 return 0;
197} 198}
@@ -199,6 +200,7 @@ static int sched_clock_suspend(void)
199static void sched_clock_resume(void) 200static void sched_clock_resume(void)
200{ 201{
201 cd.epoch_cyc = read_sched_clock(); 202 cd.epoch_cyc = read_sched_clock();
203 hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
202 cd.suspended = false; 204 cd.suspended = false;
203} 205}
204 206
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5b372e3ed675..ac9d1dad630b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -265,12 +265,12 @@ static void update_ftrace_function(void)
265 func = ftrace_ops_list_func; 265 func = ftrace_ops_list_func;
266 } 266 }
267 267
268 update_function_graph_func();
269
268 /* If there's no change, then do nothing more here */ 270 /* If there's no change, then do nothing more here */
269 if (ftrace_trace_function == func) 271 if (ftrace_trace_function == func)
270 return; 272 return;
271 273
272 update_function_graph_func();
273
274 /* 274 /*
275 * If we are using the list function, it doesn't care 275 * If we are using the list function, it doesn't care
276 * about the function_trace_ops. 276 * about the function_trace_ops.
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7c56c3d06943..ff7027199a9a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -616,10 +616,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
616 struct ring_buffer_per_cpu *cpu_buffer; 616 struct ring_buffer_per_cpu *cpu_buffer;
617 struct rb_irq_work *work; 617 struct rb_irq_work *work;
618 618
619 if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
620 (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
621 return POLLIN | POLLRDNORM;
622
623 if (cpu == RING_BUFFER_ALL_CPUS) 619 if (cpu == RING_BUFFER_ALL_CPUS)
624 work = &buffer->irq_work; 620 work = &buffer->irq_work;
625 else { 621 else {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f243444a3772..291397e66669 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -466,6 +466,12 @@ int __trace_puts(unsigned long ip, const char *str, int size)
466 struct print_entry *entry; 466 struct print_entry *entry;
467 unsigned long irq_flags; 467 unsigned long irq_flags;
468 int alloc; 468 int alloc;
469 int pc;
470
471 if (!(trace_flags & TRACE_ITER_PRINTK))
472 return 0;
473
474 pc = preempt_count();
469 475
470 if (unlikely(tracing_selftest_running || tracing_disabled)) 476 if (unlikely(tracing_selftest_running || tracing_disabled))
471 return 0; 477 return 0;
@@ -475,7 +481,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
475 local_save_flags(irq_flags); 481 local_save_flags(irq_flags);
476 buffer = global_trace.trace_buffer.buffer; 482 buffer = global_trace.trace_buffer.buffer;
477 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 483 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
478 irq_flags, preempt_count()); 484 irq_flags, pc);
479 if (!event) 485 if (!event)
480 return 0; 486 return 0;
481 487
@@ -492,6 +498,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
492 entry->buf[size] = '\0'; 498 entry->buf[size] = '\0';
493 499
494 __buffer_unlock_commit(buffer, event); 500 __buffer_unlock_commit(buffer, event);
501 ftrace_trace_stack(buffer, irq_flags, 4, pc);
495 502
496 return size; 503 return size;
497} 504}
@@ -509,6 +516,12 @@ int __trace_bputs(unsigned long ip, const char *str)
509 struct bputs_entry *entry; 516 struct bputs_entry *entry;
510 unsigned long irq_flags; 517 unsigned long irq_flags;
511 int size = sizeof(struct bputs_entry); 518 int size = sizeof(struct bputs_entry);
519 int pc;
520
521 if (!(trace_flags & TRACE_ITER_PRINTK))
522 return 0;
523
524 pc = preempt_count();
512 525
513 if (unlikely(tracing_selftest_running || tracing_disabled)) 526 if (unlikely(tracing_selftest_running || tracing_disabled))
514 return 0; 527 return 0;
@@ -516,7 +529,7 @@ int __trace_bputs(unsigned long ip, const char *str)
516 local_save_flags(irq_flags); 529 local_save_flags(irq_flags);
517 buffer = global_trace.trace_buffer.buffer; 530 buffer = global_trace.trace_buffer.buffer;
518 event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, 531 event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
519 irq_flags, preempt_count()); 532 irq_flags, pc);
520 if (!event) 533 if (!event)
521 return 0; 534 return 0;
522 535
@@ -525,6 +538,7 @@ int __trace_bputs(unsigned long ip, const char *str)
525 entry->str = str; 538 entry->str = str;
526 539
527 __buffer_unlock_commit(buffer, event); 540 __buffer_unlock_commit(buffer, event);
541 ftrace_trace_stack(buffer, irq_flags, 4, pc);
528 542
529 return 1; 543 return 1;
530} 544}
@@ -809,7 +823,7 @@ static struct {
809 { trace_clock_local, "local", 1 }, 823 { trace_clock_local, "local", 1 },
810 { trace_clock_global, "global", 1 }, 824 { trace_clock_global, "global", 1 },
811 { trace_clock_counter, "counter", 0 }, 825 { trace_clock_counter, "counter", 0 },
812 { trace_clock_jiffies, "uptime", 1 }, 826 { trace_clock_jiffies, "uptime", 0 },
813 { trace_clock, "perf", 1 }, 827 { trace_clock, "perf", 1 },
814 ARCH_TRACE_CLOCKS 828 ARCH_TRACE_CLOCKS
815}; 829};
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 26dc348332b7..57b67b1f24d1 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -59,13 +59,14 @@ u64 notrace trace_clock(void)
59 59
60/* 60/*
61 * trace_jiffy_clock(): Simply use jiffies as a clock counter. 61 * trace_jiffy_clock(): Simply use jiffies as a clock counter.
62 * Note that this use of jiffies_64 is not completely safe on
63 * 32-bit systems. But the window is tiny, and the effect if
64 * we are affected is that we will have an obviously bogus
65 * timestamp on a trace event - i.e. not life threatening.
62 */ 66 */
63u64 notrace trace_clock_jiffies(void) 67u64 notrace trace_clock_jiffies(void)
64{ 68{
65 u64 jiffy = jiffies - INITIAL_JIFFIES; 69 return jiffies_64_to_clock_t(jiffies_64 - INITIAL_JIFFIES);
66
67 /* Return nsecs */
68 return (u64)jiffies_to_usecs(jiffy) * 1000ULL;
69} 70}
70 71
71/* 72/*
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f99e0b3bca8c..2de53628689f 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -470,6 +470,7 @@ static void remove_event_file_dir(struct ftrace_event_file *file)
470 470
471 list_del(&file->list); 471 list_del(&file->list);
472 remove_subsystem(file->system); 472 remove_subsystem(file->system);
473 free_event_filter(file->filter);
473 kmem_cache_free(file_cachep, file); 474 kmem_cache_free(file_cachep, file);
474} 475}
475 476