aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2014-06-06 01:55:06 -0400
committerIngo Molnar <mingo@kernel.org>2014-06-06 01:55:06 -0400
commitec00010972a0971b2c1da4fbe4e5c7d8ed1ecb05 (patch)
treec28975d7daf6d8a3aa23afe8f42837b71105b269 /kernel
parent8c6e549a447c51f4f8c0ba7f1e444469f75a354a (diff)
parente041e328c4b41e1db79bfe5ba9992c2ed771ad19 (diff)
Merge branch 'perf/urgent' into perf/core, to resolve conflict and to prepare for new patches
Conflicts: arch/x86/kernel/traps.c Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c10
-rw-r--r--kernel/cgroup_freezer.c116
-rw-r--r--kernel/context_tracking.c2
-rw-r--r--kernel/cpu.c6
-rw-r--r--kernel/events/core.c153
-rw-r--r--kernel/futex.c52
-rw-r--r--kernel/hrtimer.c8
-rw-r--r--kernel/kexec.c8
-rw-r--r--kernel/locking/lockdep.c2
-rw-r--r--kernel/locking/rtmutex.c32
-rw-r--r--kernel/power/snapshot.c2
-rw-r--r--kernel/printk/printk.c4
-rw-r--r--kernel/sched/core.c80
-rw-r--r--kernel/sched/cpudeadline.c37
-rw-r--r--kernel/sched/cpudeadline.h6
-rw-r--r--kernel/sched/cpupri.c10
-rw-r--r--kernel/sched/cpupri.h2
-rw-r--r--kernel/sched/cputime.c32
-rw-r--r--kernel/sched/deadline.c5
-rw-r--r--kernel/sched/fair.c16
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/tracepoint.c4
-rw-r--r--kernel/workqueue.c36
23 files changed, 377 insertions, 250 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9fcdaa705b6c..3f1ca934a237 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -348,7 +348,7 @@ struct cgrp_cset_link {
348 * reference-counted, to improve performance when child cgroups 348 * reference-counted, to improve performance when child cgroups
349 * haven't been created. 349 * haven't been created.
350 */ 350 */
351static struct css_set init_css_set = { 351struct css_set init_css_set = {
352 .refcount = ATOMIC_INIT(1), 352 .refcount = ATOMIC_INIT(1),
353 .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), 353 .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
354 .tasks = LIST_HEAD_INIT(init_css_set.tasks), 354 .tasks = LIST_HEAD_INIT(init_css_set.tasks),
@@ -1495,7 +1495,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1495 */ 1495 */
1496 if (!use_task_css_set_links) 1496 if (!use_task_css_set_links)
1497 cgroup_enable_task_cg_lists(); 1497 cgroup_enable_task_cg_lists();
1498retry: 1498
1499 mutex_lock(&cgroup_tree_mutex); 1499 mutex_lock(&cgroup_tree_mutex);
1500 mutex_lock(&cgroup_mutex); 1500 mutex_lock(&cgroup_mutex);
1501 1501
@@ -1503,7 +1503,7 @@ retry:
1503 ret = parse_cgroupfs_options(data, &opts); 1503 ret = parse_cgroupfs_options(data, &opts);
1504 if (ret) 1504 if (ret)
1505 goto out_unlock; 1505 goto out_unlock;
1506 1506retry:
1507 /* look for a matching existing root */ 1507 /* look for a matching existing root */
1508 if (!opts.subsys_mask && !opts.none && !opts.name) { 1508 if (!opts.subsys_mask && !opts.none && !opts.name) {
1509 cgrp_dfl_root_visible = true; 1509 cgrp_dfl_root_visible = true;
@@ -1562,9 +1562,9 @@ retry:
1562 if (!atomic_inc_not_zero(&root->cgrp.refcnt)) { 1562 if (!atomic_inc_not_zero(&root->cgrp.refcnt)) {
1563 mutex_unlock(&cgroup_mutex); 1563 mutex_unlock(&cgroup_mutex);
1564 mutex_unlock(&cgroup_tree_mutex); 1564 mutex_unlock(&cgroup_tree_mutex);
1565 kfree(opts.release_agent);
1566 kfree(opts.name);
1567 msleep(10); 1565 msleep(10);
1566 mutex_lock(&cgroup_tree_mutex);
1567 mutex_lock(&cgroup_mutex);
1568 goto retry; 1568 goto retry;
1569 } 1569 }
1570 1570
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 2bc4a2256444..345628c78b5b 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -21,6 +21,7 @@
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/freezer.h> 22#include <linux/freezer.h>
23#include <linux/seq_file.h> 23#include <linux/seq_file.h>
24#include <linux/mutex.h>
24 25
25/* 26/*
26 * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is 27 * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
@@ -42,9 +43,10 @@ enum freezer_state_flags {
42struct freezer { 43struct freezer {
43 struct cgroup_subsys_state css; 44 struct cgroup_subsys_state css;
44 unsigned int state; 45 unsigned int state;
45 spinlock_t lock;
46}; 46};
47 47
48static DEFINE_MUTEX(freezer_mutex);
49
48static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) 50static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
49{ 51{
50 return css ? container_of(css, struct freezer, css) : NULL; 52 return css ? container_of(css, struct freezer, css) : NULL;
@@ -93,7 +95,6 @@ freezer_css_alloc(struct cgroup_subsys_state *parent_css)
93 if (!freezer) 95 if (!freezer)
94 return ERR_PTR(-ENOMEM); 96 return ERR_PTR(-ENOMEM);
95 97
96 spin_lock_init(&freezer->lock);
97 return &freezer->css; 98 return &freezer->css;
98} 99}
99 100
@@ -110,14 +111,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css)
110 struct freezer *freezer = css_freezer(css); 111 struct freezer *freezer = css_freezer(css);
111 struct freezer *parent = parent_freezer(freezer); 112 struct freezer *parent = parent_freezer(freezer);
112 113
113 /* 114 mutex_lock(&freezer_mutex);
114 * The following double locking and freezing state inheritance
115 * guarantee that @cgroup can never escape ancestors' freezing
116 * states. See css_for_each_descendant_pre() for details.
117 */
118 if (parent)
119 spin_lock_irq(&parent->lock);
120 spin_lock_nested(&freezer->lock, SINGLE_DEPTH_NESTING);
121 115
122 freezer->state |= CGROUP_FREEZER_ONLINE; 116 freezer->state |= CGROUP_FREEZER_ONLINE;
123 117
@@ -126,10 +120,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css)
126 atomic_inc(&system_freezing_cnt); 120 atomic_inc(&system_freezing_cnt);
127 } 121 }
128 122
129 spin_unlock(&freezer->lock); 123 mutex_unlock(&freezer_mutex);
130 if (parent)
131 spin_unlock_irq(&parent->lock);
132
133 return 0; 124 return 0;
134} 125}
135 126
@@ -144,14 +135,14 @@ static void freezer_css_offline(struct cgroup_subsys_state *css)
144{ 135{
145 struct freezer *freezer = css_freezer(css); 136 struct freezer *freezer = css_freezer(css);
146 137
147 spin_lock_irq(&freezer->lock); 138 mutex_lock(&freezer_mutex);
148 139
149 if (freezer->state & CGROUP_FREEZING) 140 if (freezer->state & CGROUP_FREEZING)
150 atomic_dec(&system_freezing_cnt); 141 atomic_dec(&system_freezing_cnt);
151 142
152 freezer->state = 0; 143 freezer->state = 0;
153 144
154 spin_unlock_irq(&freezer->lock); 145 mutex_unlock(&freezer_mutex);
155} 146}
156 147
157static void freezer_css_free(struct cgroup_subsys_state *css) 148static void freezer_css_free(struct cgroup_subsys_state *css)
@@ -175,7 +166,7 @@ static void freezer_attach(struct cgroup_subsys_state *new_css,
175 struct task_struct *task; 166 struct task_struct *task;
176 bool clear_frozen = false; 167 bool clear_frozen = false;
177 168
178 spin_lock_irq(&freezer->lock); 169 mutex_lock(&freezer_mutex);
179 170
180 /* 171 /*
181 * Make the new tasks conform to the current state of @new_css. 172 * Make the new tasks conform to the current state of @new_css.
@@ -197,21 +188,13 @@ static void freezer_attach(struct cgroup_subsys_state *new_css,
197 } 188 }
198 } 189 }
199 190
200 spin_unlock_irq(&freezer->lock); 191 /* propagate FROZEN clearing upwards */
201
202 /*
203 * Propagate FROZEN clearing upwards. We may race with
204 * update_if_frozen(), but as long as both work bottom-up, either
205 * update_if_frozen() sees child's FROZEN cleared or we clear the
206 * parent's FROZEN later. No parent w/ !FROZEN children can be
207 * left FROZEN.
208 */
209 while (clear_frozen && (freezer = parent_freezer(freezer))) { 192 while (clear_frozen && (freezer = parent_freezer(freezer))) {
210 spin_lock_irq(&freezer->lock);
211 freezer->state &= ~CGROUP_FROZEN; 193 freezer->state &= ~CGROUP_FROZEN;
212 clear_frozen = freezer->state & CGROUP_FREEZING; 194 clear_frozen = freezer->state & CGROUP_FREEZING;
213 spin_unlock_irq(&freezer->lock);
214 } 195 }
196
197 mutex_unlock(&freezer_mutex);
215} 198}
216 199
217/** 200/**
@@ -228,9 +211,6 @@ static void freezer_fork(struct task_struct *task)
228{ 211{
229 struct freezer *freezer; 212 struct freezer *freezer;
230 213
231 rcu_read_lock();
232 freezer = task_freezer(task);
233
234 /* 214 /*
235 * The root cgroup is non-freezable, so we can skip locking the 215 * The root cgroup is non-freezable, so we can skip locking the
236 * freezer. This is safe regardless of race with task migration. 216 * freezer. This is safe regardless of race with task migration.
@@ -238,24 +218,18 @@ static void freezer_fork(struct task_struct *task)
238 * to do. If we lost and root is the new cgroup, noop is still the 218 * to do. If we lost and root is the new cgroup, noop is still the
239 * right thing to do. 219 * right thing to do.
240 */ 220 */
241 if (!parent_freezer(freezer)) 221 if (task_css_is_root(task, freezer_cgrp_id))
242 goto out; 222 return;
243 223
244 /* 224 mutex_lock(&freezer_mutex);
245 * Grab @freezer->lock and freeze @task after verifying @task still 225 rcu_read_lock();
246 * belongs to @freezer and it's freezing. The former is for the 226
247 * case where we have raced against task migration and lost and 227 freezer = task_freezer(task);
248 * @task is already in a different cgroup which may not be frozen. 228 if (freezer->state & CGROUP_FREEZING)
249 * This isn't strictly necessary as freeze_task() is allowed to be
250 * called spuriously but let's do it anyway for, if nothing else,
251 * documentation.
252 */
253 spin_lock_irq(&freezer->lock);
254 if (freezer == task_freezer(task) && (freezer->state & CGROUP_FREEZING))
255 freeze_task(task); 229 freeze_task(task);
256 spin_unlock_irq(&freezer->lock); 230
257out:
258 rcu_read_unlock(); 231 rcu_read_unlock();
232 mutex_unlock(&freezer_mutex);
259} 233}
260 234
261/** 235/**
@@ -281,22 +255,24 @@ static void update_if_frozen(struct cgroup_subsys_state *css)
281 struct css_task_iter it; 255 struct css_task_iter it;
282 struct task_struct *task; 256 struct task_struct *task;
283 257
284 WARN_ON_ONCE(!rcu_read_lock_held()); 258 lockdep_assert_held(&freezer_mutex);
285
286 spin_lock_irq(&freezer->lock);
287 259
288 if (!(freezer->state & CGROUP_FREEZING) || 260 if (!(freezer->state & CGROUP_FREEZING) ||
289 (freezer->state & CGROUP_FROZEN)) 261 (freezer->state & CGROUP_FROZEN))
290 goto out_unlock; 262 return;
291 263
292 /* are all (live) children frozen? */ 264 /* are all (live) children frozen? */
265 rcu_read_lock();
293 css_for_each_child(pos, css) { 266 css_for_each_child(pos, css) {
294 struct freezer *child = css_freezer(pos); 267 struct freezer *child = css_freezer(pos);
295 268
296 if ((child->state & CGROUP_FREEZER_ONLINE) && 269 if ((child->state & CGROUP_FREEZER_ONLINE) &&
297 !(child->state & CGROUP_FROZEN)) 270 !(child->state & CGROUP_FROZEN)) {
298 goto out_unlock; 271 rcu_read_unlock();
272 return;
273 }
299 } 274 }
275 rcu_read_unlock();
300 276
301 /* are all tasks frozen? */ 277 /* are all tasks frozen? */
302 css_task_iter_start(css, &it); 278 css_task_iter_start(css, &it);
@@ -317,21 +293,29 @@ static void update_if_frozen(struct cgroup_subsys_state *css)
317 freezer->state |= CGROUP_FROZEN; 293 freezer->state |= CGROUP_FROZEN;
318out_iter_end: 294out_iter_end:
319 css_task_iter_end(&it); 295 css_task_iter_end(&it);
320out_unlock:
321 spin_unlock_irq(&freezer->lock);
322} 296}
323 297
324static int freezer_read(struct seq_file *m, void *v) 298static int freezer_read(struct seq_file *m, void *v)
325{ 299{
326 struct cgroup_subsys_state *css = seq_css(m), *pos; 300 struct cgroup_subsys_state *css = seq_css(m), *pos;
327 301
302 mutex_lock(&freezer_mutex);
328 rcu_read_lock(); 303 rcu_read_lock();
329 304
330 /* update states bottom-up */ 305 /* update states bottom-up */
331 css_for_each_descendant_post(pos, css) 306 css_for_each_descendant_post(pos, css) {
307 if (!css_tryget(pos))
308 continue;
309 rcu_read_unlock();
310
332 update_if_frozen(pos); 311 update_if_frozen(pos);
333 312
313 rcu_read_lock();
314 css_put(pos);
315 }
316
334 rcu_read_unlock(); 317 rcu_read_unlock();
318 mutex_unlock(&freezer_mutex);
335 319
336 seq_puts(m, freezer_state_strs(css_freezer(css)->state)); 320 seq_puts(m, freezer_state_strs(css_freezer(css)->state));
337 seq_putc(m, '\n'); 321 seq_putc(m, '\n');
@@ -373,7 +357,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
373 unsigned int state) 357 unsigned int state)
374{ 358{
375 /* also synchronizes against task migration, see freezer_attach() */ 359 /* also synchronizes against task migration, see freezer_attach() */
376 lockdep_assert_held(&freezer->lock); 360 lockdep_assert_held(&freezer_mutex);
377 361
378 if (!(freezer->state & CGROUP_FREEZER_ONLINE)) 362 if (!(freezer->state & CGROUP_FREEZER_ONLINE))
379 return; 363 return;
@@ -414,31 +398,29 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
414 * descendant will try to inherit its parent's FREEZING state as 398 * descendant will try to inherit its parent's FREEZING state as
415 * CGROUP_FREEZING_PARENT. 399 * CGROUP_FREEZING_PARENT.
416 */ 400 */
401 mutex_lock(&freezer_mutex);
417 rcu_read_lock(); 402 rcu_read_lock();
418 css_for_each_descendant_pre(pos, &freezer->css) { 403 css_for_each_descendant_pre(pos, &freezer->css) {
419 struct freezer *pos_f = css_freezer(pos); 404 struct freezer *pos_f = css_freezer(pos);
420 struct freezer *parent = parent_freezer(pos_f); 405 struct freezer *parent = parent_freezer(pos_f);
421 406
422 spin_lock_irq(&pos_f->lock); 407 if (!css_tryget(pos))
408 continue;
409 rcu_read_unlock();
423 410
424 if (pos_f == freezer) { 411 if (pos_f == freezer)
425 freezer_apply_state(pos_f, freeze, 412 freezer_apply_state(pos_f, freeze,
426 CGROUP_FREEZING_SELF); 413 CGROUP_FREEZING_SELF);
427 } else { 414 else
428 /*
429 * Our update to @parent->state is already visible
430 * which is all we need. No need to lock @parent.
431 * For more info on synchronization, see
432 * freezer_post_create().
433 */
434 freezer_apply_state(pos_f, 415 freezer_apply_state(pos_f,
435 parent->state & CGROUP_FREEZING, 416 parent->state & CGROUP_FREEZING,
436 CGROUP_FREEZING_PARENT); 417 CGROUP_FREEZING_PARENT);
437 }
438 418
439 spin_unlock_irq(&pos_f->lock); 419 rcu_read_lock();
420 css_put(pos);
440 } 421 }
441 rcu_read_unlock(); 422 rcu_read_unlock();
423 mutex_unlock(&freezer_mutex);
442} 424}
443 425
444static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft, 426static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft,
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 6cb20d2e7ee0..019d45008448 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -120,7 +120,7 @@ void context_tracking_user_enter(void)
120 * instead of preempt_schedule() to exit user context if needed before 120 * instead of preempt_schedule() to exit user context if needed before
121 * calling the scheduler. 121 * calling the scheduler.
122 */ 122 */
123asmlinkage void __sched notrace preempt_schedule_context(void) 123asmlinkage __visible void __sched notrace preempt_schedule_context(void)
124{ 124{
125 enum ctx_state prev_ctx; 125 enum ctx_state prev_ctx;
126 126
diff --git a/kernel/cpu.c b/kernel/cpu.c
index a9e710eef0e2..247979a1b815 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -726,10 +726,12 @@ void set_cpu_present(unsigned int cpu, bool present)
726 726
727void set_cpu_online(unsigned int cpu, bool online) 727void set_cpu_online(unsigned int cpu, bool online)
728{ 728{
729 if (online) 729 if (online) {
730 cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits)); 730 cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
731 else 731 cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
732 } else {
732 cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits)); 733 cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits));
734 }
733} 735}
734 736
735void set_cpu_active(unsigned int cpu, bool active) 737void set_cpu_active(unsigned int cpu, bool active)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e9ef0c6646af..8fac2056d51e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2973,6 +2973,22 @@ out:
2973 local_irq_restore(flags); 2973 local_irq_restore(flags);
2974} 2974}
2975 2975
2976void perf_event_exec(void)
2977{
2978 struct perf_event_context *ctx;
2979 int ctxn;
2980
2981 rcu_read_lock();
2982 for_each_task_context_nr(ctxn) {
2983 ctx = current->perf_event_ctxp[ctxn];
2984 if (!ctx)
2985 continue;
2986
2987 perf_event_enable_on_exec(ctx);
2988 }
2989 rcu_read_unlock();
2990}
2991
2976/* 2992/*
2977 * Cross CPU call to read the hardware event 2993 * Cross CPU call to read the hardware event
2978 */ 2994 */
@@ -3195,7 +3211,8 @@ static void free_event_rcu(struct rcu_head *head)
3195} 3211}
3196 3212
3197static void ring_buffer_put(struct ring_buffer *rb); 3213static void ring_buffer_put(struct ring_buffer *rb);
3198static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb); 3214static void ring_buffer_attach(struct perf_event *event,
3215 struct ring_buffer *rb);
3199 3216
3200static void unaccount_event_cpu(struct perf_event *event, int cpu) 3217static void unaccount_event_cpu(struct perf_event *event, int cpu)
3201{ 3218{
@@ -3259,8 +3276,6 @@ static void _free_event(struct perf_event *event)
3259 unaccount_event(event); 3276 unaccount_event(event);
3260 3277
3261 if (event->rb) { 3278 if (event->rb) {
3262 struct ring_buffer *rb;
3263
3264 /* 3279 /*
3265 * Can happen when we close an event with re-directed output. 3280 * Can happen when we close an event with re-directed output.
3266 * 3281 *
@@ -3268,12 +3283,7 @@ static void _free_event(struct perf_event *event)
3268 * over us; possibly making our ring_buffer_put() the last. 3283 * over us; possibly making our ring_buffer_put() the last.
3269 */ 3284 */
3270 mutex_lock(&event->mmap_mutex); 3285 mutex_lock(&event->mmap_mutex);
3271 rb = event->rb; 3286 ring_buffer_attach(event, NULL);
3272 if (rb) {
3273 rcu_assign_pointer(event->rb, NULL);
3274 ring_buffer_detach(event, rb);
3275 ring_buffer_put(rb); /* could be last */
3276 }
3277 mutex_unlock(&event->mmap_mutex); 3287 mutex_unlock(&event->mmap_mutex);
3278 } 3288 }
3279 3289
@@ -3870,28 +3880,47 @@ unlock:
3870static void ring_buffer_attach(struct perf_event *event, 3880static void ring_buffer_attach(struct perf_event *event,
3871 struct ring_buffer *rb) 3881 struct ring_buffer *rb)
3872{ 3882{
3883 struct ring_buffer *old_rb = NULL;
3873 unsigned long flags; 3884 unsigned long flags;
3874 3885
3875 if (!list_empty(&event->rb_entry)) 3886 if (event->rb) {
3876 return; 3887 /*
3888 * Should be impossible, we set this when removing
3889 * event->rb_entry and wait/clear when adding event->rb_entry.
3890 */
3891 WARN_ON_ONCE(event->rcu_pending);
3877 3892
3878 spin_lock_irqsave(&rb->event_lock, flags); 3893 old_rb = event->rb;
3879 if (list_empty(&event->rb_entry)) 3894 event->rcu_batches = get_state_synchronize_rcu();
3880 list_add(&event->rb_entry, &rb->event_list); 3895 event->rcu_pending = 1;
3881 spin_unlock_irqrestore(&rb->event_lock, flags);
3882}
3883 3896
3884static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb) 3897 spin_lock_irqsave(&old_rb->event_lock, flags);
3885{ 3898 list_del_rcu(&event->rb_entry);
3886 unsigned long flags; 3899 spin_unlock_irqrestore(&old_rb->event_lock, flags);
3900 }
3887 3901
3888 if (list_empty(&event->rb_entry)) 3902 if (event->rcu_pending && rb) {
3889 return; 3903 cond_synchronize_rcu(event->rcu_batches);
3904 event->rcu_pending = 0;
3905 }
3906
3907 if (rb) {
3908 spin_lock_irqsave(&rb->event_lock, flags);
3909 list_add_rcu(&event->rb_entry, &rb->event_list);
3910 spin_unlock_irqrestore(&rb->event_lock, flags);
3911 }
3912
3913 rcu_assign_pointer(event->rb, rb);
3890 3914
3891 spin_lock_irqsave(&rb->event_lock, flags); 3915 if (old_rb) {
3892 list_del_init(&event->rb_entry); 3916 ring_buffer_put(old_rb);
3893 wake_up_all(&event->waitq); 3917 /*
3894 spin_unlock_irqrestore(&rb->event_lock, flags); 3918 * Since we detached before setting the new rb, so that we
3919 * could attach the new rb, we could have missed a wakeup.
3920 * Provide it now.
3921 */
3922 wake_up_all(&event->waitq);
3923 }
3895} 3924}
3896 3925
3897static void ring_buffer_wakeup(struct perf_event *event) 3926static void ring_buffer_wakeup(struct perf_event *event)
@@ -3960,7 +3989,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
3960{ 3989{
3961 struct perf_event *event = vma->vm_file->private_data; 3990 struct perf_event *event = vma->vm_file->private_data;
3962 3991
3963 struct ring_buffer *rb = event->rb; 3992 struct ring_buffer *rb = ring_buffer_get(event);
3964 struct user_struct *mmap_user = rb->mmap_user; 3993 struct user_struct *mmap_user = rb->mmap_user;
3965 int mmap_locked = rb->mmap_locked; 3994 int mmap_locked = rb->mmap_locked;
3966 unsigned long size = perf_data_size(rb); 3995 unsigned long size = perf_data_size(rb);
@@ -3968,18 +3997,14 @@ static void perf_mmap_close(struct vm_area_struct *vma)
3968 atomic_dec(&rb->mmap_count); 3997 atomic_dec(&rb->mmap_count);
3969 3998
3970 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) 3999 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
3971 return; 4000 goto out_put;
3972 4001
3973 /* Detach current event from the buffer. */ 4002 ring_buffer_attach(event, NULL);
3974 rcu_assign_pointer(event->rb, NULL);
3975 ring_buffer_detach(event, rb);
3976 mutex_unlock(&event->mmap_mutex); 4003 mutex_unlock(&event->mmap_mutex);
3977 4004
3978 /* If there's still other mmap()s of this buffer, we're done. */ 4005 /* If there's still other mmap()s of this buffer, we're done. */
3979 if (atomic_read(&rb->mmap_count)) { 4006 if (atomic_read(&rb->mmap_count))
3980 ring_buffer_put(rb); /* can't be last */ 4007 goto out_put;
3981 return;
3982 }
3983 4008
3984 /* 4009 /*
3985 * No other mmap()s, detach from all other events that might redirect 4010 * No other mmap()s, detach from all other events that might redirect
@@ -4009,11 +4034,9 @@ again:
4009 * still restart the iteration to make sure we're not now 4034 * still restart the iteration to make sure we're not now
4010 * iterating the wrong list. 4035 * iterating the wrong list.
4011 */ 4036 */
4012 if (event->rb == rb) { 4037 if (event->rb == rb)
4013 rcu_assign_pointer(event->rb, NULL); 4038 ring_buffer_attach(event, NULL);
4014 ring_buffer_detach(event, rb); 4039
4015 ring_buffer_put(rb); /* can't be last, we still have one */
4016 }
4017 mutex_unlock(&event->mmap_mutex); 4040 mutex_unlock(&event->mmap_mutex);
4018 put_event(event); 4041 put_event(event);
4019 4042
@@ -4038,6 +4061,7 @@ again:
4038 vma->vm_mm->pinned_vm -= mmap_locked; 4061 vma->vm_mm->pinned_vm -= mmap_locked;
4039 free_uid(mmap_user); 4062 free_uid(mmap_user);
4040 4063
4064out_put:
4041 ring_buffer_put(rb); /* could be last */ 4065 ring_buffer_put(rb); /* could be last */
4042} 4066}
4043 4067
@@ -4155,7 +4179,6 @@ again:
4155 vma->vm_mm->pinned_vm += extra; 4179 vma->vm_mm->pinned_vm += extra;
4156 4180
4157 ring_buffer_attach(event, rb); 4181 ring_buffer_attach(event, rb);
4158 rcu_assign_pointer(event->rb, rb);
4159 4182
4160 perf_event_init_userpage(event); 4183 perf_event_init_userpage(event);
4161 perf_event_update_userpage(event); 4184 perf_event_update_userpage(event);
@@ -5070,18 +5093,6 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
5070void perf_event_comm(struct task_struct *task) 5093void perf_event_comm(struct task_struct *task)
5071{ 5094{
5072 struct perf_comm_event comm_event; 5095 struct perf_comm_event comm_event;
5073 struct perf_event_context *ctx;
5074 int ctxn;
5075
5076 rcu_read_lock();
5077 for_each_task_context_nr(ctxn) {
5078 ctx = task->perf_event_ctxp[ctxn];
5079 if (!ctx)
5080 continue;
5081
5082 perf_event_enable_on_exec(ctx);
5083 }
5084 rcu_read_unlock();
5085 5096
5086 if (!atomic_read(&nr_comm_events)) 5097 if (!atomic_read(&nr_comm_events))
5087 return; 5098 return;
@@ -5439,6 +5450,9 @@ struct swevent_htable {
5439 5450
5440 /* Recursion avoidance in each contexts */ 5451 /* Recursion avoidance in each contexts */
5441 int recursion[PERF_NR_CONTEXTS]; 5452 int recursion[PERF_NR_CONTEXTS];
5453
5454 /* Keeps track of cpu being initialized/exited */
5455 bool online;
5442}; 5456};
5443 5457
5444static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); 5458static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
@@ -5685,8 +5699,14 @@ static int perf_swevent_add(struct perf_event *event, int flags)
5685 hwc->state = !(flags & PERF_EF_START); 5699 hwc->state = !(flags & PERF_EF_START);
5686 5700
5687 head = find_swevent_head(swhash, event); 5701 head = find_swevent_head(swhash, event);
5688 if (WARN_ON_ONCE(!head)) 5702 if (!head) {
5703 /*
5704 * We can race with cpu hotplug code. Do not
5705 * WARN if the cpu just got unplugged.
5706 */
5707 WARN_ON_ONCE(swhash->online);
5689 return -EINVAL; 5708 return -EINVAL;
5709 }
5690 5710
5691 hlist_add_head_rcu(&event->hlist_entry, head); 5711 hlist_add_head_rcu(&event->hlist_entry, head);
5692 5712
@@ -6956,7 +6976,7 @@ err_size:
6956static int 6976static int
6957perf_event_set_output(struct perf_event *event, struct perf_event *output_event) 6977perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
6958{ 6978{
6959 struct ring_buffer *rb = NULL, *old_rb = NULL; 6979 struct ring_buffer *rb = NULL;
6960 int ret = -EINVAL; 6980 int ret = -EINVAL;
6961 6981
6962 if (!output_event) 6982 if (!output_event)
@@ -6984,8 +7004,6 @@ set:
6984 if (atomic_read(&event->mmap_count)) 7004 if (atomic_read(&event->mmap_count))
6985 goto unlock; 7005 goto unlock;
6986 7006
6987 old_rb = event->rb;
6988
6989 if (output_event) { 7007 if (output_event) {
6990 /* get the rb we want to redirect to */ 7008 /* get the rb we want to redirect to */
6991 rb = ring_buffer_get(output_event); 7009 rb = ring_buffer_get(output_event);
@@ -6993,23 +7011,7 @@ set:
6993 goto unlock; 7011 goto unlock;
6994 } 7012 }
6995 7013
6996 if (old_rb) 7014 ring_buffer_attach(event, rb);
6997 ring_buffer_detach(event, old_rb);
6998
6999 if (rb)
7000 ring_buffer_attach(event, rb);
7001
7002 rcu_assign_pointer(event->rb, rb);
7003
7004 if (old_rb) {
7005 ring_buffer_put(old_rb);
7006 /*
7007 * Since we detached before setting the new rb, so that we
7008 * could attach the new rb, we could have missed a wakeup.
7009 * Provide it now.
7010 */
7011 wake_up_all(&event->waitq);
7012 }
7013 7015
7014 ret = 0; 7016 ret = 0;
7015unlock: 7017unlock:
@@ -7060,6 +7062,9 @@ SYSCALL_DEFINE5(perf_event_open,
7060 if (attr.freq) { 7062 if (attr.freq) {
7061 if (attr.sample_freq > sysctl_perf_event_sample_rate) 7063 if (attr.sample_freq > sysctl_perf_event_sample_rate)
7062 return -EINVAL; 7064 return -EINVAL;
7065 } else {
7066 if (attr.sample_period & (1ULL << 63))
7067 return -EINVAL;
7063 } 7068 }
7064 7069
7065 /* 7070 /*
@@ -7872,6 +7877,7 @@ static void perf_event_init_cpu(int cpu)
7872 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); 7877 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
7873 7878
7874 mutex_lock(&swhash->hlist_mutex); 7879 mutex_lock(&swhash->hlist_mutex);
7880 swhash->online = true;
7875 if (swhash->hlist_refcount > 0) { 7881 if (swhash->hlist_refcount > 0) {
7876 struct swevent_hlist *hlist; 7882 struct swevent_hlist *hlist;
7877 7883
@@ -7929,6 +7935,7 @@ static void perf_event_exit_cpu(int cpu)
7929 perf_event_exit_cpu_context(cpu); 7935 perf_event_exit_cpu_context(cpu);
7930 7936
7931 mutex_lock(&swhash->hlist_mutex); 7937 mutex_lock(&swhash->hlist_mutex);
7938 swhash->online = false;
7932 swevent_hlist_release(swhash); 7939 swevent_hlist_release(swhash);
7933 mutex_unlock(&swhash->hlist_mutex); 7940 mutex_unlock(&swhash->hlist_mutex);
7934} 7941}
diff --git a/kernel/futex.c b/kernel/futex.c
index 5f589279e462..81dbe773ce4c 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -745,7 +745,8 @@ void exit_pi_state_list(struct task_struct *curr)
745 745
746static int 746static int
747lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, 747lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
748 union futex_key *key, struct futex_pi_state **ps) 748 union futex_key *key, struct futex_pi_state **ps,
749 struct task_struct *task)
749{ 750{
750 struct futex_pi_state *pi_state = NULL; 751 struct futex_pi_state *pi_state = NULL;
751 struct futex_q *this, *next; 752 struct futex_q *this, *next;
@@ -786,6 +787,16 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
786 return -EINVAL; 787 return -EINVAL;
787 } 788 }
788 789
790 /*
791 * Protect against a corrupted uval. If uval
792 * is 0x80000000 then pid is 0 and the waiter
793 * bit is set. So the deadlock check in the
794 * calling code has failed and we did not fall
795 * into the check above due to !pid.
796 */
797 if (task && pi_state->owner == task)
798 return -EDEADLK;
799
789 atomic_inc(&pi_state->refcount); 800 atomic_inc(&pi_state->refcount);
790 *ps = pi_state; 801 *ps = pi_state;
791 802
@@ -803,6 +814,11 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
803 if (!p) 814 if (!p)
804 return -ESRCH; 815 return -ESRCH;
805 816
817 if (!p->mm) {
818 put_task_struct(p);
819 return -EPERM;
820 }
821
806 /* 822 /*
807 * We need to look at the task state flags to figure out, 823 * We need to look at the task state flags to figure out,
808 * whether the task is exiting. To protect against the do_exit 824 * whether the task is exiting. To protect against the do_exit
@@ -935,7 +951,7 @@ retry:
935 * We dont have the lock. Look up the PI state (or create it if 951 * We dont have the lock. Look up the PI state (or create it if
936 * we are the first waiter): 952 * we are the first waiter):
937 */ 953 */
938 ret = lookup_pi_state(uval, hb, key, ps); 954 ret = lookup_pi_state(uval, hb, key, ps, task);
939 955
940 if (unlikely(ret)) { 956 if (unlikely(ret)) {
941 switch (ret) { 957 switch (ret) {
@@ -1347,7 +1363,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1347 * 1363 *
1348 * Return: 1364 * Return:
1349 * 0 - failed to acquire the lock atomically; 1365 * 0 - failed to acquire the lock atomically;
1350 * 1 - acquired the lock; 1366 * >0 - acquired the lock, return value is vpid of the top_waiter
1351 * <0 - error 1367 * <0 - error
1352 */ 1368 */
1353static int futex_proxy_trylock_atomic(u32 __user *pifutex, 1369static int futex_proxy_trylock_atomic(u32 __user *pifutex,
@@ -1358,7 +1374,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1358{ 1374{
1359 struct futex_q *top_waiter = NULL; 1375 struct futex_q *top_waiter = NULL;
1360 u32 curval; 1376 u32 curval;
1361 int ret; 1377 int ret, vpid;
1362 1378
1363 if (get_futex_value_locked(&curval, pifutex)) 1379 if (get_futex_value_locked(&curval, pifutex))
1364 return -EFAULT; 1380 return -EFAULT;
@@ -1386,11 +1402,13 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1386 * the contended case or if set_waiters is 1. The pi_state is returned 1402 * the contended case or if set_waiters is 1. The pi_state is returned
1387 * in ps in contended cases. 1403 * in ps in contended cases.
1388 */ 1404 */
1405 vpid = task_pid_vnr(top_waiter->task);
1389 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, 1406 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1390 set_waiters); 1407 set_waiters);
1391 if (ret == 1) 1408 if (ret == 1) {
1392 requeue_pi_wake_futex(top_waiter, key2, hb2); 1409 requeue_pi_wake_futex(top_waiter, key2, hb2);
1393 1410 return vpid;
1411 }
1394 return ret; 1412 return ret;
1395} 1413}
1396 1414
@@ -1421,7 +1439,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1421 struct futex_pi_state *pi_state = NULL; 1439 struct futex_pi_state *pi_state = NULL;
1422 struct futex_hash_bucket *hb1, *hb2; 1440 struct futex_hash_bucket *hb1, *hb2;
1423 struct futex_q *this, *next; 1441 struct futex_q *this, *next;
1424 u32 curval2;
1425 1442
1426 if (requeue_pi) { 1443 if (requeue_pi) {
1427 /* 1444 /*
@@ -1509,16 +1526,25 @@ retry_private:
1509 * At this point the top_waiter has either taken uaddr2 or is 1526 * At this point the top_waiter has either taken uaddr2 or is
1510 * waiting on it. If the former, then the pi_state will not 1527 * waiting on it. If the former, then the pi_state will not
1511 * exist yet, look it up one more time to ensure we have a 1528 * exist yet, look it up one more time to ensure we have a
1512 * reference to it. 1529 * reference to it. If the lock was taken, ret contains the
1530 * vpid of the top waiter task.
1513 */ 1531 */
1514 if (ret == 1) { 1532 if (ret > 0) {
1515 WARN_ON(pi_state); 1533 WARN_ON(pi_state);
1516 drop_count++; 1534 drop_count++;
1517 task_count++; 1535 task_count++;
1518 ret = get_futex_value_locked(&curval2, uaddr2); 1536 /*
1519 if (!ret) 1537 * If we acquired the lock, then the user
1520 ret = lookup_pi_state(curval2, hb2, &key2, 1538 * space value of uaddr2 should be vpid. It
1521 &pi_state); 1539 * cannot be changed by the top waiter as it
1540 * is blocked on hb2 lock if it tries to do
1541 * so. If something fiddled with it behind our
1542 * back the pi state lookup might unearth
1543 * it. So we rather use the known value than
1544 * rereading and handing potential crap to
1545 * lookup_pi_state.
1546 */
1547 ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL);
1522 } 1548 }
1523 1549
1524 switch (ret) { 1550 switch (ret) {
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index d10eba8089d1..3ab28993f6e0 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -990,11 +990,8 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
990 /* Remove an active timer from the queue: */ 990 /* Remove an active timer from the queue: */
991 ret = remove_hrtimer(timer, base); 991 ret = remove_hrtimer(timer, base);
992 992
993 /* Switch the timer base, if necessary: */
994 new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
995
996 if (mode & HRTIMER_MODE_REL) { 993 if (mode & HRTIMER_MODE_REL) {
997 tim = ktime_add_safe(tim, new_base->get_time()); 994 tim = ktime_add_safe(tim, base->get_time());
998 /* 995 /*
999 * CONFIG_TIME_LOW_RES is a temporary way for architectures 996 * CONFIG_TIME_LOW_RES is a temporary way for architectures
1000 * to signal that they simply return xtime in 997 * to signal that they simply return xtime in
@@ -1009,6 +1006,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1009 1006
1010 hrtimer_set_expires_range_ns(timer, tim, delta_ns); 1007 hrtimer_set_expires_range_ns(timer, tim, delta_ns);
1011 1008
1009 /* Switch the timer base, if necessary: */
1010 new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
1011
1012 timer_stats_hrtimer_set_start_info(timer); 1012 timer_stats_hrtimer_set_start_info(timer);
1013 1013
1014 leftmost = enqueue_hrtimer(timer, new_base); 1014 leftmost = enqueue_hrtimer(timer, new_base);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index c8380ad203bc..28c57069ef68 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1683,6 +1683,14 @@ int kernel_kexec(void)
1683 kexec_in_progress = true; 1683 kexec_in_progress = true;
1684 kernel_restart_prepare(NULL); 1684 kernel_restart_prepare(NULL);
1685 migrate_to_reboot_cpu(); 1685 migrate_to_reboot_cpu();
1686
1687 /*
1688 * migrate_to_reboot_cpu() disables CPU hotplug assuming that
1689 * no further code needs to use CPU hotplug (which is true in
1690 * the reboot case). However, the kexec path depends on using
1691 * CPU hotplug again; so re-enable it here.
1692 */
1693 cpu_hotplug_enable();
1686 printk(KERN_EMERG "Starting new kernel\n"); 1694 printk(KERN_EMERG "Starting new kernel\n");
1687 machine_shutdown(); 1695 machine_shutdown();
1688 } 1696 }
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index b0e9467922e1..d24e4339b46d 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4188,7 +4188,7 @@ void debug_show_held_locks(struct task_struct *task)
4188} 4188}
4189EXPORT_SYMBOL_GPL(debug_show_held_locks); 4189EXPORT_SYMBOL_GPL(debug_show_held_locks);
4190 4190
4191asmlinkage void lockdep_sys_exit(void) 4191asmlinkage __visible void lockdep_sys_exit(void)
4192{ 4192{
4193 struct task_struct *curr = current; 4193 struct task_struct *curr = current;
4194 4194
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index aa4dff04b594..a620d4d08ca6 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -343,9 +343,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
343 * top_waiter can be NULL, when we are in the deboosting 343 * top_waiter can be NULL, when we are in the deboosting
344 * mode! 344 * mode!
345 */ 345 */
346 if (top_waiter && (!task_has_pi_waiters(task) || 346 if (top_waiter) {
347 top_waiter != task_top_pi_waiter(task))) 347 if (!task_has_pi_waiters(task))
348 goto out_unlock_pi; 348 goto out_unlock_pi;
349 /*
350 * If deadlock detection is off, we stop here if we
351 * are not the top pi waiter of the task.
352 */
353 if (!detect_deadlock && top_waiter != task_top_pi_waiter(task))
354 goto out_unlock_pi;
355 }
349 356
350 /* 357 /*
351 * When deadlock detection is off then we check, if further 358 * When deadlock detection is off then we check, if further
@@ -361,7 +368,12 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
361 goto retry; 368 goto retry;
362 } 369 }
363 370
364 /* Deadlock detection */ 371 /*
372 * Deadlock detection. If the lock is the same as the original
373 * lock which caused us to walk the lock chain or if the
374 * current lock is owned by the task which initiated the chain
375 * walk, we detected a deadlock.
376 */
365 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 377 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
366 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); 378 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
367 raw_spin_unlock(&lock->wait_lock); 379 raw_spin_unlock(&lock->wait_lock);
@@ -527,6 +539,18 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
527 unsigned long flags; 539 unsigned long flags;
528 int chain_walk = 0, res; 540 int chain_walk = 0, res;
529 541
542 /*
543 * Early deadlock detection. We really don't want the task to
544 * enqueue on itself just to untangle the mess later. It's not
545 * only an optimization. We drop the locks, so another waiter
546 * can come in before the chain walk detects the deadlock. So
547 * the other will detect the deadlock and return -EDEADLOCK,
548 * which is wrong, as the other waiter is not in a deadlock
549 * situation.
550 */
551 if (detect_deadlock && owner == task)
552 return -EDEADLK;
553
530 raw_spin_lock_irqsave(&task->pi_lock, flags); 554 raw_spin_lock_irqsave(&task->pi_lock, flags);
531 __rt_mutex_adjust_prio(task); 555 __rt_mutex_adjust_prio(task);
532 waiter->task = task; 556 waiter->task = task;
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 18fb7a2fb14b..1ea328aafdc9 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1586,7 +1586,7 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
1586 return -ENOMEM; 1586 return -ENOMEM;
1587} 1587}
1588 1588
1589asmlinkage int swsusp_save(void) 1589asmlinkage __visible int swsusp_save(void)
1590{ 1590{
1591 unsigned int nr_pages, nr_highmem; 1591 unsigned int nr_pages, nr_highmem;
1592 1592
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index a45b50962295..7228258b85ec 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1674,7 +1674,7 @@ EXPORT_SYMBOL(printk_emit);
1674 * 1674 *
1675 * See the vsnprintf() documentation for format string extensions over C99. 1675 * See the vsnprintf() documentation for format string extensions over C99.
1676 */ 1676 */
1677asmlinkage int printk(const char *fmt, ...) 1677asmlinkage __visible int printk(const char *fmt, ...)
1678{ 1678{
1679 va_list args; 1679 va_list args;
1680 int r; 1680 int r;
@@ -1737,7 +1737,7 @@ void early_vprintk(const char *fmt, va_list ap)
1737 } 1737 }
1738} 1738}
1739 1739
1740asmlinkage void early_printk(const char *fmt, ...) 1740asmlinkage __visible void early_printk(const char *fmt, ...)
1741{ 1741{
1742 va_list ap; 1742 va_list ap;
1743 1743
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 00781cc38047..a8c0fde25e4a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2192,7 +2192,7 @@ static inline void post_schedule(struct rq *rq)
2192 * schedule_tail - first thing a freshly forked thread must call. 2192 * schedule_tail - first thing a freshly forked thread must call.
2193 * @prev: the thread we just switched away from. 2193 * @prev: the thread we just switched away from.
2194 */ 2194 */
2195asmlinkage void schedule_tail(struct task_struct *prev) 2195asmlinkage __visible void schedule_tail(struct task_struct *prev)
2196 __releases(rq->lock) 2196 __releases(rq->lock)
2197{ 2197{
2198 struct rq *rq = this_rq(); 2198 struct rq *rq = this_rq();
@@ -2594,8 +2594,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev)
2594 if (likely(prev->sched_class == class && 2594 if (likely(prev->sched_class == class &&
2595 rq->nr_running == rq->cfs.h_nr_running)) { 2595 rq->nr_running == rq->cfs.h_nr_running)) {
2596 p = fair_sched_class.pick_next_task(rq, prev); 2596 p = fair_sched_class.pick_next_task(rq, prev);
2597 if (likely(p && p != RETRY_TASK)) 2597 if (unlikely(p == RETRY_TASK))
2598 return p; 2598 goto again;
2599
2600 /* assumes fair_sched_class->next == idle_sched_class */
2601 if (unlikely(!p))
2602 p = idle_sched_class.pick_next_task(rq, prev);
2603
2604 return p;
2599 } 2605 }
2600 2606
2601again: 2607again:
@@ -2743,7 +2749,7 @@ static inline void sched_submit_work(struct task_struct *tsk)
2743 blk_schedule_flush_plug(tsk); 2749 blk_schedule_flush_plug(tsk);
2744} 2750}
2745 2751
2746asmlinkage void __sched schedule(void) 2752asmlinkage __visible void __sched schedule(void)
2747{ 2753{
2748 struct task_struct *tsk = current; 2754 struct task_struct *tsk = current;
2749 2755
@@ -2753,7 +2759,7 @@ asmlinkage void __sched schedule(void)
2753EXPORT_SYMBOL(schedule); 2759EXPORT_SYMBOL(schedule);
2754 2760
2755#ifdef CONFIG_CONTEXT_TRACKING 2761#ifdef CONFIG_CONTEXT_TRACKING
2756asmlinkage void __sched schedule_user(void) 2762asmlinkage __visible void __sched schedule_user(void)
2757{ 2763{
2758 /* 2764 /*
2759 * If we come here after a random call to set_need_resched(), 2765 * If we come here after a random call to set_need_resched(),
@@ -2785,7 +2791,7 @@ void __sched schedule_preempt_disabled(void)
2785 * off of preempt_enable. Kernel preemptions off return from interrupt 2791 * off of preempt_enable. Kernel preemptions off return from interrupt
2786 * occur there and call schedule directly. 2792 * occur there and call schedule directly.
2787 */ 2793 */
2788asmlinkage void __sched notrace preempt_schedule(void) 2794asmlinkage __visible void __sched notrace preempt_schedule(void)
2789{ 2795{
2790 /* 2796 /*
2791 * If there is a non-zero preempt_count or interrupts are disabled, 2797 * If there is a non-zero preempt_count or interrupts are disabled,
@@ -2816,7 +2822,7 @@ EXPORT_SYMBOL(preempt_schedule);
2816 * Note, that this is called and return with irqs disabled. This will 2822 * Note, that this is called and return with irqs disabled. This will
2817 * protect us against recursive calling from irq. 2823 * protect us against recursive calling from irq.
2818 */ 2824 */
2819asmlinkage void __sched preempt_schedule_irq(void) 2825asmlinkage __visible void __sched preempt_schedule_irq(void)
2820{ 2826{
2821 enum ctx_state prev_state; 2827 enum ctx_state prev_state;
2822 2828
@@ -3127,6 +3133,7 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
3127 dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); 3133 dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
3128 dl_se->dl_throttled = 0; 3134 dl_se->dl_throttled = 0;
3129 dl_se->dl_new = 1; 3135 dl_se->dl_new = 1;
3136 dl_se->dl_yielded = 0;
3130} 3137}
3131 3138
3132static void __setscheduler_params(struct task_struct *p, 3139static void __setscheduler_params(struct task_struct *p,
@@ -3191,17 +3198,40 @@ __getparam_dl(struct task_struct *p, struct sched_attr *attr)
3191 * We ask for the deadline not being zero, and greater or equal 3198 * We ask for the deadline not being zero, and greater or equal
3192 * than the runtime, as well as the period of being zero or 3199 * than the runtime, as well as the period of being zero or
3193 * greater than deadline. Furthermore, we have to be sure that 3200 * greater than deadline. Furthermore, we have to be sure that
3194 * user parameters are above the internal resolution (1us); we 3201 * user parameters are above the internal resolution of 1us (we
3195 * check sched_runtime only since it is always the smaller one. 3202 * check sched_runtime only since it is always the smaller one) and
3203 * below 2^63 ns (we have to check both sched_deadline and
3204 * sched_period, as the latter can be zero).
3196 */ 3205 */
3197static bool 3206static bool
3198__checkparam_dl(const struct sched_attr *attr) 3207__checkparam_dl(const struct sched_attr *attr)
3199{ 3208{
3200 return attr && attr->sched_deadline != 0 && 3209 /* deadline != 0 */
3201 (attr->sched_period == 0 || 3210 if (attr->sched_deadline == 0)
3202 (s64)(attr->sched_period - attr->sched_deadline) >= 0) && 3211 return false;
3203 (s64)(attr->sched_deadline - attr->sched_runtime ) >= 0 && 3212
3204 attr->sched_runtime >= (2 << (DL_SCALE - 1)); 3213 /*
3214 * Since we truncate DL_SCALE bits, make sure we're at least
3215 * that big.
3216 */
3217 if (attr->sched_runtime < (1ULL << DL_SCALE))
3218 return false;
3219
3220 /*
3221 * Since we use the MSB for wrap-around and sign issues, make
3222 * sure it's not set (mind that period can be equal to zero).
3223 */
3224 if (attr->sched_deadline & (1ULL << 63) ||
3225 attr->sched_period & (1ULL << 63))
3226 return false;
3227
3228 /* runtime <= deadline <= period (if period != 0) */
3229 if ((attr->sched_period != 0 &&
3230 attr->sched_period < attr->sched_deadline) ||
3231 attr->sched_deadline < attr->sched_runtime)
3232 return false;
3233
3234 return true;
3205} 3235}
3206 3236
3207/* 3237/*
@@ -3642,6 +3672,7 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
3642 * sys_sched_setattr - same as above, but with extended sched_attr 3672 * sys_sched_setattr - same as above, but with extended sched_attr
3643 * @pid: the pid in question. 3673 * @pid: the pid in question.
3644 * @uattr: structure containing the extended parameters. 3674 * @uattr: structure containing the extended parameters.
3675 * @flags: for future extension.
3645 */ 3676 */
3646SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, 3677SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
3647 unsigned int, flags) 3678 unsigned int, flags)
@@ -3653,8 +3684,12 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
3653 if (!uattr || pid < 0 || flags) 3684 if (!uattr || pid < 0 || flags)
3654 return -EINVAL; 3685 return -EINVAL;
3655 3686
3656 if (sched_copy_attr(uattr, &attr)) 3687 retval = sched_copy_attr(uattr, &attr);
3657 return -EFAULT; 3688 if (retval)
3689 return retval;
3690
3691 if (attr.sched_policy < 0)
3692 return -EINVAL;
3658 3693
3659 rcu_read_lock(); 3694 rcu_read_lock();
3660 retval = -ESRCH; 3695 retval = -ESRCH;
@@ -3704,7 +3739,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
3704 */ 3739 */
3705SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) 3740SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
3706{ 3741{
3707 struct sched_param lp; 3742 struct sched_param lp = { .sched_priority = 0 };
3708 struct task_struct *p; 3743 struct task_struct *p;
3709 int retval; 3744 int retval;
3710 3745
@@ -3721,11 +3756,8 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
3721 if (retval) 3756 if (retval)
3722 goto out_unlock; 3757 goto out_unlock;
3723 3758
3724 if (task_has_dl_policy(p)) { 3759 if (task_has_rt_policy(p))
3725 retval = -EINVAL; 3760 lp.sched_priority = p->rt_priority;
3726 goto out_unlock;
3727 }
3728 lp.sched_priority = p->rt_priority;
3729 rcu_read_unlock(); 3761 rcu_read_unlock();
3730 3762
3731 /* 3763 /*
@@ -3786,6 +3818,7 @@ err_size:
3786 * @pid: the pid in question. 3818 * @pid: the pid in question.
3787 * @uattr: structure containing the extended parameters. 3819 * @uattr: structure containing the extended parameters.
3788 * @size: sizeof(attr) for fwd/bwd comp. 3820 * @size: sizeof(attr) for fwd/bwd comp.
3821 * @flags: for future extension.
3789 */ 3822 */
3790SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, 3823SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
3791 unsigned int, size, unsigned int, flags) 3824 unsigned int, size, unsigned int, flags)
@@ -5046,7 +5079,6 @@ static int sched_cpu_active(struct notifier_block *nfb,
5046 unsigned long action, void *hcpu) 5079 unsigned long action, void *hcpu)
5047{ 5080{
5048 switch (action & ~CPU_TASKS_FROZEN) { 5081 switch (action & ~CPU_TASKS_FROZEN) {
5049 case CPU_STARTING:
5050 case CPU_DOWN_FAILED: 5082 case CPU_DOWN_FAILED:
5051 set_cpu_active((long)hcpu, true); 5083 set_cpu_active((long)hcpu, true);
5052 return NOTIFY_OK; 5084 return NOTIFY_OK;
@@ -6020,6 +6052,8 @@ sd_numa_init(struct sched_domain_topology_level *tl, int cpu)
6020 , 6052 ,
6021 .last_balance = jiffies, 6053 .last_balance = jiffies,
6022 .balance_interval = sd_weight, 6054 .balance_interval = sd_weight,
6055 .max_newidle_lb_cost = 0,
6056 .next_decay_max_lb_cost = jiffies,
6023 }; 6057 };
6024 SD_INIT_NAME(sd, NUMA); 6058 SD_INIT_NAME(sd, NUMA);
6025 sd->private = &tl->data; 6059 sd->private = &tl->data;
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 5b9bb42b2d47..bd95963dae80 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -13,6 +13,7 @@
13 13
14#include <linux/gfp.h> 14#include <linux/gfp.h>
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/slab.h>
16#include "cpudeadline.h" 17#include "cpudeadline.h"
17 18
18static inline int parent(int i) 19static inline int parent(int i)
@@ -39,8 +40,10 @@ static void cpudl_exchange(struct cpudl *cp, int a, int b)
39{ 40{
40 int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu; 41 int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu;
41 42
42 swap(cp->elements[a], cp->elements[b]); 43 swap(cp->elements[a].cpu, cp->elements[b].cpu);
43 swap(cp->cpu_to_idx[cpu_a], cp->cpu_to_idx[cpu_b]); 44 swap(cp->elements[a].dl , cp->elements[b].dl );
45
46 swap(cp->elements[cpu_a].idx, cp->elements[cpu_b].idx);
44} 47}
45 48
46static void cpudl_heapify(struct cpudl *cp, int idx) 49static void cpudl_heapify(struct cpudl *cp, int idx)
@@ -140,7 +143,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
140 WARN_ON(!cpu_present(cpu)); 143 WARN_ON(!cpu_present(cpu));
141 144
142 raw_spin_lock_irqsave(&cp->lock, flags); 145 raw_spin_lock_irqsave(&cp->lock, flags);
143 old_idx = cp->cpu_to_idx[cpu]; 146 old_idx = cp->elements[cpu].idx;
144 if (!is_valid) { 147 if (!is_valid) {
145 /* remove item */ 148 /* remove item */
146 if (old_idx == IDX_INVALID) { 149 if (old_idx == IDX_INVALID) {
@@ -155,8 +158,8 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
155 cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl; 158 cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl;
156 cp->elements[old_idx].cpu = new_cpu; 159 cp->elements[old_idx].cpu = new_cpu;
157 cp->size--; 160 cp->size--;
158 cp->cpu_to_idx[new_cpu] = old_idx; 161 cp->elements[new_cpu].idx = old_idx;
159 cp->cpu_to_idx[cpu] = IDX_INVALID; 162 cp->elements[cpu].idx = IDX_INVALID;
160 while (old_idx > 0 && dl_time_before( 163 while (old_idx > 0 && dl_time_before(
161 cp->elements[parent(old_idx)].dl, 164 cp->elements[parent(old_idx)].dl,
162 cp->elements[old_idx].dl)) { 165 cp->elements[old_idx].dl)) {
@@ -173,7 +176,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
173 cp->size++; 176 cp->size++;
174 cp->elements[cp->size - 1].dl = 0; 177 cp->elements[cp->size - 1].dl = 0;
175 cp->elements[cp->size - 1].cpu = cpu; 178 cp->elements[cp->size - 1].cpu = cpu;
176 cp->cpu_to_idx[cpu] = cp->size - 1; 179 cp->elements[cpu].idx = cp->size - 1;
177 cpudl_change_key(cp, cp->size - 1, dl); 180 cpudl_change_key(cp, cp->size - 1, dl);
178 cpumask_clear_cpu(cpu, cp->free_cpus); 181 cpumask_clear_cpu(cpu, cp->free_cpus);
179 } else { 182 } else {
@@ -195,10 +198,21 @@ int cpudl_init(struct cpudl *cp)
195 memset(cp, 0, sizeof(*cp)); 198 memset(cp, 0, sizeof(*cp));
196 raw_spin_lock_init(&cp->lock); 199 raw_spin_lock_init(&cp->lock);
197 cp->size = 0; 200 cp->size = 0;
198 for (i = 0; i < NR_CPUS; i++) 201
199 cp->cpu_to_idx[i] = IDX_INVALID; 202 cp->elements = kcalloc(nr_cpu_ids,
200 if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) 203 sizeof(struct cpudl_item),
204 GFP_KERNEL);
205 if (!cp->elements)
206 return -ENOMEM;
207
208 if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) {
209 kfree(cp->elements);
201 return -ENOMEM; 210 return -ENOMEM;
211 }
212
213 for_each_possible_cpu(i)
214 cp->elements[i].idx = IDX_INVALID;
215
202 cpumask_setall(cp->free_cpus); 216 cpumask_setall(cp->free_cpus);
203 217
204 return 0; 218 return 0;
@@ -210,7 +224,6 @@ int cpudl_init(struct cpudl *cp)
210 */ 224 */
211void cpudl_cleanup(struct cpudl *cp) 225void cpudl_cleanup(struct cpudl *cp)
212{ 226{
213 /* 227 free_cpumask_var(cp->free_cpus);
214 * nothing to do for the moment 228 kfree(cp->elements);
215 */
216} 229}
diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h
index a202789a412c..538c9796ad4a 100644
--- a/kernel/sched/cpudeadline.h
+++ b/kernel/sched/cpudeadline.h
@@ -5,17 +5,17 @@
5 5
6#define IDX_INVALID -1 6#define IDX_INVALID -1
7 7
8struct array_item { 8struct cpudl_item {
9 u64 dl; 9 u64 dl;
10 int cpu; 10 int cpu;
11 int idx;
11}; 12};
12 13
13struct cpudl { 14struct cpudl {
14 raw_spinlock_t lock; 15 raw_spinlock_t lock;
15 int size; 16 int size;
16 int cpu_to_idx[NR_CPUS];
17 struct array_item elements[NR_CPUS];
18 cpumask_var_t free_cpus; 17 cpumask_var_t free_cpus;
18 struct cpudl_item *elements;
19}; 19};
20 20
21 21
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 8b836b376d91..8834243abee2 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -30,6 +30,7 @@
30#include <linux/gfp.h> 30#include <linux/gfp.h>
31#include <linux/sched.h> 31#include <linux/sched.h>
32#include <linux/sched/rt.h> 32#include <linux/sched/rt.h>
33#include <linux/slab.h>
33#include "cpupri.h" 34#include "cpupri.h"
34 35
35/* Convert between a 140 based task->prio, and our 102 based cpupri */ 36/* Convert between a 140 based task->prio, and our 102 based cpupri */
@@ -70,8 +71,7 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
70 int idx = 0; 71 int idx = 0;
71 int task_pri = convert_prio(p->prio); 72 int task_pri = convert_prio(p->prio);
72 73
73 if (task_pri >= MAX_RT_PRIO) 74 BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES);
74 return 0;
75 75
76 for (idx = 0; idx < task_pri; idx++) { 76 for (idx = 0; idx < task_pri; idx++) {
77 struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; 77 struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
@@ -219,8 +219,13 @@ int cpupri_init(struct cpupri *cp)
219 goto cleanup; 219 goto cleanup;
220 } 220 }
221 221
222 cp->cpu_to_pri = kcalloc(nr_cpu_ids, sizeof(int), GFP_KERNEL);
223 if (!cp->cpu_to_pri)
224 goto cleanup;
225
222 for_each_possible_cpu(i) 226 for_each_possible_cpu(i)
223 cp->cpu_to_pri[i] = CPUPRI_INVALID; 227 cp->cpu_to_pri[i] = CPUPRI_INVALID;
228
224 return 0; 229 return 0;
225 230
226cleanup: 231cleanup:
@@ -237,6 +242,7 @@ void cpupri_cleanup(struct cpupri *cp)
237{ 242{
238 int i; 243 int i;
239 244
245 kfree(cp->cpu_to_pri);
240 for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) 246 for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
241 free_cpumask_var(cp->pri_to_cpu[i].mask); 247 free_cpumask_var(cp->pri_to_cpu[i].mask);
242} 248}
diff --git a/kernel/sched/cpupri.h b/kernel/sched/cpupri.h
index f6d756173491..6b033347fdfd 100644
--- a/kernel/sched/cpupri.h
+++ b/kernel/sched/cpupri.h
@@ -17,7 +17,7 @@ struct cpupri_vec {
17 17
18struct cpupri { 18struct cpupri {
19 struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES]; 19 struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES];
20 int cpu_to_pri[NR_CPUS]; 20 int *cpu_to_pri;
21}; 21};
22 22
23#ifdef CONFIG_SMP 23#ifdef CONFIG_SMP
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index a95097cb4591..72fdf06ef865 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -332,50 +332,50 @@ out:
332 * softirq as those do not count in task exec_runtime any more. 332 * softirq as those do not count in task exec_runtime any more.
333 */ 333 */
334static void irqtime_account_process_tick(struct task_struct *p, int user_tick, 334static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
335 struct rq *rq) 335 struct rq *rq, int ticks)
336{ 336{
337 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); 337 cputime_t scaled = cputime_to_scaled(cputime_one_jiffy);
338 u64 cputime = (__force u64) cputime_one_jiffy;
338 u64 *cpustat = kcpustat_this_cpu->cpustat; 339 u64 *cpustat = kcpustat_this_cpu->cpustat;
339 340
340 if (steal_account_process_tick()) 341 if (steal_account_process_tick())
341 return; 342 return;
342 343
344 cputime *= ticks;
345 scaled *= ticks;
346
343 if (irqtime_account_hi_update()) { 347 if (irqtime_account_hi_update()) {
344 cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; 348 cpustat[CPUTIME_IRQ] += cputime;
345 } else if (irqtime_account_si_update()) { 349 } else if (irqtime_account_si_update()) {
346 cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; 350 cpustat[CPUTIME_SOFTIRQ] += cputime;
347 } else if (this_cpu_ksoftirqd() == p) { 351 } else if (this_cpu_ksoftirqd() == p) {
348 /* 352 /*
349 * ksoftirqd time do not get accounted in cpu_softirq_time. 353 * ksoftirqd time do not get accounted in cpu_softirq_time.
350 * So, we have to handle it separately here. 354 * So, we have to handle it separately here.
351 * Also, p->stime needs to be updated for ksoftirqd. 355 * Also, p->stime needs to be updated for ksoftirqd.
352 */ 356 */
353 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, 357 __account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ);
354 CPUTIME_SOFTIRQ);
355 } else if (user_tick) { 358 } else if (user_tick) {
356 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); 359 account_user_time(p, cputime, scaled);
357 } else if (p == rq->idle) { 360 } else if (p == rq->idle) {
358 account_idle_time(cputime_one_jiffy); 361 account_idle_time(cputime);
359 } else if (p->flags & PF_VCPU) { /* System time or guest time */ 362 } else if (p->flags & PF_VCPU) { /* System time or guest time */
360 account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); 363 account_guest_time(p, cputime, scaled);
361 } else { 364 } else {
362 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, 365 __account_system_time(p, cputime, scaled, CPUTIME_SYSTEM);
363 CPUTIME_SYSTEM);
364 } 366 }
365} 367}
366 368
367static void irqtime_account_idle_ticks(int ticks) 369static void irqtime_account_idle_ticks(int ticks)
368{ 370{
369 int i;
370 struct rq *rq = this_rq(); 371 struct rq *rq = this_rq();
371 372
372 for (i = 0; i < ticks; i++) 373 irqtime_account_process_tick(current, 0, rq, ticks);
373 irqtime_account_process_tick(current, 0, rq);
374} 374}
375#else /* CONFIG_IRQ_TIME_ACCOUNTING */ 375#else /* CONFIG_IRQ_TIME_ACCOUNTING */
376static inline void irqtime_account_idle_ticks(int ticks) {} 376static inline void irqtime_account_idle_ticks(int ticks) {}
377static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, 377static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
378 struct rq *rq) {} 378 struct rq *rq, int nr_ticks) {}
379#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ 379#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
380 380
381/* 381/*
@@ -464,7 +464,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
464 return; 464 return;
465 465
466 if (sched_clock_irqtime) { 466 if (sched_clock_irqtime) {
467 irqtime_account_process_tick(p, user_tick, rq); 467 irqtime_account_process_tick(p, user_tick, rq, 1);
468 return; 468 return;
469 } 469 }
470 470
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index b08095786cb8..800e99b99075 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -528,6 +528,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
528 sched_clock_tick(); 528 sched_clock_tick();
529 update_rq_clock(rq); 529 update_rq_clock(rq);
530 dl_se->dl_throttled = 0; 530 dl_se->dl_throttled = 0;
531 dl_se->dl_yielded = 0;
531 if (p->on_rq) { 532 if (p->on_rq) {
532 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); 533 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
533 if (task_has_dl_policy(rq->curr)) 534 if (task_has_dl_policy(rq->curr))
@@ -893,10 +894,10 @@ static void yield_task_dl(struct rq *rq)
893 * We make the task go to sleep until its current deadline by 894 * We make the task go to sleep until its current deadline by
894 * forcing its runtime to zero. This way, update_curr_dl() stops 895 * forcing its runtime to zero. This way, update_curr_dl() stops
895 * it and the bandwidth timer will wake it up and will give it 896 * it and the bandwidth timer will wake it up and will give it
896 * new scheduling parameters (thanks to dl_new=1). 897 * new scheduling parameters (thanks to dl_yielded=1).
897 */ 898 */
898 if (p->dl.runtime > 0) { 899 if (p->dl.runtime > 0) {
899 rq->curr->dl.dl_new = 1; 900 rq->curr->dl.dl_yielded = 1;
900 p->dl.runtime = 0; 901 p->dl.runtime = 0;
901 } 902 }
902 update_curr_dl(rq); 903 update_curr_dl(rq);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7570dd969c28..0fdb96de81a5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6653,6 +6653,7 @@ static int idle_balance(struct rq *this_rq)
6653 int this_cpu = this_rq->cpu; 6653 int this_cpu = this_rq->cpu;
6654 6654
6655 idle_enter_fair(this_rq); 6655 idle_enter_fair(this_rq);
6656
6656 /* 6657 /*
6657 * We must set idle_stamp _before_ calling idle_balance(), such that we 6658 * We must set idle_stamp _before_ calling idle_balance(), such that we
6658 * measure the duration of idle_balance() as idle time. 6659 * measure the duration of idle_balance() as idle time.
@@ -6705,14 +6706,16 @@ static int idle_balance(struct rq *this_rq)
6705 6706
6706 raw_spin_lock(&this_rq->lock); 6707 raw_spin_lock(&this_rq->lock);
6707 6708
6709 if (curr_cost > this_rq->max_idle_balance_cost)
6710 this_rq->max_idle_balance_cost = curr_cost;
6711
6708 /* 6712 /*
6709 * While browsing the domains, we released the rq lock. 6713 * While browsing the domains, we released the rq lock, a task could
6710 * A task could have be enqueued in the meantime 6714 * have been enqueued in the meantime. Since we're not going idle,
6715 * pretend we pulled a task.
6711 */ 6716 */
6712 if (this_rq->cfs.h_nr_running && !pulled_task) { 6717 if (this_rq->cfs.h_nr_running && !pulled_task)
6713 pulled_task = 1; 6718 pulled_task = 1;
6714 goto out;
6715 }
6716 6719
6717 if (pulled_task || time_after(jiffies, this_rq->next_balance)) { 6720 if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
6718 /* 6721 /*
@@ -6722,9 +6725,6 @@ static int idle_balance(struct rq *this_rq)
6722 this_rq->next_balance = next_balance; 6725 this_rq->next_balance = next_balance;
6723 } 6726 }
6724 6727
6725 if (curr_cost > this_rq->max_idle_balance_cost)
6726 this_rq->max_idle_balance_cost = curr_cost;
6727
6728out: 6728out:
6729 /* Is there a task of a high priority class? */ 6729 /* Is there a task of a high priority class? */
6730 if (this_rq->nr_running != this_rq->cfs.h_nr_running && 6730 if (this_rq->nr_running != this_rq->cfs.h_nr_running &&
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 33e4648ae0e7..92f24f5e8d52 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -223,7 +223,7 @@ static inline bool lockdep_softirq_start(void) { return false; }
223static inline void lockdep_softirq_end(bool in_hardirq) { } 223static inline void lockdep_softirq_end(bool in_hardirq) { }
224#endif 224#endif
225 225
226asmlinkage void __do_softirq(void) 226asmlinkage __visible void __do_softirq(void)
227{ 227{
228 unsigned long end = jiffies + MAX_SOFTIRQ_TIME; 228 unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
229 unsigned long old_flags = current->flags; 229 unsigned long old_flags = current->flags;
@@ -299,7 +299,7 @@ restart:
299 tsk_restore_flags(current, old_flags, PF_MEMALLOC); 299 tsk_restore_flags(current, old_flags, PF_MEMALLOC);
300} 300}
301 301
302asmlinkage void do_softirq(void) 302asmlinkage __visible void do_softirq(void)
303{ 303{
304 __u32 pending; 304 __u32 pending;
305 unsigned long flags; 305 unsigned long flags;
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index ac5b23cf7212..6620e5837ce2 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -188,7 +188,6 @@ static int tracepoint_add_func(struct tracepoint *tp,
188 WARN_ON_ONCE(1); 188 WARN_ON_ONCE(1);
189 return PTR_ERR(old); 189 return PTR_ERR(old);
190 } 190 }
191 release_probes(old);
192 191
193 /* 192 /*
194 * rcu_assign_pointer has a smp_wmb() which makes sure that the new 193 * rcu_assign_pointer has a smp_wmb() which makes sure that the new
@@ -200,6 +199,7 @@ static int tracepoint_add_func(struct tracepoint *tp,
200 rcu_assign_pointer(tp->funcs, tp_funcs); 199 rcu_assign_pointer(tp->funcs, tp_funcs);
201 if (!static_key_enabled(&tp->key)) 200 if (!static_key_enabled(&tp->key))
202 static_key_slow_inc(&tp->key); 201 static_key_slow_inc(&tp->key);
202 release_probes(old);
203 return 0; 203 return 0;
204} 204}
205 205
@@ -221,7 +221,6 @@ static int tracepoint_remove_func(struct tracepoint *tp,
221 WARN_ON_ONCE(1); 221 WARN_ON_ONCE(1);
222 return PTR_ERR(old); 222 return PTR_ERR(old);
223 } 223 }
224 release_probes(old);
225 224
226 if (!tp_funcs) { 225 if (!tp_funcs) {
227 /* Removed last function */ 226 /* Removed last function */
@@ -232,6 +231,7 @@ static int tracepoint_remove_func(struct tracepoint *tp,
232 static_key_slow_dec(&tp->key); 231 static_key_slow_dec(&tp->key);
233 } 232 }
234 rcu_assign_pointer(tp->funcs, tp_funcs); 233 rcu_assign_pointer(tp->funcs, tp_funcs);
234 release_probes(old);
235 return 0; 235 return 0;
236} 236}
237 237
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0ee63af30bd1..8edc87185427 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1916,6 +1916,12 @@ static void send_mayday(struct work_struct *work)
1916 1916
1917 /* mayday mayday mayday */ 1917 /* mayday mayday mayday */
1918 if (list_empty(&pwq->mayday_node)) { 1918 if (list_empty(&pwq->mayday_node)) {
1919 /*
1920 * If @pwq is for an unbound wq, its base ref may be put at
1921 * any time due to an attribute change. Pin @pwq until the
1922 * rescuer is done with it.
1923 */
1924 get_pwq(pwq);
1919 list_add_tail(&pwq->mayday_node, &wq->maydays); 1925 list_add_tail(&pwq->mayday_node, &wq->maydays);
1920 wake_up_process(wq->rescuer->task); 1926 wake_up_process(wq->rescuer->task);
1921 } 1927 }
@@ -2398,6 +2404,7 @@ static int rescuer_thread(void *__rescuer)
2398 struct worker *rescuer = __rescuer; 2404 struct worker *rescuer = __rescuer;
2399 struct workqueue_struct *wq = rescuer->rescue_wq; 2405 struct workqueue_struct *wq = rescuer->rescue_wq;
2400 struct list_head *scheduled = &rescuer->scheduled; 2406 struct list_head *scheduled = &rescuer->scheduled;
2407 bool should_stop;
2401 2408
2402 set_user_nice(current, RESCUER_NICE_LEVEL); 2409 set_user_nice(current, RESCUER_NICE_LEVEL);
2403 2410
@@ -2409,11 +2416,15 @@ static int rescuer_thread(void *__rescuer)
2409repeat: 2416repeat:
2410 set_current_state(TASK_INTERRUPTIBLE); 2417 set_current_state(TASK_INTERRUPTIBLE);
2411 2418
2412 if (kthread_should_stop()) { 2419 /*
2413 __set_current_state(TASK_RUNNING); 2420 * By the time the rescuer is requested to stop, the workqueue
2414 rescuer->task->flags &= ~PF_WQ_WORKER; 2421 * shouldn't have any work pending, but @wq->maydays may still have
2415 return 0; 2422 * pwq(s) queued. This can happen by non-rescuer workers consuming
2416 } 2423 * all the work items before the rescuer got to them. Go through
2424 * @wq->maydays processing before acting on should_stop so that the
2425 * list is always empty on exit.
2426 */
2427 should_stop = kthread_should_stop();
2417 2428
2418 /* see whether any pwq is asking for help */ 2429 /* see whether any pwq is asking for help */
2419 spin_lock_irq(&wq_mayday_lock); 2430 spin_lock_irq(&wq_mayday_lock);
@@ -2445,6 +2456,12 @@ repeat:
2445 process_scheduled_works(rescuer); 2456 process_scheduled_works(rescuer);
2446 2457
2447 /* 2458 /*
2459 * Put the reference grabbed by send_mayday(). @pool won't
2460 * go away while we're holding its lock.
2461 */
2462 put_pwq(pwq);
2463
2464 /*
2448 * Leave this pool. If keep_working() is %true, notify a 2465 * Leave this pool. If keep_working() is %true, notify a
2449 * regular worker; otherwise, we end up with 0 concurrency 2466 * regular worker; otherwise, we end up with 0 concurrency
2450 * and stalling the execution. 2467 * and stalling the execution.
@@ -2459,6 +2476,12 @@ repeat:
2459 2476
2460 spin_unlock_irq(&wq_mayday_lock); 2477 spin_unlock_irq(&wq_mayday_lock);
2461 2478
2479 if (should_stop) {
2480 __set_current_state(TASK_RUNNING);
2481 rescuer->task->flags &= ~PF_WQ_WORKER;
2482 return 0;
2483 }
2484
2462 /* rescuers should never participate in concurrency management */ 2485 /* rescuers should never participate in concurrency management */
2463 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING)); 2486 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
2464 schedule(); 2487 schedule();
@@ -4100,7 +4123,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
4100 if (!pwq) { 4123 if (!pwq) {
4101 pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n", 4124 pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
4102 wq->name); 4125 wq->name);
4103 goto out_unlock; 4126 mutex_lock(&wq->mutex);
4127 goto use_dfl_pwq;
4104 } 4128 }
4105 4129
4106 /* 4130 /*