diff options
Diffstat (limited to 'kernel')
41 files changed, 525 insertions, 345 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 7c2893602d06..47845c57eb19 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -643,13 +643,13 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) | |||
643 | if ((task_active_pid_ns(current) != &init_pid_ns)) | 643 | if ((task_active_pid_ns(current) != &init_pid_ns)) |
644 | return -EPERM; | 644 | return -EPERM; |
645 | 645 | ||
646 | if (!capable(CAP_AUDIT_CONTROL)) | 646 | if (!netlink_capable(skb, CAP_AUDIT_CONTROL)) |
647 | err = -EPERM; | 647 | err = -EPERM; |
648 | break; | 648 | break; |
649 | case AUDIT_USER: | 649 | case AUDIT_USER: |
650 | case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: | 650 | case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: |
651 | case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: | 651 | case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: |
652 | if (!capable(CAP_AUDIT_WRITE)) | 652 | if (!netlink_capable(skb, CAP_AUDIT_WRITE)) |
653 | err = -EPERM; | 653 | err = -EPERM; |
654 | break; | 654 | break; |
655 | default: /* bad msg */ | 655 | default: /* bad msg */ |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9fcdaa705b6c..3f1ca934a237 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -348,7 +348,7 @@ struct cgrp_cset_link { | |||
348 | * reference-counted, to improve performance when child cgroups | 348 | * reference-counted, to improve performance when child cgroups |
349 | * haven't been created. | 349 | * haven't been created. |
350 | */ | 350 | */ |
351 | static struct css_set init_css_set = { | 351 | struct css_set init_css_set = { |
352 | .refcount = ATOMIC_INIT(1), | 352 | .refcount = ATOMIC_INIT(1), |
353 | .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), | 353 | .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), |
354 | .tasks = LIST_HEAD_INIT(init_css_set.tasks), | 354 | .tasks = LIST_HEAD_INIT(init_css_set.tasks), |
@@ -1495,7 +1495,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1495 | */ | 1495 | */ |
1496 | if (!use_task_css_set_links) | 1496 | if (!use_task_css_set_links) |
1497 | cgroup_enable_task_cg_lists(); | 1497 | cgroup_enable_task_cg_lists(); |
1498 | retry: | 1498 | |
1499 | mutex_lock(&cgroup_tree_mutex); | 1499 | mutex_lock(&cgroup_tree_mutex); |
1500 | mutex_lock(&cgroup_mutex); | 1500 | mutex_lock(&cgroup_mutex); |
1501 | 1501 | ||
@@ -1503,7 +1503,7 @@ retry: | |||
1503 | ret = parse_cgroupfs_options(data, &opts); | 1503 | ret = parse_cgroupfs_options(data, &opts); |
1504 | if (ret) | 1504 | if (ret) |
1505 | goto out_unlock; | 1505 | goto out_unlock; |
1506 | 1506 | retry: | |
1507 | /* look for a matching existing root */ | 1507 | /* look for a matching existing root */ |
1508 | if (!opts.subsys_mask && !opts.none && !opts.name) { | 1508 | if (!opts.subsys_mask && !opts.none && !opts.name) { |
1509 | cgrp_dfl_root_visible = true; | 1509 | cgrp_dfl_root_visible = true; |
@@ -1562,9 +1562,9 @@ retry: | |||
1562 | if (!atomic_inc_not_zero(&root->cgrp.refcnt)) { | 1562 | if (!atomic_inc_not_zero(&root->cgrp.refcnt)) { |
1563 | mutex_unlock(&cgroup_mutex); | 1563 | mutex_unlock(&cgroup_mutex); |
1564 | mutex_unlock(&cgroup_tree_mutex); | 1564 | mutex_unlock(&cgroup_tree_mutex); |
1565 | kfree(opts.release_agent); | ||
1566 | kfree(opts.name); | ||
1567 | msleep(10); | 1565 | msleep(10); |
1566 | mutex_lock(&cgroup_tree_mutex); | ||
1567 | mutex_lock(&cgroup_mutex); | ||
1568 | goto retry; | 1568 | goto retry; |
1569 | } | 1569 | } |
1570 | 1570 | ||
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 2bc4a2256444..345628c78b5b 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
22 | #include <linux/freezer.h> | 22 | #include <linux/freezer.h> |
23 | #include <linux/seq_file.h> | 23 | #include <linux/seq_file.h> |
24 | #include <linux/mutex.h> | ||
24 | 25 | ||
25 | /* | 26 | /* |
26 | * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is | 27 | * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is |
@@ -42,9 +43,10 @@ enum freezer_state_flags { | |||
42 | struct freezer { | 43 | struct freezer { |
43 | struct cgroup_subsys_state css; | 44 | struct cgroup_subsys_state css; |
44 | unsigned int state; | 45 | unsigned int state; |
45 | spinlock_t lock; | ||
46 | }; | 46 | }; |
47 | 47 | ||
48 | static DEFINE_MUTEX(freezer_mutex); | ||
49 | |||
48 | static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) | 50 | static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) |
49 | { | 51 | { |
50 | return css ? container_of(css, struct freezer, css) : NULL; | 52 | return css ? container_of(css, struct freezer, css) : NULL; |
@@ -93,7 +95,6 @@ freezer_css_alloc(struct cgroup_subsys_state *parent_css) | |||
93 | if (!freezer) | 95 | if (!freezer) |
94 | return ERR_PTR(-ENOMEM); | 96 | return ERR_PTR(-ENOMEM); |
95 | 97 | ||
96 | spin_lock_init(&freezer->lock); | ||
97 | return &freezer->css; | 98 | return &freezer->css; |
98 | } | 99 | } |
99 | 100 | ||
@@ -110,14 +111,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css) | |||
110 | struct freezer *freezer = css_freezer(css); | 111 | struct freezer *freezer = css_freezer(css); |
111 | struct freezer *parent = parent_freezer(freezer); | 112 | struct freezer *parent = parent_freezer(freezer); |
112 | 113 | ||
113 | /* | 114 | mutex_lock(&freezer_mutex); |
114 | * The following double locking and freezing state inheritance | ||
115 | * guarantee that @cgroup can never escape ancestors' freezing | ||
116 | * states. See css_for_each_descendant_pre() for details. | ||
117 | */ | ||
118 | if (parent) | ||
119 | spin_lock_irq(&parent->lock); | ||
120 | spin_lock_nested(&freezer->lock, SINGLE_DEPTH_NESTING); | ||
121 | 115 | ||
122 | freezer->state |= CGROUP_FREEZER_ONLINE; | 116 | freezer->state |= CGROUP_FREEZER_ONLINE; |
123 | 117 | ||
@@ -126,10 +120,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css) | |||
126 | atomic_inc(&system_freezing_cnt); | 120 | atomic_inc(&system_freezing_cnt); |
127 | } | 121 | } |
128 | 122 | ||
129 | spin_unlock(&freezer->lock); | 123 | mutex_unlock(&freezer_mutex); |
130 | if (parent) | ||
131 | spin_unlock_irq(&parent->lock); | ||
132 | |||
133 | return 0; | 124 | return 0; |
134 | } | 125 | } |
135 | 126 | ||
@@ -144,14 +135,14 @@ static void freezer_css_offline(struct cgroup_subsys_state *css) | |||
144 | { | 135 | { |
145 | struct freezer *freezer = css_freezer(css); | 136 | struct freezer *freezer = css_freezer(css); |
146 | 137 | ||
147 | spin_lock_irq(&freezer->lock); | 138 | mutex_lock(&freezer_mutex); |
148 | 139 | ||
149 | if (freezer->state & CGROUP_FREEZING) | 140 | if (freezer->state & CGROUP_FREEZING) |
150 | atomic_dec(&system_freezing_cnt); | 141 | atomic_dec(&system_freezing_cnt); |
151 | 142 | ||
152 | freezer->state = 0; | 143 | freezer->state = 0; |
153 | 144 | ||
154 | spin_unlock_irq(&freezer->lock); | 145 | mutex_unlock(&freezer_mutex); |
155 | } | 146 | } |
156 | 147 | ||
157 | static void freezer_css_free(struct cgroup_subsys_state *css) | 148 | static void freezer_css_free(struct cgroup_subsys_state *css) |
@@ -175,7 +166,7 @@ static void freezer_attach(struct cgroup_subsys_state *new_css, | |||
175 | struct task_struct *task; | 166 | struct task_struct *task; |
176 | bool clear_frozen = false; | 167 | bool clear_frozen = false; |
177 | 168 | ||
178 | spin_lock_irq(&freezer->lock); | 169 | mutex_lock(&freezer_mutex); |
179 | 170 | ||
180 | /* | 171 | /* |
181 | * Make the new tasks conform to the current state of @new_css. | 172 | * Make the new tasks conform to the current state of @new_css. |
@@ -197,21 +188,13 @@ static void freezer_attach(struct cgroup_subsys_state *new_css, | |||
197 | } | 188 | } |
198 | } | 189 | } |
199 | 190 | ||
200 | spin_unlock_irq(&freezer->lock); | 191 | /* propagate FROZEN clearing upwards */ |
201 | |||
202 | /* | ||
203 | * Propagate FROZEN clearing upwards. We may race with | ||
204 | * update_if_frozen(), but as long as both work bottom-up, either | ||
205 | * update_if_frozen() sees child's FROZEN cleared or we clear the | ||
206 | * parent's FROZEN later. No parent w/ !FROZEN children can be | ||
207 | * left FROZEN. | ||
208 | */ | ||
209 | while (clear_frozen && (freezer = parent_freezer(freezer))) { | 192 | while (clear_frozen && (freezer = parent_freezer(freezer))) { |
210 | spin_lock_irq(&freezer->lock); | ||
211 | freezer->state &= ~CGROUP_FROZEN; | 193 | freezer->state &= ~CGROUP_FROZEN; |
212 | clear_frozen = freezer->state & CGROUP_FREEZING; | 194 | clear_frozen = freezer->state & CGROUP_FREEZING; |
213 | spin_unlock_irq(&freezer->lock); | ||
214 | } | 195 | } |
196 | |||
197 | mutex_unlock(&freezer_mutex); | ||
215 | } | 198 | } |
216 | 199 | ||
217 | /** | 200 | /** |
@@ -228,9 +211,6 @@ static void freezer_fork(struct task_struct *task) | |||
228 | { | 211 | { |
229 | struct freezer *freezer; | 212 | struct freezer *freezer; |
230 | 213 | ||
231 | rcu_read_lock(); | ||
232 | freezer = task_freezer(task); | ||
233 | |||
234 | /* | 214 | /* |
235 | * The root cgroup is non-freezable, so we can skip locking the | 215 | * The root cgroup is non-freezable, so we can skip locking the |
236 | * freezer. This is safe regardless of race with task migration. | 216 | * freezer. This is safe regardless of race with task migration. |
@@ -238,24 +218,18 @@ static void freezer_fork(struct task_struct *task) | |||
238 | * to do. If we lost and root is the new cgroup, noop is still the | 218 | * to do. If we lost and root is the new cgroup, noop is still the |
239 | * right thing to do. | 219 | * right thing to do. |
240 | */ | 220 | */ |
241 | if (!parent_freezer(freezer)) | 221 | if (task_css_is_root(task, freezer_cgrp_id)) |
242 | goto out; | 222 | return; |
243 | 223 | ||
244 | /* | 224 | mutex_lock(&freezer_mutex); |
245 | * Grab @freezer->lock and freeze @task after verifying @task still | 225 | rcu_read_lock(); |
246 | * belongs to @freezer and it's freezing. The former is for the | 226 | |
247 | * case where we have raced against task migration and lost and | 227 | freezer = task_freezer(task); |
248 | * @task is already in a different cgroup which may not be frozen. | 228 | if (freezer->state & CGROUP_FREEZING) |
249 | * This isn't strictly necessary as freeze_task() is allowed to be | ||
250 | * called spuriously but let's do it anyway for, if nothing else, | ||
251 | * documentation. | ||
252 | */ | ||
253 | spin_lock_irq(&freezer->lock); | ||
254 | if (freezer == task_freezer(task) && (freezer->state & CGROUP_FREEZING)) | ||
255 | freeze_task(task); | 229 | freeze_task(task); |
256 | spin_unlock_irq(&freezer->lock); | 230 | |
257 | out: | ||
258 | rcu_read_unlock(); | 231 | rcu_read_unlock(); |
232 | mutex_unlock(&freezer_mutex); | ||
259 | } | 233 | } |
260 | 234 | ||
261 | /** | 235 | /** |
@@ -281,22 +255,24 @@ static void update_if_frozen(struct cgroup_subsys_state *css) | |||
281 | struct css_task_iter it; | 255 | struct css_task_iter it; |
282 | struct task_struct *task; | 256 | struct task_struct *task; |
283 | 257 | ||
284 | WARN_ON_ONCE(!rcu_read_lock_held()); | 258 | lockdep_assert_held(&freezer_mutex); |
285 | |||
286 | spin_lock_irq(&freezer->lock); | ||
287 | 259 | ||
288 | if (!(freezer->state & CGROUP_FREEZING) || | 260 | if (!(freezer->state & CGROUP_FREEZING) || |
289 | (freezer->state & CGROUP_FROZEN)) | 261 | (freezer->state & CGROUP_FROZEN)) |
290 | goto out_unlock; | 262 | return; |
291 | 263 | ||
292 | /* are all (live) children frozen? */ | 264 | /* are all (live) children frozen? */ |
265 | rcu_read_lock(); | ||
293 | css_for_each_child(pos, css) { | 266 | css_for_each_child(pos, css) { |
294 | struct freezer *child = css_freezer(pos); | 267 | struct freezer *child = css_freezer(pos); |
295 | 268 | ||
296 | if ((child->state & CGROUP_FREEZER_ONLINE) && | 269 | if ((child->state & CGROUP_FREEZER_ONLINE) && |
297 | !(child->state & CGROUP_FROZEN)) | 270 | !(child->state & CGROUP_FROZEN)) { |
298 | goto out_unlock; | 271 | rcu_read_unlock(); |
272 | return; | ||
273 | } | ||
299 | } | 274 | } |
275 | rcu_read_unlock(); | ||
300 | 276 | ||
301 | /* are all tasks frozen? */ | 277 | /* are all tasks frozen? */ |
302 | css_task_iter_start(css, &it); | 278 | css_task_iter_start(css, &it); |
@@ -317,21 +293,29 @@ static void update_if_frozen(struct cgroup_subsys_state *css) | |||
317 | freezer->state |= CGROUP_FROZEN; | 293 | freezer->state |= CGROUP_FROZEN; |
318 | out_iter_end: | 294 | out_iter_end: |
319 | css_task_iter_end(&it); | 295 | css_task_iter_end(&it); |
320 | out_unlock: | ||
321 | spin_unlock_irq(&freezer->lock); | ||
322 | } | 296 | } |
323 | 297 | ||
324 | static int freezer_read(struct seq_file *m, void *v) | 298 | static int freezer_read(struct seq_file *m, void *v) |
325 | { | 299 | { |
326 | struct cgroup_subsys_state *css = seq_css(m), *pos; | 300 | struct cgroup_subsys_state *css = seq_css(m), *pos; |
327 | 301 | ||
302 | mutex_lock(&freezer_mutex); | ||
328 | rcu_read_lock(); | 303 | rcu_read_lock(); |
329 | 304 | ||
330 | /* update states bottom-up */ | 305 | /* update states bottom-up */ |
331 | css_for_each_descendant_post(pos, css) | 306 | css_for_each_descendant_post(pos, css) { |
307 | if (!css_tryget(pos)) | ||
308 | continue; | ||
309 | rcu_read_unlock(); | ||
310 | |||
332 | update_if_frozen(pos); | 311 | update_if_frozen(pos); |
333 | 312 | ||
313 | rcu_read_lock(); | ||
314 | css_put(pos); | ||
315 | } | ||
316 | |||
334 | rcu_read_unlock(); | 317 | rcu_read_unlock(); |
318 | mutex_unlock(&freezer_mutex); | ||
335 | 319 | ||
336 | seq_puts(m, freezer_state_strs(css_freezer(css)->state)); | 320 | seq_puts(m, freezer_state_strs(css_freezer(css)->state)); |
337 | seq_putc(m, '\n'); | 321 | seq_putc(m, '\n'); |
@@ -373,7 +357,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze, | |||
373 | unsigned int state) | 357 | unsigned int state) |
374 | { | 358 | { |
375 | /* also synchronizes against task migration, see freezer_attach() */ | 359 | /* also synchronizes against task migration, see freezer_attach() */ |
376 | lockdep_assert_held(&freezer->lock); | 360 | lockdep_assert_held(&freezer_mutex); |
377 | 361 | ||
378 | if (!(freezer->state & CGROUP_FREEZER_ONLINE)) | 362 | if (!(freezer->state & CGROUP_FREEZER_ONLINE)) |
379 | return; | 363 | return; |
@@ -414,31 +398,29 @@ static void freezer_change_state(struct freezer *freezer, bool freeze) | |||
414 | * descendant will try to inherit its parent's FREEZING state as | 398 | * descendant will try to inherit its parent's FREEZING state as |
415 | * CGROUP_FREEZING_PARENT. | 399 | * CGROUP_FREEZING_PARENT. |
416 | */ | 400 | */ |
401 | mutex_lock(&freezer_mutex); | ||
417 | rcu_read_lock(); | 402 | rcu_read_lock(); |
418 | css_for_each_descendant_pre(pos, &freezer->css) { | 403 | css_for_each_descendant_pre(pos, &freezer->css) { |
419 | struct freezer *pos_f = css_freezer(pos); | 404 | struct freezer *pos_f = css_freezer(pos); |
420 | struct freezer *parent = parent_freezer(pos_f); | 405 | struct freezer *parent = parent_freezer(pos_f); |
421 | 406 | ||
422 | spin_lock_irq(&pos_f->lock); | 407 | if (!css_tryget(pos)) |
408 | continue; | ||
409 | rcu_read_unlock(); | ||
423 | 410 | ||
424 | if (pos_f == freezer) { | 411 | if (pos_f == freezer) |
425 | freezer_apply_state(pos_f, freeze, | 412 | freezer_apply_state(pos_f, freeze, |
426 | CGROUP_FREEZING_SELF); | 413 | CGROUP_FREEZING_SELF); |
427 | } else { | 414 | else |
428 | /* | ||
429 | * Our update to @parent->state is already visible | ||
430 | * which is all we need. No need to lock @parent. | ||
431 | * For more info on synchronization, see | ||
432 | * freezer_post_create(). | ||
433 | */ | ||
434 | freezer_apply_state(pos_f, | 415 | freezer_apply_state(pos_f, |
435 | parent->state & CGROUP_FREEZING, | 416 | parent->state & CGROUP_FREEZING, |
436 | CGROUP_FREEZING_PARENT); | 417 | CGROUP_FREEZING_PARENT); |
437 | } | ||
438 | 418 | ||
439 | spin_unlock_irq(&pos_f->lock); | 419 | rcu_read_lock(); |
420 | css_put(pos); | ||
440 | } | 421 | } |
441 | rcu_read_unlock(); | 422 | rcu_read_unlock(); |
423 | mutex_unlock(&freezer_mutex); | ||
442 | } | 424 | } |
443 | 425 | ||
444 | static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft, | 426 | static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft, |
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 6cb20d2e7ee0..019d45008448 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c | |||
@@ -120,7 +120,7 @@ void context_tracking_user_enter(void) | |||
120 | * instead of preempt_schedule() to exit user context if needed before | 120 | * instead of preempt_schedule() to exit user context if needed before |
121 | * calling the scheduler. | 121 | * calling the scheduler. |
122 | */ | 122 | */ |
123 | asmlinkage void __sched notrace preempt_schedule_context(void) | 123 | asmlinkage __visible void __sched notrace preempt_schedule_context(void) |
124 | { | 124 | { |
125 | enum ctx_state prev_ctx; | 125 | enum ctx_state prev_ctx; |
126 | 126 | ||
diff --git a/kernel/cpu.c b/kernel/cpu.c index a9e710eef0e2..247979a1b815 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -726,10 +726,12 @@ void set_cpu_present(unsigned int cpu, bool present) | |||
726 | 726 | ||
727 | void set_cpu_online(unsigned int cpu, bool online) | 727 | void set_cpu_online(unsigned int cpu, bool online) |
728 | { | 728 | { |
729 | if (online) | 729 | if (online) { |
730 | cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits)); | 730 | cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits)); |
731 | else | 731 | cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits)); |
732 | } else { | ||
732 | cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits)); | 733 | cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits)); |
734 | } | ||
733 | } | 735 | } |
734 | 736 | ||
735 | void set_cpu_active(unsigned int cpu, bool active) | 737 | void set_cpu_active(unsigned int cpu, bool active) |
diff --git a/kernel/events/core.c b/kernel/events/core.c index f83a71a3e46d..440eefc67397 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -1443,6 +1443,11 @@ group_sched_out(struct perf_event *group_event, | |||
1443 | cpuctx->exclusive = 0; | 1443 | cpuctx->exclusive = 0; |
1444 | } | 1444 | } |
1445 | 1445 | ||
1446 | struct remove_event { | ||
1447 | struct perf_event *event; | ||
1448 | bool detach_group; | ||
1449 | }; | ||
1450 | |||
1446 | /* | 1451 | /* |
1447 | * Cross CPU call to remove a performance event | 1452 | * Cross CPU call to remove a performance event |
1448 | * | 1453 | * |
@@ -1451,12 +1456,15 @@ group_sched_out(struct perf_event *group_event, | |||
1451 | */ | 1456 | */ |
1452 | static int __perf_remove_from_context(void *info) | 1457 | static int __perf_remove_from_context(void *info) |
1453 | { | 1458 | { |
1454 | struct perf_event *event = info; | 1459 | struct remove_event *re = info; |
1460 | struct perf_event *event = re->event; | ||
1455 | struct perf_event_context *ctx = event->ctx; | 1461 | struct perf_event_context *ctx = event->ctx; |
1456 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | 1462 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |
1457 | 1463 | ||
1458 | raw_spin_lock(&ctx->lock); | 1464 | raw_spin_lock(&ctx->lock); |
1459 | event_sched_out(event, cpuctx, ctx); | 1465 | event_sched_out(event, cpuctx, ctx); |
1466 | if (re->detach_group) | ||
1467 | perf_group_detach(event); | ||
1460 | list_del_event(event, ctx); | 1468 | list_del_event(event, ctx); |
1461 | if (!ctx->nr_events && cpuctx->task_ctx == ctx) { | 1469 | if (!ctx->nr_events && cpuctx->task_ctx == ctx) { |
1462 | ctx->is_active = 0; | 1470 | ctx->is_active = 0; |
@@ -1481,10 +1489,14 @@ static int __perf_remove_from_context(void *info) | |||
1481 | * When called from perf_event_exit_task, it's OK because the | 1489 | * When called from perf_event_exit_task, it's OK because the |
1482 | * context has been detached from its task. | 1490 | * context has been detached from its task. |
1483 | */ | 1491 | */ |
1484 | static void perf_remove_from_context(struct perf_event *event) | 1492 | static void perf_remove_from_context(struct perf_event *event, bool detach_group) |
1485 | { | 1493 | { |
1486 | struct perf_event_context *ctx = event->ctx; | 1494 | struct perf_event_context *ctx = event->ctx; |
1487 | struct task_struct *task = ctx->task; | 1495 | struct task_struct *task = ctx->task; |
1496 | struct remove_event re = { | ||
1497 | .event = event, | ||
1498 | .detach_group = detach_group, | ||
1499 | }; | ||
1488 | 1500 | ||
1489 | lockdep_assert_held(&ctx->mutex); | 1501 | lockdep_assert_held(&ctx->mutex); |
1490 | 1502 | ||
@@ -1493,12 +1505,12 @@ static void perf_remove_from_context(struct perf_event *event) | |||
1493 | * Per cpu events are removed via an smp call and | 1505 | * Per cpu events are removed via an smp call and |
1494 | * the removal is always successful. | 1506 | * the removal is always successful. |
1495 | */ | 1507 | */ |
1496 | cpu_function_call(event->cpu, __perf_remove_from_context, event); | 1508 | cpu_function_call(event->cpu, __perf_remove_from_context, &re); |
1497 | return; | 1509 | return; |
1498 | } | 1510 | } |
1499 | 1511 | ||
1500 | retry: | 1512 | retry: |
1501 | if (!task_function_call(task, __perf_remove_from_context, event)) | 1513 | if (!task_function_call(task, __perf_remove_from_context, &re)) |
1502 | return; | 1514 | return; |
1503 | 1515 | ||
1504 | raw_spin_lock_irq(&ctx->lock); | 1516 | raw_spin_lock_irq(&ctx->lock); |
@@ -1515,6 +1527,8 @@ retry: | |||
1515 | * Since the task isn't running, its safe to remove the event, us | 1527 | * Since the task isn't running, its safe to remove the event, us |
1516 | * holding the ctx->lock ensures the task won't get scheduled in. | 1528 | * holding the ctx->lock ensures the task won't get scheduled in. |
1517 | */ | 1529 | */ |
1530 | if (detach_group) | ||
1531 | perf_group_detach(event); | ||
1518 | list_del_event(event, ctx); | 1532 | list_del_event(event, ctx); |
1519 | raw_spin_unlock_irq(&ctx->lock); | 1533 | raw_spin_unlock_irq(&ctx->lock); |
1520 | } | 1534 | } |
@@ -3178,7 +3192,8 @@ static void free_event_rcu(struct rcu_head *head) | |||
3178 | } | 3192 | } |
3179 | 3193 | ||
3180 | static void ring_buffer_put(struct ring_buffer *rb); | 3194 | static void ring_buffer_put(struct ring_buffer *rb); |
3181 | static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb); | 3195 | static void ring_buffer_attach(struct perf_event *event, |
3196 | struct ring_buffer *rb); | ||
3182 | 3197 | ||
3183 | static void unaccount_event_cpu(struct perf_event *event, int cpu) | 3198 | static void unaccount_event_cpu(struct perf_event *event, int cpu) |
3184 | { | 3199 | { |
@@ -3238,8 +3253,6 @@ static void free_event(struct perf_event *event) | |||
3238 | unaccount_event(event); | 3253 | unaccount_event(event); |
3239 | 3254 | ||
3240 | if (event->rb) { | 3255 | if (event->rb) { |
3241 | struct ring_buffer *rb; | ||
3242 | |||
3243 | /* | 3256 | /* |
3244 | * Can happen when we close an event with re-directed output. | 3257 | * Can happen when we close an event with re-directed output. |
3245 | * | 3258 | * |
@@ -3247,12 +3260,7 @@ static void free_event(struct perf_event *event) | |||
3247 | * over us; possibly making our ring_buffer_put() the last. | 3260 | * over us; possibly making our ring_buffer_put() the last. |
3248 | */ | 3261 | */ |
3249 | mutex_lock(&event->mmap_mutex); | 3262 | mutex_lock(&event->mmap_mutex); |
3250 | rb = event->rb; | 3263 | ring_buffer_attach(event, NULL); |
3251 | if (rb) { | ||
3252 | rcu_assign_pointer(event->rb, NULL); | ||
3253 | ring_buffer_detach(event, rb); | ||
3254 | ring_buffer_put(rb); /* could be last */ | ||
3255 | } | ||
3256 | mutex_unlock(&event->mmap_mutex); | 3264 | mutex_unlock(&event->mmap_mutex); |
3257 | } | 3265 | } |
3258 | 3266 | ||
@@ -3281,10 +3289,7 @@ int perf_event_release_kernel(struct perf_event *event) | |||
3281 | * to trigger the AB-BA case. | 3289 | * to trigger the AB-BA case. |
3282 | */ | 3290 | */ |
3283 | mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); | 3291 | mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); |
3284 | raw_spin_lock_irq(&ctx->lock); | 3292 | perf_remove_from_context(event, true); |
3285 | perf_group_detach(event); | ||
3286 | raw_spin_unlock_irq(&ctx->lock); | ||
3287 | perf_remove_from_context(event); | ||
3288 | mutex_unlock(&ctx->mutex); | 3293 | mutex_unlock(&ctx->mutex); |
3289 | 3294 | ||
3290 | free_event(event); | 3295 | free_event(event); |
@@ -3839,28 +3844,47 @@ unlock: | |||
3839 | static void ring_buffer_attach(struct perf_event *event, | 3844 | static void ring_buffer_attach(struct perf_event *event, |
3840 | struct ring_buffer *rb) | 3845 | struct ring_buffer *rb) |
3841 | { | 3846 | { |
3847 | struct ring_buffer *old_rb = NULL; | ||
3842 | unsigned long flags; | 3848 | unsigned long flags; |
3843 | 3849 | ||
3844 | if (!list_empty(&event->rb_entry)) | 3850 | if (event->rb) { |
3845 | return; | 3851 | /* |
3852 | * Should be impossible, we set this when removing | ||
3853 | * event->rb_entry and wait/clear when adding event->rb_entry. | ||
3854 | */ | ||
3855 | WARN_ON_ONCE(event->rcu_pending); | ||
3846 | 3856 | ||
3847 | spin_lock_irqsave(&rb->event_lock, flags); | 3857 | old_rb = event->rb; |
3848 | if (list_empty(&event->rb_entry)) | 3858 | event->rcu_batches = get_state_synchronize_rcu(); |
3849 | list_add(&event->rb_entry, &rb->event_list); | 3859 | event->rcu_pending = 1; |
3850 | spin_unlock_irqrestore(&rb->event_lock, flags); | ||
3851 | } | ||
3852 | 3860 | ||
3853 | static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb) | 3861 | spin_lock_irqsave(&old_rb->event_lock, flags); |
3854 | { | 3862 | list_del_rcu(&event->rb_entry); |
3855 | unsigned long flags; | 3863 | spin_unlock_irqrestore(&old_rb->event_lock, flags); |
3864 | } | ||
3856 | 3865 | ||
3857 | if (list_empty(&event->rb_entry)) | 3866 | if (event->rcu_pending && rb) { |
3858 | return; | 3867 | cond_synchronize_rcu(event->rcu_batches); |
3868 | event->rcu_pending = 0; | ||
3869 | } | ||
3870 | |||
3871 | if (rb) { | ||
3872 | spin_lock_irqsave(&rb->event_lock, flags); | ||
3873 | list_add_rcu(&event->rb_entry, &rb->event_list); | ||
3874 | spin_unlock_irqrestore(&rb->event_lock, flags); | ||
3875 | } | ||
3876 | |||
3877 | rcu_assign_pointer(event->rb, rb); | ||
3859 | 3878 | ||
3860 | spin_lock_irqsave(&rb->event_lock, flags); | 3879 | if (old_rb) { |
3861 | list_del_init(&event->rb_entry); | 3880 | ring_buffer_put(old_rb); |
3862 | wake_up_all(&event->waitq); | 3881 | /* |
3863 | spin_unlock_irqrestore(&rb->event_lock, flags); | 3882 | * Since we detached before setting the new rb, so that we |
3883 | * could attach the new rb, we could have missed a wakeup. | ||
3884 | * Provide it now. | ||
3885 | */ | ||
3886 | wake_up_all(&event->waitq); | ||
3887 | } | ||
3864 | } | 3888 | } |
3865 | 3889 | ||
3866 | static void ring_buffer_wakeup(struct perf_event *event) | 3890 | static void ring_buffer_wakeup(struct perf_event *event) |
@@ -3929,7 +3953,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
3929 | { | 3953 | { |
3930 | struct perf_event *event = vma->vm_file->private_data; | 3954 | struct perf_event *event = vma->vm_file->private_data; |
3931 | 3955 | ||
3932 | struct ring_buffer *rb = event->rb; | 3956 | struct ring_buffer *rb = ring_buffer_get(event); |
3933 | struct user_struct *mmap_user = rb->mmap_user; | 3957 | struct user_struct *mmap_user = rb->mmap_user; |
3934 | int mmap_locked = rb->mmap_locked; | 3958 | int mmap_locked = rb->mmap_locked; |
3935 | unsigned long size = perf_data_size(rb); | 3959 | unsigned long size = perf_data_size(rb); |
@@ -3937,18 +3961,14 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
3937 | atomic_dec(&rb->mmap_count); | 3961 | atomic_dec(&rb->mmap_count); |
3938 | 3962 | ||
3939 | if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) | 3963 | if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) |
3940 | return; | 3964 | goto out_put; |
3941 | 3965 | ||
3942 | /* Detach current event from the buffer. */ | 3966 | ring_buffer_attach(event, NULL); |
3943 | rcu_assign_pointer(event->rb, NULL); | ||
3944 | ring_buffer_detach(event, rb); | ||
3945 | mutex_unlock(&event->mmap_mutex); | 3967 | mutex_unlock(&event->mmap_mutex); |
3946 | 3968 | ||
3947 | /* If there's still other mmap()s of this buffer, we're done. */ | 3969 | /* If there's still other mmap()s of this buffer, we're done. */ |
3948 | if (atomic_read(&rb->mmap_count)) { | 3970 | if (atomic_read(&rb->mmap_count)) |
3949 | ring_buffer_put(rb); /* can't be last */ | 3971 | goto out_put; |
3950 | return; | ||
3951 | } | ||
3952 | 3972 | ||
3953 | /* | 3973 | /* |
3954 | * No other mmap()s, detach from all other events that might redirect | 3974 | * No other mmap()s, detach from all other events that might redirect |
@@ -3978,11 +3998,9 @@ again: | |||
3978 | * still restart the iteration to make sure we're not now | 3998 | * still restart the iteration to make sure we're not now |
3979 | * iterating the wrong list. | 3999 | * iterating the wrong list. |
3980 | */ | 4000 | */ |
3981 | if (event->rb == rb) { | 4001 | if (event->rb == rb) |
3982 | rcu_assign_pointer(event->rb, NULL); | 4002 | ring_buffer_attach(event, NULL); |
3983 | ring_buffer_detach(event, rb); | 4003 | |
3984 | ring_buffer_put(rb); /* can't be last, we still have one */ | ||
3985 | } | ||
3986 | mutex_unlock(&event->mmap_mutex); | 4004 | mutex_unlock(&event->mmap_mutex); |
3987 | put_event(event); | 4005 | put_event(event); |
3988 | 4006 | ||
@@ -4007,6 +4025,7 @@ again: | |||
4007 | vma->vm_mm->pinned_vm -= mmap_locked; | 4025 | vma->vm_mm->pinned_vm -= mmap_locked; |
4008 | free_uid(mmap_user); | 4026 | free_uid(mmap_user); |
4009 | 4027 | ||
4028 | out_put: | ||
4010 | ring_buffer_put(rb); /* could be last */ | 4029 | ring_buffer_put(rb); /* could be last */ |
4011 | } | 4030 | } |
4012 | 4031 | ||
@@ -4124,7 +4143,6 @@ again: | |||
4124 | vma->vm_mm->pinned_vm += extra; | 4143 | vma->vm_mm->pinned_vm += extra; |
4125 | 4144 | ||
4126 | ring_buffer_attach(event, rb); | 4145 | ring_buffer_attach(event, rb); |
4127 | rcu_assign_pointer(event->rb, rb); | ||
4128 | 4146 | ||
4129 | perf_event_init_userpage(event); | 4147 | perf_event_init_userpage(event); |
4130 | perf_event_update_userpage(event); | 4148 | perf_event_update_userpage(event); |
@@ -5408,6 +5426,9 @@ struct swevent_htable { | |||
5408 | 5426 | ||
5409 | /* Recursion avoidance in each contexts */ | 5427 | /* Recursion avoidance in each contexts */ |
5410 | int recursion[PERF_NR_CONTEXTS]; | 5428 | int recursion[PERF_NR_CONTEXTS]; |
5429 | |||
5430 | /* Keeps track of cpu being initialized/exited */ | ||
5431 | bool online; | ||
5411 | }; | 5432 | }; |
5412 | 5433 | ||
5413 | static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); | 5434 | static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); |
@@ -5654,8 +5675,14 @@ static int perf_swevent_add(struct perf_event *event, int flags) | |||
5654 | hwc->state = !(flags & PERF_EF_START); | 5675 | hwc->state = !(flags & PERF_EF_START); |
5655 | 5676 | ||
5656 | head = find_swevent_head(swhash, event); | 5677 | head = find_swevent_head(swhash, event); |
5657 | if (WARN_ON_ONCE(!head)) | 5678 | if (!head) { |
5679 | /* | ||
5680 | * We can race with cpu hotplug code. Do not | ||
5681 | * WARN if the cpu just got unplugged. | ||
5682 | */ | ||
5683 | WARN_ON_ONCE(swhash->online); | ||
5658 | return -EINVAL; | 5684 | return -EINVAL; |
5685 | } | ||
5659 | 5686 | ||
5660 | hlist_add_head_rcu(&event->hlist_entry, head); | 5687 | hlist_add_head_rcu(&event->hlist_entry, head); |
5661 | 5688 | ||
@@ -6914,7 +6941,7 @@ err_size: | |||
6914 | static int | 6941 | static int |
6915 | perf_event_set_output(struct perf_event *event, struct perf_event *output_event) | 6942 | perf_event_set_output(struct perf_event *event, struct perf_event *output_event) |
6916 | { | 6943 | { |
6917 | struct ring_buffer *rb = NULL, *old_rb = NULL; | 6944 | struct ring_buffer *rb = NULL; |
6918 | int ret = -EINVAL; | 6945 | int ret = -EINVAL; |
6919 | 6946 | ||
6920 | if (!output_event) | 6947 | if (!output_event) |
@@ -6942,8 +6969,6 @@ set: | |||
6942 | if (atomic_read(&event->mmap_count)) | 6969 | if (atomic_read(&event->mmap_count)) |
6943 | goto unlock; | 6970 | goto unlock; |
6944 | 6971 | ||
6945 | old_rb = event->rb; | ||
6946 | |||
6947 | if (output_event) { | 6972 | if (output_event) { |
6948 | /* get the rb we want to redirect to */ | 6973 | /* get the rb we want to redirect to */ |
6949 | rb = ring_buffer_get(output_event); | 6974 | rb = ring_buffer_get(output_event); |
@@ -6951,23 +6976,7 @@ set: | |||
6951 | goto unlock; | 6976 | goto unlock; |
6952 | } | 6977 | } |
6953 | 6978 | ||
6954 | if (old_rb) | 6979 | ring_buffer_attach(event, rb); |
6955 | ring_buffer_detach(event, old_rb); | ||
6956 | |||
6957 | if (rb) | ||
6958 | ring_buffer_attach(event, rb); | ||
6959 | |||
6960 | rcu_assign_pointer(event->rb, rb); | ||
6961 | |||
6962 | if (old_rb) { | ||
6963 | ring_buffer_put(old_rb); | ||
6964 | /* | ||
6965 | * Since we detached before setting the new rb, so that we | ||
6966 | * could attach the new rb, we could have missed a wakeup. | ||
6967 | * Provide it now. | ||
6968 | */ | ||
6969 | wake_up_all(&event->waitq); | ||
6970 | } | ||
6971 | 6980 | ||
6972 | ret = 0; | 6981 | ret = 0; |
6973 | unlock: | 6982 | unlock: |
@@ -7018,6 +7027,9 @@ SYSCALL_DEFINE5(perf_event_open, | |||
7018 | if (attr.freq) { | 7027 | if (attr.freq) { |
7019 | if (attr.sample_freq > sysctl_perf_event_sample_rate) | 7028 | if (attr.sample_freq > sysctl_perf_event_sample_rate) |
7020 | return -EINVAL; | 7029 | return -EINVAL; |
7030 | } else { | ||
7031 | if (attr.sample_period & (1ULL << 63)) | ||
7032 | return -EINVAL; | ||
7021 | } | 7033 | } |
7022 | 7034 | ||
7023 | /* | 7035 | /* |
@@ -7165,7 +7177,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
7165 | struct perf_event_context *gctx = group_leader->ctx; | 7177 | struct perf_event_context *gctx = group_leader->ctx; |
7166 | 7178 | ||
7167 | mutex_lock(&gctx->mutex); | 7179 | mutex_lock(&gctx->mutex); |
7168 | perf_remove_from_context(group_leader); | 7180 | perf_remove_from_context(group_leader, false); |
7169 | 7181 | ||
7170 | /* | 7182 | /* |
7171 | * Removing from the context ends up with disabled | 7183 | * Removing from the context ends up with disabled |
@@ -7175,7 +7187,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
7175 | perf_event__state_init(group_leader); | 7187 | perf_event__state_init(group_leader); |
7176 | list_for_each_entry(sibling, &group_leader->sibling_list, | 7188 | list_for_each_entry(sibling, &group_leader->sibling_list, |
7177 | group_entry) { | 7189 | group_entry) { |
7178 | perf_remove_from_context(sibling); | 7190 | perf_remove_from_context(sibling, false); |
7179 | perf_event__state_init(sibling); | 7191 | perf_event__state_init(sibling); |
7180 | put_ctx(gctx); | 7192 | put_ctx(gctx); |
7181 | } | 7193 | } |
@@ -7305,7 +7317,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) | |||
7305 | mutex_lock(&src_ctx->mutex); | 7317 | mutex_lock(&src_ctx->mutex); |
7306 | list_for_each_entry_safe(event, tmp, &src_ctx->event_list, | 7318 | list_for_each_entry_safe(event, tmp, &src_ctx->event_list, |
7307 | event_entry) { | 7319 | event_entry) { |
7308 | perf_remove_from_context(event); | 7320 | perf_remove_from_context(event, false); |
7309 | unaccount_event_cpu(event, src_cpu); | 7321 | unaccount_event_cpu(event, src_cpu); |
7310 | put_ctx(src_ctx); | 7322 | put_ctx(src_ctx); |
7311 | list_add(&event->migrate_entry, &events); | 7323 | list_add(&event->migrate_entry, &events); |
@@ -7367,13 +7379,7 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
7367 | struct perf_event_context *child_ctx, | 7379 | struct perf_event_context *child_ctx, |
7368 | struct task_struct *child) | 7380 | struct task_struct *child) |
7369 | { | 7381 | { |
7370 | if (child_event->parent) { | 7382 | perf_remove_from_context(child_event, !!child_event->parent); |
7371 | raw_spin_lock_irq(&child_ctx->lock); | ||
7372 | perf_group_detach(child_event); | ||
7373 | raw_spin_unlock_irq(&child_ctx->lock); | ||
7374 | } | ||
7375 | |||
7376 | perf_remove_from_context(child_event); | ||
7377 | 7383 | ||
7378 | /* | 7384 | /* |
7379 | * It can happen that the parent exits first, and has events | 7385 | * It can happen that the parent exits first, and has events |
@@ -7724,6 +7730,8 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
7724 | * swapped under us. | 7730 | * swapped under us. |
7725 | */ | 7731 | */ |
7726 | parent_ctx = perf_pin_task_context(parent, ctxn); | 7732 | parent_ctx = perf_pin_task_context(parent, ctxn); |
7733 | if (!parent_ctx) | ||
7734 | return 0; | ||
7727 | 7735 | ||
7728 | /* | 7736 | /* |
7729 | * No need to check if parent_ctx != NULL here; since we saw | 7737 | * No need to check if parent_ctx != NULL here; since we saw |
@@ -7835,6 +7843,7 @@ static void perf_event_init_cpu(int cpu) | |||
7835 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); | 7843 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); |
7836 | 7844 | ||
7837 | mutex_lock(&swhash->hlist_mutex); | 7845 | mutex_lock(&swhash->hlist_mutex); |
7846 | swhash->online = true; | ||
7838 | if (swhash->hlist_refcount > 0) { | 7847 | if (swhash->hlist_refcount > 0) { |
7839 | struct swevent_hlist *hlist; | 7848 | struct swevent_hlist *hlist; |
7840 | 7849 | ||
@@ -7857,14 +7866,14 @@ static void perf_pmu_rotate_stop(struct pmu *pmu) | |||
7857 | 7866 | ||
7858 | static void __perf_event_exit_context(void *__info) | 7867 | static void __perf_event_exit_context(void *__info) |
7859 | { | 7868 | { |
7869 | struct remove_event re = { .detach_group = false }; | ||
7860 | struct perf_event_context *ctx = __info; | 7870 | struct perf_event_context *ctx = __info; |
7861 | struct perf_event *event; | ||
7862 | 7871 | ||
7863 | perf_pmu_rotate_stop(ctx->pmu); | 7872 | perf_pmu_rotate_stop(ctx->pmu); |
7864 | 7873 | ||
7865 | rcu_read_lock(); | 7874 | rcu_read_lock(); |
7866 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) | 7875 | list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry) |
7867 | __perf_remove_from_context(event); | 7876 | __perf_remove_from_context(&re); |
7868 | rcu_read_unlock(); | 7877 | rcu_read_unlock(); |
7869 | } | 7878 | } |
7870 | 7879 | ||
@@ -7892,6 +7901,7 @@ static void perf_event_exit_cpu(int cpu) | |||
7892 | perf_event_exit_cpu_context(cpu); | 7901 | perf_event_exit_cpu_context(cpu); |
7893 | 7902 | ||
7894 | mutex_lock(&swhash->hlist_mutex); | 7903 | mutex_lock(&swhash->hlist_mutex); |
7904 | swhash->online = false; | ||
7895 | swevent_hlist_release(swhash); | 7905 | swevent_hlist_release(swhash); |
7896 | mutex_unlock(&swhash->hlist_mutex); | 7906 | mutex_unlock(&swhash->hlist_mutex); |
7897 | } | 7907 | } |
diff --git a/kernel/futex.c b/kernel/futex.c index 5f589279e462..81dbe773ce4c 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -745,7 +745,8 @@ void exit_pi_state_list(struct task_struct *curr) | |||
745 | 745 | ||
746 | static int | 746 | static int |
747 | lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | 747 | lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, |
748 | union futex_key *key, struct futex_pi_state **ps) | 748 | union futex_key *key, struct futex_pi_state **ps, |
749 | struct task_struct *task) | ||
749 | { | 750 | { |
750 | struct futex_pi_state *pi_state = NULL; | 751 | struct futex_pi_state *pi_state = NULL; |
751 | struct futex_q *this, *next; | 752 | struct futex_q *this, *next; |
@@ -786,6 +787,16 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
786 | return -EINVAL; | 787 | return -EINVAL; |
787 | } | 788 | } |
788 | 789 | ||
790 | /* | ||
791 | * Protect against a corrupted uval. If uval | ||
792 | * is 0x80000000 then pid is 0 and the waiter | ||
793 | * bit is set. So the deadlock check in the | ||
794 | * calling code has failed and we did not fall | ||
795 | * into the check above due to !pid. | ||
796 | */ | ||
797 | if (task && pi_state->owner == task) | ||
798 | return -EDEADLK; | ||
799 | |||
789 | atomic_inc(&pi_state->refcount); | 800 | atomic_inc(&pi_state->refcount); |
790 | *ps = pi_state; | 801 | *ps = pi_state; |
791 | 802 | ||
@@ -803,6 +814,11 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
803 | if (!p) | 814 | if (!p) |
804 | return -ESRCH; | 815 | return -ESRCH; |
805 | 816 | ||
817 | if (!p->mm) { | ||
818 | put_task_struct(p); | ||
819 | return -EPERM; | ||
820 | } | ||
821 | |||
806 | /* | 822 | /* |
807 | * We need to look at the task state flags to figure out, | 823 | * We need to look at the task state flags to figure out, |
808 | * whether the task is exiting. To protect against the do_exit | 824 | * whether the task is exiting. To protect against the do_exit |
@@ -935,7 +951,7 @@ retry: | |||
935 | * We dont have the lock. Look up the PI state (or create it if | 951 | * We dont have the lock. Look up the PI state (or create it if |
936 | * we are the first waiter): | 952 | * we are the first waiter): |
937 | */ | 953 | */ |
938 | ret = lookup_pi_state(uval, hb, key, ps); | 954 | ret = lookup_pi_state(uval, hb, key, ps, task); |
939 | 955 | ||
940 | if (unlikely(ret)) { | 956 | if (unlikely(ret)) { |
941 | switch (ret) { | 957 | switch (ret) { |
@@ -1347,7 +1363,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, | |||
1347 | * | 1363 | * |
1348 | * Return: | 1364 | * Return: |
1349 | * 0 - failed to acquire the lock atomically; | 1365 | * 0 - failed to acquire the lock atomically; |
1350 | * 1 - acquired the lock; | 1366 | * >0 - acquired the lock, return value is vpid of the top_waiter |
1351 | * <0 - error | 1367 | * <0 - error |
1352 | */ | 1368 | */ |
1353 | static int futex_proxy_trylock_atomic(u32 __user *pifutex, | 1369 | static int futex_proxy_trylock_atomic(u32 __user *pifutex, |
@@ -1358,7 +1374,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, | |||
1358 | { | 1374 | { |
1359 | struct futex_q *top_waiter = NULL; | 1375 | struct futex_q *top_waiter = NULL; |
1360 | u32 curval; | 1376 | u32 curval; |
1361 | int ret; | 1377 | int ret, vpid; |
1362 | 1378 | ||
1363 | if (get_futex_value_locked(&curval, pifutex)) | 1379 | if (get_futex_value_locked(&curval, pifutex)) |
1364 | return -EFAULT; | 1380 | return -EFAULT; |
@@ -1386,11 +1402,13 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, | |||
1386 | * the contended case or if set_waiters is 1. The pi_state is returned | 1402 | * the contended case or if set_waiters is 1. The pi_state is returned |
1387 | * in ps in contended cases. | 1403 | * in ps in contended cases. |
1388 | */ | 1404 | */ |
1405 | vpid = task_pid_vnr(top_waiter->task); | ||
1389 | ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, | 1406 | ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, |
1390 | set_waiters); | 1407 | set_waiters); |
1391 | if (ret == 1) | 1408 | if (ret == 1) { |
1392 | requeue_pi_wake_futex(top_waiter, key2, hb2); | 1409 | requeue_pi_wake_futex(top_waiter, key2, hb2); |
1393 | 1410 | return vpid; | |
1411 | } | ||
1394 | return ret; | 1412 | return ret; |
1395 | } | 1413 | } |
1396 | 1414 | ||
@@ -1421,7 +1439,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, | |||
1421 | struct futex_pi_state *pi_state = NULL; | 1439 | struct futex_pi_state *pi_state = NULL; |
1422 | struct futex_hash_bucket *hb1, *hb2; | 1440 | struct futex_hash_bucket *hb1, *hb2; |
1423 | struct futex_q *this, *next; | 1441 | struct futex_q *this, *next; |
1424 | u32 curval2; | ||
1425 | 1442 | ||
1426 | if (requeue_pi) { | 1443 | if (requeue_pi) { |
1427 | /* | 1444 | /* |
@@ -1509,16 +1526,25 @@ retry_private: | |||
1509 | * At this point the top_waiter has either taken uaddr2 or is | 1526 | * At this point the top_waiter has either taken uaddr2 or is |
1510 | * waiting on it. If the former, then the pi_state will not | 1527 | * waiting on it. If the former, then the pi_state will not |
1511 | * exist yet, look it up one more time to ensure we have a | 1528 | * exist yet, look it up one more time to ensure we have a |
1512 | * reference to it. | 1529 | * reference to it. If the lock was taken, ret contains the |
1530 | * vpid of the top waiter task. | ||
1513 | */ | 1531 | */ |
1514 | if (ret == 1) { | 1532 | if (ret > 0) { |
1515 | WARN_ON(pi_state); | 1533 | WARN_ON(pi_state); |
1516 | drop_count++; | 1534 | drop_count++; |
1517 | task_count++; | 1535 | task_count++; |
1518 | ret = get_futex_value_locked(&curval2, uaddr2); | 1536 | /* |
1519 | if (!ret) | 1537 | * If we acquired the lock, then the user |
1520 | ret = lookup_pi_state(curval2, hb2, &key2, | 1538 | * space value of uaddr2 should be vpid. It |
1521 | &pi_state); | 1539 | * cannot be changed by the top waiter as it |
1540 | * is blocked on hb2 lock if it tries to do | ||
1541 | * so. If something fiddled with it behind our | ||
1542 | * back the pi state lookup might unearth | ||
1543 | * it. So we rather use the known value than | ||
1544 | * rereading and handing potential crap to | ||
1545 | * lookup_pi_state. | ||
1546 | */ | ||
1547 | ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL); | ||
1522 | } | 1548 | } |
1523 | 1549 | ||
1524 | switch (ret) { | 1550 | switch (ret) { |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index d55092ceee29..e0501fe7140d 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -234,6 +234,11 @@ again: | |||
234 | goto again; | 234 | goto again; |
235 | } | 235 | } |
236 | timer->base = new_base; | 236 | timer->base = new_base; |
237 | } else { | ||
238 | if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { | ||
239 | cpu = this_cpu; | ||
240 | goto again; | ||
241 | } | ||
237 | } | 242 | } |
238 | return new_base; | 243 | return new_base; |
239 | } | 244 | } |
@@ -569,6 +574,23 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) | |||
569 | 574 | ||
570 | cpu_base->expires_next.tv64 = expires_next.tv64; | 575 | cpu_base->expires_next.tv64 = expires_next.tv64; |
571 | 576 | ||
577 | /* | ||
578 | * If a hang was detected in the last timer interrupt then we | ||
579 | * leave the hang delay active in the hardware. We want the | ||
580 | * system to make progress. That also prevents the following | ||
581 | * scenario: | ||
582 | * T1 expires 50ms from now | ||
583 | * T2 expires 5s from now | ||
584 | * | ||
585 | * T1 is removed, so this code is called and would reprogram | ||
586 | * the hardware to 5s from now. Any hrtimer_start after that | ||
587 | * will not reprogram the hardware due to hang_detected being | ||
588 | * set. So we'd effectivly block all timers until the T2 event | ||
589 | * fires. | ||
590 | */ | ||
591 | if (cpu_base->hang_detected) | ||
592 | return; | ||
593 | |||
572 | if (cpu_base->expires_next.tv64 != KTIME_MAX) | 594 | if (cpu_base->expires_next.tv64 != KTIME_MAX) |
573 | tick_program_event(cpu_base->expires_next, 1); | 595 | tick_program_event(cpu_base->expires_next, 1); |
574 | } | 596 | } |
@@ -968,11 +990,8 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |||
968 | /* Remove an active timer from the queue: */ | 990 | /* Remove an active timer from the queue: */ |
969 | ret = remove_hrtimer(timer, base); | 991 | ret = remove_hrtimer(timer, base); |
970 | 992 | ||
971 | /* Switch the timer base, if necessary: */ | ||
972 | new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); | ||
973 | |||
974 | if (mode & HRTIMER_MODE_REL) { | 993 | if (mode & HRTIMER_MODE_REL) { |
975 | tim = ktime_add_safe(tim, new_base->get_time()); | 994 | tim = ktime_add_safe(tim, base->get_time()); |
976 | /* | 995 | /* |
977 | * CONFIG_TIME_LOW_RES is a temporary way for architectures | 996 | * CONFIG_TIME_LOW_RES is a temporary way for architectures |
978 | * to signal that they simply return xtime in | 997 | * to signal that they simply return xtime in |
@@ -987,6 +1006,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |||
987 | 1006 | ||
988 | hrtimer_set_expires_range_ns(timer, tim, delta_ns); | 1007 | hrtimer_set_expires_range_ns(timer, tim, delta_ns); |
989 | 1008 | ||
1009 | /* Switch the timer base, if necessary: */ | ||
1010 | new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); | ||
1011 | |||
990 | timer_stats_hrtimer_set_start_info(timer); | 1012 | timer_stats_hrtimer_set_start_info(timer); |
991 | 1013 | ||
992 | leftmost = enqueue_hrtimer(timer, new_base); | 1014 | leftmost = enqueue_hrtimer(timer, new_base); |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index a7174617616b..bb07f2928f4b 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
@@ -363,6 +363,13 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, | |||
363 | if (from > irq) | 363 | if (from > irq) |
364 | return -EINVAL; | 364 | return -EINVAL; |
365 | from = irq; | 365 | from = irq; |
366 | } else { | ||
367 | /* | ||
368 | * For interrupts which are freely allocated the | ||
369 | * architecture can force a lower bound to the @from | ||
370 | * argument. x86 uses this to exclude the GSI space. | ||
371 | */ | ||
372 | from = arch_dynirq_lower_bound(from); | ||
366 | } | 373 | } |
367 | 374 | ||
368 | mutex_lock(&sparse_irq_lock); | 375 | mutex_lock(&sparse_irq_lock); |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 2486a4c1a710..d34131ca372b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -180,7 +180,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, | |||
180 | struct irq_chip *chip = irq_data_get_irq_chip(data); | 180 | struct irq_chip *chip = irq_data_get_irq_chip(data); |
181 | int ret; | 181 | int ret; |
182 | 182 | ||
183 | ret = chip->irq_set_affinity(data, mask, false); | 183 | ret = chip->irq_set_affinity(data, mask, force); |
184 | switch (ret) { | 184 | switch (ret) { |
185 | case IRQ_SET_MASK_OK: | 185 | case IRQ_SET_MASK_OK: |
186 | cpumask_copy(data->affinity, mask); | 186 | cpumask_copy(data->affinity, mask); |
@@ -192,7 +192,8 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, | |||
192 | return ret; | 192 | return ret; |
193 | } | 193 | } |
194 | 194 | ||
195 | int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) | 195 | int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, |
196 | bool force) | ||
196 | { | 197 | { |
197 | struct irq_chip *chip = irq_data_get_irq_chip(data); | 198 | struct irq_chip *chip = irq_data_get_irq_chip(data); |
198 | struct irq_desc *desc = irq_data_to_desc(data); | 199 | struct irq_desc *desc = irq_data_to_desc(data); |
@@ -202,7 +203,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) | |||
202 | return -EINVAL; | 203 | return -EINVAL; |
203 | 204 | ||
204 | if (irq_can_move_pcntxt(data)) { | 205 | if (irq_can_move_pcntxt(data)) { |
205 | ret = irq_do_set_affinity(data, mask, false); | 206 | ret = irq_do_set_affinity(data, mask, force); |
206 | } else { | 207 | } else { |
207 | irqd_set_move_pending(data); | 208 | irqd_set_move_pending(data); |
208 | irq_copy_pending(desc, mask); | 209 | irq_copy_pending(desc, mask); |
@@ -217,13 +218,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) | |||
217 | return ret; | 218 | return ret; |
218 | } | 219 | } |
219 | 220 | ||
220 | /** | 221 | int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force) |
221 | * irq_set_affinity - Set the irq affinity of a given irq | ||
222 | * @irq: Interrupt to set affinity | ||
223 | * @mask: cpumask | ||
224 | * | ||
225 | */ | ||
226 | int irq_set_affinity(unsigned int irq, const struct cpumask *mask) | ||
227 | { | 222 | { |
228 | struct irq_desc *desc = irq_to_desc(irq); | 223 | struct irq_desc *desc = irq_to_desc(irq); |
229 | unsigned long flags; | 224 | unsigned long flags; |
@@ -233,7 +228,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
233 | return -EINVAL; | 228 | return -EINVAL; |
234 | 229 | ||
235 | raw_spin_lock_irqsave(&desc->lock, flags); | 230 | raw_spin_lock_irqsave(&desc->lock, flags); |
236 | ret = __irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask); | 231 | ret = irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force); |
237 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 232 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
238 | return ret; | 233 | return ret; |
239 | } | 234 | } |
diff --git a/kernel/kexec.c b/kernel/kexec.c index c8380ad203bc..28c57069ef68 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -1683,6 +1683,14 @@ int kernel_kexec(void) | |||
1683 | kexec_in_progress = true; | 1683 | kexec_in_progress = true; |
1684 | kernel_restart_prepare(NULL); | 1684 | kernel_restart_prepare(NULL); |
1685 | migrate_to_reboot_cpu(); | 1685 | migrate_to_reboot_cpu(); |
1686 | |||
1687 | /* | ||
1688 | * migrate_to_reboot_cpu() disables CPU hotplug assuming that | ||
1689 | * no further code needs to use CPU hotplug (which is true in | ||
1690 | * the reboot case). However, the kexec path depends on using | ||
1691 | * CPU hotplug again; so re-enable it here. | ||
1692 | */ | ||
1693 | cpu_hotplug_enable(); | ||
1686 | printk(KERN_EMERG "Starting new kernel\n"); | 1694 | printk(KERN_EMERG "Starting new kernel\n"); |
1687 | machine_shutdown(); | 1695 | machine_shutdown(); |
1688 | } | 1696 | } |
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index b0e9467922e1..d24e4339b46d 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
@@ -4188,7 +4188,7 @@ void debug_show_held_locks(struct task_struct *task) | |||
4188 | } | 4188 | } |
4189 | EXPORT_SYMBOL_GPL(debug_show_held_locks); | 4189 | EXPORT_SYMBOL_GPL(debug_show_held_locks); |
4190 | 4190 | ||
4191 | asmlinkage void lockdep_sys_exit(void) | 4191 | asmlinkage __visible void lockdep_sys_exit(void) |
4192 | { | 4192 | { |
4193 | struct task_struct *curr = current; | 4193 | struct task_struct *curr = current; |
4194 | 4194 | ||
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index e1191c996c59..5cf6731b98e9 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c | |||
@@ -71,18 +71,17 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, | |||
71 | 71 | ||
72 | void debug_mutex_unlock(struct mutex *lock) | 72 | void debug_mutex_unlock(struct mutex *lock) |
73 | { | 73 | { |
74 | if (unlikely(!debug_locks)) | 74 | if (likely(debug_locks)) { |
75 | return; | 75 | DEBUG_LOCKS_WARN_ON(lock->magic != lock); |
76 | 76 | ||
77 | DEBUG_LOCKS_WARN_ON(lock->magic != lock); | 77 | if (!lock->owner) |
78 | DEBUG_LOCKS_WARN_ON(!lock->owner); | ||
79 | else | ||
80 | DEBUG_LOCKS_WARN_ON(lock->owner != current); | ||
78 | 81 | ||
79 | if (!lock->owner) | 82 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); |
80 | DEBUG_LOCKS_WARN_ON(!lock->owner); | 83 | mutex_clear_owner(lock); |
81 | else | 84 | } |
82 | DEBUG_LOCKS_WARN_ON(lock->owner != current); | ||
83 | |||
84 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); | ||
85 | mutex_clear_owner(lock); | ||
86 | 85 | ||
87 | /* | 86 | /* |
88 | * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug | 87 | * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug |
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index aa4dff04b594..a620d4d08ca6 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c | |||
@@ -343,9 +343,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |||
343 | * top_waiter can be NULL, when we are in the deboosting | 343 | * top_waiter can be NULL, when we are in the deboosting |
344 | * mode! | 344 | * mode! |
345 | */ | 345 | */ |
346 | if (top_waiter && (!task_has_pi_waiters(task) || | 346 | if (top_waiter) { |
347 | top_waiter != task_top_pi_waiter(task))) | 347 | if (!task_has_pi_waiters(task)) |
348 | goto out_unlock_pi; | 348 | goto out_unlock_pi; |
349 | /* | ||
350 | * If deadlock detection is off, we stop here if we | ||
351 | * are not the top pi waiter of the task. | ||
352 | */ | ||
353 | if (!detect_deadlock && top_waiter != task_top_pi_waiter(task)) | ||
354 | goto out_unlock_pi; | ||
355 | } | ||
349 | 356 | ||
350 | /* | 357 | /* |
351 | * When deadlock detection is off then we check, if further | 358 | * When deadlock detection is off then we check, if further |
@@ -361,7 +368,12 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |||
361 | goto retry; | 368 | goto retry; |
362 | } | 369 | } |
363 | 370 | ||
364 | /* Deadlock detection */ | 371 | /* |
372 | * Deadlock detection. If the lock is the same as the original | ||
373 | * lock which caused us to walk the lock chain or if the | ||
374 | * current lock is owned by the task which initiated the chain | ||
375 | * walk, we detected a deadlock. | ||
376 | */ | ||
365 | if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { | 377 | if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { |
366 | debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); | 378 | debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); |
367 | raw_spin_unlock(&lock->wait_lock); | 379 | raw_spin_unlock(&lock->wait_lock); |
@@ -527,6 +539,18 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |||
527 | unsigned long flags; | 539 | unsigned long flags; |
528 | int chain_walk = 0, res; | 540 | int chain_walk = 0, res; |
529 | 541 | ||
542 | /* | ||
543 | * Early deadlock detection. We really don't want the task to | ||
544 | * enqueue on itself just to untangle the mess later. It's not | ||
545 | * only an optimization. We drop the locks, so another waiter | ||
546 | * can come in before the chain walk detects the deadlock. So | ||
547 | * the other will detect the deadlock and return -EDEADLOCK, | ||
548 | * which is wrong, as the other waiter is not in a deadlock | ||
549 | * situation. | ||
550 | */ | ||
551 | if (detect_deadlock && owner == task) | ||
552 | return -EDEADLK; | ||
553 | |||
530 | raw_spin_lock_irqsave(&task->pi_lock, flags); | 554 | raw_spin_lock_irqsave(&task->pi_lock, flags); |
531 | __rt_mutex_adjust_prio(task); | 555 | __rt_mutex_adjust_prio(task); |
532 | waiter->task = task; | 556 | waiter->task = task; |
diff --git a/kernel/module.c b/kernel/module.c index 11869408f79b..079c4615607d 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -815,9 +815,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, | |||
815 | return -EFAULT; | 815 | return -EFAULT; |
816 | name[MODULE_NAME_LEN-1] = '\0'; | 816 | name[MODULE_NAME_LEN-1] = '\0'; |
817 | 817 | ||
818 | if (!(flags & O_NONBLOCK)) | ||
819 | pr_warn("waiting module removal not supported: please upgrade\n"); | ||
820 | |||
821 | if (mutex_lock_interruptible(&module_mutex) != 0) | 818 | if (mutex_lock_interruptible(&module_mutex) != 0) |
822 | return -EINTR; | 819 | return -EINTR; |
823 | 820 | ||
@@ -3271,6 +3268,9 @@ static int load_module(struct load_info *info, const char __user *uargs, | |||
3271 | 3268 | ||
3272 | dynamic_debug_setup(info->debug, info->num_debug); | 3269 | dynamic_debug_setup(info->debug, info->num_debug); |
3273 | 3270 | ||
3271 | /* Ftrace init must be called in the MODULE_STATE_UNFORMED state */ | ||
3272 | ftrace_module_init(mod); | ||
3273 | |||
3274 | /* Finally it's fully formed, ready to start executing. */ | 3274 | /* Finally it's fully formed, ready to start executing. */ |
3275 | err = complete_formation(mod, info); | 3275 | err = complete_formation(mod, info); |
3276 | if (err) | 3276 | if (err) |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 18fb7a2fb14b..1ea328aafdc9 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -1586,7 +1586,7 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, | |||
1586 | return -ENOMEM; | 1586 | return -ENOMEM; |
1587 | } | 1587 | } |
1588 | 1588 | ||
1589 | asmlinkage int swsusp_save(void) | 1589 | asmlinkage __visible int swsusp_save(void) |
1590 | { | 1590 | { |
1591 | unsigned int nr_pages, nr_highmem; | 1591 | unsigned int nr_pages, nr_highmem; |
1592 | 1592 | ||
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index c3ad9cafe930..8233cd4047d7 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <linux/console.h> | 15 | #include <linux/console.h> |
16 | #include <linux/cpu.h> | 16 | #include <linux/cpu.h> |
17 | #include <linux/cpuidle.h> | ||
17 | #include <linux/syscalls.h> | 18 | #include <linux/syscalls.h> |
18 | #include <linux/gfp.h> | 19 | #include <linux/gfp.h> |
19 | #include <linux/io.h> | 20 | #include <linux/io.h> |
@@ -53,7 +54,9 @@ static void freeze_begin(void) | |||
53 | 54 | ||
54 | static void freeze_enter(void) | 55 | static void freeze_enter(void) |
55 | { | 56 | { |
57 | cpuidle_resume(); | ||
56 | wait_event(suspend_freeze_wait_head, suspend_freeze_wake); | 58 | wait_event(suspend_freeze_wait_head, suspend_freeze_wake); |
59 | cpuidle_pause(); | ||
57 | } | 60 | } |
58 | 61 | ||
59 | void freeze_wake(void) | 62 | void freeze_wake(void) |
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index a45b50962295..7228258b85ec 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c | |||
@@ -1674,7 +1674,7 @@ EXPORT_SYMBOL(printk_emit); | |||
1674 | * | 1674 | * |
1675 | * See the vsnprintf() documentation for format string extensions over C99. | 1675 | * See the vsnprintf() documentation for format string extensions over C99. |
1676 | */ | 1676 | */ |
1677 | asmlinkage int printk(const char *fmt, ...) | 1677 | asmlinkage __visible int printk(const char *fmt, ...) |
1678 | { | 1678 | { |
1679 | va_list args; | 1679 | va_list args; |
1680 | int r; | 1680 | int r; |
@@ -1737,7 +1737,7 @@ void early_vprintk(const char *fmt, va_list ap) | |||
1737 | } | 1737 | } |
1738 | } | 1738 | } |
1739 | 1739 | ||
1740 | asmlinkage void early_printk(const char *fmt, ...) | 1740 | asmlinkage __visible void early_printk(const char *fmt, ...) |
1741 | { | 1741 | { |
1742 | va_list ap; | 1742 | va_list ap; |
1743 | 1743 | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 268a45ea238c..0a7251678982 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2192,7 +2192,7 @@ static inline void post_schedule(struct rq *rq) | |||
2192 | * schedule_tail - first thing a freshly forked thread must call. | 2192 | * schedule_tail - first thing a freshly forked thread must call. |
2193 | * @prev: the thread we just switched away from. | 2193 | * @prev: the thread we just switched away from. |
2194 | */ | 2194 | */ |
2195 | asmlinkage void schedule_tail(struct task_struct *prev) | 2195 | asmlinkage __visible void schedule_tail(struct task_struct *prev) |
2196 | __releases(rq->lock) | 2196 | __releases(rq->lock) |
2197 | { | 2197 | { |
2198 | struct rq *rq = this_rq(); | 2198 | struct rq *rq = this_rq(); |
@@ -2592,8 +2592,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev) | |||
2592 | if (likely(prev->sched_class == class && | 2592 | if (likely(prev->sched_class == class && |
2593 | rq->nr_running == rq->cfs.h_nr_running)) { | 2593 | rq->nr_running == rq->cfs.h_nr_running)) { |
2594 | p = fair_sched_class.pick_next_task(rq, prev); | 2594 | p = fair_sched_class.pick_next_task(rq, prev); |
2595 | if (likely(p && p != RETRY_TASK)) | 2595 | if (unlikely(p == RETRY_TASK)) |
2596 | return p; | 2596 | goto again; |
2597 | |||
2598 | /* assumes fair_sched_class->next == idle_sched_class */ | ||
2599 | if (unlikely(!p)) | ||
2600 | p = idle_sched_class.pick_next_task(rq, prev); | ||
2601 | |||
2602 | return p; | ||
2597 | } | 2603 | } |
2598 | 2604 | ||
2599 | again: | 2605 | again: |
@@ -2741,7 +2747,7 @@ static inline void sched_submit_work(struct task_struct *tsk) | |||
2741 | blk_schedule_flush_plug(tsk); | 2747 | blk_schedule_flush_plug(tsk); |
2742 | } | 2748 | } |
2743 | 2749 | ||
2744 | asmlinkage void __sched schedule(void) | 2750 | asmlinkage __visible void __sched schedule(void) |
2745 | { | 2751 | { |
2746 | struct task_struct *tsk = current; | 2752 | struct task_struct *tsk = current; |
2747 | 2753 | ||
@@ -2751,7 +2757,7 @@ asmlinkage void __sched schedule(void) | |||
2751 | EXPORT_SYMBOL(schedule); | 2757 | EXPORT_SYMBOL(schedule); |
2752 | 2758 | ||
2753 | #ifdef CONFIG_CONTEXT_TRACKING | 2759 | #ifdef CONFIG_CONTEXT_TRACKING |
2754 | asmlinkage void __sched schedule_user(void) | 2760 | asmlinkage __visible void __sched schedule_user(void) |
2755 | { | 2761 | { |
2756 | /* | 2762 | /* |
2757 | * If we come here after a random call to set_need_resched(), | 2763 | * If we come here after a random call to set_need_resched(), |
@@ -2783,7 +2789,7 @@ void __sched schedule_preempt_disabled(void) | |||
2783 | * off of preempt_enable. Kernel preemptions off return from interrupt | 2789 | * off of preempt_enable. Kernel preemptions off return from interrupt |
2784 | * occur there and call schedule directly. | 2790 | * occur there and call schedule directly. |
2785 | */ | 2791 | */ |
2786 | asmlinkage void __sched notrace preempt_schedule(void) | 2792 | asmlinkage __visible void __sched notrace preempt_schedule(void) |
2787 | { | 2793 | { |
2788 | /* | 2794 | /* |
2789 | * If there is a non-zero preempt_count or interrupts are disabled, | 2795 | * If there is a non-zero preempt_count or interrupts are disabled, |
@@ -2813,7 +2819,7 @@ EXPORT_SYMBOL(preempt_schedule); | |||
2813 | * Note, that this is called and return with irqs disabled. This will | 2819 | * Note, that this is called and return with irqs disabled. This will |
2814 | * protect us against recursive calling from irq. | 2820 | * protect us against recursive calling from irq. |
2815 | */ | 2821 | */ |
2816 | asmlinkage void __sched preempt_schedule_irq(void) | 2822 | asmlinkage __visible void __sched preempt_schedule_irq(void) |
2817 | { | 2823 | { |
2818 | enum ctx_state prev_state; | 2824 | enum ctx_state prev_state; |
2819 | 2825 | ||
@@ -3124,6 +3130,7 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr) | |||
3124 | dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); | 3130 | dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); |
3125 | dl_se->dl_throttled = 0; | 3131 | dl_se->dl_throttled = 0; |
3126 | dl_se->dl_new = 1; | 3132 | dl_se->dl_new = 1; |
3133 | dl_se->dl_yielded = 0; | ||
3127 | } | 3134 | } |
3128 | 3135 | ||
3129 | static void __setscheduler_params(struct task_struct *p, | 3136 | static void __setscheduler_params(struct task_struct *p, |
@@ -3188,17 +3195,40 @@ __getparam_dl(struct task_struct *p, struct sched_attr *attr) | |||
3188 | * We ask for the deadline not being zero, and greater or equal | 3195 | * We ask for the deadline not being zero, and greater or equal |
3189 | * than the runtime, as well as the period of being zero or | 3196 | * than the runtime, as well as the period of being zero or |
3190 | * greater than deadline. Furthermore, we have to be sure that | 3197 | * greater than deadline. Furthermore, we have to be sure that |
3191 | * user parameters are above the internal resolution (1us); we | 3198 | * user parameters are above the internal resolution of 1us (we |
3192 | * check sched_runtime only since it is always the smaller one. | 3199 | * check sched_runtime only since it is always the smaller one) and |
3200 | * below 2^63 ns (we have to check both sched_deadline and | ||
3201 | * sched_period, as the latter can be zero). | ||
3193 | */ | 3202 | */ |
3194 | static bool | 3203 | static bool |
3195 | __checkparam_dl(const struct sched_attr *attr) | 3204 | __checkparam_dl(const struct sched_attr *attr) |
3196 | { | 3205 | { |
3197 | return attr && attr->sched_deadline != 0 && | 3206 | /* deadline != 0 */ |
3198 | (attr->sched_period == 0 || | 3207 | if (attr->sched_deadline == 0) |
3199 | (s64)(attr->sched_period - attr->sched_deadline) >= 0) && | 3208 | return false; |
3200 | (s64)(attr->sched_deadline - attr->sched_runtime ) >= 0 && | 3209 | |
3201 | attr->sched_runtime >= (2 << (DL_SCALE - 1)); | 3210 | /* |
3211 | * Since we truncate DL_SCALE bits, make sure we're at least | ||
3212 | * that big. | ||
3213 | */ | ||
3214 | if (attr->sched_runtime < (1ULL << DL_SCALE)) | ||
3215 | return false; | ||
3216 | |||
3217 | /* | ||
3218 | * Since we use the MSB for wrap-around and sign issues, make | ||
3219 | * sure it's not set (mind that period can be equal to zero). | ||
3220 | */ | ||
3221 | if (attr->sched_deadline & (1ULL << 63) || | ||
3222 | attr->sched_period & (1ULL << 63)) | ||
3223 | return false; | ||
3224 | |||
3225 | /* runtime <= deadline <= period (if period != 0) */ | ||
3226 | if ((attr->sched_period != 0 && | ||
3227 | attr->sched_period < attr->sched_deadline) || | ||
3228 | attr->sched_deadline < attr->sched_runtime) | ||
3229 | return false; | ||
3230 | |||
3231 | return true; | ||
3202 | } | 3232 | } |
3203 | 3233 | ||
3204 | /* | 3234 | /* |
@@ -3639,6 +3669,7 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) | |||
3639 | * sys_sched_setattr - same as above, but with extended sched_attr | 3669 | * sys_sched_setattr - same as above, but with extended sched_attr |
3640 | * @pid: the pid in question. | 3670 | * @pid: the pid in question. |
3641 | * @uattr: structure containing the extended parameters. | 3671 | * @uattr: structure containing the extended parameters. |
3672 | * @flags: for future extension. | ||
3642 | */ | 3673 | */ |
3643 | SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, | 3674 | SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, |
3644 | unsigned int, flags) | 3675 | unsigned int, flags) |
@@ -3650,8 +3681,12 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, | |||
3650 | if (!uattr || pid < 0 || flags) | 3681 | if (!uattr || pid < 0 || flags) |
3651 | return -EINVAL; | 3682 | return -EINVAL; |
3652 | 3683 | ||
3653 | if (sched_copy_attr(uattr, &attr)) | 3684 | retval = sched_copy_attr(uattr, &attr); |
3654 | return -EFAULT; | 3685 | if (retval) |
3686 | return retval; | ||
3687 | |||
3688 | if (attr.sched_policy < 0) | ||
3689 | return -EINVAL; | ||
3655 | 3690 | ||
3656 | rcu_read_lock(); | 3691 | rcu_read_lock(); |
3657 | retval = -ESRCH; | 3692 | retval = -ESRCH; |
@@ -3701,7 +3736,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) | |||
3701 | */ | 3736 | */ |
3702 | SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | 3737 | SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) |
3703 | { | 3738 | { |
3704 | struct sched_param lp; | 3739 | struct sched_param lp = { .sched_priority = 0 }; |
3705 | struct task_struct *p; | 3740 | struct task_struct *p; |
3706 | int retval; | 3741 | int retval; |
3707 | 3742 | ||
@@ -3718,11 +3753,8 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | |||
3718 | if (retval) | 3753 | if (retval) |
3719 | goto out_unlock; | 3754 | goto out_unlock; |
3720 | 3755 | ||
3721 | if (task_has_dl_policy(p)) { | 3756 | if (task_has_rt_policy(p)) |
3722 | retval = -EINVAL; | 3757 | lp.sched_priority = p->rt_priority; |
3723 | goto out_unlock; | ||
3724 | } | ||
3725 | lp.sched_priority = p->rt_priority; | ||
3726 | rcu_read_unlock(); | 3758 | rcu_read_unlock(); |
3727 | 3759 | ||
3728 | /* | 3760 | /* |
@@ -3783,6 +3815,7 @@ err_size: | |||
3783 | * @pid: the pid in question. | 3815 | * @pid: the pid in question. |
3784 | * @uattr: structure containing the extended parameters. | 3816 | * @uattr: structure containing the extended parameters. |
3785 | * @size: sizeof(attr) for fwd/bwd comp. | 3817 | * @size: sizeof(attr) for fwd/bwd comp. |
3818 | * @flags: for future extension. | ||
3786 | */ | 3819 | */ |
3787 | SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, | 3820 | SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, |
3788 | unsigned int, size, unsigned int, flags) | 3821 | unsigned int, size, unsigned int, flags) |
@@ -5043,7 +5076,6 @@ static int sched_cpu_active(struct notifier_block *nfb, | |||
5043 | unsigned long action, void *hcpu) | 5076 | unsigned long action, void *hcpu) |
5044 | { | 5077 | { |
5045 | switch (action & ~CPU_TASKS_FROZEN) { | 5078 | switch (action & ~CPU_TASKS_FROZEN) { |
5046 | case CPU_STARTING: | ||
5047 | case CPU_DOWN_FAILED: | 5079 | case CPU_DOWN_FAILED: |
5048 | set_cpu_active((long)hcpu, true); | 5080 | set_cpu_active((long)hcpu, true); |
5049 | return NOTIFY_OK; | 5081 | return NOTIFY_OK; |
@@ -6017,6 +6049,8 @@ sd_numa_init(struct sched_domain_topology_level *tl, int cpu) | |||
6017 | , | 6049 | , |
6018 | .last_balance = jiffies, | 6050 | .last_balance = jiffies, |
6019 | .balance_interval = sd_weight, | 6051 | .balance_interval = sd_weight, |
6052 | .max_newidle_lb_cost = 0, | ||
6053 | .next_decay_max_lb_cost = jiffies, | ||
6020 | }; | 6054 | }; |
6021 | SD_INIT_NAME(sd, NUMA); | 6055 | SD_INIT_NAME(sd, NUMA); |
6022 | sd->private = &tl->data; | 6056 | sd->private = &tl->data; |
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index 5b9bb42b2d47..bd95963dae80 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c | |||
@@ -13,6 +13,7 @@ | |||
13 | 13 | ||
14 | #include <linux/gfp.h> | 14 | #include <linux/gfp.h> |
15 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
16 | #include <linux/slab.h> | ||
16 | #include "cpudeadline.h" | 17 | #include "cpudeadline.h" |
17 | 18 | ||
18 | static inline int parent(int i) | 19 | static inline int parent(int i) |
@@ -39,8 +40,10 @@ static void cpudl_exchange(struct cpudl *cp, int a, int b) | |||
39 | { | 40 | { |
40 | int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu; | 41 | int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu; |
41 | 42 | ||
42 | swap(cp->elements[a], cp->elements[b]); | 43 | swap(cp->elements[a].cpu, cp->elements[b].cpu); |
43 | swap(cp->cpu_to_idx[cpu_a], cp->cpu_to_idx[cpu_b]); | 44 | swap(cp->elements[a].dl , cp->elements[b].dl ); |
45 | |||
46 | swap(cp->elements[cpu_a].idx, cp->elements[cpu_b].idx); | ||
44 | } | 47 | } |
45 | 48 | ||
46 | static void cpudl_heapify(struct cpudl *cp, int idx) | 49 | static void cpudl_heapify(struct cpudl *cp, int idx) |
@@ -140,7 +143,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid) | |||
140 | WARN_ON(!cpu_present(cpu)); | 143 | WARN_ON(!cpu_present(cpu)); |
141 | 144 | ||
142 | raw_spin_lock_irqsave(&cp->lock, flags); | 145 | raw_spin_lock_irqsave(&cp->lock, flags); |
143 | old_idx = cp->cpu_to_idx[cpu]; | 146 | old_idx = cp->elements[cpu].idx; |
144 | if (!is_valid) { | 147 | if (!is_valid) { |
145 | /* remove item */ | 148 | /* remove item */ |
146 | if (old_idx == IDX_INVALID) { | 149 | if (old_idx == IDX_INVALID) { |
@@ -155,8 +158,8 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid) | |||
155 | cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl; | 158 | cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl; |
156 | cp->elements[old_idx].cpu = new_cpu; | 159 | cp->elements[old_idx].cpu = new_cpu; |
157 | cp->size--; | 160 | cp->size--; |
158 | cp->cpu_to_idx[new_cpu] = old_idx; | 161 | cp->elements[new_cpu].idx = old_idx; |
159 | cp->cpu_to_idx[cpu] = IDX_INVALID; | 162 | cp->elements[cpu].idx = IDX_INVALID; |
160 | while (old_idx > 0 && dl_time_before( | 163 | while (old_idx > 0 && dl_time_before( |
161 | cp->elements[parent(old_idx)].dl, | 164 | cp->elements[parent(old_idx)].dl, |
162 | cp->elements[old_idx].dl)) { | 165 | cp->elements[old_idx].dl)) { |
@@ -173,7 +176,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid) | |||
173 | cp->size++; | 176 | cp->size++; |
174 | cp->elements[cp->size - 1].dl = 0; | 177 | cp->elements[cp->size - 1].dl = 0; |
175 | cp->elements[cp->size - 1].cpu = cpu; | 178 | cp->elements[cp->size - 1].cpu = cpu; |
176 | cp->cpu_to_idx[cpu] = cp->size - 1; | 179 | cp->elements[cpu].idx = cp->size - 1; |
177 | cpudl_change_key(cp, cp->size - 1, dl); | 180 | cpudl_change_key(cp, cp->size - 1, dl); |
178 | cpumask_clear_cpu(cpu, cp->free_cpus); | 181 | cpumask_clear_cpu(cpu, cp->free_cpus); |
179 | } else { | 182 | } else { |
@@ -195,10 +198,21 @@ int cpudl_init(struct cpudl *cp) | |||
195 | memset(cp, 0, sizeof(*cp)); | 198 | memset(cp, 0, sizeof(*cp)); |
196 | raw_spin_lock_init(&cp->lock); | 199 | raw_spin_lock_init(&cp->lock); |
197 | cp->size = 0; | 200 | cp->size = 0; |
198 | for (i = 0; i < NR_CPUS; i++) | 201 | |
199 | cp->cpu_to_idx[i] = IDX_INVALID; | 202 | cp->elements = kcalloc(nr_cpu_ids, |
200 | if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) | 203 | sizeof(struct cpudl_item), |
204 | GFP_KERNEL); | ||
205 | if (!cp->elements) | ||
206 | return -ENOMEM; | ||
207 | |||
208 | if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) { | ||
209 | kfree(cp->elements); | ||
201 | return -ENOMEM; | 210 | return -ENOMEM; |
211 | } | ||
212 | |||
213 | for_each_possible_cpu(i) | ||
214 | cp->elements[i].idx = IDX_INVALID; | ||
215 | |||
202 | cpumask_setall(cp->free_cpus); | 216 | cpumask_setall(cp->free_cpus); |
203 | 217 | ||
204 | return 0; | 218 | return 0; |
@@ -210,7 +224,6 @@ int cpudl_init(struct cpudl *cp) | |||
210 | */ | 224 | */ |
211 | void cpudl_cleanup(struct cpudl *cp) | 225 | void cpudl_cleanup(struct cpudl *cp) |
212 | { | 226 | { |
213 | /* | 227 | free_cpumask_var(cp->free_cpus); |
214 | * nothing to do for the moment | 228 | kfree(cp->elements); |
215 | */ | ||
216 | } | 229 | } |
diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h index a202789a412c..538c9796ad4a 100644 --- a/kernel/sched/cpudeadline.h +++ b/kernel/sched/cpudeadline.h | |||
@@ -5,17 +5,17 @@ | |||
5 | 5 | ||
6 | #define IDX_INVALID -1 | 6 | #define IDX_INVALID -1 |
7 | 7 | ||
8 | struct array_item { | 8 | struct cpudl_item { |
9 | u64 dl; | 9 | u64 dl; |
10 | int cpu; | 10 | int cpu; |
11 | int idx; | ||
11 | }; | 12 | }; |
12 | 13 | ||
13 | struct cpudl { | 14 | struct cpudl { |
14 | raw_spinlock_t lock; | 15 | raw_spinlock_t lock; |
15 | int size; | 16 | int size; |
16 | int cpu_to_idx[NR_CPUS]; | ||
17 | struct array_item elements[NR_CPUS]; | ||
18 | cpumask_var_t free_cpus; | 17 | cpumask_var_t free_cpus; |
18 | struct cpudl_item *elements; | ||
19 | }; | 19 | }; |
20 | 20 | ||
21 | 21 | ||
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 8b836b376d91..8834243abee2 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/gfp.h> | 30 | #include <linux/gfp.h> |
31 | #include <linux/sched.h> | 31 | #include <linux/sched.h> |
32 | #include <linux/sched/rt.h> | 32 | #include <linux/sched/rt.h> |
33 | #include <linux/slab.h> | ||
33 | #include "cpupri.h" | 34 | #include "cpupri.h" |
34 | 35 | ||
35 | /* Convert between a 140 based task->prio, and our 102 based cpupri */ | 36 | /* Convert between a 140 based task->prio, and our 102 based cpupri */ |
@@ -70,8 +71,7 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, | |||
70 | int idx = 0; | 71 | int idx = 0; |
71 | int task_pri = convert_prio(p->prio); | 72 | int task_pri = convert_prio(p->prio); |
72 | 73 | ||
73 | if (task_pri >= MAX_RT_PRIO) | 74 | BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES); |
74 | return 0; | ||
75 | 75 | ||
76 | for (idx = 0; idx < task_pri; idx++) { | 76 | for (idx = 0; idx < task_pri; idx++) { |
77 | struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; | 77 | struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; |
@@ -219,8 +219,13 @@ int cpupri_init(struct cpupri *cp) | |||
219 | goto cleanup; | 219 | goto cleanup; |
220 | } | 220 | } |
221 | 221 | ||
222 | cp->cpu_to_pri = kcalloc(nr_cpu_ids, sizeof(int), GFP_KERNEL); | ||
223 | if (!cp->cpu_to_pri) | ||
224 | goto cleanup; | ||
225 | |||
222 | for_each_possible_cpu(i) | 226 | for_each_possible_cpu(i) |
223 | cp->cpu_to_pri[i] = CPUPRI_INVALID; | 227 | cp->cpu_to_pri[i] = CPUPRI_INVALID; |
228 | |||
224 | return 0; | 229 | return 0; |
225 | 230 | ||
226 | cleanup: | 231 | cleanup: |
@@ -237,6 +242,7 @@ void cpupri_cleanup(struct cpupri *cp) | |||
237 | { | 242 | { |
238 | int i; | 243 | int i; |
239 | 244 | ||
245 | kfree(cp->cpu_to_pri); | ||
240 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) | 246 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) |
241 | free_cpumask_var(cp->pri_to_cpu[i].mask); | 247 | free_cpumask_var(cp->pri_to_cpu[i].mask); |
242 | } | 248 | } |
diff --git a/kernel/sched/cpupri.h b/kernel/sched/cpupri.h index f6d756173491..6b033347fdfd 100644 --- a/kernel/sched/cpupri.h +++ b/kernel/sched/cpupri.h | |||
@@ -17,7 +17,7 @@ struct cpupri_vec { | |||
17 | 17 | ||
18 | struct cpupri { | 18 | struct cpupri { |
19 | struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES]; | 19 | struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES]; |
20 | int cpu_to_pri[NR_CPUS]; | 20 | int *cpu_to_pri; |
21 | }; | 21 | }; |
22 | 22 | ||
23 | #ifdef CONFIG_SMP | 23 | #ifdef CONFIG_SMP |
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index a95097cb4591..72fdf06ef865 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
@@ -332,50 +332,50 @@ out: | |||
332 | * softirq as those do not count in task exec_runtime any more. | 332 | * softirq as those do not count in task exec_runtime any more. |
333 | */ | 333 | */ |
334 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | 334 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, |
335 | struct rq *rq) | 335 | struct rq *rq, int ticks) |
336 | { | 336 | { |
337 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | 337 | cputime_t scaled = cputime_to_scaled(cputime_one_jiffy); |
338 | u64 cputime = (__force u64) cputime_one_jiffy; | ||
338 | u64 *cpustat = kcpustat_this_cpu->cpustat; | 339 | u64 *cpustat = kcpustat_this_cpu->cpustat; |
339 | 340 | ||
340 | if (steal_account_process_tick()) | 341 | if (steal_account_process_tick()) |
341 | return; | 342 | return; |
342 | 343 | ||
344 | cputime *= ticks; | ||
345 | scaled *= ticks; | ||
346 | |||
343 | if (irqtime_account_hi_update()) { | 347 | if (irqtime_account_hi_update()) { |
344 | cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; | 348 | cpustat[CPUTIME_IRQ] += cputime; |
345 | } else if (irqtime_account_si_update()) { | 349 | } else if (irqtime_account_si_update()) { |
346 | cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; | 350 | cpustat[CPUTIME_SOFTIRQ] += cputime; |
347 | } else if (this_cpu_ksoftirqd() == p) { | 351 | } else if (this_cpu_ksoftirqd() == p) { |
348 | /* | 352 | /* |
349 | * ksoftirqd time do not get accounted in cpu_softirq_time. | 353 | * ksoftirqd time do not get accounted in cpu_softirq_time. |
350 | * So, we have to handle it separately here. | 354 | * So, we have to handle it separately here. |
351 | * Also, p->stime needs to be updated for ksoftirqd. | 355 | * Also, p->stime needs to be updated for ksoftirqd. |
352 | */ | 356 | */ |
353 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | 357 | __account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ); |
354 | CPUTIME_SOFTIRQ); | ||
355 | } else if (user_tick) { | 358 | } else if (user_tick) { |
356 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | 359 | account_user_time(p, cputime, scaled); |
357 | } else if (p == rq->idle) { | 360 | } else if (p == rq->idle) { |
358 | account_idle_time(cputime_one_jiffy); | 361 | account_idle_time(cputime); |
359 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ | 362 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ |
360 | account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); | 363 | account_guest_time(p, cputime, scaled); |
361 | } else { | 364 | } else { |
362 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | 365 | __account_system_time(p, cputime, scaled, CPUTIME_SYSTEM); |
363 | CPUTIME_SYSTEM); | ||
364 | } | 366 | } |
365 | } | 367 | } |
366 | 368 | ||
367 | static void irqtime_account_idle_ticks(int ticks) | 369 | static void irqtime_account_idle_ticks(int ticks) |
368 | { | 370 | { |
369 | int i; | ||
370 | struct rq *rq = this_rq(); | 371 | struct rq *rq = this_rq(); |
371 | 372 | ||
372 | for (i = 0; i < ticks; i++) | 373 | irqtime_account_process_tick(current, 0, rq, ticks); |
373 | irqtime_account_process_tick(current, 0, rq); | ||
374 | } | 374 | } |
375 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | 375 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ |
376 | static inline void irqtime_account_idle_ticks(int ticks) {} | 376 | static inline void irqtime_account_idle_ticks(int ticks) {} |
377 | static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, | 377 | static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, |
378 | struct rq *rq) {} | 378 | struct rq *rq, int nr_ticks) {} |
379 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | 379 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ |
380 | 380 | ||
381 | /* | 381 | /* |
@@ -464,7 +464,7 @@ void account_process_tick(struct task_struct *p, int user_tick) | |||
464 | return; | 464 | return; |
465 | 465 | ||
466 | if (sched_clock_irqtime) { | 466 | if (sched_clock_irqtime) { |
467 | irqtime_account_process_tick(p, user_tick, rq); | 467 | irqtime_account_process_tick(p, user_tick, rq, 1); |
468 | return; | 468 | return; |
469 | } | 469 | } |
470 | 470 | ||
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 27ef40925525..800e99b99075 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
@@ -528,6 +528,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) | |||
528 | sched_clock_tick(); | 528 | sched_clock_tick(); |
529 | update_rq_clock(rq); | 529 | update_rq_clock(rq); |
530 | dl_se->dl_throttled = 0; | 530 | dl_se->dl_throttled = 0; |
531 | dl_se->dl_yielded = 0; | ||
531 | if (p->on_rq) { | 532 | if (p->on_rq) { |
532 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); | 533 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); |
533 | if (task_has_dl_policy(rq->curr)) | 534 | if (task_has_dl_policy(rq->curr)) |
@@ -893,10 +894,10 @@ static void yield_task_dl(struct rq *rq) | |||
893 | * We make the task go to sleep until its current deadline by | 894 | * We make the task go to sleep until its current deadline by |
894 | * forcing its runtime to zero. This way, update_curr_dl() stops | 895 | * forcing its runtime to zero. This way, update_curr_dl() stops |
895 | * it and the bandwidth timer will wake it up and will give it | 896 | * it and the bandwidth timer will wake it up and will give it |
896 | * new scheduling parameters (thanks to dl_new=1). | 897 | * new scheduling parameters (thanks to dl_yielded=1). |
897 | */ | 898 | */ |
898 | if (p->dl.runtime > 0) { | 899 | if (p->dl.runtime > 0) { |
899 | rq->curr->dl.dl_new = 1; | 900 | rq->curr->dl.dl_yielded = 1; |
900 | p->dl.runtime = 0; | 901 | p->dl.runtime = 0; |
901 | } | 902 | } |
902 | update_curr_dl(rq); | 903 | update_curr_dl(rq); |
@@ -1021,8 +1022,17 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev) | |||
1021 | 1022 | ||
1022 | dl_rq = &rq->dl; | 1023 | dl_rq = &rq->dl; |
1023 | 1024 | ||
1024 | if (need_pull_dl_task(rq, prev)) | 1025 | if (need_pull_dl_task(rq, prev)) { |
1025 | pull_dl_task(rq); | 1026 | pull_dl_task(rq); |
1027 | /* | ||
1028 | * pull_rt_task() can drop (and re-acquire) rq->lock; this | ||
1029 | * means a stop task can slip in, in which case we need to | ||
1030 | * re-start task selection. | ||
1031 | */ | ||
1032 | if (rq->stop && rq->stop->on_rq) | ||
1033 | return RETRY_TASK; | ||
1034 | } | ||
1035 | |||
1026 | /* | 1036 | /* |
1027 | * When prev is DL, we may throttle it in put_prev_task(). | 1037 | * When prev is DL, we may throttle it in put_prev_task(). |
1028 | * So, we update time before we check for dl_nr_running. | 1038 | * So, we update time before we check for dl_nr_running. |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7e9bd0b1fa9e..0fdb96de81a5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -1497,7 +1497,7 @@ static void task_numa_placement(struct task_struct *p) | |||
1497 | /* If the task is part of a group prevent parallel updates to group stats */ | 1497 | /* If the task is part of a group prevent parallel updates to group stats */ |
1498 | if (p->numa_group) { | 1498 | if (p->numa_group) { |
1499 | group_lock = &p->numa_group->lock; | 1499 | group_lock = &p->numa_group->lock; |
1500 | spin_lock(group_lock); | 1500 | spin_lock_irq(group_lock); |
1501 | } | 1501 | } |
1502 | 1502 | ||
1503 | /* Find the node with the highest number of faults */ | 1503 | /* Find the node with the highest number of faults */ |
@@ -1572,7 +1572,7 @@ static void task_numa_placement(struct task_struct *p) | |||
1572 | } | 1572 | } |
1573 | } | 1573 | } |
1574 | 1574 | ||
1575 | spin_unlock(group_lock); | 1575 | spin_unlock_irq(group_lock); |
1576 | } | 1576 | } |
1577 | 1577 | ||
1578 | /* Preferred node as the node with the most faults */ | 1578 | /* Preferred node as the node with the most faults */ |
@@ -1677,7 +1677,8 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, | |||
1677 | if (!join) | 1677 | if (!join) |
1678 | return; | 1678 | return; |
1679 | 1679 | ||
1680 | double_lock(&my_grp->lock, &grp->lock); | 1680 | BUG_ON(irqs_disabled()); |
1681 | double_lock_irq(&my_grp->lock, &grp->lock); | ||
1681 | 1682 | ||
1682 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) { | 1683 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) { |
1683 | my_grp->faults[i] -= p->numa_faults_memory[i]; | 1684 | my_grp->faults[i] -= p->numa_faults_memory[i]; |
@@ -1691,7 +1692,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, | |||
1691 | grp->nr_tasks++; | 1692 | grp->nr_tasks++; |
1692 | 1693 | ||
1693 | spin_unlock(&my_grp->lock); | 1694 | spin_unlock(&my_grp->lock); |
1694 | spin_unlock(&grp->lock); | 1695 | spin_unlock_irq(&grp->lock); |
1695 | 1696 | ||
1696 | rcu_assign_pointer(p->numa_group, grp); | 1697 | rcu_assign_pointer(p->numa_group, grp); |
1697 | 1698 | ||
@@ -1710,14 +1711,14 @@ void task_numa_free(struct task_struct *p) | |||
1710 | void *numa_faults = p->numa_faults_memory; | 1711 | void *numa_faults = p->numa_faults_memory; |
1711 | 1712 | ||
1712 | if (grp) { | 1713 | if (grp) { |
1713 | spin_lock(&grp->lock); | 1714 | spin_lock_irq(&grp->lock); |
1714 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) | 1715 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) |
1715 | grp->faults[i] -= p->numa_faults_memory[i]; | 1716 | grp->faults[i] -= p->numa_faults_memory[i]; |
1716 | grp->total_faults -= p->total_numa_faults; | 1717 | grp->total_faults -= p->total_numa_faults; |
1717 | 1718 | ||
1718 | list_del(&p->numa_entry); | 1719 | list_del(&p->numa_entry); |
1719 | grp->nr_tasks--; | 1720 | grp->nr_tasks--; |
1720 | spin_unlock(&grp->lock); | 1721 | spin_unlock_irq(&grp->lock); |
1721 | rcu_assign_pointer(p->numa_group, NULL); | 1722 | rcu_assign_pointer(p->numa_group, NULL); |
1722 | put_numa_group(grp); | 1723 | put_numa_group(grp); |
1723 | } | 1724 | } |
@@ -6652,6 +6653,7 @@ static int idle_balance(struct rq *this_rq) | |||
6652 | int this_cpu = this_rq->cpu; | 6653 | int this_cpu = this_rq->cpu; |
6653 | 6654 | ||
6654 | idle_enter_fair(this_rq); | 6655 | idle_enter_fair(this_rq); |
6656 | |||
6655 | /* | 6657 | /* |
6656 | * We must set idle_stamp _before_ calling idle_balance(), such that we | 6658 | * We must set idle_stamp _before_ calling idle_balance(), such that we |
6657 | * measure the duration of idle_balance() as idle time. | 6659 | * measure the duration of idle_balance() as idle time. |
@@ -6704,14 +6706,16 @@ static int idle_balance(struct rq *this_rq) | |||
6704 | 6706 | ||
6705 | raw_spin_lock(&this_rq->lock); | 6707 | raw_spin_lock(&this_rq->lock); |
6706 | 6708 | ||
6709 | if (curr_cost > this_rq->max_idle_balance_cost) | ||
6710 | this_rq->max_idle_balance_cost = curr_cost; | ||
6711 | |||
6707 | /* | 6712 | /* |
6708 | * While browsing the domains, we released the rq lock. | 6713 | * While browsing the domains, we released the rq lock, a task could |
6709 | * A task could have be enqueued in the meantime | 6714 | * have been enqueued in the meantime. Since we're not going idle, |
6715 | * pretend we pulled a task. | ||
6710 | */ | 6716 | */ |
6711 | if (this_rq->cfs.h_nr_running && !pulled_task) { | 6717 | if (this_rq->cfs.h_nr_running && !pulled_task) |
6712 | pulled_task = 1; | 6718 | pulled_task = 1; |
6713 | goto out; | ||
6714 | } | ||
6715 | 6719 | ||
6716 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { | 6720 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { |
6717 | /* | 6721 | /* |
@@ -6721,13 +6725,11 @@ static int idle_balance(struct rq *this_rq) | |||
6721 | this_rq->next_balance = next_balance; | 6725 | this_rq->next_balance = next_balance; |
6722 | } | 6726 | } |
6723 | 6727 | ||
6724 | if (curr_cost > this_rq->max_idle_balance_cost) | ||
6725 | this_rq->max_idle_balance_cost = curr_cost; | ||
6726 | |||
6727 | out: | 6728 | out: |
6728 | /* Is there a task of a high priority class? */ | 6729 | /* Is there a task of a high priority class? */ |
6729 | if (this_rq->nr_running != this_rq->cfs.h_nr_running && | 6730 | if (this_rq->nr_running != this_rq->cfs.h_nr_running && |
6730 | (this_rq->dl.dl_nr_running || | 6731 | ((this_rq->stop && this_rq->stop->on_rq) || |
6732 | this_rq->dl.dl_nr_running || | ||
6731 | (this_rq->rt.rt_nr_running && !rt_rq_throttled(&this_rq->rt)))) | 6733 | (this_rq->rt.rt_nr_running && !rt_rq_throttled(&this_rq->rt)))) |
6732 | pulled_task = -1; | 6734 | pulled_task = -1; |
6733 | 6735 | ||
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index d8cdf1618551..bd2267ad404f 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -1362,10 +1362,11 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev) | |||
1362 | pull_rt_task(rq); | 1362 | pull_rt_task(rq); |
1363 | /* | 1363 | /* |
1364 | * pull_rt_task() can drop (and re-acquire) rq->lock; this | 1364 | * pull_rt_task() can drop (and re-acquire) rq->lock; this |
1365 | * means a dl task can slip in, in which case we need to | 1365 | * means a dl or stop task can slip in, in which case we need |
1366 | * re-start task selection. | 1366 | * to re-start task selection. |
1367 | */ | 1367 | */ |
1368 | if (unlikely(rq->dl.dl_nr_running)) | 1368 | if (unlikely((rq->stop && rq->stop->on_rq) || |
1369 | rq->dl.dl_nr_running)) | ||
1369 | return RETRY_TASK; | 1370 | return RETRY_TASK; |
1370 | } | 1371 | } |
1371 | 1372 | ||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c9007f28d3a2..456e492a3dca 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1385,6 +1385,15 @@ static inline void double_lock(spinlock_t *l1, spinlock_t *l2) | |||
1385 | spin_lock_nested(l2, SINGLE_DEPTH_NESTING); | 1385 | spin_lock_nested(l2, SINGLE_DEPTH_NESTING); |
1386 | } | 1386 | } |
1387 | 1387 | ||
1388 | static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2) | ||
1389 | { | ||
1390 | if (l1 > l2) | ||
1391 | swap(l1, l2); | ||
1392 | |||
1393 | spin_lock_irq(l1); | ||
1394 | spin_lock_nested(l2, SINGLE_DEPTH_NESTING); | ||
1395 | } | ||
1396 | |||
1388 | static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2) | 1397 | static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2) |
1389 | { | 1398 | { |
1390 | if (l1 > l2) | 1399 | if (l1 > l2) |
diff --git a/kernel/seccomp.c b/kernel/seccomp.c index d8d046c0726a..b35c21503a36 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c | |||
@@ -69,18 +69,17 @@ static void populate_seccomp_data(struct seccomp_data *sd) | |||
69 | { | 69 | { |
70 | struct task_struct *task = current; | 70 | struct task_struct *task = current; |
71 | struct pt_regs *regs = task_pt_regs(task); | 71 | struct pt_regs *regs = task_pt_regs(task); |
72 | unsigned long args[6]; | ||
72 | 73 | ||
73 | sd->nr = syscall_get_nr(task, regs); | 74 | sd->nr = syscall_get_nr(task, regs); |
74 | sd->arch = syscall_get_arch(); | 75 | sd->arch = syscall_get_arch(); |
75 | 76 | syscall_get_arguments(task, regs, 0, 6, args); | |
76 | /* Unroll syscall_get_args to help gcc on arm. */ | 77 | sd->args[0] = args[0]; |
77 | syscall_get_arguments(task, regs, 0, 1, (unsigned long *) &sd->args[0]); | 78 | sd->args[1] = args[1]; |
78 | syscall_get_arguments(task, regs, 1, 1, (unsigned long *) &sd->args[1]); | 79 | sd->args[2] = args[2]; |
79 | syscall_get_arguments(task, regs, 2, 1, (unsigned long *) &sd->args[2]); | 80 | sd->args[3] = args[3]; |
80 | syscall_get_arguments(task, regs, 3, 1, (unsigned long *) &sd->args[3]); | 81 | sd->args[4] = args[4]; |
81 | syscall_get_arguments(task, regs, 4, 1, (unsigned long *) &sd->args[4]); | 82 | sd->args[5] = args[5]; |
82 | syscall_get_arguments(task, regs, 5, 1, (unsigned long *) &sd->args[5]); | ||
83 | |||
84 | sd->instruction_pointer = KSTK_EIP(task); | 83 | sd->instruction_pointer = KSTK_EIP(task); |
85 | } | 84 | } |
86 | 85 | ||
@@ -256,6 +255,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) | |||
256 | goto free_prog; | 255 | goto free_prog; |
257 | 256 | ||
258 | /* Allocate a new seccomp_filter */ | 257 | /* Allocate a new seccomp_filter */ |
258 | ret = -ENOMEM; | ||
259 | filter = kzalloc(sizeof(struct seccomp_filter) + | 259 | filter = kzalloc(sizeof(struct seccomp_filter) + |
260 | sizeof(struct sock_filter_int) * new_len, | 260 | sizeof(struct sock_filter_int) * new_len, |
261 | GFP_KERNEL|__GFP_NOWARN); | 261 | GFP_KERNEL|__GFP_NOWARN); |
@@ -265,6 +265,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) | |||
265 | ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len); | 265 | ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len); |
266 | if (ret) | 266 | if (ret) |
267 | goto free_filter; | 267 | goto free_filter; |
268 | kfree(fp); | ||
268 | 269 | ||
269 | atomic_set(&filter->usage, 1); | 270 | atomic_set(&filter->usage, 1); |
270 | filter->len = new_len; | 271 | filter->len = new_len; |
diff --git a/kernel/softirq.c b/kernel/softirq.c index b50990a5bea0..92f24f5e8d52 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -223,7 +223,7 @@ static inline bool lockdep_softirq_start(void) { return false; } | |||
223 | static inline void lockdep_softirq_end(bool in_hardirq) { } | 223 | static inline void lockdep_softirq_end(bool in_hardirq) { } |
224 | #endif | 224 | #endif |
225 | 225 | ||
226 | asmlinkage void __do_softirq(void) | 226 | asmlinkage __visible void __do_softirq(void) |
227 | { | 227 | { |
228 | unsigned long end = jiffies + MAX_SOFTIRQ_TIME; | 228 | unsigned long end = jiffies + MAX_SOFTIRQ_TIME; |
229 | unsigned long old_flags = current->flags; | 229 | unsigned long old_flags = current->flags; |
@@ -299,7 +299,7 @@ restart: | |||
299 | tsk_restore_flags(current, old_flags, PF_MEMALLOC); | 299 | tsk_restore_flags(current, old_flags, PF_MEMALLOC); |
300 | } | 300 | } |
301 | 301 | ||
302 | asmlinkage void do_softirq(void) | 302 | asmlinkage __visible void do_softirq(void) |
303 | { | 303 | { |
304 | __u32 pending; | 304 | __u32 pending; |
305 | unsigned long flags; | 305 | unsigned long flags; |
@@ -779,3 +779,8 @@ int __init __weak arch_early_irq_init(void) | |||
779 | { | 779 | { |
780 | return 0; | 780 | return 0; |
781 | } | 781 | } |
782 | |||
783 | unsigned int __weak arch_dynirq_lower_bound(unsigned int from) | ||
784 | { | ||
785 | return from; | ||
786 | } | ||
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 015661279b68..0a0608edeb26 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c | |||
@@ -276,7 +276,7 @@ static bool tick_check_preferred(struct clock_event_device *curdev, | |||
276 | bool tick_check_replacement(struct clock_event_device *curdev, | 276 | bool tick_check_replacement(struct clock_event_device *curdev, |
277 | struct clock_event_device *newdev) | 277 | struct clock_event_device *newdev) |
278 | { | 278 | { |
279 | if (tick_check_percpu(curdev, newdev, smp_processor_id())) | 279 | if (!tick_check_percpu(curdev, newdev, smp_processor_id())) |
280 | return false; | 280 | return false; |
281 | 281 | ||
282 | return tick_check_preferred(curdev, newdev); | 282 | return tick_check_preferred(curdev, newdev); |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 9f8af69c67ec..6558b7ac112d 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -84,6 +84,9 @@ static void tick_do_update_jiffies64(ktime_t now) | |||
84 | 84 | ||
85 | /* Keep the tick_next_period variable up to date */ | 85 | /* Keep the tick_next_period variable up to date */ |
86 | tick_next_period = ktime_add(last_jiffies_update, tick_period); | 86 | tick_next_period = ktime_add(last_jiffies_update, tick_period); |
87 | } else { | ||
88 | write_sequnlock(&jiffies_lock); | ||
89 | return; | ||
87 | } | 90 | } |
88 | write_sequnlock(&jiffies_lock); | 91 | write_sequnlock(&jiffies_lock); |
89 | update_wall_time(); | 92 | update_wall_time(); |
@@ -967,7 +970,7 @@ static void tick_nohz_switch_to_nohz(void) | |||
967 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 970 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
968 | ktime_t next; | 971 | ktime_t next; |
969 | 972 | ||
970 | if (!tick_nohz_active) | 973 | if (!tick_nohz_enabled) |
971 | return; | 974 | return; |
972 | 975 | ||
973 | local_irq_disable(); | 976 | local_irq_disable(); |
diff --git a/kernel/timer.c b/kernel/timer.c index 87bd529879c2..3bb01a323b2a 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -838,7 +838,7 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires) | |||
838 | 838 | ||
839 | bit = find_last_bit(&mask, BITS_PER_LONG); | 839 | bit = find_last_bit(&mask, BITS_PER_LONG); |
840 | 840 | ||
841 | mask = (1 << bit) - 1; | 841 | mask = (1UL << bit) - 1; |
842 | 842 | ||
843 | expires_limit = expires_limit & ~(mask); | 843 | expires_limit = expires_limit & ~(mask); |
844 | 844 | ||
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1fd4b9479210..4a54a25afa2f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -4330,16 +4330,11 @@ static void ftrace_init_module(struct module *mod, | |||
4330 | ftrace_process_locs(mod, start, end); | 4330 | ftrace_process_locs(mod, start, end); |
4331 | } | 4331 | } |
4332 | 4332 | ||
4333 | static int ftrace_module_notify_enter(struct notifier_block *self, | 4333 | void ftrace_module_init(struct module *mod) |
4334 | unsigned long val, void *data) | ||
4335 | { | 4334 | { |
4336 | struct module *mod = data; | 4335 | ftrace_init_module(mod, mod->ftrace_callsites, |
4337 | 4336 | mod->ftrace_callsites + | |
4338 | if (val == MODULE_STATE_COMING) | 4337 | mod->num_ftrace_callsites); |
4339 | ftrace_init_module(mod, mod->ftrace_callsites, | ||
4340 | mod->ftrace_callsites + | ||
4341 | mod->num_ftrace_callsites); | ||
4342 | return 0; | ||
4343 | } | 4338 | } |
4344 | 4339 | ||
4345 | static int ftrace_module_notify_exit(struct notifier_block *self, | 4340 | static int ftrace_module_notify_exit(struct notifier_block *self, |
@@ -4353,11 +4348,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self, | |||
4353 | return 0; | 4348 | return 0; |
4354 | } | 4349 | } |
4355 | #else | 4350 | #else |
4356 | static int ftrace_module_notify_enter(struct notifier_block *self, | ||
4357 | unsigned long val, void *data) | ||
4358 | { | ||
4359 | return 0; | ||
4360 | } | ||
4361 | static int ftrace_module_notify_exit(struct notifier_block *self, | 4351 | static int ftrace_module_notify_exit(struct notifier_block *self, |
4362 | unsigned long val, void *data) | 4352 | unsigned long val, void *data) |
4363 | { | 4353 | { |
@@ -4365,11 +4355,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self, | |||
4365 | } | 4355 | } |
4366 | #endif /* CONFIG_MODULES */ | 4356 | #endif /* CONFIG_MODULES */ |
4367 | 4357 | ||
4368 | struct notifier_block ftrace_module_enter_nb = { | ||
4369 | .notifier_call = ftrace_module_notify_enter, | ||
4370 | .priority = INT_MAX, /* Run before anything that can use kprobes */ | ||
4371 | }; | ||
4372 | |||
4373 | struct notifier_block ftrace_module_exit_nb = { | 4358 | struct notifier_block ftrace_module_exit_nb = { |
4374 | .notifier_call = ftrace_module_notify_exit, | 4359 | .notifier_call = ftrace_module_notify_exit, |
4375 | .priority = INT_MIN, /* Run after anything that can remove kprobes */ | 4360 | .priority = INT_MIN, /* Run after anything that can remove kprobes */ |
@@ -4403,10 +4388,6 @@ void __init ftrace_init(void) | |||
4403 | __start_mcount_loc, | 4388 | __start_mcount_loc, |
4404 | __stop_mcount_loc); | 4389 | __stop_mcount_loc); |
4405 | 4390 | ||
4406 | ret = register_module_notifier(&ftrace_module_enter_nb); | ||
4407 | if (ret) | ||
4408 | pr_warning("Failed to register trace ftrace module enter notifier\n"); | ||
4409 | |||
4410 | ret = register_module_notifier(&ftrace_module_exit_nb); | 4391 | ret = register_module_notifier(&ftrace_module_exit_nb); |
4411 | if (ret) | 4392 | if (ret) |
4412 | pr_warning("Failed to register trace ftrace module exit notifier\n"); | 4393 | pr_warning("Failed to register trace ftrace module exit notifier\n"); |
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 925f537f07d1..4747b476a030 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c | |||
@@ -77,7 +77,7 @@ event_triggers_call(struct ftrace_event_file *file, void *rec) | |||
77 | data->ops->func(data); | 77 | data->ops->func(data); |
78 | continue; | 78 | continue; |
79 | } | 79 | } |
80 | filter = rcu_dereference(data->filter); | 80 | filter = rcu_dereference_sched(data->filter); |
81 | if (filter && !filter_match_preds(filter, rec)) | 81 | if (filter && !filter_match_preds(filter, rec)) |
82 | continue; | 82 | continue; |
83 | if (data->cmd_ops->post_trigger) { | 83 | if (data->cmd_ops->post_trigger) { |
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 5b781d2be383..ffd56351b521 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c | |||
@@ -58,12 +58,16 @@ int ftrace_create_function_files(struct trace_array *tr, | |||
58 | { | 58 | { |
59 | int ret; | 59 | int ret; |
60 | 60 | ||
61 | /* The top level array uses the "global_ops". */ | 61 | /* |
62 | if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) { | 62 | * The top level array uses the "global_ops", and the files are |
63 | ret = allocate_ftrace_ops(tr); | 63 | * created on boot up. |
64 | if (ret) | 64 | */ |
65 | return ret; | 65 | if (tr->flags & TRACE_ARRAY_FL_GLOBAL) |
66 | } | 66 | return 0; |
67 | |||
68 | ret = allocate_ftrace_ops(tr); | ||
69 | if (ret) | ||
70 | return ret; | ||
67 | 71 | ||
68 | ftrace_create_filter_files(tr->ops, parent); | 72 | ftrace_create_filter_files(tr->ops, parent); |
69 | 73 | ||
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 930e51462dc8..c082a7441345 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c | |||
@@ -732,9 +732,15 @@ static int uprobe_buffer_enable(void) | |||
732 | 732 | ||
733 | static void uprobe_buffer_disable(void) | 733 | static void uprobe_buffer_disable(void) |
734 | { | 734 | { |
735 | int cpu; | ||
736 | |||
735 | BUG_ON(!mutex_is_locked(&event_mutex)); | 737 | BUG_ON(!mutex_is_locked(&event_mutex)); |
736 | 738 | ||
737 | if (--uprobe_buffer_refcnt == 0) { | 739 | if (--uprobe_buffer_refcnt == 0) { |
740 | for_each_possible_cpu(cpu) | ||
741 | free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, | ||
742 | cpu)->buf); | ||
743 | |||
738 | free_percpu(uprobe_cpu_buffer); | 744 | free_percpu(uprobe_cpu_buffer); |
739 | uprobe_cpu_buffer = NULL; | 745 | uprobe_cpu_buffer = NULL; |
740 | } | 746 | } |
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index ac5b23cf7212..6620e5837ce2 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c | |||
@@ -188,7 +188,6 @@ static int tracepoint_add_func(struct tracepoint *tp, | |||
188 | WARN_ON_ONCE(1); | 188 | WARN_ON_ONCE(1); |
189 | return PTR_ERR(old); | 189 | return PTR_ERR(old); |
190 | } | 190 | } |
191 | release_probes(old); | ||
192 | 191 | ||
193 | /* | 192 | /* |
194 | * rcu_assign_pointer has a smp_wmb() which makes sure that the new | 193 | * rcu_assign_pointer has a smp_wmb() which makes sure that the new |
@@ -200,6 +199,7 @@ static int tracepoint_add_func(struct tracepoint *tp, | |||
200 | rcu_assign_pointer(tp->funcs, tp_funcs); | 199 | rcu_assign_pointer(tp->funcs, tp_funcs); |
201 | if (!static_key_enabled(&tp->key)) | 200 | if (!static_key_enabled(&tp->key)) |
202 | static_key_slow_inc(&tp->key); | 201 | static_key_slow_inc(&tp->key); |
202 | release_probes(old); | ||
203 | return 0; | 203 | return 0; |
204 | } | 204 | } |
205 | 205 | ||
@@ -221,7 +221,6 @@ static int tracepoint_remove_func(struct tracepoint *tp, | |||
221 | WARN_ON_ONCE(1); | 221 | WARN_ON_ONCE(1); |
222 | return PTR_ERR(old); | 222 | return PTR_ERR(old); |
223 | } | 223 | } |
224 | release_probes(old); | ||
225 | 224 | ||
226 | if (!tp_funcs) { | 225 | if (!tp_funcs) { |
227 | /* Removed last function */ | 226 | /* Removed last function */ |
@@ -232,6 +231,7 @@ static int tracepoint_remove_func(struct tracepoint *tp, | |||
232 | static_key_slow_dec(&tp->key); | 231 | static_key_slow_dec(&tp->key); |
233 | } | 232 | } |
234 | rcu_assign_pointer(tp->funcs, tp_funcs); | 233 | rcu_assign_pointer(tp->funcs, tp_funcs); |
234 | release_probes(old); | ||
235 | return 0; | 235 | return 0; |
236 | } | 236 | } |
237 | 237 | ||
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 0d8f6023fd8d..bf71b4b2d632 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
@@ -152,7 +152,7 @@ static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) | |||
152 | 152 | ||
153 | /* Find the matching extent */ | 153 | /* Find the matching extent */ |
154 | extents = map->nr_extents; | 154 | extents = map->nr_extents; |
155 | smp_read_barrier_depends(); | 155 | smp_rmb(); |
156 | for (idx = 0; idx < extents; idx++) { | 156 | for (idx = 0; idx < extents; idx++) { |
157 | first = map->extent[idx].first; | 157 | first = map->extent[idx].first; |
158 | last = first + map->extent[idx].count - 1; | 158 | last = first + map->extent[idx].count - 1; |
@@ -176,7 +176,7 @@ static u32 map_id_down(struct uid_gid_map *map, u32 id) | |||
176 | 176 | ||
177 | /* Find the matching extent */ | 177 | /* Find the matching extent */ |
178 | extents = map->nr_extents; | 178 | extents = map->nr_extents; |
179 | smp_read_barrier_depends(); | 179 | smp_rmb(); |
180 | for (idx = 0; idx < extents; idx++) { | 180 | for (idx = 0; idx < extents; idx++) { |
181 | first = map->extent[idx].first; | 181 | first = map->extent[idx].first; |
182 | last = first + map->extent[idx].count - 1; | 182 | last = first + map->extent[idx].count - 1; |
@@ -199,7 +199,7 @@ static u32 map_id_up(struct uid_gid_map *map, u32 id) | |||
199 | 199 | ||
200 | /* Find the matching extent */ | 200 | /* Find the matching extent */ |
201 | extents = map->nr_extents; | 201 | extents = map->nr_extents; |
202 | smp_read_barrier_depends(); | 202 | smp_rmb(); |
203 | for (idx = 0; idx < extents; idx++) { | 203 | for (idx = 0; idx < extents; idx++) { |
204 | first = map->extent[idx].lower_first; | 204 | first = map->extent[idx].lower_first; |
205 | last = first + map->extent[idx].count - 1; | 205 | last = first + map->extent[idx].count - 1; |
@@ -615,9 +615,8 @@ static ssize_t map_write(struct file *file, const char __user *buf, | |||
615 | * were written before the count of the extents. | 615 | * were written before the count of the extents. |
616 | * | 616 | * |
617 | * To achieve this smp_wmb() is used on guarantee the write | 617 | * To achieve this smp_wmb() is used on guarantee the write |
618 | * order and smp_read_barrier_depends() is guaranteed that we | 618 | * order and smp_rmb() is guaranteed that we don't have crazy |
619 | * don't have crazy architectures returning stale data. | 619 | * architectures returning stale data. |
620 | * | ||
621 | */ | 620 | */ |
622 | mutex_lock(&id_map_mutex); | 621 | mutex_lock(&id_map_mutex); |
623 | 622 | ||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index e90089fd78e0..516203e665fc 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -138,7 +138,11 @@ static void __touch_watchdog(void) | |||
138 | 138 | ||
139 | void touch_softlockup_watchdog(void) | 139 | void touch_softlockup_watchdog(void) |
140 | { | 140 | { |
141 | __this_cpu_write(watchdog_touch_ts, 0); | 141 | /* |
142 | * Preemption can be enabled. It doesn't matter which CPU's timestamp | ||
143 | * gets zeroed here, so use the raw_ operation. | ||
144 | */ | ||
145 | raw_cpu_write(watchdog_touch_ts, 0); | ||
142 | } | 146 | } |
143 | EXPORT_SYMBOL(touch_softlockup_watchdog); | 147 | EXPORT_SYMBOL(touch_softlockup_watchdog); |
144 | 148 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0ee63af30bd1..8edc87185427 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -1916,6 +1916,12 @@ static void send_mayday(struct work_struct *work) | |||
1916 | 1916 | ||
1917 | /* mayday mayday mayday */ | 1917 | /* mayday mayday mayday */ |
1918 | if (list_empty(&pwq->mayday_node)) { | 1918 | if (list_empty(&pwq->mayday_node)) { |
1919 | /* | ||
1920 | * If @pwq is for an unbound wq, its base ref may be put at | ||
1921 | * any time due to an attribute change. Pin @pwq until the | ||
1922 | * rescuer is done with it. | ||
1923 | */ | ||
1924 | get_pwq(pwq); | ||
1919 | list_add_tail(&pwq->mayday_node, &wq->maydays); | 1925 | list_add_tail(&pwq->mayday_node, &wq->maydays); |
1920 | wake_up_process(wq->rescuer->task); | 1926 | wake_up_process(wq->rescuer->task); |
1921 | } | 1927 | } |
@@ -2398,6 +2404,7 @@ static int rescuer_thread(void *__rescuer) | |||
2398 | struct worker *rescuer = __rescuer; | 2404 | struct worker *rescuer = __rescuer; |
2399 | struct workqueue_struct *wq = rescuer->rescue_wq; | 2405 | struct workqueue_struct *wq = rescuer->rescue_wq; |
2400 | struct list_head *scheduled = &rescuer->scheduled; | 2406 | struct list_head *scheduled = &rescuer->scheduled; |
2407 | bool should_stop; | ||
2401 | 2408 | ||
2402 | set_user_nice(current, RESCUER_NICE_LEVEL); | 2409 | set_user_nice(current, RESCUER_NICE_LEVEL); |
2403 | 2410 | ||
@@ -2409,11 +2416,15 @@ static int rescuer_thread(void *__rescuer) | |||
2409 | repeat: | 2416 | repeat: |
2410 | set_current_state(TASK_INTERRUPTIBLE); | 2417 | set_current_state(TASK_INTERRUPTIBLE); |
2411 | 2418 | ||
2412 | if (kthread_should_stop()) { | 2419 | /* |
2413 | __set_current_state(TASK_RUNNING); | 2420 | * By the time the rescuer is requested to stop, the workqueue |
2414 | rescuer->task->flags &= ~PF_WQ_WORKER; | 2421 | * shouldn't have any work pending, but @wq->maydays may still have |
2415 | return 0; | 2422 | * pwq(s) queued. This can happen by non-rescuer workers consuming |
2416 | } | 2423 | * all the work items before the rescuer got to them. Go through |
2424 | * @wq->maydays processing before acting on should_stop so that the | ||
2425 | * list is always empty on exit. | ||
2426 | */ | ||
2427 | should_stop = kthread_should_stop(); | ||
2417 | 2428 | ||
2418 | /* see whether any pwq is asking for help */ | 2429 | /* see whether any pwq is asking for help */ |
2419 | spin_lock_irq(&wq_mayday_lock); | 2430 | spin_lock_irq(&wq_mayday_lock); |
@@ -2445,6 +2456,12 @@ repeat: | |||
2445 | process_scheduled_works(rescuer); | 2456 | process_scheduled_works(rescuer); |
2446 | 2457 | ||
2447 | /* | 2458 | /* |
2459 | * Put the reference grabbed by send_mayday(). @pool won't | ||
2460 | * go away while we're holding its lock. | ||
2461 | */ | ||
2462 | put_pwq(pwq); | ||
2463 | |||
2464 | /* | ||
2448 | * Leave this pool. If keep_working() is %true, notify a | 2465 | * Leave this pool. If keep_working() is %true, notify a |
2449 | * regular worker; otherwise, we end up with 0 concurrency | 2466 | * regular worker; otherwise, we end up with 0 concurrency |
2450 | * and stalling the execution. | 2467 | * and stalling the execution. |
@@ -2459,6 +2476,12 @@ repeat: | |||
2459 | 2476 | ||
2460 | spin_unlock_irq(&wq_mayday_lock); | 2477 | spin_unlock_irq(&wq_mayday_lock); |
2461 | 2478 | ||
2479 | if (should_stop) { | ||
2480 | __set_current_state(TASK_RUNNING); | ||
2481 | rescuer->task->flags &= ~PF_WQ_WORKER; | ||
2482 | return 0; | ||
2483 | } | ||
2484 | |||
2462 | /* rescuers should never participate in concurrency management */ | 2485 | /* rescuers should never participate in concurrency management */ |
2463 | WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING)); | 2486 | WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING)); |
2464 | schedule(); | 2487 | schedule(); |
@@ -4100,7 +4123,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, | |||
4100 | if (!pwq) { | 4123 | if (!pwq) { |
4101 | pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n", | 4124 | pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n", |
4102 | wq->name); | 4125 | wq->name); |
4103 | goto out_unlock; | 4126 | mutex_lock(&wq->mutex); |
4127 | goto use_dfl_pwq; | ||
4104 | } | 4128 | } |
4105 | 4129 | ||
4106 | /* | 4130 | /* |