diff options
Diffstat (limited to 'kernel')
31 files changed, 238 insertions, 182 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 7c2893602d06..47845c57eb19 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -643,13 +643,13 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) | |||
643 | if ((task_active_pid_ns(current) != &init_pid_ns)) | 643 | if ((task_active_pid_ns(current) != &init_pid_ns)) |
644 | return -EPERM; | 644 | return -EPERM; |
645 | 645 | ||
646 | if (!capable(CAP_AUDIT_CONTROL)) | 646 | if (!netlink_capable(skb, CAP_AUDIT_CONTROL)) |
647 | err = -EPERM; | 647 | err = -EPERM; |
648 | break; | 648 | break; |
649 | case AUDIT_USER: | 649 | case AUDIT_USER: |
650 | case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: | 650 | case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: |
651 | case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: | 651 | case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: |
652 | if (!capable(CAP_AUDIT_WRITE)) | 652 | if (!netlink_capable(skb, CAP_AUDIT_WRITE)) |
653 | err = -EPERM; | 653 | err = -EPERM; |
654 | break; | 654 | break; |
655 | default: /* bad msg */ | 655 | default: /* bad msg */ |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9fcdaa705b6c..3f1ca934a237 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -348,7 +348,7 @@ struct cgrp_cset_link { | |||
348 | * reference-counted, to improve performance when child cgroups | 348 | * reference-counted, to improve performance when child cgroups |
349 | * haven't been created. | 349 | * haven't been created. |
350 | */ | 350 | */ |
351 | static struct css_set init_css_set = { | 351 | struct css_set init_css_set = { |
352 | .refcount = ATOMIC_INIT(1), | 352 | .refcount = ATOMIC_INIT(1), |
353 | .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), | 353 | .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), |
354 | .tasks = LIST_HEAD_INIT(init_css_set.tasks), | 354 | .tasks = LIST_HEAD_INIT(init_css_set.tasks), |
@@ -1495,7 +1495,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1495 | */ | 1495 | */ |
1496 | if (!use_task_css_set_links) | 1496 | if (!use_task_css_set_links) |
1497 | cgroup_enable_task_cg_lists(); | 1497 | cgroup_enable_task_cg_lists(); |
1498 | retry: | 1498 | |
1499 | mutex_lock(&cgroup_tree_mutex); | 1499 | mutex_lock(&cgroup_tree_mutex); |
1500 | mutex_lock(&cgroup_mutex); | 1500 | mutex_lock(&cgroup_mutex); |
1501 | 1501 | ||
@@ -1503,7 +1503,7 @@ retry: | |||
1503 | ret = parse_cgroupfs_options(data, &opts); | 1503 | ret = parse_cgroupfs_options(data, &opts); |
1504 | if (ret) | 1504 | if (ret) |
1505 | goto out_unlock; | 1505 | goto out_unlock; |
1506 | 1506 | retry: | |
1507 | /* look for a matching existing root */ | 1507 | /* look for a matching existing root */ |
1508 | if (!opts.subsys_mask && !opts.none && !opts.name) { | 1508 | if (!opts.subsys_mask && !opts.none && !opts.name) { |
1509 | cgrp_dfl_root_visible = true; | 1509 | cgrp_dfl_root_visible = true; |
@@ -1562,9 +1562,9 @@ retry: | |||
1562 | if (!atomic_inc_not_zero(&root->cgrp.refcnt)) { | 1562 | if (!atomic_inc_not_zero(&root->cgrp.refcnt)) { |
1563 | mutex_unlock(&cgroup_mutex); | 1563 | mutex_unlock(&cgroup_mutex); |
1564 | mutex_unlock(&cgroup_tree_mutex); | 1564 | mutex_unlock(&cgroup_tree_mutex); |
1565 | kfree(opts.release_agent); | ||
1566 | kfree(opts.name); | ||
1567 | msleep(10); | 1565 | msleep(10); |
1566 | mutex_lock(&cgroup_tree_mutex); | ||
1567 | mutex_lock(&cgroup_mutex); | ||
1568 | goto retry; | 1568 | goto retry; |
1569 | } | 1569 | } |
1570 | 1570 | ||
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 2bc4a2256444..345628c78b5b 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
22 | #include <linux/freezer.h> | 22 | #include <linux/freezer.h> |
23 | #include <linux/seq_file.h> | 23 | #include <linux/seq_file.h> |
24 | #include <linux/mutex.h> | ||
24 | 25 | ||
25 | /* | 26 | /* |
26 | * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is | 27 | * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is |
@@ -42,9 +43,10 @@ enum freezer_state_flags { | |||
42 | struct freezer { | 43 | struct freezer { |
43 | struct cgroup_subsys_state css; | 44 | struct cgroup_subsys_state css; |
44 | unsigned int state; | 45 | unsigned int state; |
45 | spinlock_t lock; | ||
46 | }; | 46 | }; |
47 | 47 | ||
48 | static DEFINE_MUTEX(freezer_mutex); | ||
49 | |||
48 | static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) | 50 | static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) |
49 | { | 51 | { |
50 | return css ? container_of(css, struct freezer, css) : NULL; | 52 | return css ? container_of(css, struct freezer, css) : NULL; |
@@ -93,7 +95,6 @@ freezer_css_alloc(struct cgroup_subsys_state *parent_css) | |||
93 | if (!freezer) | 95 | if (!freezer) |
94 | return ERR_PTR(-ENOMEM); | 96 | return ERR_PTR(-ENOMEM); |
95 | 97 | ||
96 | spin_lock_init(&freezer->lock); | ||
97 | return &freezer->css; | 98 | return &freezer->css; |
98 | } | 99 | } |
99 | 100 | ||
@@ -110,14 +111,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css) | |||
110 | struct freezer *freezer = css_freezer(css); | 111 | struct freezer *freezer = css_freezer(css); |
111 | struct freezer *parent = parent_freezer(freezer); | 112 | struct freezer *parent = parent_freezer(freezer); |
112 | 113 | ||
113 | /* | 114 | mutex_lock(&freezer_mutex); |
114 | * The following double locking and freezing state inheritance | ||
115 | * guarantee that @cgroup can never escape ancestors' freezing | ||
116 | * states. See css_for_each_descendant_pre() for details. | ||
117 | */ | ||
118 | if (parent) | ||
119 | spin_lock_irq(&parent->lock); | ||
120 | spin_lock_nested(&freezer->lock, SINGLE_DEPTH_NESTING); | ||
121 | 115 | ||
122 | freezer->state |= CGROUP_FREEZER_ONLINE; | 116 | freezer->state |= CGROUP_FREEZER_ONLINE; |
123 | 117 | ||
@@ -126,10 +120,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css) | |||
126 | atomic_inc(&system_freezing_cnt); | 120 | atomic_inc(&system_freezing_cnt); |
127 | } | 121 | } |
128 | 122 | ||
129 | spin_unlock(&freezer->lock); | 123 | mutex_unlock(&freezer_mutex); |
130 | if (parent) | ||
131 | spin_unlock_irq(&parent->lock); | ||
132 | |||
133 | return 0; | 124 | return 0; |
134 | } | 125 | } |
135 | 126 | ||
@@ -144,14 +135,14 @@ static void freezer_css_offline(struct cgroup_subsys_state *css) | |||
144 | { | 135 | { |
145 | struct freezer *freezer = css_freezer(css); | 136 | struct freezer *freezer = css_freezer(css); |
146 | 137 | ||
147 | spin_lock_irq(&freezer->lock); | 138 | mutex_lock(&freezer_mutex); |
148 | 139 | ||
149 | if (freezer->state & CGROUP_FREEZING) | 140 | if (freezer->state & CGROUP_FREEZING) |
150 | atomic_dec(&system_freezing_cnt); | 141 | atomic_dec(&system_freezing_cnt); |
151 | 142 | ||
152 | freezer->state = 0; | 143 | freezer->state = 0; |
153 | 144 | ||
154 | spin_unlock_irq(&freezer->lock); | 145 | mutex_unlock(&freezer_mutex); |
155 | } | 146 | } |
156 | 147 | ||
157 | static void freezer_css_free(struct cgroup_subsys_state *css) | 148 | static void freezer_css_free(struct cgroup_subsys_state *css) |
@@ -175,7 +166,7 @@ static void freezer_attach(struct cgroup_subsys_state *new_css, | |||
175 | struct task_struct *task; | 166 | struct task_struct *task; |
176 | bool clear_frozen = false; | 167 | bool clear_frozen = false; |
177 | 168 | ||
178 | spin_lock_irq(&freezer->lock); | 169 | mutex_lock(&freezer_mutex); |
179 | 170 | ||
180 | /* | 171 | /* |
181 | * Make the new tasks conform to the current state of @new_css. | 172 | * Make the new tasks conform to the current state of @new_css. |
@@ -197,21 +188,13 @@ static void freezer_attach(struct cgroup_subsys_state *new_css, | |||
197 | } | 188 | } |
198 | } | 189 | } |
199 | 190 | ||
200 | spin_unlock_irq(&freezer->lock); | 191 | /* propagate FROZEN clearing upwards */ |
201 | |||
202 | /* | ||
203 | * Propagate FROZEN clearing upwards. We may race with | ||
204 | * update_if_frozen(), but as long as both work bottom-up, either | ||
205 | * update_if_frozen() sees child's FROZEN cleared or we clear the | ||
206 | * parent's FROZEN later. No parent w/ !FROZEN children can be | ||
207 | * left FROZEN. | ||
208 | */ | ||
209 | while (clear_frozen && (freezer = parent_freezer(freezer))) { | 192 | while (clear_frozen && (freezer = parent_freezer(freezer))) { |
210 | spin_lock_irq(&freezer->lock); | ||
211 | freezer->state &= ~CGROUP_FROZEN; | 193 | freezer->state &= ~CGROUP_FROZEN; |
212 | clear_frozen = freezer->state & CGROUP_FREEZING; | 194 | clear_frozen = freezer->state & CGROUP_FREEZING; |
213 | spin_unlock_irq(&freezer->lock); | ||
214 | } | 195 | } |
196 | |||
197 | mutex_unlock(&freezer_mutex); | ||
215 | } | 198 | } |
216 | 199 | ||
217 | /** | 200 | /** |
@@ -228,9 +211,6 @@ static void freezer_fork(struct task_struct *task) | |||
228 | { | 211 | { |
229 | struct freezer *freezer; | 212 | struct freezer *freezer; |
230 | 213 | ||
231 | rcu_read_lock(); | ||
232 | freezer = task_freezer(task); | ||
233 | |||
234 | /* | 214 | /* |
235 | * The root cgroup is non-freezable, so we can skip locking the | 215 | * The root cgroup is non-freezable, so we can skip locking the |
236 | * freezer. This is safe regardless of race with task migration. | 216 | * freezer. This is safe regardless of race with task migration. |
@@ -238,24 +218,18 @@ static void freezer_fork(struct task_struct *task) | |||
238 | * to do. If we lost and root is the new cgroup, noop is still the | 218 | * to do. If we lost and root is the new cgroup, noop is still the |
239 | * right thing to do. | 219 | * right thing to do. |
240 | */ | 220 | */ |
241 | if (!parent_freezer(freezer)) | 221 | if (task_css_is_root(task, freezer_cgrp_id)) |
242 | goto out; | 222 | return; |
243 | 223 | ||
244 | /* | 224 | mutex_lock(&freezer_mutex); |
245 | * Grab @freezer->lock and freeze @task after verifying @task still | 225 | rcu_read_lock(); |
246 | * belongs to @freezer and it's freezing. The former is for the | 226 | |
247 | * case where we have raced against task migration and lost and | 227 | freezer = task_freezer(task); |
248 | * @task is already in a different cgroup which may not be frozen. | 228 | if (freezer->state & CGROUP_FREEZING) |
249 | * This isn't strictly necessary as freeze_task() is allowed to be | ||
250 | * called spuriously but let's do it anyway for, if nothing else, | ||
251 | * documentation. | ||
252 | */ | ||
253 | spin_lock_irq(&freezer->lock); | ||
254 | if (freezer == task_freezer(task) && (freezer->state & CGROUP_FREEZING)) | ||
255 | freeze_task(task); | 229 | freeze_task(task); |
256 | spin_unlock_irq(&freezer->lock); | 230 | |
257 | out: | ||
258 | rcu_read_unlock(); | 231 | rcu_read_unlock(); |
232 | mutex_unlock(&freezer_mutex); | ||
259 | } | 233 | } |
260 | 234 | ||
261 | /** | 235 | /** |
@@ -281,22 +255,24 @@ static void update_if_frozen(struct cgroup_subsys_state *css) | |||
281 | struct css_task_iter it; | 255 | struct css_task_iter it; |
282 | struct task_struct *task; | 256 | struct task_struct *task; |
283 | 257 | ||
284 | WARN_ON_ONCE(!rcu_read_lock_held()); | 258 | lockdep_assert_held(&freezer_mutex); |
285 | |||
286 | spin_lock_irq(&freezer->lock); | ||
287 | 259 | ||
288 | if (!(freezer->state & CGROUP_FREEZING) || | 260 | if (!(freezer->state & CGROUP_FREEZING) || |
289 | (freezer->state & CGROUP_FROZEN)) | 261 | (freezer->state & CGROUP_FROZEN)) |
290 | goto out_unlock; | 262 | return; |
291 | 263 | ||
292 | /* are all (live) children frozen? */ | 264 | /* are all (live) children frozen? */ |
265 | rcu_read_lock(); | ||
293 | css_for_each_child(pos, css) { | 266 | css_for_each_child(pos, css) { |
294 | struct freezer *child = css_freezer(pos); | 267 | struct freezer *child = css_freezer(pos); |
295 | 268 | ||
296 | if ((child->state & CGROUP_FREEZER_ONLINE) && | 269 | if ((child->state & CGROUP_FREEZER_ONLINE) && |
297 | !(child->state & CGROUP_FROZEN)) | 270 | !(child->state & CGROUP_FROZEN)) { |
298 | goto out_unlock; | 271 | rcu_read_unlock(); |
272 | return; | ||
273 | } | ||
299 | } | 274 | } |
275 | rcu_read_unlock(); | ||
300 | 276 | ||
301 | /* are all tasks frozen? */ | 277 | /* are all tasks frozen? */ |
302 | css_task_iter_start(css, &it); | 278 | css_task_iter_start(css, &it); |
@@ -317,21 +293,29 @@ static void update_if_frozen(struct cgroup_subsys_state *css) | |||
317 | freezer->state |= CGROUP_FROZEN; | 293 | freezer->state |= CGROUP_FROZEN; |
318 | out_iter_end: | 294 | out_iter_end: |
319 | css_task_iter_end(&it); | 295 | css_task_iter_end(&it); |
320 | out_unlock: | ||
321 | spin_unlock_irq(&freezer->lock); | ||
322 | } | 296 | } |
323 | 297 | ||
324 | static int freezer_read(struct seq_file *m, void *v) | 298 | static int freezer_read(struct seq_file *m, void *v) |
325 | { | 299 | { |
326 | struct cgroup_subsys_state *css = seq_css(m), *pos; | 300 | struct cgroup_subsys_state *css = seq_css(m), *pos; |
327 | 301 | ||
302 | mutex_lock(&freezer_mutex); | ||
328 | rcu_read_lock(); | 303 | rcu_read_lock(); |
329 | 304 | ||
330 | /* update states bottom-up */ | 305 | /* update states bottom-up */ |
331 | css_for_each_descendant_post(pos, css) | 306 | css_for_each_descendant_post(pos, css) { |
307 | if (!css_tryget(pos)) | ||
308 | continue; | ||
309 | rcu_read_unlock(); | ||
310 | |||
332 | update_if_frozen(pos); | 311 | update_if_frozen(pos); |
333 | 312 | ||
313 | rcu_read_lock(); | ||
314 | css_put(pos); | ||
315 | } | ||
316 | |||
334 | rcu_read_unlock(); | 317 | rcu_read_unlock(); |
318 | mutex_unlock(&freezer_mutex); | ||
335 | 319 | ||
336 | seq_puts(m, freezer_state_strs(css_freezer(css)->state)); | 320 | seq_puts(m, freezer_state_strs(css_freezer(css)->state)); |
337 | seq_putc(m, '\n'); | 321 | seq_putc(m, '\n'); |
@@ -373,7 +357,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze, | |||
373 | unsigned int state) | 357 | unsigned int state) |
374 | { | 358 | { |
375 | /* also synchronizes against task migration, see freezer_attach() */ | 359 | /* also synchronizes against task migration, see freezer_attach() */ |
376 | lockdep_assert_held(&freezer->lock); | 360 | lockdep_assert_held(&freezer_mutex); |
377 | 361 | ||
378 | if (!(freezer->state & CGROUP_FREEZER_ONLINE)) | 362 | if (!(freezer->state & CGROUP_FREEZER_ONLINE)) |
379 | return; | 363 | return; |
@@ -414,31 +398,29 @@ static void freezer_change_state(struct freezer *freezer, bool freeze) | |||
414 | * descendant will try to inherit its parent's FREEZING state as | 398 | * descendant will try to inherit its parent's FREEZING state as |
415 | * CGROUP_FREEZING_PARENT. | 399 | * CGROUP_FREEZING_PARENT. |
416 | */ | 400 | */ |
401 | mutex_lock(&freezer_mutex); | ||
417 | rcu_read_lock(); | 402 | rcu_read_lock(); |
418 | css_for_each_descendant_pre(pos, &freezer->css) { | 403 | css_for_each_descendant_pre(pos, &freezer->css) { |
419 | struct freezer *pos_f = css_freezer(pos); | 404 | struct freezer *pos_f = css_freezer(pos); |
420 | struct freezer *parent = parent_freezer(pos_f); | 405 | struct freezer *parent = parent_freezer(pos_f); |
421 | 406 | ||
422 | spin_lock_irq(&pos_f->lock); | 407 | if (!css_tryget(pos)) |
408 | continue; | ||
409 | rcu_read_unlock(); | ||
423 | 410 | ||
424 | if (pos_f == freezer) { | 411 | if (pos_f == freezer) |
425 | freezer_apply_state(pos_f, freeze, | 412 | freezer_apply_state(pos_f, freeze, |
426 | CGROUP_FREEZING_SELF); | 413 | CGROUP_FREEZING_SELF); |
427 | } else { | 414 | else |
428 | /* | ||
429 | * Our update to @parent->state is already visible | ||
430 | * which is all we need. No need to lock @parent. | ||
431 | * For more info on synchronization, see | ||
432 | * freezer_post_create(). | ||
433 | */ | ||
434 | freezer_apply_state(pos_f, | 415 | freezer_apply_state(pos_f, |
435 | parent->state & CGROUP_FREEZING, | 416 | parent->state & CGROUP_FREEZING, |
436 | CGROUP_FREEZING_PARENT); | 417 | CGROUP_FREEZING_PARENT); |
437 | } | ||
438 | 418 | ||
439 | spin_unlock_irq(&pos_f->lock); | 419 | rcu_read_lock(); |
420 | css_put(pos); | ||
440 | } | 421 | } |
441 | rcu_read_unlock(); | 422 | rcu_read_unlock(); |
423 | mutex_unlock(&freezer_mutex); | ||
442 | } | 424 | } |
443 | 425 | ||
444 | static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft, | 426 | static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft, |
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 6cb20d2e7ee0..019d45008448 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c | |||
@@ -120,7 +120,7 @@ void context_tracking_user_enter(void) | |||
120 | * instead of preempt_schedule() to exit user context if needed before | 120 | * instead of preempt_schedule() to exit user context if needed before |
121 | * calling the scheduler. | 121 | * calling the scheduler. |
122 | */ | 122 | */ |
123 | asmlinkage void __sched notrace preempt_schedule_context(void) | 123 | asmlinkage __visible void __sched notrace preempt_schedule_context(void) |
124 | { | 124 | { |
125 | enum ctx_state prev_ctx; | 125 | enum ctx_state prev_ctx; |
126 | 126 | ||
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index d55092ceee29..e0501fe7140d 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -234,6 +234,11 @@ again: | |||
234 | goto again; | 234 | goto again; |
235 | } | 235 | } |
236 | timer->base = new_base; | 236 | timer->base = new_base; |
237 | } else { | ||
238 | if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { | ||
239 | cpu = this_cpu; | ||
240 | goto again; | ||
241 | } | ||
237 | } | 242 | } |
238 | return new_base; | 243 | return new_base; |
239 | } | 244 | } |
@@ -569,6 +574,23 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) | |||
569 | 574 | ||
570 | cpu_base->expires_next.tv64 = expires_next.tv64; | 575 | cpu_base->expires_next.tv64 = expires_next.tv64; |
571 | 576 | ||
577 | /* | ||
578 | * If a hang was detected in the last timer interrupt then we | ||
579 | * leave the hang delay active in the hardware. We want the | ||
580 | * system to make progress. That also prevents the following | ||
581 | * scenario: | ||
582 | * T1 expires 50ms from now | ||
583 | * T2 expires 5s from now | ||
584 | * | ||
585 | * T1 is removed, so this code is called and would reprogram | ||
586 | * the hardware to 5s from now. Any hrtimer_start after that | ||
587 | * will not reprogram the hardware due to hang_detected being | ||
588 | * set. So we'd effectivly block all timers until the T2 event | ||
589 | * fires. | ||
590 | */ | ||
591 | if (cpu_base->hang_detected) | ||
592 | return; | ||
593 | |||
572 | if (cpu_base->expires_next.tv64 != KTIME_MAX) | 594 | if (cpu_base->expires_next.tv64 != KTIME_MAX) |
573 | tick_program_event(cpu_base->expires_next, 1); | 595 | tick_program_event(cpu_base->expires_next, 1); |
574 | } | 596 | } |
@@ -968,11 +990,8 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |||
968 | /* Remove an active timer from the queue: */ | 990 | /* Remove an active timer from the queue: */ |
969 | ret = remove_hrtimer(timer, base); | 991 | ret = remove_hrtimer(timer, base); |
970 | 992 | ||
971 | /* Switch the timer base, if necessary: */ | ||
972 | new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); | ||
973 | |||
974 | if (mode & HRTIMER_MODE_REL) { | 993 | if (mode & HRTIMER_MODE_REL) { |
975 | tim = ktime_add_safe(tim, new_base->get_time()); | 994 | tim = ktime_add_safe(tim, base->get_time()); |
976 | /* | 995 | /* |
977 | * CONFIG_TIME_LOW_RES is a temporary way for architectures | 996 | * CONFIG_TIME_LOW_RES is a temporary way for architectures |
978 | * to signal that they simply return xtime in | 997 | * to signal that they simply return xtime in |
@@ -987,6 +1006,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |||
987 | 1006 | ||
988 | hrtimer_set_expires_range_ns(timer, tim, delta_ns); | 1007 | hrtimer_set_expires_range_ns(timer, tim, delta_ns); |
989 | 1008 | ||
1009 | /* Switch the timer base, if necessary: */ | ||
1010 | new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); | ||
1011 | |||
990 | timer_stats_hrtimer_set_start_info(timer); | 1012 | timer_stats_hrtimer_set_start_info(timer); |
991 | 1013 | ||
992 | leftmost = enqueue_hrtimer(timer, new_base); | 1014 | leftmost = enqueue_hrtimer(timer, new_base); |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index a7174617616b..bb07f2928f4b 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
@@ -363,6 +363,13 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, | |||
363 | if (from > irq) | 363 | if (from > irq) |
364 | return -EINVAL; | 364 | return -EINVAL; |
365 | from = irq; | 365 | from = irq; |
366 | } else { | ||
367 | /* | ||
368 | * For interrupts which are freely allocated the | ||
369 | * architecture can force a lower bound to the @from | ||
370 | * argument. x86 uses this to exclude the GSI space. | ||
371 | */ | ||
372 | from = arch_dynirq_lower_bound(from); | ||
366 | } | 373 | } |
367 | 374 | ||
368 | mutex_lock(&sparse_irq_lock); | 375 | mutex_lock(&sparse_irq_lock); |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 2486a4c1a710..d34131ca372b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -180,7 +180,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, | |||
180 | struct irq_chip *chip = irq_data_get_irq_chip(data); | 180 | struct irq_chip *chip = irq_data_get_irq_chip(data); |
181 | int ret; | 181 | int ret; |
182 | 182 | ||
183 | ret = chip->irq_set_affinity(data, mask, false); | 183 | ret = chip->irq_set_affinity(data, mask, force); |
184 | switch (ret) { | 184 | switch (ret) { |
185 | case IRQ_SET_MASK_OK: | 185 | case IRQ_SET_MASK_OK: |
186 | cpumask_copy(data->affinity, mask); | 186 | cpumask_copy(data->affinity, mask); |
@@ -192,7 +192,8 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, | |||
192 | return ret; | 192 | return ret; |
193 | } | 193 | } |
194 | 194 | ||
195 | int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) | 195 | int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, |
196 | bool force) | ||
196 | { | 197 | { |
197 | struct irq_chip *chip = irq_data_get_irq_chip(data); | 198 | struct irq_chip *chip = irq_data_get_irq_chip(data); |
198 | struct irq_desc *desc = irq_data_to_desc(data); | 199 | struct irq_desc *desc = irq_data_to_desc(data); |
@@ -202,7 +203,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) | |||
202 | return -EINVAL; | 203 | return -EINVAL; |
203 | 204 | ||
204 | if (irq_can_move_pcntxt(data)) { | 205 | if (irq_can_move_pcntxt(data)) { |
205 | ret = irq_do_set_affinity(data, mask, false); | 206 | ret = irq_do_set_affinity(data, mask, force); |
206 | } else { | 207 | } else { |
207 | irqd_set_move_pending(data); | 208 | irqd_set_move_pending(data); |
208 | irq_copy_pending(desc, mask); | 209 | irq_copy_pending(desc, mask); |
@@ -217,13 +218,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) | |||
217 | return ret; | 218 | return ret; |
218 | } | 219 | } |
219 | 220 | ||
220 | /** | 221 | int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force) |
221 | * irq_set_affinity - Set the irq affinity of a given irq | ||
222 | * @irq: Interrupt to set affinity | ||
223 | * @mask: cpumask | ||
224 | * | ||
225 | */ | ||
226 | int irq_set_affinity(unsigned int irq, const struct cpumask *mask) | ||
227 | { | 222 | { |
228 | struct irq_desc *desc = irq_to_desc(irq); | 223 | struct irq_desc *desc = irq_to_desc(irq); |
229 | unsigned long flags; | 224 | unsigned long flags; |
@@ -233,7 +228,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
233 | return -EINVAL; | 228 | return -EINVAL; |
234 | 229 | ||
235 | raw_spin_lock_irqsave(&desc->lock, flags); | 230 | raw_spin_lock_irqsave(&desc->lock, flags); |
236 | ret = __irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask); | 231 | ret = irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force); |
237 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 232 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
238 | return ret; | 233 | return ret; |
239 | } | 234 | } |
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index b0e9467922e1..d24e4339b46d 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
@@ -4188,7 +4188,7 @@ void debug_show_held_locks(struct task_struct *task) | |||
4188 | } | 4188 | } |
4189 | EXPORT_SYMBOL_GPL(debug_show_held_locks); | 4189 | EXPORT_SYMBOL_GPL(debug_show_held_locks); |
4190 | 4190 | ||
4191 | asmlinkage void lockdep_sys_exit(void) | 4191 | asmlinkage __visible void lockdep_sys_exit(void) |
4192 | { | 4192 | { |
4193 | struct task_struct *curr = current; | 4193 | struct task_struct *curr = current; |
4194 | 4194 | ||
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index e1191c996c59..5cf6731b98e9 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c | |||
@@ -71,18 +71,17 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, | |||
71 | 71 | ||
72 | void debug_mutex_unlock(struct mutex *lock) | 72 | void debug_mutex_unlock(struct mutex *lock) |
73 | { | 73 | { |
74 | if (unlikely(!debug_locks)) | 74 | if (likely(debug_locks)) { |
75 | return; | 75 | DEBUG_LOCKS_WARN_ON(lock->magic != lock); |
76 | 76 | ||
77 | DEBUG_LOCKS_WARN_ON(lock->magic != lock); | 77 | if (!lock->owner) |
78 | DEBUG_LOCKS_WARN_ON(!lock->owner); | ||
79 | else | ||
80 | DEBUG_LOCKS_WARN_ON(lock->owner != current); | ||
78 | 81 | ||
79 | if (!lock->owner) | 82 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); |
80 | DEBUG_LOCKS_WARN_ON(!lock->owner); | 83 | mutex_clear_owner(lock); |
81 | else | 84 | } |
82 | DEBUG_LOCKS_WARN_ON(lock->owner != current); | ||
83 | |||
84 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); | ||
85 | mutex_clear_owner(lock); | ||
86 | 85 | ||
87 | /* | 86 | /* |
88 | * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug | 87 | * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug |
diff --git a/kernel/module.c b/kernel/module.c index 11869408f79b..079c4615607d 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -815,9 +815,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, | |||
815 | return -EFAULT; | 815 | return -EFAULT; |
816 | name[MODULE_NAME_LEN-1] = '\0'; | 816 | name[MODULE_NAME_LEN-1] = '\0'; |
817 | 817 | ||
818 | if (!(flags & O_NONBLOCK)) | ||
819 | pr_warn("waiting module removal not supported: please upgrade\n"); | ||
820 | |||
821 | if (mutex_lock_interruptible(&module_mutex) != 0) | 818 | if (mutex_lock_interruptible(&module_mutex) != 0) |
822 | return -EINTR; | 819 | return -EINTR; |
823 | 820 | ||
@@ -3271,6 +3268,9 @@ static int load_module(struct load_info *info, const char __user *uargs, | |||
3271 | 3268 | ||
3272 | dynamic_debug_setup(info->debug, info->num_debug); | 3269 | dynamic_debug_setup(info->debug, info->num_debug); |
3273 | 3270 | ||
3271 | /* Ftrace init must be called in the MODULE_STATE_UNFORMED state */ | ||
3272 | ftrace_module_init(mod); | ||
3273 | |||
3274 | /* Finally it's fully formed, ready to start executing. */ | 3274 | /* Finally it's fully formed, ready to start executing. */ |
3275 | err = complete_formation(mod, info); | 3275 | err = complete_formation(mod, info); |
3276 | if (err) | 3276 | if (err) |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 18fb7a2fb14b..1ea328aafdc9 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -1586,7 +1586,7 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, | |||
1586 | return -ENOMEM; | 1586 | return -ENOMEM; |
1587 | } | 1587 | } |
1588 | 1588 | ||
1589 | asmlinkage int swsusp_save(void) | 1589 | asmlinkage __visible int swsusp_save(void) |
1590 | { | 1590 | { |
1591 | unsigned int nr_pages, nr_highmem; | 1591 | unsigned int nr_pages, nr_highmem; |
1592 | 1592 | ||
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index c3ad9cafe930..8233cd4047d7 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <linux/console.h> | 15 | #include <linux/console.h> |
16 | #include <linux/cpu.h> | 16 | #include <linux/cpu.h> |
17 | #include <linux/cpuidle.h> | ||
17 | #include <linux/syscalls.h> | 18 | #include <linux/syscalls.h> |
18 | #include <linux/gfp.h> | 19 | #include <linux/gfp.h> |
19 | #include <linux/io.h> | 20 | #include <linux/io.h> |
@@ -53,7 +54,9 @@ static void freeze_begin(void) | |||
53 | 54 | ||
54 | static void freeze_enter(void) | 55 | static void freeze_enter(void) |
55 | { | 56 | { |
57 | cpuidle_resume(); | ||
56 | wait_event(suspend_freeze_wait_head, suspend_freeze_wake); | 58 | wait_event(suspend_freeze_wait_head, suspend_freeze_wake); |
59 | cpuidle_pause(); | ||
57 | } | 60 | } |
58 | 61 | ||
59 | void freeze_wake(void) | 62 | void freeze_wake(void) |
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index a45b50962295..7228258b85ec 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c | |||
@@ -1674,7 +1674,7 @@ EXPORT_SYMBOL(printk_emit); | |||
1674 | * | 1674 | * |
1675 | * See the vsnprintf() documentation for format string extensions over C99. | 1675 | * See the vsnprintf() documentation for format string extensions over C99. |
1676 | */ | 1676 | */ |
1677 | asmlinkage int printk(const char *fmt, ...) | 1677 | asmlinkage __visible int printk(const char *fmt, ...) |
1678 | { | 1678 | { |
1679 | va_list args; | 1679 | va_list args; |
1680 | int r; | 1680 | int r; |
@@ -1737,7 +1737,7 @@ void early_vprintk(const char *fmt, va_list ap) | |||
1737 | } | 1737 | } |
1738 | } | 1738 | } |
1739 | 1739 | ||
1740 | asmlinkage void early_printk(const char *fmt, ...) | 1740 | asmlinkage __visible void early_printk(const char *fmt, ...) |
1741 | { | 1741 | { |
1742 | va_list ap; | 1742 | va_list ap; |
1743 | 1743 | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 268a45ea238c..d9d8ece46a15 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2192,7 +2192,7 @@ static inline void post_schedule(struct rq *rq) | |||
2192 | * schedule_tail - first thing a freshly forked thread must call. | 2192 | * schedule_tail - first thing a freshly forked thread must call. |
2193 | * @prev: the thread we just switched away from. | 2193 | * @prev: the thread we just switched away from. |
2194 | */ | 2194 | */ |
2195 | asmlinkage void schedule_tail(struct task_struct *prev) | 2195 | asmlinkage __visible void schedule_tail(struct task_struct *prev) |
2196 | __releases(rq->lock) | 2196 | __releases(rq->lock) |
2197 | { | 2197 | { |
2198 | struct rq *rq = this_rq(); | 2198 | struct rq *rq = this_rq(); |
@@ -2741,7 +2741,7 @@ static inline void sched_submit_work(struct task_struct *tsk) | |||
2741 | blk_schedule_flush_plug(tsk); | 2741 | blk_schedule_flush_plug(tsk); |
2742 | } | 2742 | } |
2743 | 2743 | ||
2744 | asmlinkage void __sched schedule(void) | 2744 | asmlinkage __visible void __sched schedule(void) |
2745 | { | 2745 | { |
2746 | struct task_struct *tsk = current; | 2746 | struct task_struct *tsk = current; |
2747 | 2747 | ||
@@ -2751,7 +2751,7 @@ asmlinkage void __sched schedule(void) | |||
2751 | EXPORT_SYMBOL(schedule); | 2751 | EXPORT_SYMBOL(schedule); |
2752 | 2752 | ||
2753 | #ifdef CONFIG_CONTEXT_TRACKING | 2753 | #ifdef CONFIG_CONTEXT_TRACKING |
2754 | asmlinkage void __sched schedule_user(void) | 2754 | asmlinkage __visible void __sched schedule_user(void) |
2755 | { | 2755 | { |
2756 | /* | 2756 | /* |
2757 | * If we come here after a random call to set_need_resched(), | 2757 | * If we come here after a random call to set_need_resched(), |
@@ -2783,7 +2783,7 @@ void __sched schedule_preempt_disabled(void) | |||
2783 | * off of preempt_enable. Kernel preemptions off return from interrupt | 2783 | * off of preempt_enable. Kernel preemptions off return from interrupt |
2784 | * occur there and call schedule directly. | 2784 | * occur there and call schedule directly. |
2785 | */ | 2785 | */ |
2786 | asmlinkage void __sched notrace preempt_schedule(void) | 2786 | asmlinkage __visible void __sched notrace preempt_schedule(void) |
2787 | { | 2787 | { |
2788 | /* | 2788 | /* |
2789 | * If there is a non-zero preempt_count or interrupts are disabled, | 2789 | * If there is a non-zero preempt_count or interrupts are disabled, |
@@ -2813,7 +2813,7 @@ EXPORT_SYMBOL(preempt_schedule); | |||
2813 | * Note, that this is called and return with irqs disabled. This will | 2813 | * Note, that this is called and return with irqs disabled. This will |
2814 | * protect us against recursive calling from irq. | 2814 | * protect us against recursive calling from irq. |
2815 | */ | 2815 | */ |
2816 | asmlinkage void __sched preempt_schedule_irq(void) | 2816 | asmlinkage __visible void __sched preempt_schedule_irq(void) |
2817 | { | 2817 | { |
2818 | enum ctx_state prev_state; | 2818 | enum ctx_state prev_state; |
2819 | 2819 | ||
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 27ef40925525..b08095786cb8 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
@@ -1021,8 +1021,17 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev) | |||
1021 | 1021 | ||
1022 | dl_rq = &rq->dl; | 1022 | dl_rq = &rq->dl; |
1023 | 1023 | ||
1024 | if (need_pull_dl_task(rq, prev)) | 1024 | if (need_pull_dl_task(rq, prev)) { |
1025 | pull_dl_task(rq); | 1025 | pull_dl_task(rq); |
1026 | /* | ||
1027 | * pull_rt_task() can drop (and re-acquire) rq->lock; this | ||
1028 | * means a stop task can slip in, in which case we need to | ||
1029 | * re-start task selection. | ||
1030 | */ | ||
1031 | if (rq->stop && rq->stop->on_rq) | ||
1032 | return RETRY_TASK; | ||
1033 | } | ||
1034 | |||
1026 | /* | 1035 | /* |
1027 | * When prev is DL, we may throttle it in put_prev_task(). | 1036 | * When prev is DL, we may throttle it in put_prev_task(). |
1028 | * So, we update time before we check for dl_nr_running. | 1037 | * So, we update time before we check for dl_nr_running. |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7e9bd0b1fa9e..7570dd969c28 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -1497,7 +1497,7 @@ static void task_numa_placement(struct task_struct *p) | |||
1497 | /* If the task is part of a group prevent parallel updates to group stats */ | 1497 | /* If the task is part of a group prevent parallel updates to group stats */ |
1498 | if (p->numa_group) { | 1498 | if (p->numa_group) { |
1499 | group_lock = &p->numa_group->lock; | 1499 | group_lock = &p->numa_group->lock; |
1500 | spin_lock(group_lock); | 1500 | spin_lock_irq(group_lock); |
1501 | } | 1501 | } |
1502 | 1502 | ||
1503 | /* Find the node with the highest number of faults */ | 1503 | /* Find the node with the highest number of faults */ |
@@ -1572,7 +1572,7 @@ static void task_numa_placement(struct task_struct *p) | |||
1572 | } | 1572 | } |
1573 | } | 1573 | } |
1574 | 1574 | ||
1575 | spin_unlock(group_lock); | 1575 | spin_unlock_irq(group_lock); |
1576 | } | 1576 | } |
1577 | 1577 | ||
1578 | /* Preferred node as the node with the most faults */ | 1578 | /* Preferred node as the node with the most faults */ |
@@ -1677,7 +1677,8 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, | |||
1677 | if (!join) | 1677 | if (!join) |
1678 | return; | 1678 | return; |
1679 | 1679 | ||
1680 | double_lock(&my_grp->lock, &grp->lock); | 1680 | BUG_ON(irqs_disabled()); |
1681 | double_lock_irq(&my_grp->lock, &grp->lock); | ||
1681 | 1682 | ||
1682 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) { | 1683 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) { |
1683 | my_grp->faults[i] -= p->numa_faults_memory[i]; | 1684 | my_grp->faults[i] -= p->numa_faults_memory[i]; |
@@ -1691,7 +1692,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, | |||
1691 | grp->nr_tasks++; | 1692 | grp->nr_tasks++; |
1692 | 1693 | ||
1693 | spin_unlock(&my_grp->lock); | 1694 | spin_unlock(&my_grp->lock); |
1694 | spin_unlock(&grp->lock); | 1695 | spin_unlock_irq(&grp->lock); |
1695 | 1696 | ||
1696 | rcu_assign_pointer(p->numa_group, grp); | 1697 | rcu_assign_pointer(p->numa_group, grp); |
1697 | 1698 | ||
@@ -1710,14 +1711,14 @@ void task_numa_free(struct task_struct *p) | |||
1710 | void *numa_faults = p->numa_faults_memory; | 1711 | void *numa_faults = p->numa_faults_memory; |
1711 | 1712 | ||
1712 | if (grp) { | 1713 | if (grp) { |
1713 | spin_lock(&grp->lock); | 1714 | spin_lock_irq(&grp->lock); |
1714 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) | 1715 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) |
1715 | grp->faults[i] -= p->numa_faults_memory[i]; | 1716 | grp->faults[i] -= p->numa_faults_memory[i]; |
1716 | grp->total_faults -= p->total_numa_faults; | 1717 | grp->total_faults -= p->total_numa_faults; |
1717 | 1718 | ||
1718 | list_del(&p->numa_entry); | 1719 | list_del(&p->numa_entry); |
1719 | grp->nr_tasks--; | 1720 | grp->nr_tasks--; |
1720 | spin_unlock(&grp->lock); | 1721 | spin_unlock_irq(&grp->lock); |
1721 | rcu_assign_pointer(p->numa_group, NULL); | 1722 | rcu_assign_pointer(p->numa_group, NULL); |
1722 | put_numa_group(grp); | 1723 | put_numa_group(grp); |
1723 | } | 1724 | } |
@@ -6727,7 +6728,8 @@ static int idle_balance(struct rq *this_rq) | |||
6727 | out: | 6728 | out: |
6728 | /* Is there a task of a high priority class? */ | 6729 | /* Is there a task of a high priority class? */ |
6729 | if (this_rq->nr_running != this_rq->cfs.h_nr_running && | 6730 | if (this_rq->nr_running != this_rq->cfs.h_nr_running && |
6730 | (this_rq->dl.dl_nr_running || | 6731 | ((this_rq->stop && this_rq->stop->on_rq) || |
6732 | this_rq->dl.dl_nr_running || | ||
6731 | (this_rq->rt.rt_nr_running && !rt_rq_throttled(&this_rq->rt)))) | 6733 | (this_rq->rt.rt_nr_running && !rt_rq_throttled(&this_rq->rt)))) |
6732 | pulled_task = -1; | 6734 | pulled_task = -1; |
6733 | 6735 | ||
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index d8cdf1618551..bd2267ad404f 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -1362,10 +1362,11 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev) | |||
1362 | pull_rt_task(rq); | 1362 | pull_rt_task(rq); |
1363 | /* | 1363 | /* |
1364 | * pull_rt_task() can drop (and re-acquire) rq->lock; this | 1364 | * pull_rt_task() can drop (and re-acquire) rq->lock; this |
1365 | * means a dl task can slip in, in which case we need to | 1365 | * means a dl or stop task can slip in, in which case we need |
1366 | * re-start task selection. | 1366 | * to re-start task selection. |
1367 | */ | 1367 | */ |
1368 | if (unlikely(rq->dl.dl_nr_running)) | 1368 | if (unlikely((rq->stop && rq->stop->on_rq) || |
1369 | rq->dl.dl_nr_running)) | ||
1369 | return RETRY_TASK; | 1370 | return RETRY_TASK; |
1370 | } | 1371 | } |
1371 | 1372 | ||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c9007f28d3a2..456e492a3dca 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1385,6 +1385,15 @@ static inline void double_lock(spinlock_t *l1, spinlock_t *l2) | |||
1385 | spin_lock_nested(l2, SINGLE_DEPTH_NESTING); | 1385 | spin_lock_nested(l2, SINGLE_DEPTH_NESTING); |
1386 | } | 1386 | } |
1387 | 1387 | ||
1388 | static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2) | ||
1389 | { | ||
1390 | if (l1 > l2) | ||
1391 | swap(l1, l2); | ||
1392 | |||
1393 | spin_lock_irq(l1); | ||
1394 | spin_lock_nested(l2, SINGLE_DEPTH_NESTING); | ||
1395 | } | ||
1396 | |||
1388 | static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2) | 1397 | static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2) |
1389 | { | 1398 | { |
1390 | if (l1 > l2) | 1399 | if (l1 > l2) |
diff --git a/kernel/seccomp.c b/kernel/seccomp.c index d8d046c0726a..b35c21503a36 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c | |||
@@ -69,18 +69,17 @@ static void populate_seccomp_data(struct seccomp_data *sd) | |||
69 | { | 69 | { |
70 | struct task_struct *task = current; | 70 | struct task_struct *task = current; |
71 | struct pt_regs *regs = task_pt_regs(task); | 71 | struct pt_regs *regs = task_pt_regs(task); |
72 | unsigned long args[6]; | ||
72 | 73 | ||
73 | sd->nr = syscall_get_nr(task, regs); | 74 | sd->nr = syscall_get_nr(task, regs); |
74 | sd->arch = syscall_get_arch(); | 75 | sd->arch = syscall_get_arch(); |
75 | 76 | syscall_get_arguments(task, regs, 0, 6, args); | |
76 | /* Unroll syscall_get_args to help gcc on arm. */ | 77 | sd->args[0] = args[0]; |
77 | syscall_get_arguments(task, regs, 0, 1, (unsigned long *) &sd->args[0]); | 78 | sd->args[1] = args[1]; |
78 | syscall_get_arguments(task, regs, 1, 1, (unsigned long *) &sd->args[1]); | 79 | sd->args[2] = args[2]; |
79 | syscall_get_arguments(task, regs, 2, 1, (unsigned long *) &sd->args[2]); | 80 | sd->args[3] = args[3]; |
80 | syscall_get_arguments(task, regs, 3, 1, (unsigned long *) &sd->args[3]); | 81 | sd->args[4] = args[4]; |
81 | syscall_get_arguments(task, regs, 4, 1, (unsigned long *) &sd->args[4]); | 82 | sd->args[5] = args[5]; |
82 | syscall_get_arguments(task, regs, 5, 1, (unsigned long *) &sd->args[5]); | ||
83 | |||
84 | sd->instruction_pointer = KSTK_EIP(task); | 83 | sd->instruction_pointer = KSTK_EIP(task); |
85 | } | 84 | } |
86 | 85 | ||
@@ -256,6 +255,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) | |||
256 | goto free_prog; | 255 | goto free_prog; |
257 | 256 | ||
258 | /* Allocate a new seccomp_filter */ | 257 | /* Allocate a new seccomp_filter */ |
258 | ret = -ENOMEM; | ||
259 | filter = kzalloc(sizeof(struct seccomp_filter) + | 259 | filter = kzalloc(sizeof(struct seccomp_filter) + |
260 | sizeof(struct sock_filter_int) * new_len, | 260 | sizeof(struct sock_filter_int) * new_len, |
261 | GFP_KERNEL|__GFP_NOWARN); | 261 | GFP_KERNEL|__GFP_NOWARN); |
@@ -265,6 +265,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) | |||
265 | ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len); | 265 | ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len); |
266 | if (ret) | 266 | if (ret) |
267 | goto free_filter; | 267 | goto free_filter; |
268 | kfree(fp); | ||
268 | 269 | ||
269 | atomic_set(&filter->usage, 1); | 270 | atomic_set(&filter->usage, 1); |
270 | filter->len = new_len; | 271 | filter->len = new_len; |
diff --git a/kernel/softirq.c b/kernel/softirq.c index b50990a5bea0..92f24f5e8d52 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -223,7 +223,7 @@ static inline bool lockdep_softirq_start(void) { return false; } | |||
223 | static inline void lockdep_softirq_end(bool in_hardirq) { } | 223 | static inline void lockdep_softirq_end(bool in_hardirq) { } |
224 | #endif | 224 | #endif |
225 | 225 | ||
226 | asmlinkage void __do_softirq(void) | 226 | asmlinkage __visible void __do_softirq(void) |
227 | { | 227 | { |
228 | unsigned long end = jiffies + MAX_SOFTIRQ_TIME; | 228 | unsigned long end = jiffies + MAX_SOFTIRQ_TIME; |
229 | unsigned long old_flags = current->flags; | 229 | unsigned long old_flags = current->flags; |
@@ -299,7 +299,7 @@ restart: | |||
299 | tsk_restore_flags(current, old_flags, PF_MEMALLOC); | 299 | tsk_restore_flags(current, old_flags, PF_MEMALLOC); |
300 | } | 300 | } |
301 | 301 | ||
302 | asmlinkage void do_softirq(void) | 302 | asmlinkage __visible void do_softirq(void) |
303 | { | 303 | { |
304 | __u32 pending; | 304 | __u32 pending; |
305 | unsigned long flags; | 305 | unsigned long flags; |
@@ -779,3 +779,8 @@ int __init __weak arch_early_irq_init(void) | |||
779 | { | 779 | { |
780 | return 0; | 780 | return 0; |
781 | } | 781 | } |
782 | |||
783 | unsigned int __weak arch_dynirq_lower_bound(unsigned int from) | ||
784 | { | ||
785 | return from; | ||
786 | } | ||
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 015661279b68..0a0608edeb26 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c | |||
@@ -276,7 +276,7 @@ static bool tick_check_preferred(struct clock_event_device *curdev, | |||
276 | bool tick_check_replacement(struct clock_event_device *curdev, | 276 | bool tick_check_replacement(struct clock_event_device *curdev, |
277 | struct clock_event_device *newdev) | 277 | struct clock_event_device *newdev) |
278 | { | 278 | { |
279 | if (tick_check_percpu(curdev, newdev, smp_processor_id())) | 279 | if (!tick_check_percpu(curdev, newdev, smp_processor_id())) |
280 | return false; | 280 | return false; |
281 | 281 | ||
282 | return tick_check_preferred(curdev, newdev); | 282 | return tick_check_preferred(curdev, newdev); |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 9f8af69c67ec..6558b7ac112d 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -84,6 +84,9 @@ static void tick_do_update_jiffies64(ktime_t now) | |||
84 | 84 | ||
85 | /* Keep the tick_next_period variable up to date */ | 85 | /* Keep the tick_next_period variable up to date */ |
86 | tick_next_period = ktime_add(last_jiffies_update, tick_period); | 86 | tick_next_period = ktime_add(last_jiffies_update, tick_period); |
87 | } else { | ||
88 | write_sequnlock(&jiffies_lock); | ||
89 | return; | ||
87 | } | 90 | } |
88 | write_sequnlock(&jiffies_lock); | 91 | write_sequnlock(&jiffies_lock); |
89 | update_wall_time(); | 92 | update_wall_time(); |
@@ -967,7 +970,7 @@ static void tick_nohz_switch_to_nohz(void) | |||
967 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 970 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
968 | ktime_t next; | 971 | ktime_t next; |
969 | 972 | ||
970 | if (!tick_nohz_active) | 973 | if (!tick_nohz_enabled) |
971 | return; | 974 | return; |
972 | 975 | ||
973 | local_irq_disable(); | 976 | local_irq_disable(); |
diff --git a/kernel/timer.c b/kernel/timer.c index 87bd529879c2..3bb01a323b2a 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -838,7 +838,7 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires) | |||
838 | 838 | ||
839 | bit = find_last_bit(&mask, BITS_PER_LONG); | 839 | bit = find_last_bit(&mask, BITS_PER_LONG); |
840 | 840 | ||
841 | mask = (1 << bit) - 1; | 841 | mask = (1UL << bit) - 1; |
842 | 842 | ||
843 | expires_limit = expires_limit & ~(mask); | 843 | expires_limit = expires_limit & ~(mask); |
844 | 844 | ||
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1fd4b9479210..4a54a25afa2f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -4330,16 +4330,11 @@ static void ftrace_init_module(struct module *mod, | |||
4330 | ftrace_process_locs(mod, start, end); | 4330 | ftrace_process_locs(mod, start, end); |
4331 | } | 4331 | } |
4332 | 4332 | ||
4333 | static int ftrace_module_notify_enter(struct notifier_block *self, | 4333 | void ftrace_module_init(struct module *mod) |
4334 | unsigned long val, void *data) | ||
4335 | { | 4334 | { |
4336 | struct module *mod = data; | 4335 | ftrace_init_module(mod, mod->ftrace_callsites, |
4337 | 4336 | mod->ftrace_callsites + | |
4338 | if (val == MODULE_STATE_COMING) | 4337 | mod->num_ftrace_callsites); |
4339 | ftrace_init_module(mod, mod->ftrace_callsites, | ||
4340 | mod->ftrace_callsites + | ||
4341 | mod->num_ftrace_callsites); | ||
4342 | return 0; | ||
4343 | } | 4338 | } |
4344 | 4339 | ||
4345 | static int ftrace_module_notify_exit(struct notifier_block *self, | 4340 | static int ftrace_module_notify_exit(struct notifier_block *self, |
@@ -4353,11 +4348,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self, | |||
4353 | return 0; | 4348 | return 0; |
4354 | } | 4349 | } |
4355 | #else | 4350 | #else |
4356 | static int ftrace_module_notify_enter(struct notifier_block *self, | ||
4357 | unsigned long val, void *data) | ||
4358 | { | ||
4359 | return 0; | ||
4360 | } | ||
4361 | static int ftrace_module_notify_exit(struct notifier_block *self, | 4351 | static int ftrace_module_notify_exit(struct notifier_block *self, |
4362 | unsigned long val, void *data) | 4352 | unsigned long val, void *data) |
4363 | { | 4353 | { |
@@ -4365,11 +4355,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self, | |||
4365 | } | 4355 | } |
4366 | #endif /* CONFIG_MODULES */ | 4356 | #endif /* CONFIG_MODULES */ |
4367 | 4357 | ||
4368 | struct notifier_block ftrace_module_enter_nb = { | ||
4369 | .notifier_call = ftrace_module_notify_enter, | ||
4370 | .priority = INT_MAX, /* Run before anything that can use kprobes */ | ||
4371 | }; | ||
4372 | |||
4373 | struct notifier_block ftrace_module_exit_nb = { | 4358 | struct notifier_block ftrace_module_exit_nb = { |
4374 | .notifier_call = ftrace_module_notify_exit, | 4359 | .notifier_call = ftrace_module_notify_exit, |
4375 | .priority = INT_MIN, /* Run after anything that can remove kprobes */ | 4360 | .priority = INT_MIN, /* Run after anything that can remove kprobes */ |
@@ -4403,10 +4388,6 @@ void __init ftrace_init(void) | |||
4403 | __start_mcount_loc, | 4388 | __start_mcount_loc, |
4404 | __stop_mcount_loc); | 4389 | __stop_mcount_loc); |
4405 | 4390 | ||
4406 | ret = register_module_notifier(&ftrace_module_enter_nb); | ||
4407 | if (ret) | ||
4408 | pr_warning("Failed to register trace ftrace module enter notifier\n"); | ||
4409 | |||
4410 | ret = register_module_notifier(&ftrace_module_exit_nb); | 4391 | ret = register_module_notifier(&ftrace_module_exit_nb); |
4411 | if (ret) | 4392 | if (ret) |
4412 | pr_warning("Failed to register trace ftrace module exit notifier\n"); | 4393 | pr_warning("Failed to register trace ftrace module exit notifier\n"); |
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 925f537f07d1..4747b476a030 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c | |||
@@ -77,7 +77,7 @@ event_triggers_call(struct ftrace_event_file *file, void *rec) | |||
77 | data->ops->func(data); | 77 | data->ops->func(data); |
78 | continue; | 78 | continue; |
79 | } | 79 | } |
80 | filter = rcu_dereference(data->filter); | 80 | filter = rcu_dereference_sched(data->filter); |
81 | if (filter && !filter_match_preds(filter, rec)) | 81 | if (filter && !filter_match_preds(filter, rec)) |
82 | continue; | 82 | continue; |
83 | if (data->cmd_ops->post_trigger) { | 83 | if (data->cmd_ops->post_trigger) { |
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 5b781d2be383..ffd56351b521 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c | |||
@@ -58,12 +58,16 @@ int ftrace_create_function_files(struct trace_array *tr, | |||
58 | { | 58 | { |
59 | int ret; | 59 | int ret; |
60 | 60 | ||
61 | /* The top level array uses the "global_ops". */ | 61 | /* |
62 | if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) { | 62 | * The top level array uses the "global_ops", and the files are |
63 | ret = allocate_ftrace_ops(tr); | 63 | * created on boot up. |
64 | if (ret) | 64 | */ |
65 | return ret; | 65 | if (tr->flags & TRACE_ARRAY_FL_GLOBAL) |
66 | } | 66 | return 0; |
67 | |||
68 | ret = allocate_ftrace_ops(tr); | ||
69 | if (ret) | ||
70 | return ret; | ||
67 | 71 | ||
68 | ftrace_create_filter_files(tr->ops, parent); | 72 | ftrace_create_filter_files(tr->ops, parent); |
69 | 73 | ||
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 930e51462dc8..c082a7441345 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c | |||
@@ -732,9 +732,15 @@ static int uprobe_buffer_enable(void) | |||
732 | 732 | ||
733 | static void uprobe_buffer_disable(void) | 733 | static void uprobe_buffer_disable(void) |
734 | { | 734 | { |
735 | int cpu; | ||
736 | |||
735 | BUG_ON(!mutex_is_locked(&event_mutex)); | 737 | BUG_ON(!mutex_is_locked(&event_mutex)); |
736 | 738 | ||
737 | if (--uprobe_buffer_refcnt == 0) { | 739 | if (--uprobe_buffer_refcnt == 0) { |
740 | for_each_possible_cpu(cpu) | ||
741 | free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, | ||
742 | cpu)->buf); | ||
743 | |||
738 | free_percpu(uprobe_cpu_buffer); | 744 | free_percpu(uprobe_cpu_buffer); |
739 | uprobe_cpu_buffer = NULL; | 745 | uprobe_cpu_buffer = NULL; |
740 | } | 746 | } |
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index ac5b23cf7212..6620e5837ce2 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c | |||
@@ -188,7 +188,6 @@ static int tracepoint_add_func(struct tracepoint *tp, | |||
188 | WARN_ON_ONCE(1); | 188 | WARN_ON_ONCE(1); |
189 | return PTR_ERR(old); | 189 | return PTR_ERR(old); |
190 | } | 190 | } |
191 | release_probes(old); | ||
192 | 191 | ||
193 | /* | 192 | /* |
194 | * rcu_assign_pointer has a smp_wmb() which makes sure that the new | 193 | * rcu_assign_pointer has a smp_wmb() which makes sure that the new |
@@ -200,6 +199,7 @@ static int tracepoint_add_func(struct tracepoint *tp, | |||
200 | rcu_assign_pointer(tp->funcs, tp_funcs); | 199 | rcu_assign_pointer(tp->funcs, tp_funcs); |
201 | if (!static_key_enabled(&tp->key)) | 200 | if (!static_key_enabled(&tp->key)) |
202 | static_key_slow_inc(&tp->key); | 201 | static_key_slow_inc(&tp->key); |
202 | release_probes(old); | ||
203 | return 0; | 203 | return 0; |
204 | } | 204 | } |
205 | 205 | ||
@@ -221,7 +221,6 @@ static int tracepoint_remove_func(struct tracepoint *tp, | |||
221 | WARN_ON_ONCE(1); | 221 | WARN_ON_ONCE(1); |
222 | return PTR_ERR(old); | 222 | return PTR_ERR(old); |
223 | } | 223 | } |
224 | release_probes(old); | ||
225 | 224 | ||
226 | if (!tp_funcs) { | 225 | if (!tp_funcs) { |
227 | /* Removed last function */ | 226 | /* Removed last function */ |
@@ -232,6 +231,7 @@ static int tracepoint_remove_func(struct tracepoint *tp, | |||
232 | static_key_slow_dec(&tp->key); | 231 | static_key_slow_dec(&tp->key); |
233 | } | 232 | } |
234 | rcu_assign_pointer(tp->funcs, tp_funcs); | 233 | rcu_assign_pointer(tp->funcs, tp_funcs); |
234 | release_probes(old); | ||
235 | return 0; | 235 | return 0; |
236 | } | 236 | } |
237 | 237 | ||
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 0d8f6023fd8d..bf71b4b2d632 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
@@ -152,7 +152,7 @@ static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) | |||
152 | 152 | ||
153 | /* Find the matching extent */ | 153 | /* Find the matching extent */ |
154 | extents = map->nr_extents; | 154 | extents = map->nr_extents; |
155 | smp_read_barrier_depends(); | 155 | smp_rmb(); |
156 | for (idx = 0; idx < extents; idx++) { | 156 | for (idx = 0; idx < extents; idx++) { |
157 | first = map->extent[idx].first; | 157 | first = map->extent[idx].first; |
158 | last = first + map->extent[idx].count - 1; | 158 | last = first + map->extent[idx].count - 1; |
@@ -176,7 +176,7 @@ static u32 map_id_down(struct uid_gid_map *map, u32 id) | |||
176 | 176 | ||
177 | /* Find the matching extent */ | 177 | /* Find the matching extent */ |
178 | extents = map->nr_extents; | 178 | extents = map->nr_extents; |
179 | smp_read_barrier_depends(); | 179 | smp_rmb(); |
180 | for (idx = 0; idx < extents; idx++) { | 180 | for (idx = 0; idx < extents; idx++) { |
181 | first = map->extent[idx].first; | 181 | first = map->extent[idx].first; |
182 | last = first + map->extent[idx].count - 1; | 182 | last = first + map->extent[idx].count - 1; |
@@ -199,7 +199,7 @@ static u32 map_id_up(struct uid_gid_map *map, u32 id) | |||
199 | 199 | ||
200 | /* Find the matching extent */ | 200 | /* Find the matching extent */ |
201 | extents = map->nr_extents; | 201 | extents = map->nr_extents; |
202 | smp_read_barrier_depends(); | 202 | smp_rmb(); |
203 | for (idx = 0; idx < extents; idx++) { | 203 | for (idx = 0; idx < extents; idx++) { |
204 | first = map->extent[idx].lower_first; | 204 | first = map->extent[idx].lower_first; |
205 | last = first + map->extent[idx].count - 1; | 205 | last = first + map->extent[idx].count - 1; |
@@ -615,9 +615,8 @@ static ssize_t map_write(struct file *file, const char __user *buf, | |||
615 | * were written before the count of the extents. | 615 | * were written before the count of the extents. |
616 | * | 616 | * |
617 | * To achieve this smp_wmb() is used on guarantee the write | 617 | * To achieve this smp_wmb() is used on guarantee the write |
618 | * order and smp_read_barrier_depends() is guaranteed that we | 618 | * order and smp_rmb() is guaranteed that we don't have crazy |
619 | * don't have crazy architectures returning stale data. | 619 | * architectures returning stale data. |
620 | * | ||
621 | */ | 620 | */ |
622 | mutex_lock(&id_map_mutex); | 621 | mutex_lock(&id_map_mutex); |
623 | 622 | ||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index e90089fd78e0..516203e665fc 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -138,7 +138,11 @@ static void __touch_watchdog(void) | |||
138 | 138 | ||
139 | void touch_softlockup_watchdog(void) | 139 | void touch_softlockup_watchdog(void) |
140 | { | 140 | { |
141 | __this_cpu_write(watchdog_touch_ts, 0); | 141 | /* |
142 | * Preemption can be enabled. It doesn't matter which CPU's timestamp | ||
143 | * gets zeroed here, so use the raw_ operation. | ||
144 | */ | ||
145 | raw_cpu_write(watchdog_touch_ts, 0); | ||
142 | } | 146 | } |
143 | EXPORT_SYMBOL(touch_softlockup_watchdog); | 147 | EXPORT_SYMBOL(touch_softlockup_watchdog); |
144 | 148 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0ee63af30bd1..8edc87185427 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -1916,6 +1916,12 @@ static void send_mayday(struct work_struct *work) | |||
1916 | 1916 | ||
1917 | /* mayday mayday mayday */ | 1917 | /* mayday mayday mayday */ |
1918 | if (list_empty(&pwq->mayday_node)) { | 1918 | if (list_empty(&pwq->mayday_node)) { |
1919 | /* | ||
1920 | * If @pwq is for an unbound wq, its base ref may be put at | ||
1921 | * any time due to an attribute change. Pin @pwq until the | ||
1922 | * rescuer is done with it. | ||
1923 | */ | ||
1924 | get_pwq(pwq); | ||
1919 | list_add_tail(&pwq->mayday_node, &wq->maydays); | 1925 | list_add_tail(&pwq->mayday_node, &wq->maydays); |
1920 | wake_up_process(wq->rescuer->task); | 1926 | wake_up_process(wq->rescuer->task); |
1921 | } | 1927 | } |
@@ -2398,6 +2404,7 @@ static int rescuer_thread(void *__rescuer) | |||
2398 | struct worker *rescuer = __rescuer; | 2404 | struct worker *rescuer = __rescuer; |
2399 | struct workqueue_struct *wq = rescuer->rescue_wq; | 2405 | struct workqueue_struct *wq = rescuer->rescue_wq; |
2400 | struct list_head *scheduled = &rescuer->scheduled; | 2406 | struct list_head *scheduled = &rescuer->scheduled; |
2407 | bool should_stop; | ||
2401 | 2408 | ||
2402 | set_user_nice(current, RESCUER_NICE_LEVEL); | 2409 | set_user_nice(current, RESCUER_NICE_LEVEL); |
2403 | 2410 | ||
@@ -2409,11 +2416,15 @@ static int rescuer_thread(void *__rescuer) | |||
2409 | repeat: | 2416 | repeat: |
2410 | set_current_state(TASK_INTERRUPTIBLE); | 2417 | set_current_state(TASK_INTERRUPTIBLE); |
2411 | 2418 | ||
2412 | if (kthread_should_stop()) { | 2419 | /* |
2413 | __set_current_state(TASK_RUNNING); | 2420 | * By the time the rescuer is requested to stop, the workqueue |
2414 | rescuer->task->flags &= ~PF_WQ_WORKER; | 2421 | * shouldn't have any work pending, but @wq->maydays may still have |
2415 | return 0; | 2422 | * pwq(s) queued. This can happen by non-rescuer workers consuming |
2416 | } | 2423 | * all the work items before the rescuer got to them. Go through |
2424 | * @wq->maydays processing before acting on should_stop so that the | ||
2425 | * list is always empty on exit. | ||
2426 | */ | ||
2427 | should_stop = kthread_should_stop(); | ||
2417 | 2428 | ||
2418 | /* see whether any pwq is asking for help */ | 2429 | /* see whether any pwq is asking for help */ |
2419 | spin_lock_irq(&wq_mayday_lock); | 2430 | spin_lock_irq(&wq_mayday_lock); |
@@ -2445,6 +2456,12 @@ repeat: | |||
2445 | process_scheduled_works(rescuer); | 2456 | process_scheduled_works(rescuer); |
2446 | 2457 | ||
2447 | /* | 2458 | /* |
2459 | * Put the reference grabbed by send_mayday(). @pool won't | ||
2460 | * go away while we're holding its lock. | ||
2461 | */ | ||
2462 | put_pwq(pwq); | ||
2463 | |||
2464 | /* | ||
2448 | * Leave this pool. If keep_working() is %true, notify a | 2465 | * Leave this pool. If keep_working() is %true, notify a |
2449 | * regular worker; otherwise, we end up with 0 concurrency | 2466 | * regular worker; otherwise, we end up with 0 concurrency |
2450 | * and stalling the execution. | 2467 | * and stalling the execution. |
@@ -2459,6 +2476,12 @@ repeat: | |||
2459 | 2476 | ||
2460 | spin_unlock_irq(&wq_mayday_lock); | 2477 | spin_unlock_irq(&wq_mayday_lock); |
2461 | 2478 | ||
2479 | if (should_stop) { | ||
2480 | __set_current_state(TASK_RUNNING); | ||
2481 | rescuer->task->flags &= ~PF_WQ_WORKER; | ||
2482 | return 0; | ||
2483 | } | ||
2484 | |||
2462 | /* rescuers should never participate in concurrency management */ | 2485 | /* rescuers should never participate in concurrency management */ |
2463 | WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING)); | 2486 | WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING)); |
2464 | schedule(); | 2487 | schedule(); |
@@ -4100,7 +4123,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, | |||
4100 | if (!pwq) { | 4123 | if (!pwq) { |
4101 | pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n", | 4124 | pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n", |
4102 | wq->name); | 4125 | wq->name); |
4103 | goto out_unlock; | 4126 | mutex_lock(&wq->mutex); |
4127 | goto use_dfl_pwq; | ||
4104 | } | 4128 | } |
4105 | 4129 | ||
4106 | /* | 4130 | /* |