diff options
| author | Ingo Molnar <mingo@kernel.org> | 2014-06-06 01:55:06 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2014-06-06 01:55:06 -0400 |
| commit | ec00010972a0971b2c1da4fbe4e5c7d8ed1ecb05 (patch) | |
| tree | c28975d7daf6d8a3aa23afe8f42837b71105b269 /kernel | |
| parent | 8c6e549a447c51f4f8c0ba7f1e444469f75a354a (diff) | |
| parent | e041e328c4b41e1db79bfe5ba9992c2ed771ad19 (diff) | |
Merge branch 'perf/urgent' into perf/core, to resolve conflict and to prepare for new patches
Conflicts:
arch/x86/kernel/traps.c
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cgroup.c | 10 | ||||
| -rw-r--r-- | kernel/cgroup_freezer.c | 116 | ||||
| -rw-r--r-- | kernel/context_tracking.c | 2 | ||||
| -rw-r--r-- | kernel/cpu.c | 6 | ||||
| -rw-r--r-- | kernel/events/core.c | 153 | ||||
| -rw-r--r-- | kernel/futex.c | 52 | ||||
| -rw-r--r-- | kernel/hrtimer.c | 8 | ||||
| -rw-r--r-- | kernel/kexec.c | 8 | ||||
| -rw-r--r-- | kernel/locking/lockdep.c | 2 | ||||
| -rw-r--r-- | kernel/locking/rtmutex.c | 32 | ||||
| -rw-r--r-- | kernel/power/snapshot.c | 2 | ||||
| -rw-r--r-- | kernel/printk/printk.c | 4 | ||||
| -rw-r--r-- | kernel/sched/core.c | 80 | ||||
| -rw-r--r-- | kernel/sched/cpudeadline.c | 37 | ||||
| -rw-r--r-- | kernel/sched/cpudeadline.h | 6 | ||||
| -rw-r--r-- | kernel/sched/cpupri.c | 10 | ||||
| -rw-r--r-- | kernel/sched/cpupri.h | 2 | ||||
| -rw-r--r-- | kernel/sched/cputime.c | 32 | ||||
| -rw-r--r-- | kernel/sched/deadline.c | 5 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 16 | ||||
| -rw-r--r-- | kernel/softirq.c | 4 | ||||
| -rw-r--r-- | kernel/tracepoint.c | 4 | ||||
| -rw-r--r-- | kernel/workqueue.c | 36 |
23 files changed, 377 insertions, 250 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9fcdaa705b6c..3f1ca934a237 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -348,7 +348,7 @@ struct cgrp_cset_link { | |||
| 348 | * reference-counted, to improve performance when child cgroups | 348 | * reference-counted, to improve performance when child cgroups |
| 349 | * haven't been created. | 349 | * haven't been created. |
| 350 | */ | 350 | */ |
| 351 | static struct css_set init_css_set = { | 351 | struct css_set init_css_set = { |
| 352 | .refcount = ATOMIC_INIT(1), | 352 | .refcount = ATOMIC_INIT(1), |
| 353 | .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), | 353 | .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), |
| 354 | .tasks = LIST_HEAD_INIT(init_css_set.tasks), | 354 | .tasks = LIST_HEAD_INIT(init_css_set.tasks), |
| @@ -1495,7 +1495,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1495 | */ | 1495 | */ |
| 1496 | if (!use_task_css_set_links) | 1496 | if (!use_task_css_set_links) |
| 1497 | cgroup_enable_task_cg_lists(); | 1497 | cgroup_enable_task_cg_lists(); |
| 1498 | retry: | 1498 | |
| 1499 | mutex_lock(&cgroup_tree_mutex); | 1499 | mutex_lock(&cgroup_tree_mutex); |
| 1500 | mutex_lock(&cgroup_mutex); | 1500 | mutex_lock(&cgroup_mutex); |
| 1501 | 1501 | ||
| @@ -1503,7 +1503,7 @@ retry: | |||
| 1503 | ret = parse_cgroupfs_options(data, &opts); | 1503 | ret = parse_cgroupfs_options(data, &opts); |
| 1504 | if (ret) | 1504 | if (ret) |
| 1505 | goto out_unlock; | 1505 | goto out_unlock; |
| 1506 | 1506 | retry: | |
| 1507 | /* look for a matching existing root */ | 1507 | /* look for a matching existing root */ |
| 1508 | if (!opts.subsys_mask && !opts.none && !opts.name) { | 1508 | if (!opts.subsys_mask && !opts.none && !opts.name) { |
| 1509 | cgrp_dfl_root_visible = true; | 1509 | cgrp_dfl_root_visible = true; |
| @@ -1562,9 +1562,9 @@ retry: | |||
| 1562 | if (!atomic_inc_not_zero(&root->cgrp.refcnt)) { | 1562 | if (!atomic_inc_not_zero(&root->cgrp.refcnt)) { |
| 1563 | mutex_unlock(&cgroup_mutex); | 1563 | mutex_unlock(&cgroup_mutex); |
| 1564 | mutex_unlock(&cgroup_tree_mutex); | 1564 | mutex_unlock(&cgroup_tree_mutex); |
| 1565 | kfree(opts.release_agent); | ||
| 1566 | kfree(opts.name); | ||
| 1567 | msleep(10); | 1565 | msleep(10); |
| 1566 | mutex_lock(&cgroup_tree_mutex); | ||
| 1567 | mutex_lock(&cgroup_mutex); | ||
| 1568 | goto retry; | 1568 | goto retry; |
| 1569 | } | 1569 | } |
| 1570 | 1570 | ||
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 2bc4a2256444..345628c78b5b 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
| 22 | #include <linux/freezer.h> | 22 | #include <linux/freezer.h> |
| 23 | #include <linux/seq_file.h> | 23 | #include <linux/seq_file.h> |
| 24 | #include <linux/mutex.h> | ||
| 24 | 25 | ||
| 25 | /* | 26 | /* |
| 26 | * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is | 27 | * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is |
| @@ -42,9 +43,10 @@ enum freezer_state_flags { | |||
| 42 | struct freezer { | 43 | struct freezer { |
| 43 | struct cgroup_subsys_state css; | 44 | struct cgroup_subsys_state css; |
| 44 | unsigned int state; | 45 | unsigned int state; |
| 45 | spinlock_t lock; | ||
| 46 | }; | 46 | }; |
| 47 | 47 | ||
| 48 | static DEFINE_MUTEX(freezer_mutex); | ||
| 49 | |||
| 48 | static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) | 50 | static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) |
| 49 | { | 51 | { |
| 50 | return css ? container_of(css, struct freezer, css) : NULL; | 52 | return css ? container_of(css, struct freezer, css) : NULL; |
| @@ -93,7 +95,6 @@ freezer_css_alloc(struct cgroup_subsys_state *parent_css) | |||
| 93 | if (!freezer) | 95 | if (!freezer) |
| 94 | return ERR_PTR(-ENOMEM); | 96 | return ERR_PTR(-ENOMEM); |
| 95 | 97 | ||
| 96 | spin_lock_init(&freezer->lock); | ||
| 97 | return &freezer->css; | 98 | return &freezer->css; |
| 98 | } | 99 | } |
| 99 | 100 | ||
| @@ -110,14 +111,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css) | |||
| 110 | struct freezer *freezer = css_freezer(css); | 111 | struct freezer *freezer = css_freezer(css); |
| 111 | struct freezer *parent = parent_freezer(freezer); | 112 | struct freezer *parent = parent_freezer(freezer); |
| 112 | 113 | ||
| 113 | /* | 114 | mutex_lock(&freezer_mutex); |
| 114 | * The following double locking and freezing state inheritance | ||
| 115 | * guarantee that @cgroup can never escape ancestors' freezing | ||
| 116 | * states. See css_for_each_descendant_pre() for details. | ||
| 117 | */ | ||
| 118 | if (parent) | ||
| 119 | spin_lock_irq(&parent->lock); | ||
| 120 | spin_lock_nested(&freezer->lock, SINGLE_DEPTH_NESTING); | ||
| 121 | 115 | ||
| 122 | freezer->state |= CGROUP_FREEZER_ONLINE; | 116 | freezer->state |= CGROUP_FREEZER_ONLINE; |
| 123 | 117 | ||
| @@ -126,10 +120,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css) | |||
| 126 | atomic_inc(&system_freezing_cnt); | 120 | atomic_inc(&system_freezing_cnt); |
| 127 | } | 121 | } |
| 128 | 122 | ||
| 129 | spin_unlock(&freezer->lock); | 123 | mutex_unlock(&freezer_mutex); |
| 130 | if (parent) | ||
| 131 | spin_unlock_irq(&parent->lock); | ||
| 132 | |||
| 133 | return 0; | 124 | return 0; |
| 134 | } | 125 | } |
| 135 | 126 | ||
| @@ -144,14 +135,14 @@ static void freezer_css_offline(struct cgroup_subsys_state *css) | |||
| 144 | { | 135 | { |
| 145 | struct freezer *freezer = css_freezer(css); | 136 | struct freezer *freezer = css_freezer(css); |
| 146 | 137 | ||
| 147 | spin_lock_irq(&freezer->lock); | 138 | mutex_lock(&freezer_mutex); |
| 148 | 139 | ||
| 149 | if (freezer->state & CGROUP_FREEZING) | 140 | if (freezer->state & CGROUP_FREEZING) |
| 150 | atomic_dec(&system_freezing_cnt); | 141 | atomic_dec(&system_freezing_cnt); |
| 151 | 142 | ||
| 152 | freezer->state = 0; | 143 | freezer->state = 0; |
| 153 | 144 | ||
| 154 | spin_unlock_irq(&freezer->lock); | 145 | mutex_unlock(&freezer_mutex); |
| 155 | } | 146 | } |
| 156 | 147 | ||
| 157 | static void freezer_css_free(struct cgroup_subsys_state *css) | 148 | static void freezer_css_free(struct cgroup_subsys_state *css) |
| @@ -175,7 +166,7 @@ static void freezer_attach(struct cgroup_subsys_state *new_css, | |||
| 175 | struct task_struct *task; | 166 | struct task_struct *task; |
| 176 | bool clear_frozen = false; | 167 | bool clear_frozen = false; |
| 177 | 168 | ||
| 178 | spin_lock_irq(&freezer->lock); | 169 | mutex_lock(&freezer_mutex); |
| 179 | 170 | ||
| 180 | /* | 171 | /* |
| 181 | * Make the new tasks conform to the current state of @new_css. | 172 | * Make the new tasks conform to the current state of @new_css. |
| @@ -197,21 +188,13 @@ static void freezer_attach(struct cgroup_subsys_state *new_css, | |||
| 197 | } | 188 | } |
| 198 | } | 189 | } |
| 199 | 190 | ||
| 200 | spin_unlock_irq(&freezer->lock); | 191 | /* propagate FROZEN clearing upwards */ |
| 201 | |||
| 202 | /* | ||
| 203 | * Propagate FROZEN clearing upwards. We may race with | ||
| 204 | * update_if_frozen(), but as long as both work bottom-up, either | ||
| 205 | * update_if_frozen() sees child's FROZEN cleared or we clear the | ||
| 206 | * parent's FROZEN later. No parent w/ !FROZEN children can be | ||
| 207 | * left FROZEN. | ||
| 208 | */ | ||
| 209 | while (clear_frozen && (freezer = parent_freezer(freezer))) { | 192 | while (clear_frozen && (freezer = parent_freezer(freezer))) { |
| 210 | spin_lock_irq(&freezer->lock); | ||
| 211 | freezer->state &= ~CGROUP_FROZEN; | 193 | freezer->state &= ~CGROUP_FROZEN; |
| 212 | clear_frozen = freezer->state & CGROUP_FREEZING; | 194 | clear_frozen = freezer->state & CGROUP_FREEZING; |
| 213 | spin_unlock_irq(&freezer->lock); | ||
| 214 | } | 195 | } |
| 196 | |||
| 197 | mutex_unlock(&freezer_mutex); | ||
| 215 | } | 198 | } |
| 216 | 199 | ||
| 217 | /** | 200 | /** |
| @@ -228,9 +211,6 @@ static void freezer_fork(struct task_struct *task) | |||
| 228 | { | 211 | { |
| 229 | struct freezer *freezer; | 212 | struct freezer *freezer; |
| 230 | 213 | ||
| 231 | rcu_read_lock(); | ||
| 232 | freezer = task_freezer(task); | ||
| 233 | |||
| 234 | /* | 214 | /* |
| 235 | * The root cgroup is non-freezable, so we can skip locking the | 215 | * The root cgroup is non-freezable, so we can skip locking the |
| 236 | * freezer. This is safe regardless of race with task migration. | 216 | * freezer. This is safe regardless of race with task migration. |
| @@ -238,24 +218,18 @@ static void freezer_fork(struct task_struct *task) | |||
| 238 | * to do. If we lost and root is the new cgroup, noop is still the | 218 | * to do. If we lost and root is the new cgroup, noop is still the |
| 239 | * right thing to do. | 219 | * right thing to do. |
| 240 | */ | 220 | */ |
| 241 | if (!parent_freezer(freezer)) | 221 | if (task_css_is_root(task, freezer_cgrp_id)) |
| 242 | goto out; | 222 | return; |
| 243 | 223 | ||
| 244 | /* | 224 | mutex_lock(&freezer_mutex); |
| 245 | * Grab @freezer->lock and freeze @task after verifying @task still | 225 | rcu_read_lock(); |
| 246 | * belongs to @freezer and it's freezing. The former is for the | 226 | |
| 247 | * case where we have raced against task migration and lost and | 227 | freezer = task_freezer(task); |
| 248 | * @task is already in a different cgroup which may not be frozen. | 228 | if (freezer->state & CGROUP_FREEZING) |
| 249 | * This isn't strictly necessary as freeze_task() is allowed to be | ||
| 250 | * called spuriously but let's do it anyway for, if nothing else, | ||
| 251 | * documentation. | ||
| 252 | */ | ||
| 253 | spin_lock_irq(&freezer->lock); | ||
| 254 | if (freezer == task_freezer(task) && (freezer->state & CGROUP_FREEZING)) | ||
| 255 | freeze_task(task); | 229 | freeze_task(task); |
| 256 | spin_unlock_irq(&freezer->lock); | 230 | |
| 257 | out: | ||
| 258 | rcu_read_unlock(); | 231 | rcu_read_unlock(); |
| 232 | mutex_unlock(&freezer_mutex); | ||
| 259 | } | 233 | } |
| 260 | 234 | ||
| 261 | /** | 235 | /** |
| @@ -281,22 +255,24 @@ static void update_if_frozen(struct cgroup_subsys_state *css) | |||
| 281 | struct css_task_iter it; | 255 | struct css_task_iter it; |
| 282 | struct task_struct *task; | 256 | struct task_struct *task; |
| 283 | 257 | ||
| 284 | WARN_ON_ONCE(!rcu_read_lock_held()); | 258 | lockdep_assert_held(&freezer_mutex); |
| 285 | |||
| 286 | spin_lock_irq(&freezer->lock); | ||
| 287 | 259 | ||
| 288 | if (!(freezer->state & CGROUP_FREEZING) || | 260 | if (!(freezer->state & CGROUP_FREEZING) || |
| 289 | (freezer->state & CGROUP_FROZEN)) | 261 | (freezer->state & CGROUP_FROZEN)) |
| 290 | goto out_unlock; | 262 | return; |
| 291 | 263 | ||
| 292 | /* are all (live) children frozen? */ | 264 | /* are all (live) children frozen? */ |
| 265 | rcu_read_lock(); | ||
| 293 | css_for_each_child(pos, css) { | 266 | css_for_each_child(pos, css) { |
| 294 | struct freezer *child = css_freezer(pos); | 267 | struct freezer *child = css_freezer(pos); |
| 295 | 268 | ||
| 296 | if ((child->state & CGROUP_FREEZER_ONLINE) && | 269 | if ((child->state & CGROUP_FREEZER_ONLINE) && |
| 297 | !(child->state & CGROUP_FROZEN)) | 270 | !(child->state & CGROUP_FROZEN)) { |
| 298 | goto out_unlock; | 271 | rcu_read_unlock(); |
| 272 | return; | ||
| 273 | } | ||
| 299 | } | 274 | } |
| 275 | rcu_read_unlock(); | ||
| 300 | 276 | ||
| 301 | /* are all tasks frozen? */ | 277 | /* are all tasks frozen? */ |
| 302 | css_task_iter_start(css, &it); | 278 | css_task_iter_start(css, &it); |
| @@ -317,21 +293,29 @@ static void update_if_frozen(struct cgroup_subsys_state *css) | |||
| 317 | freezer->state |= CGROUP_FROZEN; | 293 | freezer->state |= CGROUP_FROZEN; |
| 318 | out_iter_end: | 294 | out_iter_end: |
| 319 | css_task_iter_end(&it); | 295 | css_task_iter_end(&it); |
| 320 | out_unlock: | ||
| 321 | spin_unlock_irq(&freezer->lock); | ||
| 322 | } | 296 | } |
| 323 | 297 | ||
| 324 | static int freezer_read(struct seq_file *m, void *v) | 298 | static int freezer_read(struct seq_file *m, void *v) |
| 325 | { | 299 | { |
| 326 | struct cgroup_subsys_state *css = seq_css(m), *pos; | 300 | struct cgroup_subsys_state *css = seq_css(m), *pos; |
| 327 | 301 | ||
| 302 | mutex_lock(&freezer_mutex); | ||
| 328 | rcu_read_lock(); | 303 | rcu_read_lock(); |
| 329 | 304 | ||
| 330 | /* update states bottom-up */ | 305 | /* update states bottom-up */ |
| 331 | css_for_each_descendant_post(pos, css) | 306 | css_for_each_descendant_post(pos, css) { |
| 307 | if (!css_tryget(pos)) | ||
| 308 | continue; | ||
| 309 | rcu_read_unlock(); | ||
| 310 | |||
| 332 | update_if_frozen(pos); | 311 | update_if_frozen(pos); |
| 333 | 312 | ||
| 313 | rcu_read_lock(); | ||
| 314 | css_put(pos); | ||
| 315 | } | ||
| 316 | |||
| 334 | rcu_read_unlock(); | 317 | rcu_read_unlock(); |
| 318 | mutex_unlock(&freezer_mutex); | ||
| 335 | 319 | ||
| 336 | seq_puts(m, freezer_state_strs(css_freezer(css)->state)); | 320 | seq_puts(m, freezer_state_strs(css_freezer(css)->state)); |
| 337 | seq_putc(m, '\n'); | 321 | seq_putc(m, '\n'); |
| @@ -373,7 +357,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze, | |||
| 373 | unsigned int state) | 357 | unsigned int state) |
| 374 | { | 358 | { |
| 375 | /* also synchronizes against task migration, see freezer_attach() */ | 359 | /* also synchronizes against task migration, see freezer_attach() */ |
| 376 | lockdep_assert_held(&freezer->lock); | 360 | lockdep_assert_held(&freezer_mutex); |
| 377 | 361 | ||
| 378 | if (!(freezer->state & CGROUP_FREEZER_ONLINE)) | 362 | if (!(freezer->state & CGROUP_FREEZER_ONLINE)) |
| 379 | return; | 363 | return; |
| @@ -414,31 +398,29 @@ static void freezer_change_state(struct freezer *freezer, bool freeze) | |||
| 414 | * descendant will try to inherit its parent's FREEZING state as | 398 | * descendant will try to inherit its parent's FREEZING state as |
| 415 | * CGROUP_FREEZING_PARENT. | 399 | * CGROUP_FREEZING_PARENT. |
| 416 | */ | 400 | */ |
| 401 | mutex_lock(&freezer_mutex); | ||
| 417 | rcu_read_lock(); | 402 | rcu_read_lock(); |
| 418 | css_for_each_descendant_pre(pos, &freezer->css) { | 403 | css_for_each_descendant_pre(pos, &freezer->css) { |
| 419 | struct freezer *pos_f = css_freezer(pos); | 404 | struct freezer *pos_f = css_freezer(pos); |
| 420 | struct freezer *parent = parent_freezer(pos_f); | 405 | struct freezer *parent = parent_freezer(pos_f); |
| 421 | 406 | ||
| 422 | spin_lock_irq(&pos_f->lock); | 407 | if (!css_tryget(pos)) |
| 408 | continue; | ||
| 409 | rcu_read_unlock(); | ||
| 423 | 410 | ||
| 424 | if (pos_f == freezer) { | 411 | if (pos_f == freezer) |
| 425 | freezer_apply_state(pos_f, freeze, | 412 | freezer_apply_state(pos_f, freeze, |
| 426 | CGROUP_FREEZING_SELF); | 413 | CGROUP_FREEZING_SELF); |
| 427 | } else { | 414 | else |
| 428 | /* | ||
| 429 | * Our update to @parent->state is already visible | ||
| 430 | * which is all we need. No need to lock @parent. | ||
| 431 | * For more info on synchronization, see | ||
| 432 | * freezer_post_create(). | ||
| 433 | */ | ||
| 434 | freezer_apply_state(pos_f, | 415 | freezer_apply_state(pos_f, |
| 435 | parent->state & CGROUP_FREEZING, | 416 | parent->state & CGROUP_FREEZING, |
| 436 | CGROUP_FREEZING_PARENT); | 417 | CGROUP_FREEZING_PARENT); |
| 437 | } | ||
| 438 | 418 | ||
| 439 | spin_unlock_irq(&pos_f->lock); | 419 | rcu_read_lock(); |
| 420 | css_put(pos); | ||
| 440 | } | 421 | } |
| 441 | rcu_read_unlock(); | 422 | rcu_read_unlock(); |
| 423 | mutex_unlock(&freezer_mutex); | ||
| 442 | } | 424 | } |
| 443 | 425 | ||
| 444 | static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft, | 426 | static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft, |
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 6cb20d2e7ee0..019d45008448 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c | |||
| @@ -120,7 +120,7 @@ void context_tracking_user_enter(void) | |||
| 120 | * instead of preempt_schedule() to exit user context if needed before | 120 | * instead of preempt_schedule() to exit user context if needed before |
| 121 | * calling the scheduler. | 121 | * calling the scheduler. |
| 122 | */ | 122 | */ |
| 123 | asmlinkage void __sched notrace preempt_schedule_context(void) | 123 | asmlinkage __visible void __sched notrace preempt_schedule_context(void) |
| 124 | { | 124 | { |
| 125 | enum ctx_state prev_ctx; | 125 | enum ctx_state prev_ctx; |
| 126 | 126 | ||
diff --git a/kernel/cpu.c b/kernel/cpu.c index a9e710eef0e2..247979a1b815 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -726,10 +726,12 @@ void set_cpu_present(unsigned int cpu, bool present) | |||
| 726 | 726 | ||
| 727 | void set_cpu_online(unsigned int cpu, bool online) | 727 | void set_cpu_online(unsigned int cpu, bool online) |
| 728 | { | 728 | { |
| 729 | if (online) | 729 | if (online) { |
| 730 | cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits)); | 730 | cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits)); |
| 731 | else | 731 | cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits)); |
| 732 | } else { | ||
| 732 | cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits)); | 733 | cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits)); |
| 734 | } | ||
| 733 | } | 735 | } |
| 734 | 736 | ||
| 735 | void set_cpu_active(unsigned int cpu, bool active) | 737 | void set_cpu_active(unsigned int cpu, bool active) |
diff --git a/kernel/events/core.c b/kernel/events/core.c index e9ef0c6646af..8fac2056d51e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -2973,6 +2973,22 @@ out: | |||
| 2973 | local_irq_restore(flags); | 2973 | local_irq_restore(flags); |
| 2974 | } | 2974 | } |
| 2975 | 2975 | ||
| 2976 | void perf_event_exec(void) | ||
| 2977 | { | ||
| 2978 | struct perf_event_context *ctx; | ||
| 2979 | int ctxn; | ||
| 2980 | |||
| 2981 | rcu_read_lock(); | ||
| 2982 | for_each_task_context_nr(ctxn) { | ||
| 2983 | ctx = current->perf_event_ctxp[ctxn]; | ||
| 2984 | if (!ctx) | ||
| 2985 | continue; | ||
| 2986 | |||
| 2987 | perf_event_enable_on_exec(ctx); | ||
| 2988 | } | ||
| 2989 | rcu_read_unlock(); | ||
| 2990 | } | ||
| 2991 | |||
| 2976 | /* | 2992 | /* |
| 2977 | * Cross CPU call to read the hardware event | 2993 | * Cross CPU call to read the hardware event |
| 2978 | */ | 2994 | */ |
| @@ -3195,7 +3211,8 @@ static void free_event_rcu(struct rcu_head *head) | |||
| 3195 | } | 3211 | } |
| 3196 | 3212 | ||
| 3197 | static void ring_buffer_put(struct ring_buffer *rb); | 3213 | static void ring_buffer_put(struct ring_buffer *rb); |
| 3198 | static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb); | 3214 | static void ring_buffer_attach(struct perf_event *event, |
| 3215 | struct ring_buffer *rb); | ||
| 3199 | 3216 | ||
| 3200 | static void unaccount_event_cpu(struct perf_event *event, int cpu) | 3217 | static void unaccount_event_cpu(struct perf_event *event, int cpu) |
| 3201 | { | 3218 | { |
| @@ -3259,8 +3276,6 @@ static void _free_event(struct perf_event *event) | |||
| 3259 | unaccount_event(event); | 3276 | unaccount_event(event); |
| 3260 | 3277 | ||
| 3261 | if (event->rb) { | 3278 | if (event->rb) { |
| 3262 | struct ring_buffer *rb; | ||
| 3263 | |||
| 3264 | /* | 3279 | /* |
| 3265 | * Can happen when we close an event with re-directed output. | 3280 | * Can happen when we close an event with re-directed output. |
| 3266 | * | 3281 | * |
| @@ -3268,12 +3283,7 @@ static void _free_event(struct perf_event *event) | |||
| 3268 | * over us; possibly making our ring_buffer_put() the last. | 3283 | * over us; possibly making our ring_buffer_put() the last. |
| 3269 | */ | 3284 | */ |
| 3270 | mutex_lock(&event->mmap_mutex); | 3285 | mutex_lock(&event->mmap_mutex); |
| 3271 | rb = event->rb; | 3286 | ring_buffer_attach(event, NULL); |
| 3272 | if (rb) { | ||
| 3273 | rcu_assign_pointer(event->rb, NULL); | ||
| 3274 | ring_buffer_detach(event, rb); | ||
| 3275 | ring_buffer_put(rb); /* could be last */ | ||
| 3276 | } | ||
| 3277 | mutex_unlock(&event->mmap_mutex); | 3287 | mutex_unlock(&event->mmap_mutex); |
| 3278 | } | 3288 | } |
| 3279 | 3289 | ||
| @@ -3870,28 +3880,47 @@ unlock: | |||
| 3870 | static void ring_buffer_attach(struct perf_event *event, | 3880 | static void ring_buffer_attach(struct perf_event *event, |
| 3871 | struct ring_buffer *rb) | 3881 | struct ring_buffer *rb) |
| 3872 | { | 3882 | { |
| 3883 | struct ring_buffer *old_rb = NULL; | ||
| 3873 | unsigned long flags; | 3884 | unsigned long flags; |
| 3874 | 3885 | ||
| 3875 | if (!list_empty(&event->rb_entry)) | 3886 | if (event->rb) { |
| 3876 | return; | 3887 | /* |
| 3888 | * Should be impossible, we set this when removing | ||
| 3889 | * event->rb_entry and wait/clear when adding event->rb_entry. | ||
| 3890 | */ | ||
| 3891 | WARN_ON_ONCE(event->rcu_pending); | ||
| 3877 | 3892 | ||
| 3878 | spin_lock_irqsave(&rb->event_lock, flags); | 3893 | old_rb = event->rb; |
| 3879 | if (list_empty(&event->rb_entry)) | 3894 | event->rcu_batches = get_state_synchronize_rcu(); |
| 3880 | list_add(&event->rb_entry, &rb->event_list); | 3895 | event->rcu_pending = 1; |
| 3881 | spin_unlock_irqrestore(&rb->event_lock, flags); | ||
| 3882 | } | ||
| 3883 | 3896 | ||
| 3884 | static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb) | 3897 | spin_lock_irqsave(&old_rb->event_lock, flags); |
| 3885 | { | 3898 | list_del_rcu(&event->rb_entry); |
| 3886 | unsigned long flags; | 3899 | spin_unlock_irqrestore(&old_rb->event_lock, flags); |
| 3900 | } | ||
| 3887 | 3901 | ||
| 3888 | if (list_empty(&event->rb_entry)) | 3902 | if (event->rcu_pending && rb) { |
| 3889 | return; | 3903 | cond_synchronize_rcu(event->rcu_batches); |
| 3904 | event->rcu_pending = 0; | ||
| 3905 | } | ||
| 3906 | |||
| 3907 | if (rb) { | ||
| 3908 | spin_lock_irqsave(&rb->event_lock, flags); | ||
| 3909 | list_add_rcu(&event->rb_entry, &rb->event_list); | ||
| 3910 | spin_unlock_irqrestore(&rb->event_lock, flags); | ||
| 3911 | } | ||
| 3912 | |||
| 3913 | rcu_assign_pointer(event->rb, rb); | ||
| 3890 | 3914 | ||
| 3891 | spin_lock_irqsave(&rb->event_lock, flags); | 3915 | if (old_rb) { |
| 3892 | list_del_init(&event->rb_entry); | 3916 | ring_buffer_put(old_rb); |
| 3893 | wake_up_all(&event->waitq); | 3917 | /* |
| 3894 | spin_unlock_irqrestore(&rb->event_lock, flags); | 3918 | * Since we detached before setting the new rb, so that we |
| 3919 | * could attach the new rb, we could have missed a wakeup. | ||
| 3920 | * Provide it now. | ||
| 3921 | */ | ||
| 3922 | wake_up_all(&event->waitq); | ||
| 3923 | } | ||
| 3895 | } | 3924 | } |
| 3896 | 3925 | ||
| 3897 | static void ring_buffer_wakeup(struct perf_event *event) | 3926 | static void ring_buffer_wakeup(struct perf_event *event) |
| @@ -3960,7 +3989,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
| 3960 | { | 3989 | { |
| 3961 | struct perf_event *event = vma->vm_file->private_data; | 3990 | struct perf_event *event = vma->vm_file->private_data; |
| 3962 | 3991 | ||
| 3963 | struct ring_buffer *rb = event->rb; | 3992 | struct ring_buffer *rb = ring_buffer_get(event); |
| 3964 | struct user_struct *mmap_user = rb->mmap_user; | 3993 | struct user_struct *mmap_user = rb->mmap_user; |
| 3965 | int mmap_locked = rb->mmap_locked; | 3994 | int mmap_locked = rb->mmap_locked; |
| 3966 | unsigned long size = perf_data_size(rb); | 3995 | unsigned long size = perf_data_size(rb); |
| @@ -3968,18 +3997,14 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
| 3968 | atomic_dec(&rb->mmap_count); | 3997 | atomic_dec(&rb->mmap_count); |
| 3969 | 3998 | ||
| 3970 | if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) | 3999 | if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) |
| 3971 | return; | 4000 | goto out_put; |
| 3972 | 4001 | ||
| 3973 | /* Detach current event from the buffer. */ | 4002 | ring_buffer_attach(event, NULL); |
| 3974 | rcu_assign_pointer(event->rb, NULL); | ||
| 3975 | ring_buffer_detach(event, rb); | ||
| 3976 | mutex_unlock(&event->mmap_mutex); | 4003 | mutex_unlock(&event->mmap_mutex); |
| 3977 | 4004 | ||
| 3978 | /* If there's still other mmap()s of this buffer, we're done. */ | 4005 | /* If there's still other mmap()s of this buffer, we're done. */ |
| 3979 | if (atomic_read(&rb->mmap_count)) { | 4006 | if (atomic_read(&rb->mmap_count)) |
| 3980 | ring_buffer_put(rb); /* can't be last */ | 4007 | goto out_put; |
| 3981 | return; | ||
| 3982 | } | ||
| 3983 | 4008 | ||
| 3984 | /* | 4009 | /* |
| 3985 | * No other mmap()s, detach from all other events that might redirect | 4010 | * No other mmap()s, detach from all other events that might redirect |
| @@ -4009,11 +4034,9 @@ again: | |||
| 4009 | * still restart the iteration to make sure we're not now | 4034 | * still restart the iteration to make sure we're not now |
| 4010 | * iterating the wrong list. | 4035 | * iterating the wrong list. |
| 4011 | */ | 4036 | */ |
| 4012 | if (event->rb == rb) { | 4037 | if (event->rb == rb) |
| 4013 | rcu_assign_pointer(event->rb, NULL); | 4038 | ring_buffer_attach(event, NULL); |
| 4014 | ring_buffer_detach(event, rb); | 4039 | |
| 4015 | ring_buffer_put(rb); /* can't be last, we still have one */ | ||
| 4016 | } | ||
| 4017 | mutex_unlock(&event->mmap_mutex); | 4040 | mutex_unlock(&event->mmap_mutex); |
| 4018 | put_event(event); | 4041 | put_event(event); |
| 4019 | 4042 | ||
| @@ -4038,6 +4061,7 @@ again: | |||
| 4038 | vma->vm_mm->pinned_vm -= mmap_locked; | 4061 | vma->vm_mm->pinned_vm -= mmap_locked; |
| 4039 | free_uid(mmap_user); | 4062 | free_uid(mmap_user); |
| 4040 | 4063 | ||
| 4064 | out_put: | ||
| 4041 | ring_buffer_put(rb); /* could be last */ | 4065 | ring_buffer_put(rb); /* could be last */ |
| 4042 | } | 4066 | } |
| 4043 | 4067 | ||
| @@ -4155,7 +4179,6 @@ again: | |||
| 4155 | vma->vm_mm->pinned_vm += extra; | 4179 | vma->vm_mm->pinned_vm += extra; |
| 4156 | 4180 | ||
| 4157 | ring_buffer_attach(event, rb); | 4181 | ring_buffer_attach(event, rb); |
| 4158 | rcu_assign_pointer(event->rb, rb); | ||
| 4159 | 4182 | ||
| 4160 | perf_event_init_userpage(event); | 4183 | perf_event_init_userpage(event); |
| 4161 | perf_event_update_userpage(event); | 4184 | perf_event_update_userpage(event); |
| @@ -5070,18 +5093,6 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
| 5070 | void perf_event_comm(struct task_struct *task) | 5093 | void perf_event_comm(struct task_struct *task) |
| 5071 | { | 5094 | { |
| 5072 | struct perf_comm_event comm_event; | 5095 | struct perf_comm_event comm_event; |
| 5073 | struct perf_event_context *ctx; | ||
| 5074 | int ctxn; | ||
| 5075 | |||
| 5076 | rcu_read_lock(); | ||
| 5077 | for_each_task_context_nr(ctxn) { | ||
| 5078 | ctx = task->perf_event_ctxp[ctxn]; | ||
| 5079 | if (!ctx) | ||
| 5080 | continue; | ||
| 5081 | |||
| 5082 | perf_event_enable_on_exec(ctx); | ||
| 5083 | } | ||
| 5084 | rcu_read_unlock(); | ||
| 5085 | 5096 | ||
| 5086 | if (!atomic_read(&nr_comm_events)) | 5097 | if (!atomic_read(&nr_comm_events)) |
| 5087 | return; | 5098 | return; |
| @@ -5439,6 +5450,9 @@ struct swevent_htable { | |||
| 5439 | 5450 | ||
| 5440 | /* Recursion avoidance in each contexts */ | 5451 | /* Recursion avoidance in each contexts */ |
| 5441 | int recursion[PERF_NR_CONTEXTS]; | 5452 | int recursion[PERF_NR_CONTEXTS]; |
| 5453 | |||
| 5454 | /* Keeps track of cpu being initialized/exited */ | ||
| 5455 | bool online; | ||
| 5442 | }; | 5456 | }; |
| 5443 | 5457 | ||
| 5444 | static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); | 5458 | static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); |
| @@ -5685,8 +5699,14 @@ static int perf_swevent_add(struct perf_event *event, int flags) | |||
| 5685 | hwc->state = !(flags & PERF_EF_START); | 5699 | hwc->state = !(flags & PERF_EF_START); |
| 5686 | 5700 | ||
| 5687 | head = find_swevent_head(swhash, event); | 5701 | head = find_swevent_head(swhash, event); |
| 5688 | if (WARN_ON_ONCE(!head)) | 5702 | if (!head) { |
| 5703 | /* | ||
| 5704 | * We can race with cpu hotplug code. Do not | ||
| 5705 | * WARN if the cpu just got unplugged. | ||
| 5706 | */ | ||
| 5707 | WARN_ON_ONCE(swhash->online); | ||
| 5689 | return -EINVAL; | 5708 | return -EINVAL; |
| 5709 | } | ||
| 5690 | 5710 | ||
| 5691 | hlist_add_head_rcu(&event->hlist_entry, head); | 5711 | hlist_add_head_rcu(&event->hlist_entry, head); |
| 5692 | 5712 | ||
| @@ -6956,7 +6976,7 @@ err_size: | |||
| 6956 | static int | 6976 | static int |
| 6957 | perf_event_set_output(struct perf_event *event, struct perf_event *output_event) | 6977 | perf_event_set_output(struct perf_event *event, struct perf_event *output_event) |
| 6958 | { | 6978 | { |
| 6959 | struct ring_buffer *rb = NULL, *old_rb = NULL; | 6979 | struct ring_buffer *rb = NULL; |
| 6960 | int ret = -EINVAL; | 6980 | int ret = -EINVAL; |
| 6961 | 6981 | ||
| 6962 | if (!output_event) | 6982 | if (!output_event) |
| @@ -6984,8 +7004,6 @@ set: | |||
| 6984 | if (atomic_read(&event->mmap_count)) | 7004 | if (atomic_read(&event->mmap_count)) |
| 6985 | goto unlock; | 7005 | goto unlock; |
| 6986 | 7006 | ||
| 6987 | old_rb = event->rb; | ||
| 6988 | |||
| 6989 | if (output_event) { | 7007 | if (output_event) { |
| 6990 | /* get the rb we want to redirect to */ | 7008 | /* get the rb we want to redirect to */ |
| 6991 | rb = ring_buffer_get(output_event); | 7009 | rb = ring_buffer_get(output_event); |
| @@ -6993,23 +7011,7 @@ set: | |||
| 6993 | goto unlock; | 7011 | goto unlock; |
| 6994 | } | 7012 | } |
| 6995 | 7013 | ||
| 6996 | if (old_rb) | 7014 | ring_buffer_attach(event, rb); |
| 6997 | ring_buffer_detach(event, old_rb); | ||
| 6998 | |||
| 6999 | if (rb) | ||
| 7000 | ring_buffer_attach(event, rb); | ||
| 7001 | |||
| 7002 | rcu_assign_pointer(event->rb, rb); | ||
| 7003 | |||
| 7004 | if (old_rb) { | ||
| 7005 | ring_buffer_put(old_rb); | ||
| 7006 | /* | ||
| 7007 | * Since we detached before setting the new rb, so that we | ||
| 7008 | * could attach the new rb, we could have missed a wakeup. | ||
| 7009 | * Provide it now. | ||
| 7010 | */ | ||
| 7011 | wake_up_all(&event->waitq); | ||
| 7012 | } | ||
| 7013 | 7015 | ||
| 7014 | ret = 0; | 7016 | ret = 0; |
| 7015 | unlock: | 7017 | unlock: |
| @@ -7060,6 +7062,9 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 7060 | if (attr.freq) { | 7062 | if (attr.freq) { |
| 7061 | if (attr.sample_freq > sysctl_perf_event_sample_rate) | 7063 | if (attr.sample_freq > sysctl_perf_event_sample_rate) |
| 7062 | return -EINVAL; | 7064 | return -EINVAL; |
| 7065 | } else { | ||
| 7066 | if (attr.sample_period & (1ULL << 63)) | ||
| 7067 | return -EINVAL; | ||
| 7063 | } | 7068 | } |
| 7064 | 7069 | ||
| 7065 | /* | 7070 | /* |
| @@ -7872,6 +7877,7 @@ static void perf_event_init_cpu(int cpu) | |||
| 7872 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); | 7877 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); |
| 7873 | 7878 | ||
| 7874 | mutex_lock(&swhash->hlist_mutex); | 7879 | mutex_lock(&swhash->hlist_mutex); |
| 7880 | swhash->online = true; | ||
| 7875 | if (swhash->hlist_refcount > 0) { | 7881 | if (swhash->hlist_refcount > 0) { |
| 7876 | struct swevent_hlist *hlist; | 7882 | struct swevent_hlist *hlist; |
| 7877 | 7883 | ||
| @@ -7929,6 +7935,7 @@ static void perf_event_exit_cpu(int cpu) | |||
| 7929 | perf_event_exit_cpu_context(cpu); | 7935 | perf_event_exit_cpu_context(cpu); |
| 7930 | 7936 | ||
| 7931 | mutex_lock(&swhash->hlist_mutex); | 7937 | mutex_lock(&swhash->hlist_mutex); |
| 7938 | swhash->online = false; | ||
| 7932 | swevent_hlist_release(swhash); | 7939 | swevent_hlist_release(swhash); |
| 7933 | mutex_unlock(&swhash->hlist_mutex); | 7940 | mutex_unlock(&swhash->hlist_mutex); |
| 7934 | } | 7941 | } |
diff --git a/kernel/futex.c b/kernel/futex.c index 5f589279e462..81dbe773ce4c 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -745,7 +745,8 @@ void exit_pi_state_list(struct task_struct *curr) | |||
| 745 | 745 | ||
| 746 | static int | 746 | static int |
| 747 | lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | 747 | lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, |
| 748 | union futex_key *key, struct futex_pi_state **ps) | 748 | union futex_key *key, struct futex_pi_state **ps, |
| 749 | struct task_struct *task) | ||
| 749 | { | 750 | { |
| 750 | struct futex_pi_state *pi_state = NULL; | 751 | struct futex_pi_state *pi_state = NULL; |
| 751 | struct futex_q *this, *next; | 752 | struct futex_q *this, *next; |
| @@ -786,6 +787,16 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
| 786 | return -EINVAL; | 787 | return -EINVAL; |
| 787 | } | 788 | } |
| 788 | 789 | ||
| 790 | /* | ||
| 791 | * Protect against a corrupted uval. If uval | ||
| 792 | * is 0x80000000 then pid is 0 and the waiter | ||
| 793 | * bit is set. So the deadlock check in the | ||
| 794 | * calling code has failed and we did not fall | ||
| 795 | * into the check above due to !pid. | ||
| 796 | */ | ||
| 797 | if (task && pi_state->owner == task) | ||
| 798 | return -EDEADLK; | ||
| 799 | |||
| 789 | atomic_inc(&pi_state->refcount); | 800 | atomic_inc(&pi_state->refcount); |
| 790 | *ps = pi_state; | 801 | *ps = pi_state; |
| 791 | 802 | ||
| @@ -803,6 +814,11 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
| 803 | if (!p) | 814 | if (!p) |
| 804 | return -ESRCH; | 815 | return -ESRCH; |
| 805 | 816 | ||
| 817 | if (!p->mm) { | ||
| 818 | put_task_struct(p); | ||
| 819 | return -EPERM; | ||
| 820 | } | ||
| 821 | |||
| 806 | /* | 822 | /* |
| 807 | * We need to look at the task state flags to figure out, | 823 | * We need to look at the task state flags to figure out, |
| 808 | * whether the task is exiting. To protect against the do_exit | 824 | * whether the task is exiting. To protect against the do_exit |
| @@ -935,7 +951,7 @@ retry: | |||
| 935 | * We dont have the lock. Look up the PI state (or create it if | 951 | * We dont have the lock. Look up the PI state (or create it if |
| 936 | * we are the first waiter): | 952 | * we are the first waiter): |
| 937 | */ | 953 | */ |
| 938 | ret = lookup_pi_state(uval, hb, key, ps); | 954 | ret = lookup_pi_state(uval, hb, key, ps, task); |
| 939 | 955 | ||
| 940 | if (unlikely(ret)) { | 956 | if (unlikely(ret)) { |
| 941 | switch (ret) { | 957 | switch (ret) { |
| @@ -1347,7 +1363,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, | |||
| 1347 | * | 1363 | * |
| 1348 | * Return: | 1364 | * Return: |
| 1349 | * 0 - failed to acquire the lock atomically; | 1365 | * 0 - failed to acquire the lock atomically; |
| 1350 | * 1 - acquired the lock; | 1366 | * >0 - acquired the lock, return value is vpid of the top_waiter |
| 1351 | * <0 - error | 1367 | * <0 - error |
| 1352 | */ | 1368 | */ |
| 1353 | static int futex_proxy_trylock_atomic(u32 __user *pifutex, | 1369 | static int futex_proxy_trylock_atomic(u32 __user *pifutex, |
| @@ -1358,7 +1374,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, | |||
| 1358 | { | 1374 | { |
| 1359 | struct futex_q *top_waiter = NULL; | 1375 | struct futex_q *top_waiter = NULL; |
| 1360 | u32 curval; | 1376 | u32 curval; |
| 1361 | int ret; | 1377 | int ret, vpid; |
| 1362 | 1378 | ||
| 1363 | if (get_futex_value_locked(&curval, pifutex)) | 1379 | if (get_futex_value_locked(&curval, pifutex)) |
| 1364 | return -EFAULT; | 1380 | return -EFAULT; |
| @@ -1386,11 +1402,13 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, | |||
| 1386 | * the contended case or if set_waiters is 1. The pi_state is returned | 1402 | * the contended case or if set_waiters is 1. The pi_state is returned |
| 1387 | * in ps in contended cases. | 1403 | * in ps in contended cases. |
| 1388 | */ | 1404 | */ |
| 1405 | vpid = task_pid_vnr(top_waiter->task); | ||
| 1389 | ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, | 1406 | ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, |
| 1390 | set_waiters); | 1407 | set_waiters); |
| 1391 | if (ret == 1) | 1408 | if (ret == 1) { |
| 1392 | requeue_pi_wake_futex(top_waiter, key2, hb2); | 1409 | requeue_pi_wake_futex(top_waiter, key2, hb2); |
| 1393 | 1410 | return vpid; | |
| 1411 | } | ||
| 1394 | return ret; | 1412 | return ret; |
| 1395 | } | 1413 | } |
| 1396 | 1414 | ||
| @@ -1421,7 +1439,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, | |||
| 1421 | struct futex_pi_state *pi_state = NULL; | 1439 | struct futex_pi_state *pi_state = NULL; |
| 1422 | struct futex_hash_bucket *hb1, *hb2; | 1440 | struct futex_hash_bucket *hb1, *hb2; |
| 1423 | struct futex_q *this, *next; | 1441 | struct futex_q *this, *next; |
| 1424 | u32 curval2; | ||
| 1425 | 1442 | ||
| 1426 | if (requeue_pi) { | 1443 | if (requeue_pi) { |
| 1427 | /* | 1444 | /* |
| @@ -1509,16 +1526,25 @@ retry_private: | |||
| 1509 | * At this point the top_waiter has either taken uaddr2 or is | 1526 | * At this point the top_waiter has either taken uaddr2 or is |
| 1510 | * waiting on it. If the former, then the pi_state will not | 1527 | * waiting on it. If the former, then the pi_state will not |
| 1511 | * exist yet, look it up one more time to ensure we have a | 1528 | * exist yet, look it up one more time to ensure we have a |
| 1512 | * reference to it. | 1529 | * reference to it. If the lock was taken, ret contains the |
| 1530 | * vpid of the top waiter task. | ||
| 1513 | */ | 1531 | */ |
| 1514 | if (ret == 1) { | 1532 | if (ret > 0) { |
| 1515 | WARN_ON(pi_state); | 1533 | WARN_ON(pi_state); |
| 1516 | drop_count++; | 1534 | drop_count++; |
| 1517 | task_count++; | 1535 | task_count++; |
| 1518 | ret = get_futex_value_locked(&curval2, uaddr2); | 1536 | /* |
| 1519 | if (!ret) | 1537 | * If we acquired the lock, then the user |
| 1520 | ret = lookup_pi_state(curval2, hb2, &key2, | 1538 | * space value of uaddr2 should be vpid. It |
| 1521 | &pi_state); | 1539 | * cannot be changed by the top waiter as it |
| 1540 | * is blocked on hb2 lock if it tries to do | ||
| 1541 | * so. If something fiddled with it behind our | ||
| 1542 | * back the pi state lookup might unearth | ||
| 1543 | * it. So we rather use the known value than | ||
| 1544 | * rereading and handing potential crap to | ||
| 1545 | * lookup_pi_state. | ||
| 1546 | */ | ||
| 1547 | ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL); | ||
| 1522 | } | 1548 | } |
| 1523 | 1549 | ||
| 1524 | switch (ret) { | 1550 | switch (ret) { |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index d10eba8089d1..3ab28993f6e0 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
| @@ -990,11 +990,8 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |||
| 990 | /* Remove an active timer from the queue: */ | 990 | /* Remove an active timer from the queue: */ |
| 991 | ret = remove_hrtimer(timer, base); | 991 | ret = remove_hrtimer(timer, base); |
| 992 | 992 | ||
| 993 | /* Switch the timer base, if necessary: */ | ||
| 994 | new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); | ||
| 995 | |||
| 996 | if (mode & HRTIMER_MODE_REL) { | 993 | if (mode & HRTIMER_MODE_REL) { |
| 997 | tim = ktime_add_safe(tim, new_base->get_time()); | 994 | tim = ktime_add_safe(tim, base->get_time()); |
| 998 | /* | 995 | /* |
| 999 | * CONFIG_TIME_LOW_RES is a temporary way for architectures | 996 | * CONFIG_TIME_LOW_RES is a temporary way for architectures |
| 1000 | * to signal that they simply return xtime in | 997 | * to signal that they simply return xtime in |
| @@ -1009,6 +1006,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |||
| 1009 | 1006 | ||
| 1010 | hrtimer_set_expires_range_ns(timer, tim, delta_ns); | 1007 | hrtimer_set_expires_range_ns(timer, tim, delta_ns); |
| 1011 | 1008 | ||
| 1009 | /* Switch the timer base, if necessary: */ | ||
| 1010 | new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); | ||
| 1011 | |||
| 1012 | timer_stats_hrtimer_set_start_info(timer); | 1012 | timer_stats_hrtimer_set_start_info(timer); |
| 1013 | 1013 | ||
| 1014 | leftmost = enqueue_hrtimer(timer, new_base); | 1014 | leftmost = enqueue_hrtimer(timer, new_base); |
diff --git a/kernel/kexec.c b/kernel/kexec.c index c8380ad203bc..28c57069ef68 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
| @@ -1683,6 +1683,14 @@ int kernel_kexec(void) | |||
| 1683 | kexec_in_progress = true; | 1683 | kexec_in_progress = true; |
| 1684 | kernel_restart_prepare(NULL); | 1684 | kernel_restart_prepare(NULL); |
| 1685 | migrate_to_reboot_cpu(); | 1685 | migrate_to_reboot_cpu(); |
| 1686 | |||
| 1687 | /* | ||
| 1688 | * migrate_to_reboot_cpu() disables CPU hotplug assuming that | ||
| 1689 | * no further code needs to use CPU hotplug (which is true in | ||
| 1690 | * the reboot case). However, the kexec path depends on using | ||
| 1691 | * CPU hotplug again; so re-enable it here. | ||
| 1692 | */ | ||
| 1693 | cpu_hotplug_enable(); | ||
| 1686 | printk(KERN_EMERG "Starting new kernel\n"); | 1694 | printk(KERN_EMERG "Starting new kernel\n"); |
| 1687 | machine_shutdown(); | 1695 | machine_shutdown(); |
| 1688 | } | 1696 | } |
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index b0e9467922e1..d24e4339b46d 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
| @@ -4188,7 +4188,7 @@ void debug_show_held_locks(struct task_struct *task) | |||
| 4188 | } | 4188 | } |
| 4189 | EXPORT_SYMBOL_GPL(debug_show_held_locks); | 4189 | EXPORT_SYMBOL_GPL(debug_show_held_locks); |
| 4190 | 4190 | ||
| 4191 | asmlinkage void lockdep_sys_exit(void) | 4191 | asmlinkage __visible void lockdep_sys_exit(void) |
| 4192 | { | 4192 | { |
| 4193 | struct task_struct *curr = current; | 4193 | struct task_struct *curr = current; |
| 4194 | 4194 | ||
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index aa4dff04b594..a620d4d08ca6 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c | |||
| @@ -343,9 +343,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |||
| 343 | * top_waiter can be NULL, when we are in the deboosting | 343 | * top_waiter can be NULL, when we are in the deboosting |
| 344 | * mode! | 344 | * mode! |
| 345 | */ | 345 | */ |
| 346 | if (top_waiter && (!task_has_pi_waiters(task) || | 346 | if (top_waiter) { |
| 347 | top_waiter != task_top_pi_waiter(task))) | 347 | if (!task_has_pi_waiters(task)) |
| 348 | goto out_unlock_pi; | 348 | goto out_unlock_pi; |
| 349 | /* | ||
| 350 | * If deadlock detection is off, we stop here if we | ||
| 351 | * are not the top pi waiter of the task. | ||
| 352 | */ | ||
| 353 | if (!detect_deadlock && top_waiter != task_top_pi_waiter(task)) | ||
| 354 | goto out_unlock_pi; | ||
| 355 | } | ||
| 349 | 356 | ||
| 350 | /* | 357 | /* |
| 351 | * When deadlock detection is off then we check, if further | 358 | * When deadlock detection is off then we check, if further |
| @@ -361,7 +368,12 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |||
| 361 | goto retry; | 368 | goto retry; |
| 362 | } | 369 | } |
| 363 | 370 | ||
| 364 | /* Deadlock detection */ | 371 | /* |
| 372 | * Deadlock detection. If the lock is the same as the original | ||
| 373 | * lock which caused us to walk the lock chain or if the | ||
| 374 | * current lock is owned by the task which initiated the chain | ||
| 375 | * walk, we detected a deadlock. | ||
| 376 | */ | ||
| 365 | if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { | 377 | if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { |
| 366 | debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); | 378 | debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); |
| 367 | raw_spin_unlock(&lock->wait_lock); | 379 | raw_spin_unlock(&lock->wait_lock); |
| @@ -527,6 +539,18 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |||
| 527 | unsigned long flags; | 539 | unsigned long flags; |
| 528 | int chain_walk = 0, res; | 540 | int chain_walk = 0, res; |
| 529 | 541 | ||
| 542 | /* | ||
| 543 | * Early deadlock detection. We really don't want the task to | ||
| 544 | * enqueue on itself just to untangle the mess later. It's not | ||
| 545 | * only an optimization. We drop the locks, so another waiter | ||
| 546 | * can come in before the chain walk detects the deadlock. So | ||
| 547 | * the other will detect the deadlock and return -EDEADLOCK, | ||
| 548 | * which is wrong, as the other waiter is not in a deadlock | ||
| 549 | * situation. | ||
| 550 | */ | ||
| 551 | if (detect_deadlock && owner == task) | ||
| 552 | return -EDEADLK; | ||
| 553 | |||
| 530 | raw_spin_lock_irqsave(&task->pi_lock, flags); | 554 | raw_spin_lock_irqsave(&task->pi_lock, flags); |
| 531 | __rt_mutex_adjust_prio(task); | 555 | __rt_mutex_adjust_prio(task); |
| 532 | waiter->task = task; | 556 | waiter->task = task; |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 18fb7a2fb14b..1ea328aafdc9 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
| @@ -1586,7 +1586,7 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, | |||
| 1586 | return -ENOMEM; | 1586 | return -ENOMEM; |
| 1587 | } | 1587 | } |
| 1588 | 1588 | ||
| 1589 | asmlinkage int swsusp_save(void) | 1589 | asmlinkage __visible int swsusp_save(void) |
| 1590 | { | 1590 | { |
| 1591 | unsigned int nr_pages, nr_highmem; | 1591 | unsigned int nr_pages, nr_highmem; |
| 1592 | 1592 | ||
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index a45b50962295..7228258b85ec 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c | |||
| @@ -1674,7 +1674,7 @@ EXPORT_SYMBOL(printk_emit); | |||
| 1674 | * | 1674 | * |
| 1675 | * See the vsnprintf() documentation for format string extensions over C99. | 1675 | * See the vsnprintf() documentation for format string extensions over C99. |
| 1676 | */ | 1676 | */ |
| 1677 | asmlinkage int printk(const char *fmt, ...) | 1677 | asmlinkage __visible int printk(const char *fmt, ...) |
| 1678 | { | 1678 | { |
| 1679 | va_list args; | 1679 | va_list args; |
| 1680 | int r; | 1680 | int r; |
| @@ -1737,7 +1737,7 @@ void early_vprintk(const char *fmt, va_list ap) | |||
| 1737 | } | 1737 | } |
| 1738 | } | 1738 | } |
| 1739 | 1739 | ||
| 1740 | asmlinkage void early_printk(const char *fmt, ...) | 1740 | asmlinkage __visible void early_printk(const char *fmt, ...) |
| 1741 | { | 1741 | { |
| 1742 | va_list ap; | 1742 | va_list ap; |
| 1743 | 1743 | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 00781cc38047..a8c0fde25e4a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -2192,7 +2192,7 @@ static inline void post_schedule(struct rq *rq) | |||
| 2192 | * schedule_tail - first thing a freshly forked thread must call. | 2192 | * schedule_tail - first thing a freshly forked thread must call. |
| 2193 | * @prev: the thread we just switched away from. | 2193 | * @prev: the thread we just switched away from. |
| 2194 | */ | 2194 | */ |
| 2195 | asmlinkage void schedule_tail(struct task_struct *prev) | 2195 | asmlinkage __visible void schedule_tail(struct task_struct *prev) |
| 2196 | __releases(rq->lock) | 2196 | __releases(rq->lock) |
| 2197 | { | 2197 | { |
| 2198 | struct rq *rq = this_rq(); | 2198 | struct rq *rq = this_rq(); |
| @@ -2594,8 +2594,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev) | |||
| 2594 | if (likely(prev->sched_class == class && | 2594 | if (likely(prev->sched_class == class && |
| 2595 | rq->nr_running == rq->cfs.h_nr_running)) { | 2595 | rq->nr_running == rq->cfs.h_nr_running)) { |
| 2596 | p = fair_sched_class.pick_next_task(rq, prev); | 2596 | p = fair_sched_class.pick_next_task(rq, prev); |
| 2597 | if (likely(p && p != RETRY_TASK)) | 2597 | if (unlikely(p == RETRY_TASK)) |
| 2598 | return p; | 2598 | goto again; |
| 2599 | |||
| 2600 | /* assumes fair_sched_class->next == idle_sched_class */ | ||
| 2601 | if (unlikely(!p)) | ||
| 2602 | p = idle_sched_class.pick_next_task(rq, prev); | ||
| 2603 | |||
| 2604 | return p; | ||
| 2599 | } | 2605 | } |
| 2600 | 2606 | ||
| 2601 | again: | 2607 | again: |
| @@ -2743,7 +2749,7 @@ static inline void sched_submit_work(struct task_struct *tsk) | |||
| 2743 | blk_schedule_flush_plug(tsk); | 2749 | blk_schedule_flush_plug(tsk); |
| 2744 | } | 2750 | } |
| 2745 | 2751 | ||
| 2746 | asmlinkage void __sched schedule(void) | 2752 | asmlinkage __visible void __sched schedule(void) |
| 2747 | { | 2753 | { |
| 2748 | struct task_struct *tsk = current; | 2754 | struct task_struct *tsk = current; |
| 2749 | 2755 | ||
| @@ -2753,7 +2759,7 @@ asmlinkage void __sched schedule(void) | |||
| 2753 | EXPORT_SYMBOL(schedule); | 2759 | EXPORT_SYMBOL(schedule); |
| 2754 | 2760 | ||
| 2755 | #ifdef CONFIG_CONTEXT_TRACKING | 2761 | #ifdef CONFIG_CONTEXT_TRACKING |
| 2756 | asmlinkage void __sched schedule_user(void) | 2762 | asmlinkage __visible void __sched schedule_user(void) |
| 2757 | { | 2763 | { |
| 2758 | /* | 2764 | /* |
| 2759 | * If we come here after a random call to set_need_resched(), | 2765 | * If we come here after a random call to set_need_resched(), |
| @@ -2785,7 +2791,7 @@ void __sched schedule_preempt_disabled(void) | |||
| 2785 | * off of preempt_enable. Kernel preemptions off return from interrupt | 2791 | * off of preempt_enable. Kernel preemptions off return from interrupt |
| 2786 | * occur there and call schedule directly. | 2792 | * occur there and call schedule directly. |
| 2787 | */ | 2793 | */ |
| 2788 | asmlinkage void __sched notrace preempt_schedule(void) | 2794 | asmlinkage __visible void __sched notrace preempt_schedule(void) |
| 2789 | { | 2795 | { |
| 2790 | /* | 2796 | /* |
| 2791 | * If there is a non-zero preempt_count or interrupts are disabled, | 2797 | * If there is a non-zero preempt_count or interrupts are disabled, |
| @@ -2816,7 +2822,7 @@ EXPORT_SYMBOL(preempt_schedule); | |||
| 2816 | * Note, that this is called and return with irqs disabled. This will | 2822 | * Note, that this is called and return with irqs disabled. This will |
| 2817 | * protect us against recursive calling from irq. | 2823 | * protect us against recursive calling from irq. |
| 2818 | */ | 2824 | */ |
| 2819 | asmlinkage void __sched preempt_schedule_irq(void) | 2825 | asmlinkage __visible void __sched preempt_schedule_irq(void) |
| 2820 | { | 2826 | { |
| 2821 | enum ctx_state prev_state; | 2827 | enum ctx_state prev_state; |
| 2822 | 2828 | ||
| @@ -3127,6 +3133,7 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr) | |||
| 3127 | dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); | 3133 | dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); |
| 3128 | dl_se->dl_throttled = 0; | 3134 | dl_se->dl_throttled = 0; |
| 3129 | dl_se->dl_new = 1; | 3135 | dl_se->dl_new = 1; |
| 3136 | dl_se->dl_yielded = 0; | ||
| 3130 | } | 3137 | } |
| 3131 | 3138 | ||
| 3132 | static void __setscheduler_params(struct task_struct *p, | 3139 | static void __setscheduler_params(struct task_struct *p, |
| @@ -3191,17 +3198,40 @@ __getparam_dl(struct task_struct *p, struct sched_attr *attr) | |||
| 3191 | * We ask for the deadline not being zero, and greater or equal | 3198 | * We ask for the deadline not being zero, and greater or equal |
| 3192 | * than the runtime, as well as the period of being zero or | 3199 | * than the runtime, as well as the period of being zero or |
| 3193 | * greater than deadline. Furthermore, we have to be sure that | 3200 | * greater than deadline. Furthermore, we have to be sure that |
| 3194 | * user parameters are above the internal resolution (1us); we | 3201 | * user parameters are above the internal resolution of 1us (we |
| 3195 | * check sched_runtime only since it is always the smaller one. | 3202 | * check sched_runtime only since it is always the smaller one) and |
| 3203 | * below 2^63 ns (we have to check both sched_deadline and | ||
| 3204 | * sched_period, as the latter can be zero). | ||
| 3196 | */ | 3205 | */ |
| 3197 | static bool | 3206 | static bool |
| 3198 | __checkparam_dl(const struct sched_attr *attr) | 3207 | __checkparam_dl(const struct sched_attr *attr) |
| 3199 | { | 3208 | { |
| 3200 | return attr && attr->sched_deadline != 0 && | 3209 | /* deadline != 0 */ |
| 3201 | (attr->sched_period == 0 || | 3210 | if (attr->sched_deadline == 0) |
| 3202 | (s64)(attr->sched_period - attr->sched_deadline) >= 0) && | 3211 | return false; |
| 3203 | (s64)(attr->sched_deadline - attr->sched_runtime ) >= 0 && | 3212 | |
| 3204 | attr->sched_runtime >= (2 << (DL_SCALE - 1)); | 3213 | /* |
| 3214 | * Since we truncate DL_SCALE bits, make sure we're at least | ||
| 3215 | * that big. | ||
| 3216 | */ | ||
| 3217 | if (attr->sched_runtime < (1ULL << DL_SCALE)) | ||
| 3218 | return false; | ||
| 3219 | |||
| 3220 | /* | ||
| 3221 | * Since we use the MSB for wrap-around and sign issues, make | ||
| 3222 | * sure it's not set (mind that period can be equal to zero). | ||
| 3223 | */ | ||
| 3224 | if (attr->sched_deadline & (1ULL << 63) || | ||
| 3225 | attr->sched_period & (1ULL << 63)) | ||
| 3226 | return false; | ||
| 3227 | |||
| 3228 | /* runtime <= deadline <= period (if period != 0) */ | ||
| 3229 | if ((attr->sched_period != 0 && | ||
| 3230 | attr->sched_period < attr->sched_deadline) || | ||
| 3231 | attr->sched_deadline < attr->sched_runtime) | ||
| 3232 | return false; | ||
| 3233 | |||
| 3234 | return true; | ||
| 3205 | } | 3235 | } |
| 3206 | 3236 | ||
| 3207 | /* | 3237 | /* |
| @@ -3642,6 +3672,7 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) | |||
| 3642 | * sys_sched_setattr - same as above, but with extended sched_attr | 3672 | * sys_sched_setattr - same as above, but with extended sched_attr |
| 3643 | * @pid: the pid in question. | 3673 | * @pid: the pid in question. |
| 3644 | * @uattr: structure containing the extended parameters. | 3674 | * @uattr: structure containing the extended parameters. |
| 3675 | * @flags: for future extension. | ||
| 3645 | */ | 3676 | */ |
| 3646 | SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, | 3677 | SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, |
| 3647 | unsigned int, flags) | 3678 | unsigned int, flags) |
| @@ -3653,8 +3684,12 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, | |||
| 3653 | if (!uattr || pid < 0 || flags) | 3684 | if (!uattr || pid < 0 || flags) |
| 3654 | return -EINVAL; | 3685 | return -EINVAL; |
| 3655 | 3686 | ||
| 3656 | if (sched_copy_attr(uattr, &attr)) | 3687 | retval = sched_copy_attr(uattr, &attr); |
| 3657 | return -EFAULT; | 3688 | if (retval) |
| 3689 | return retval; | ||
| 3690 | |||
| 3691 | if (attr.sched_policy < 0) | ||
| 3692 | return -EINVAL; | ||
| 3658 | 3693 | ||
| 3659 | rcu_read_lock(); | 3694 | rcu_read_lock(); |
| 3660 | retval = -ESRCH; | 3695 | retval = -ESRCH; |
| @@ -3704,7 +3739,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) | |||
| 3704 | */ | 3739 | */ |
| 3705 | SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | 3740 | SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) |
| 3706 | { | 3741 | { |
| 3707 | struct sched_param lp; | 3742 | struct sched_param lp = { .sched_priority = 0 }; |
| 3708 | struct task_struct *p; | 3743 | struct task_struct *p; |
| 3709 | int retval; | 3744 | int retval; |
| 3710 | 3745 | ||
| @@ -3721,11 +3756,8 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | |||
| 3721 | if (retval) | 3756 | if (retval) |
| 3722 | goto out_unlock; | 3757 | goto out_unlock; |
| 3723 | 3758 | ||
| 3724 | if (task_has_dl_policy(p)) { | 3759 | if (task_has_rt_policy(p)) |
| 3725 | retval = -EINVAL; | 3760 | lp.sched_priority = p->rt_priority; |
| 3726 | goto out_unlock; | ||
| 3727 | } | ||
| 3728 | lp.sched_priority = p->rt_priority; | ||
| 3729 | rcu_read_unlock(); | 3761 | rcu_read_unlock(); |
| 3730 | 3762 | ||
| 3731 | /* | 3763 | /* |
| @@ -3786,6 +3818,7 @@ err_size: | |||
| 3786 | * @pid: the pid in question. | 3818 | * @pid: the pid in question. |
| 3787 | * @uattr: structure containing the extended parameters. | 3819 | * @uattr: structure containing the extended parameters. |
| 3788 | * @size: sizeof(attr) for fwd/bwd comp. | 3820 | * @size: sizeof(attr) for fwd/bwd comp. |
| 3821 | * @flags: for future extension. | ||
| 3789 | */ | 3822 | */ |
| 3790 | SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, | 3823 | SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, |
| 3791 | unsigned int, size, unsigned int, flags) | 3824 | unsigned int, size, unsigned int, flags) |
| @@ -5046,7 +5079,6 @@ static int sched_cpu_active(struct notifier_block *nfb, | |||
| 5046 | unsigned long action, void *hcpu) | 5079 | unsigned long action, void *hcpu) |
| 5047 | { | 5080 | { |
| 5048 | switch (action & ~CPU_TASKS_FROZEN) { | 5081 | switch (action & ~CPU_TASKS_FROZEN) { |
| 5049 | case CPU_STARTING: | ||
| 5050 | case CPU_DOWN_FAILED: | 5082 | case CPU_DOWN_FAILED: |
| 5051 | set_cpu_active((long)hcpu, true); | 5083 | set_cpu_active((long)hcpu, true); |
| 5052 | return NOTIFY_OK; | 5084 | return NOTIFY_OK; |
| @@ -6020,6 +6052,8 @@ sd_numa_init(struct sched_domain_topology_level *tl, int cpu) | |||
| 6020 | , | 6052 | , |
| 6021 | .last_balance = jiffies, | 6053 | .last_balance = jiffies, |
| 6022 | .balance_interval = sd_weight, | 6054 | .balance_interval = sd_weight, |
| 6055 | .max_newidle_lb_cost = 0, | ||
| 6056 | .next_decay_max_lb_cost = jiffies, | ||
| 6023 | }; | 6057 | }; |
| 6024 | SD_INIT_NAME(sd, NUMA); | 6058 | SD_INIT_NAME(sd, NUMA); |
| 6025 | sd->private = &tl->data; | 6059 | sd->private = &tl->data; |
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index 5b9bb42b2d47..bd95963dae80 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | 13 | ||
| 14 | #include <linux/gfp.h> | 14 | #include <linux/gfp.h> |
| 15 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
| 16 | #include <linux/slab.h> | ||
| 16 | #include "cpudeadline.h" | 17 | #include "cpudeadline.h" |
| 17 | 18 | ||
| 18 | static inline int parent(int i) | 19 | static inline int parent(int i) |
| @@ -39,8 +40,10 @@ static void cpudl_exchange(struct cpudl *cp, int a, int b) | |||
| 39 | { | 40 | { |
| 40 | int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu; | 41 | int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu; |
| 41 | 42 | ||
| 42 | swap(cp->elements[a], cp->elements[b]); | 43 | swap(cp->elements[a].cpu, cp->elements[b].cpu); |
| 43 | swap(cp->cpu_to_idx[cpu_a], cp->cpu_to_idx[cpu_b]); | 44 | swap(cp->elements[a].dl , cp->elements[b].dl ); |
| 45 | |||
| 46 | swap(cp->elements[cpu_a].idx, cp->elements[cpu_b].idx); | ||
| 44 | } | 47 | } |
| 45 | 48 | ||
| 46 | static void cpudl_heapify(struct cpudl *cp, int idx) | 49 | static void cpudl_heapify(struct cpudl *cp, int idx) |
| @@ -140,7 +143,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid) | |||
| 140 | WARN_ON(!cpu_present(cpu)); | 143 | WARN_ON(!cpu_present(cpu)); |
| 141 | 144 | ||
| 142 | raw_spin_lock_irqsave(&cp->lock, flags); | 145 | raw_spin_lock_irqsave(&cp->lock, flags); |
| 143 | old_idx = cp->cpu_to_idx[cpu]; | 146 | old_idx = cp->elements[cpu].idx; |
| 144 | if (!is_valid) { | 147 | if (!is_valid) { |
| 145 | /* remove item */ | 148 | /* remove item */ |
| 146 | if (old_idx == IDX_INVALID) { | 149 | if (old_idx == IDX_INVALID) { |
| @@ -155,8 +158,8 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid) | |||
| 155 | cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl; | 158 | cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl; |
| 156 | cp->elements[old_idx].cpu = new_cpu; | 159 | cp->elements[old_idx].cpu = new_cpu; |
| 157 | cp->size--; | 160 | cp->size--; |
| 158 | cp->cpu_to_idx[new_cpu] = old_idx; | 161 | cp->elements[new_cpu].idx = old_idx; |
| 159 | cp->cpu_to_idx[cpu] = IDX_INVALID; | 162 | cp->elements[cpu].idx = IDX_INVALID; |
| 160 | while (old_idx > 0 && dl_time_before( | 163 | while (old_idx > 0 && dl_time_before( |
| 161 | cp->elements[parent(old_idx)].dl, | 164 | cp->elements[parent(old_idx)].dl, |
| 162 | cp->elements[old_idx].dl)) { | 165 | cp->elements[old_idx].dl)) { |
| @@ -173,7 +176,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid) | |||
| 173 | cp->size++; | 176 | cp->size++; |
| 174 | cp->elements[cp->size - 1].dl = 0; | 177 | cp->elements[cp->size - 1].dl = 0; |
| 175 | cp->elements[cp->size - 1].cpu = cpu; | 178 | cp->elements[cp->size - 1].cpu = cpu; |
| 176 | cp->cpu_to_idx[cpu] = cp->size - 1; | 179 | cp->elements[cpu].idx = cp->size - 1; |
| 177 | cpudl_change_key(cp, cp->size - 1, dl); | 180 | cpudl_change_key(cp, cp->size - 1, dl); |
| 178 | cpumask_clear_cpu(cpu, cp->free_cpus); | 181 | cpumask_clear_cpu(cpu, cp->free_cpus); |
| 179 | } else { | 182 | } else { |
| @@ -195,10 +198,21 @@ int cpudl_init(struct cpudl *cp) | |||
| 195 | memset(cp, 0, sizeof(*cp)); | 198 | memset(cp, 0, sizeof(*cp)); |
| 196 | raw_spin_lock_init(&cp->lock); | 199 | raw_spin_lock_init(&cp->lock); |
| 197 | cp->size = 0; | 200 | cp->size = 0; |
| 198 | for (i = 0; i < NR_CPUS; i++) | 201 | |
| 199 | cp->cpu_to_idx[i] = IDX_INVALID; | 202 | cp->elements = kcalloc(nr_cpu_ids, |
| 200 | if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) | 203 | sizeof(struct cpudl_item), |
| 204 | GFP_KERNEL); | ||
| 205 | if (!cp->elements) | ||
| 206 | return -ENOMEM; | ||
| 207 | |||
| 208 | if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) { | ||
| 209 | kfree(cp->elements); | ||
| 201 | return -ENOMEM; | 210 | return -ENOMEM; |
| 211 | } | ||
| 212 | |||
| 213 | for_each_possible_cpu(i) | ||
| 214 | cp->elements[i].idx = IDX_INVALID; | ||
| 215 | |||
| 202 | cpumask_setall(cp->free_cpus); | 216 | cpumask_setall(cp->free_cpus); |
| 203 | 217 | ||
| 204 | return 0; | 218 | return 0; |
| @@ -210,7 +224,6 @@ int cpudl_init(struct cpudl *cp) | |||
| 210 | */ | 224 | */ |
| 211 | void cpudl_cleanup(struct cpudl *cp) | 225 | void cpudl_cleanup(struct cpudl *cp) |
| 212 | { | 226 | { |
| 213 | /* | 227 | free_cpumask_var(cp->free_cpus); |
| 214 | * nothing to do for the moment | 228 | kfree(cp->elements); |
| 215 | */ | ||
| 216 | } | 229 | } |
diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h index a202789a412c..538c9796ad4a 100644 --- a/kernel/sched/cpudeadline.h +++ b/kernel/sched/cpudeadline.h | |||
| @@ -5,17 +5,17 @@ | |||
| 5 | 5 | ||
| 6 | #define IDX_INVALID -1 | 6 | #define IDX_INVALID -1 |
| 7 | 7 | ||
| 8 | struct array_item { | 8 | struct cpudl_item { |
| 9 | u64 dl; | 9 | u64 dl; |
| 10 | int cpu; | 10 | int cpu; |
| 11 | int idx; | ||
| 11 | }; | 12 | }; |
| 12 | 13 | ||
| 13 | struct cpudl { | 14 | struct cpudl { |
| 14 | raw_spinlock_t lock; | 15 | raw_spinlock_t lock; |
| 15 | int size; | 16 | int size; |
| 16 | int cpu_to_idx[NR_CPUS]; | ||
| 17 | struct array_item elements[NR_CPUS]; | ||
| 18 | cpumask_var_t free_cpus; | 17 | cpumask_var_t free_cpus; |
| 18 | struct cpudl_item *elements; | ||
| 19 | }; | 19 | }; |
| 20 | 20 | ||
| 21 | 21 | ||
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 8b836b376d91..8834243abee2 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include <linux/gfp.h> | 30 | #include <linux/gfp.h> |
| 31 | #include <linux/sched.h> | 31 | #include <linux/sched.h> |
| 32 | #include <linux/sched/rt.h> | 32 | #include <linux/sched/rt.h> |
| 33 | #include <linux/slab.h> | ||
| 33 | #include "cpupri.h" | 34 | #include "cpupri.h" |
| 34 | 35 | ||
| 35 | /* Convert between a 140 based task->prio, and our 102 based cpupri */ | 36 | /* Convert between a 140 based task->prio, and our 102 based cpupri */ |
| @@ -70,8 +71,7 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, | |||
| 70 | int idx = 0; | 71 | int idx = 0; |
| 71 | int task_pri = convert_prio(p->prio); | 72 | int task_pri = convert_prio(p->prio); |
| 72 | 73 | ||
| 73 | if (task_pri >= MAX_RT_PRIO) | 74 | BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES); |
| 74 | return 0; | ||
| 75 | 75 | ||
| 76 | for (idx = 0; idx < task_pri; idx++) { | 76 | for (idx = 0; idx < task_pri; idx++) { |
| 77 | struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; | 77 | struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; |
| @@ -219,8 +219,13 @@ int cpupri_init(struct cpupri *cp) | |||
| 219 | goto cleanup; | 219 | goto cleanup; |
| 220 | } | 220 | } |
| 221 | 221 | ||
| 222 | cp->cpu_to_pri = kcalloc(nr_cpu_ids, sizeof(int), GFP_KERNEL); | ||
| 223 | if (!cp->cpu_to_pri) | ||
| 224 | goto cleanup; | ||
| 225 | |||
| 222 | for_each_possible_cpu(i) | 226 | for_each_possible_cpu(i) |
| 223 | cp->cpu_to_pri[i] = CPUPRI_INVALID; | 227 | cp->cpu_to_pri[i] = CPUPRI_INVALID; |
| 228 | |||
| 224 | return 0; | 229 | return 0; |
| 225 | 230 | ||
| 226 | cleanup: | 231 | cleanup: |
| @@ -237,6 +242,7 @@ void cpupri_cleanup(struct cpupri *cp) | |||
| 237 | { | 242 | { |
| 238 | int i; | 243 | int i; |
| 239 | 244 | ||
| 245 | kfree(cp->cpu_to_pri); | ||
| 240 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) | 246 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) |
| 241 | free_cpumask_var(cp->pri_to_cpu[i].mask); | 247 | free_cpumask_var(cp->pri_to_cpu[i].mask); |
| 242 | } | 248 | } |
diff --git a/kernel/sched/cpupri.h b/kernel/sched/cpupri.h index f6d756173491..6b033347fdfd 100644 --- a/kernel/sched/cpupri.h +++ b/kernel/sched/cpupri.h | |||
| @@ -17,7 +17,7 @@ struct cpupri_vec { | |||
| 17 | 17 | ||
| 18 | struct cpupri { | 18 | struct cpupri { |
| 19 | struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES]; | 19 | struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES]; |
| 20 | int cpu_to_pri[NR_CPUS]; | 20 | int *cpu_to_pri; |
| 21 | }; | 21 | }; |
| 22 | 22 | ||
| 23 | #ifdef CONFIG_SMP | 23 | #ifdef CONFIG_SMP |
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index a95097cb4591..72fdf06ef865 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
| @@ -332,50 +332,50 @@ out: | |||
| 332 | * softirq as those do not count in task exec_runtime any more. | 332 | * softirq as those do not count in task exec_runtime any more. |
| 333 | */ | 333 | */ |
| 334 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | 334 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, |
| 335 | struct rq *rq) | 335 | struct rq *rq, int ticks) |
| 336 | { | 336 | { |
| 337 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | 337 | cputime_t scaled = cputime_to_scaled(cputime_one_jiffy); |
| 338 | u64 cputime = (__force u64) cputime_one_jiffy; | ||
| 338 | u64 *cpustat = kcpustat_this_cpu->cpustat; | 339 | u64 *cpustat = kcpustat_this_cpu->cpustat; |
| 339 | 340 | ||
| 340 | if (steal_account_process_tick()) | 341 | if (steal_account_process_tick()) |
| 341 | return; | 342 | return; |
| 342 | 343 | ||
| 344 | cputime *= ticks; | ||
| 345 | scaled *= ticks; | ||
| 346 | |||
| 343 | if (irqtime_account_hi_update()) { | 347 | if (irqtime_account_hi_update()) { |
| 344 | cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; | 348 | cpustat[CPUTIME_IRQ] += cputime; |
| 345 | } else if (irqtime_account_si_update()) { | 349 | } else if (irqtime_account_si_update()) { |
| 346 | cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; | 350 | cpustat[CPUTIME_SOFTIRQ] += cputime; |
| 347 | } else if (this_cpu_ksoftirqd() == p) { | 351 | } else if (this_cpu_ksoftirqd() == p) { |
| 348 | /* | 352 | /* |
| 349 | * ksoftirqd time do not get accounted in cpu_softirq_time. | 353 | * ksoftirqd time do not get accounted in cpu_softirq_time. |
| 350 | * So, we have to handle it separately here. | 354 | * So, we have to handle it separately here. |
| 351 | * Also, p->stime needs to be updated for ksoftirqd. | 355 | * Also, p->stime needs to be updated for ksoftirqd. |
| 352 | */ | 356 | */ |
| 353 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | 357 | __account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ); |
| 354 | CPUTIME_SOFTIRQ); | ||
| 355 | } else if (user_tick) { | 358 | } else if (user_tick) { |
| 356 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | 359 | account_user_time(p, cputime, scaled); |
| 357 | } else if (p == rq->idle) { | 360 | } else if (p == rq->idle) { |
| 358 | account_idle_time(cputime_one_jiffy); | 361 | account_idle_time(cputime); |
| 359 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ | 362 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ |
| 360 | account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); | 363 | account_guest_time(p, cputime, scaled); |
| 361 | } else { | 364 | } else { |
| 362 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | 365 | __account_system_time(p, cputime, scaled, CPUTIME_SYSTEM); |
| 363 | CPUTIME_SYSTEM); | ||
| 364 | } | 366 | } |
| 365 | } | 367 | } |
| 366 | 368 | ||
| 367 | static void irqtime_account_idle_ticks(int ticks) | 369 | static void irqtime_account_idle_ticks(int ticks) |
| 368 | { | 370 | { |
| 369 | int i; | ||
| 370 | struct rq *rq = this_rq(); | 371 | struct rq *rq = this_rq(); |
| 371 | 372 | ||
| 372 | for (i = 0; i < ticks; i++) | 373 | irqtime_account_process_tick(current, 0, rq, ticks); |
| 373 | irqtime_account_process_tick(current, 0, rq); | ||
| 374 | } | 374 | } |
| 375 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | 375 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ |
| 376 | static inline void irqtime_account_idle_ticks(int ticks) {} | 376 | static inline void irqtime_account_idle_ticks(int ticks) {} |
| 377 | static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, | 377 | static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, |
| 378 | struct rq *rq) {} | 378 | struct rq *rq, int nr_ticks) {} |
| 379 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | 379 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ |
| 380 | 380 | ||
| 381 | /* | 381 | /* |
| @@ -464,7 +464,7 @@ void account_process_tick(struct task_struct *p, int user_tick) | |||
| 464 | return; | 464 | return; |
| 465 | 465 | ||
| 466 | if (sched_clock_irqtime) { | 466 | if (sched_clock_irqtime) { |
| 467 | irqtime_account_process_tick(p, user_tick, rq); | 467 | irqtime_account_process_tick(p, user_tick, rq, 1); |
| 468 | return; | 468 | return; |
| 469 | } | 469 | } |
| 470 | 470 | ||
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index b08095786cb8..800e99b99075 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
| @@ -528,6 +528,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) | |||
| 528 | sched_clock_tick(); | 528 | sched_clock_tick(); |
| 529 | update_rq_clock(rq); | 529 | update_rq_clock(rq); |
| 530 | dl_se->dl_throttled = 0; | 530 | dl_se->dl_throttled = 0; |
| 531 | dl_se->dl_yielded = 0; | ||
| 531 | if (p->on_rq) { | 532 | if (p->on_rq) { |
| 532 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); | 533 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); |
| 533 | if (task_has_dl_policy(rq->curr)) | 534 | if (task_has_dl_policy(rq->curr)) |
| @@ -893,10 +894,10 @@ static void yield_task_dl(struct rq *rq) | |||
| 893 | * We make the task go to sleep until its current deadline by | 894 | * We make the task go to sleep until its current deadline by |
| 894 | * forcing its runtime to zero. This way, update_curr_dl() stops | 895 | * forcing its runtime to zero. This way, update_curr_dl() stops |
| 895 | * it and the bandwidth timer will wake it up and will give it | 896 | * it and the bandwidth timer will wake it up and will give it |
| 896 | * new scheduling parameters (thanks to dl_new=1). | 897 | * new scheduling parameters (thanks to dl_yielded=1). |
| 897 | */ | 898 | */ |
| 898 | if (p->dl.runtime > 0) { | 899 | if (p->dl.runtime > 0) { |
| 899 | rq->curr->dl.dl_new = 1; | 900 | rq->curr->dl.dl_yielded = 1; |
| 900 | p->dl.runtime = 0; | 901 | p->dl.runtime = 0; |
| 901 | } | 902 | } |
| 902 | update_curr_dl(rq); | 903 | update_curr_dl(rq); |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7570dd969c28..0fdb96de81a5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -6653,6 +6653,7 @@ static int idle_balance(struct rq *this_rq) | |||
| 6653 | int this_cpu = this_rq->cpu; | 6653 | int this_cpu = this_rq->cpu; |
| 6654 | 6654 | ||
| 6655 | idle_enter_fair(this_rq); | 6655 | idle_enter_fair(this_rq); |
| 6656 | |||
| 6656 | /* | 6657 | /* |
| 6657 | * We must set idle_stamp _before_ calling idle_balance(), such that we | 6658 | * We must set idle_stamp _before_ calling idle_balance(), such that we |
| 6658 | * measure the duration of idle_balance() as idle time. | 6659 | * measure the duration of idle_balance() as idle time. |
| @@ -6705,14 +6706,16 @@ static int idle_balance(struct rq *this_rq) | |||
| 6705 | 6706 | ||
| 6706 | raw_spin_lock(&this_rq->lock); | 6707 | raw_spin_lock(&this_rq->lock); |
| 6707 | 6708 | ||
| 6709 | if (curr_cost > this_rq->max_idle_balance_cost) | ||
| 6710 | this_rq->max_idle_balance_cost = curr_cost; | ||
| 6711 | |||
| 6708 | /* | 6712 | /* |
| 6709 | * While browsing the domains, we released the rq lock. | 6713 | * While browsing the domains, we released the rq lock, a task could |
| 6710 | * A task could have be enqueued in the meantime | 6714 | * have been enqueued in the meantime. Since we're not going idle, |
| 6715 | * pretend we pulled a task. | ||
| 6711 | */ | 6716 | */ |
| 6712 | if (this_rq->cfs.h_nr_running && !pulled_task) { | 6717 | if (this_rq->cfs.h_nr_running && !pulled_task) |
| 6713 | pulled_task = 1; | 6718 | pulled_task = 1; |
| 6714 | goto out; | ||
| 6715 | } | ||
| 6716 | 6719 | ||
| 6717 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { | 6720 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { |
| 6718 | /* | 6721 | /* |
| @@ -6722,9 +6725,6 @@ static int idle_balance(struct rq *this_rq) | |||
| 6722 | this_rq->next_balance = next_balance; | 6725 | this_rq->next_balance = next_balance; |
| 6723 | } | 6726 | } |
| 6724 | 6727 | ||
| 6725 | if (curr_cost > this_rq->max_idle_balance_cost) | ||
| 6726 | this_rq->max_idle_balance_cost = curr_cost; | ||
| 6727 | |||
| 6728 | out: | 6728 | out: |
| 6729 | /* Is there a task of a high priority class? */ | 6729 | /* Is there a task of a high priority class? */ |
| 6730 | if (this_rq->nr_running != this_rq->cfs.h_nr_running && | 6730 | if (this_rq->nr_running != this_rq->cfs.h_nr_running && |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 33e4648ae0e7..92f24f5e8d52 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -223,7 +223,7 @@ static inline bool lockdep_softirq_start(void) { return false; } | |||
| 223 | static inline void lockdep_softirq_end(bool in_hardirq) { } | 223 | static inline void lockdep_softirq_end(bool in_hardirq) { } |
| 224 | #endif | 224 | #endif |
| 225 | 225 | ||
| 226 | asmlinkage void __do_softirq(void) | 226 | asmlinkage __visible void __do_softirq(void) |
| 227 | { | 227 | { |
| 228 | unsigned long end = jiffies + MAX_SOFTIRQ_TIME; | 228 | unsigned long end = jiffies + MAX_SOFTIRQ_TIME; |
| 229 | unsigned long old_flags = current->flags; | 229 | unsigned long old_flags = current->flags; |
| @@ -299,7 +299,7 @@ restart: | |||
| 299 | tsk_restore_flags(current, old_flags, PF_MEMALLOC); | 299 | tsk_restore_flags(current, old_flags, PF_MEMALLOC); |
| 300 | } | 300 | } |
| 301 | 301 | ||
| 302 | asmlinkage void do_softirq(void) | 302 | asmlinkage __visible void do_softirq(void) |
| 303 | { | 303 | { |
| 304 | __u32 pending; | 304 | __u32 pending; |
| 305 | unsigned long flags; | 305 | unsigned long flags; |
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index ac5b23cf7212..6620e5837ce2 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c | |||
| @@ -188,7 +188,6 @@ static int tracepoint_add_func(struct tracepoint *tp, | |||
| 188 | WARN_ON_ONCE(1); | 188 | WARN_ON_ONCE(1); |
| 189 | return PTR_ERR(old); | 189 | return PTR_ERR(old); |
| 190 | } | 190 | } |
| 191 | release_probes(old); | ||
| 192 | 191 | ||
| 193 | /* | 192 | /* |
| 194 | * rcu_assign_pointer has a smp_wmb() which makes sure that the new | 193 | * rcu_assign_pointer has a smp_wmb() which makes sure that the new |
| @@ -200,6 +199,7 @@ static int tracepoint_add_func(struct tracepoint *tp, | |||
| 200 | rcu_assign_pointer(tp->funcs, tp_funcs); | 199 | rcu_assign_pointer(tp->funcs, tp_funcs); |
| 201 | if (!static_key_enabled(&tp->key)) | 200 | if (!static_key_enabled(&tp->key)) |
| 202 | static_key_slow_inc(&tp->key); | 201 | static_key_slow_inc(&tp->key); |
| 202 | release_probes(old); | ||
| 203 | return 0; | 203 | return 0; |
| 204 | } | 204 | } |
| 205 | 205 | ||
| @@ -221,7 +221,6 @@ static int tracepoint_remove_func(struct tracepoint *tp, | |||
| 221 | WARN_ON_ONCE(1); | 221 | WARN_ON_ONCE(1); |
| 222 | return PTR_ERR(old); | 222 | return PTR_ERR(old); |
| 223 | } | 223 | } |
| 224 | release_probes(old); | ||
| 225 | 224 | ||
| 226 | if (!tp_funcs) { | 225 | if (!tp_funcs) { |
| 227 | /* Removed last function */ | 226 | /* Removed last function */ |
| @@ -232,6 +231,7 @@ static int tracepoint_remove_func(struct tracepoint *tp, | |||
| 232 | static_key_slow_dec(&tp->key); | 231 | static_key_slow_dec(&tp->key); |
| 233 | } | 232 | } |
| 234 | rcu_assign_pointer(tp->funcs, tp_funcs); | 233 | rcu_assign_pointer(tp->funcs, tp_funcs); |
| 234 | release_probes(old); | ||
| 235 | return 0; | 235 | return 0; |
| 236 | } | 236 | } |
| 237 | 237 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0ee63af30bd1..8edc87185427 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -1916,6 +1916,12 @@ static void send_mayday(struct work_struct *work) | |||
| 1916 | 1916 | ||
| 1917 | /* mayday mayday mayday */ | 1917 | /* mayday mayday mayday */ |
| 1918 | if (list_empty(&pwq->mayday_node)) { | 1918 | if (list_empty(&pwq->mayday_node)) { |
| 1919 | /* | ||
| 1920 | * If @pwq is for an unbound wq, its base ref may be put at | ||
| 1921 | * any time due to an attribute change. Pin @pwq until the | ||
| 1922 | * rescuer is done with it. | ||
| 1923 | */ | ||
| 1924 | get_pwq(pwq); | ||
| 1919 | list_add_tail(&pwq->mayday_node, &wq->maydays); | 1925 | list_add_tail(&pwq->mayday_node, &wq->maydays); |
| 1920 | wake_up_process(wq->rescuer->task); | 1926 | wake_up_process(wq->rescuer->task); |
| 1921 | } | 1927 | } |
| @@ -2398,6 +2404,7 @@ static int rescuer_thread(void *__rescuer) | |||
| 2398 | struct worker *rescuer = __rescuer; | 2404 | struct worker *rescuer = __rescuer; |
| 2399 | struct workqueue_struct *wq = rescuer->rescue_wq; | 2405 | struct workqueue_struct *wq = rescuer->rescue_wq; |
| 2400 | struct list_head *scheduled = &rescuer->scheduled; | 2406 | struct list_head *scheduled = &rescuer->scheduled; |
| 2407 | bool should_stop; | ||
| 2401 | 2408 | ||
| 2402 | set_user_nice(current, RESCUER_NICE_LEVEL); | 2409 | set_user_nice(current, RESCUER_NICE_LEVEL); |
| 2403 | 2410 | ||
| @@ -2409,11 +2416,15 @@ static int rescuer_thread(void *__rescuer) | |||
| 2409 | repeat: | 2416 | repeat: |
| 2410 | set_current_state(TASK_INTERRUPTIBLE); | 2417 | set_current_state(TASK_INTERRUPTIBLE); |
| 2411 | 2418 | ||
| 2412 | if (kthread_should_stop()) { | 2419 | /* |
| 2413 | __set_current_state(TASK_RUNNING); | 2420 | * By the time the rescuer is requested to stop, the workqueue |
| 2414 | rescuer->task->flags &= ~PF_WQ_WORKER; | 2421 | * shouldn't have any work pending, but @wq->maydays may still have |
| 2415 | return 0; | 2422 | * pwq(s) queued. This can happen by non-rescuer workers consuming |
| 2416 | } | 2423 | * all the work items before the rescuer got to them. Go through |
| 2424 | * @wq->maydays processing before acting on should_stop so that the | ||
| 2425 | * list is always empty on exit. | ||
| 2426 | */ | ||
| 2427 | should_stop = kthread_should_stop(); | ||
| 2417 | 2428 | ||
| 2418 | /* see whether any pwq is asking for help */ | 2429 | /* see whether any pwq is asking for help */ |
| 2419 | spin_lock_irq(&wq_mayday_lock); | 2430 | spin_lock_irq(&wq_mayday_lock); |
| @@ -2445,6 +2456,12 @@ repeat: | |||
| 2445 | process_scheduled_works(rescuer); | 2456 | process_scheduled_works(rescuer); |
| 2446 | 2457 | ||
| 2447 | /* | 2458 | /* |
| 2459 | * Put the reference grabbed by send_mayday(). @pool won't | ||
| 2460 | * go away while we're holding its lock. | ||
| 2461 | */ | ||
| 2462 | put_pwq(pwq); | ||
| 2463 | |||
| 2464 | /* | ||
| 2448 | * Leave this pool. If keep_working() is %true, notify a | 2465 | * Leave this pool. If keep_working() is %true, notify a |
| 2449 | * regular worker; otherwise, we end up with 0 concurrency | 2466 | * regular worker; otherwise, we end up with 0 concurrency |
| 2450 | * and stalling the execution. | 2467 | * and stalling the execution. |
| @@ -2459,6 +2476,12 @@ repeat: | |||
| 2459 | 2476 | ||
| 2460 | spin_unlock_irq(&wq_mayday_lock); | 2477 | spin_unlock_irq(&wq_mayday_lock); |
| 2461 | 2478 | ||
| 2479 | if (should_stop) { | ||
| 2480 | __set_current_state(TASK_RUNNING); | ||
| 2481 | rescuer->task->flags &= ~PF_WQ_WORKER; | ||
| 2482 | return 0; | ||
| 2483 | } | ||
| 2484 | |||
| 2462 | /* rescuers should never participate in concurrency management */ | 2485 | /* rescuers should never participate in concurrency management */ |
| 2463 | WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING)); | 2486 | WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING)); |
| 2464 | schedule(); | 2487 | schedule(); |
| @@ -4100,7 +4123,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, | |||
| 4100 | if (!pwq) { | 4123 | if (!pwq) { |
| 4101 | pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n", | 4124 | pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n", |
| 4102 | wq->name); | 4125 | wq->name); |
| 4103 | goto out_unlock; | 4126 | mutex_lock(&wq->mutex); |
| 4127 | goto use_dfl_pwq; | ||
| 4104 | } | 4128 | } |
| 4105 | 4129 | ||
| 4106 | /* | 4130 | /* |
