diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/audit_tree.c | 19 | ||||
| -rw-r--r-- | kernel/events/core.c | 64 | ||||
| -rw-r--r-- | kernel/events/hw_breakpoint.c | 11 | ||||
| -rw-r--r-- | kernel/fork.c | 4 | ||||
| -rw-r--r-- | kernel/pid_namespace.c | 6 | ||||
| -rw-r--r-- | kernel/power/suspend.c | 3 | ||||
| -rw-r--r-- | kernel/printk.c | 2 | ||||
| -rw-r--r-- | kernel/sched/core.c | 108 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 48 | ||||
| -rw-r--r-- | kernel/sched/rt.c | 14 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 9 | ||||
| -rw-r--r-- | kernel/sched/stop_task.c | 22 | ||||
| -rw-r--r-- | kernel/task_work.c | 1 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 1 | ||||
| -rw-r--r-- | kernel/time/timekeeping.c | 58 | ||||
| -rw-r--r-- | kernel/timer.c | 9 | ||||
| -rw-r--r-- | kernel/trace/trace_syscalls.c | 4 | ||||
| -rw-r--r-- | kernel/watchdog.c | 21 | ||||
| -rw-r--r-- | kernel/workqueue.c | 147 |
19 files changed, 333 insertions, 218 deletions
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 3a5ca582ba1e..ed206fd88cca 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c | |||
| @@ -250,7 +250,6 @@ static void untag_chunk(struct node *p) | |||
| 250 | spin_unlock(&hash_lock); | 250 | spin_unlock(&hash_lock); |
| 251 | spin_unlock(&entry->lock); | 251 | spin_unlock(&entry->lock); |
| 252 | fsnotify_destroy_mark(entry); | 252 | fsnotify_destroy_mark(entry); |
| 253 | fsnotify_put_mark(entry); | ||
| 254 | goto out; | 253 | goto out; |
| 255 | } | 254 | } |
| 256 | 255 | ||
| @@ -259,7 +258,7 @@ static void untag_chunk(struct node *p) | |||
| 259 | 258 | ||
| 260 | fsnotify_duplicate_mark(&new->mark, entry); | 259 | fsnotify_duplicate_mark(&new->mark, entry); |
| 261 | if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) { | 260 | if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) { |
| 262 | free_chunk(new); | 261 | fsnotify_put_mark(&new->mark); |
| 263 | goto Fallback; | 262 | goto Fallback; |
| 264 | } | 263 | } |
| 265 | 264 | ||
| @@ -293,7 +292,7 @@ static void untag_chunk(struct node *p) | |||
| 293 | spin_unlock(&hash_lock); | 292 | spin_unlock(&hash_lock); |
| 294 | spin_unlock(&entry->lock); | 293 | spin_unlock(&entry->lock); |
| 295 | fsnotify_destroy_mark(entry); | 294 | fsnotify_destroy_mark(entry); |
| 296 | fsnotify_put_mark(entry); | 295 | fsnotify_put_mark(&new->mark); /* drop initial reference */ |
| 297 | goto out; | 296 | goto out; |
| 298 | 297 | ||
| 299 | Fallback: | 298 | Fallback: |
| @@ -322,7 +321,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) | |||
| 322 | 321 | ||
| 323 | entry = &chunk->mark; | 322 | entry = &chunk->mark; |
| 324 | if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) { | 323 | if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) { |
| 325 | free_chunk(chunk); | 324 | fsnotify_put_mark(entry); |
| 326 | return -ENOSPC; | 325 | return -ENOSPC; |
| 327 | } | 326 | } |
| 328 | 327 | ||
| @@ -347,6 +346,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) | |||
| 347 | insert_hash(chunk); | 346 | insert_hash(chunk); |
| 348 | spin_unlock(&hash_lock); | 347 | spin_unlock(&hash_lock); |
| 349 | spin_unlock(&entry->lock); | 348 | spin_unlock(&entry->lock); |
| 349 | fsnotify_put_mark(entry); /* drop initial reference */ | ||
| 350 | return 0; | 350 | return 0; |
| 351 | } | 351 | } |
| 352 | 352 | ||
| @@ -396,7 +396,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) | |||
| 396 | fsnotify_duplicate_mark(chunk_entry, old_entry); | 396 | fsnotify_duplicate_mark(chunk_entry, old_entry); |
| 397 | if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) { | 397 | if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) { |
| 398 | spin_unlock(&old_entry->lock); | 398 | spin_unlock(&old_entry->lock); |
| 399 | free_chunk(chunk); | 399 | fsnotify_put_mark(chunk_entry); |
| 400 | fsnotify_put_mark(old_entry); | 400 | fsnotify_put_mark(old_entry); |
| 401 | return -ENOSPC; | 401 | return -ENOSPC; |
| 402 | } | 402 | } |
| @@ -444,8 +444,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) | |||
| 444 | spin_unlock(&chunk_entry->lock); | 444 | spin_unlock(&chunk_entry->lock); |
| 445 | spin_unlock(&old_entry->lock); | 445 | spin_unlock(&old_entry->lock); |
| 446 | fsnotify_destroy_mark(old_entry); | 446 | fsnotify_destroy_mark(old_entry); |
| 447 | fsnotify_put_mark(chunk_entry); /* drop initial reference */ | ||
| 447 | fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */ | 448 | fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */ |
| 448 | fsnotify_put_mark(old_entry); /* and kill it */ | ||
| 449 | return 0; | 449 | return 0; |
| 450 | } | 450 | } |
| 451 | 451 | ||
| @@ -916,7 +916,12 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify | |||
| 916 | struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark); | 916 | struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark); |
| 917 | 917 | ||
| 918 | evict_chunk(chunk); | 918 | evict_chunk(chunk); |
| 919 | fsnotify_put_mark(entry); | 919 | |
| 920 | /* | ||
| 921 | * We are guaranteed to have at least one reference to the mark from | ||
| 922 | * either the inode or the caller of fsnotify_destroy_mark(). | ||
| 923 | */ | ||
| 924 | BUG_ON(atomic_read(&entry->refcnt) < 1); | ||
| 920 | } | 925 | } |
| 921 | 926 | ||
| 922 | static bool audit_tree_send_event(struct fsnotify_group *group, struct inode *inode, | 927 | static bool audit_tree_send_event(struct fsnotify_group *group, struct inode *inode, |
diff --git a/kernel/events/core.c b/kernel/events/core.c index b7935fcec7d9..7fee567153f0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -1253,7 +1253,7 @@ retry: | |||
| 1253 | /* | 1253 | /* |
| 1254 | * Cross CPU call to disable a performance event | 1254 | * Cross CPU call to disable a performance event |
| 1255 | */ | 1255 | */ |
| 1256 | static int __perf_event_disable(void *info) | 1256 | int __perf_event_disable(void *info) |
| 1257 | { | 1257 | { |
| 1258 | struct perf_event *event = info; | 1258 | struct perf_event *event = info; |
| 1259 | struct perf_event_context *ctx = event->ctx; | 1259 | struct perf_event_context *ctx = event->ctx; |
| @@ -2935,12 +2935,12 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel); | |||
| 2935 | /* | 2935 | /* |
| 2936 | * Called when the last reference to the file is gone. | 2936 | * Called when the last reference to the file is gone. |
| 2937 | */ | 2937 | */ |
| 2938 | static int perf_release(struct inode *inode, struct file *file) | 2938 | static void put_event(struct perf_event *event) |
| 2939 | { | 2939 | { |
| 2940 | struct perf_event *event = file->private_data; | ||
| 2941 | struct task_struct *owner; | 2940 | struct task_struct *owner; |
| 2942 | 2941 | ||
| 2943 | file->private_data = NULL; | 2942 | if (!atomic_long_dec_and_test(&event->refcount)) |
| 2943 | return; | ||
| 2944 | 2944 | ||
| 2945 | rcu_read_lock(); | 2945 | rcu_read_lock(); |
| 2946 | owner = ACCESS_ONCE(event->owner); | 2946 | owner = ACCESS_ONCE(event->owner); |
| @@ -2975,7 +2975,13 @@ static int perf_release(struct inode *inode, struct file *file) | |||
| 2975 | put_task_struct(owner); | 2975 | put_task_struct(owner); |
| 2976 | } | 2976 | } |
| 2977 | 2977 | ||
| 2978 | return perf_event_release_kernel(event); | 2978 | perf_event_release_kernel(event); |
| 2979 | } | ||
| 2980 | |||
| 2981 | static int perf_release(struct inode *inode, struct file *file) | ||
| 2982 | { | ||
| 2983 | put_event(file->private_data); | ||
| 2984 | return 0; | ||
| 2979 | } | 2985 | } |
| 2980 | 2986 | ||
| 2981 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) | 2987 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) |
| @@ -3227,7 +3233,7 @@ unlock: | |||
| 3227 | 3233 | ||
| 3228 | static const struct file_operations perf_fops; | 3234 | static const struct file_operations perf_fops; |
| 3229 | 3235 | ||
| 3230 | static struct perf_event *perf_fget_light(int fd, int *fput_needed) | 3236 | static struct file *perf_fget_light(int fd, int *fput_needed) |
| 3231 | { | 3237 | { |
| 3232 | struct file *file; | 3238 | struct file *file; |
| 3233 | 3239 | ||
| @@ -3241,7 +3247,7 @@ static struct perf_event *perf_fget_light(int fd, int *fput_needed) | |||
| 3241 | return ERR_PTR(-EBADF); | 3247 | return ERR_PTR(-EBADF); |
| 3242 | } | 3248 | } |
| 3243 | 3249 | ||
| 3244 | return file->private_data; | 3250 | return file; |
| 3245 | } | 3251 | } |
| 3246 | 3252 | ||
| 3247 | static int perf_event_set_output(struct perf_event *event, | 3253 | static int perf_event_set_output(struct perf_event *event, |
| @@ -3273,19 +3279,21 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
| 3273 | 3279 | ||
| 3274 | case PERF_EVENT_IOC_SET_OUTPUT: | 3280 | case PERF_EVENT_IOC_SET_OUTPUT: |
| 3275 | { | 3281 | { |
| 3282 | struct file *output_file = NULL; | ||
| 3276 | struct perf_event *output_event = NULL; | 3283 | struct perf_event *output_event = NULL; |
| 3277 | int fput_needed = 0; | 3284 | int fput_needed = 0; |
| 3278 | int ret; | 3285 | int ret; |
| 3279 | 3286 | ||
| 3280 | if (arg != -1) { | 3287 | if (arg != -1) { |
| 3281 | output_event = perf_fget_light(arg, &fput_needed); | 3288 | output_file = perf_fget_light(arg, &fput_needed); |
| 3282 | if (IS_ERR(output_event)) | 3289 | if (IS_ERR(output_file)) |
| 3283 | return PTR_ERR(output_event); | 3290 | return PTR_ERR(output_file); |
| 3291 | output_event = output_file->private_data; | ||
| 3284 | } | 3292 | } |
| 3285 | 3293 | ||
| 3286 | ret = perf_event_set_output(event, output_event); | 3294 | ret = perf_event_set_output(event, output_event); |
| 3287 | if (output_event) | 3295 | if (output_event) |
| 3288 | fput_light(output_event->filp, fput_needed); | 3296 | fput_light(output_file, fput_needed); |
| 3289 | 3297 | ||
| 3290 | return ret; | 3298 | return ret; |
| 3291 | } | 3299 | } |
| @@ -5950,6 +5958,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
| 5950 | 5958 | ||
| 5951 | mutex_init(&event->mmap_mutex); | 5959 | mutex_init(&event->mmap_mutex); |
| 5952 | 5960 | ||
| 5961 | atomic_long_set(&event->refcount, 1); | ||
| 5953 | event->cpu = cpu; | 5962 | event->cpu = cpu; |
| 5954 | event->attr = *attr; | 5963 | event->attr = *attr; |
| 5955 | event->group_leader = group_leader; | 5964 | event->group_leader = group_leader; |
| @@ -6260,12 +6269,12 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 6260 | return event_fd; | 6269 | return event_fd; |
| 6261 | 6270 | ||
| 6262 | if (group_fd != -1) { | 6271 | if (group_fd != -1) { |
| 6263 | group_leader = perf_fget_light(group_fd, &fput_needed); | 6272 | group_file = perf_fget_light(group_fd, &fput_needed); |
| 6264 | if (IS_ERR(group_leader)) { | 6273 | if (IS_ERR(group_file)) { |
| 6265 | err = PTR_ERR(group_leader); | 6274 | err = PTR_ERR(group_file); |
| 6266 | goto err_fd; | 6275 | goto err_fd; |
| 6267 | } | 6276 | } |
| 6268 | group_file = group_leader->filp; | 6277 | group_leader = group_file->private_data; |
| 6269 | if (flags & PERF_FLAG_FD_OUTPUT) | 6278 | if (flags & PERF_FLAG_FD_OUTPUT) |
| 6270 | output_event = group_leader; | 6279 | output_event = group_leader; |
| 6271 | if (flags & PERF_FLAG_FD_NO_GROUP) | 6280 | if (flags & PERF_FLAG_FD_NO_GROUP) |
| @@ -6402,7 +6411,6 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 6402 | put_ctx(gctx); | 6411 | put_ctx(gctx); |
| 6403 | } | 6412 | } |
| 6404 | 6413 | ||
| 6405 | event->filp = event_file; | ||
| 6406 | WARN_ON_ONCE(ctx->parent_ctx); | 6414 | WARN_ON_ONCE(ctx->parent_ctx); |
| 6407 | mutex_lock(&ctx->mutex); | 6415 | mutex_lock(&ctx->mutex); |
| 6408 | 6416 | ||
| @@ -6496,7 +6504,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
| 6496 | goto err_free; | 6504 | goto err_free; |
| 6497 | } | 6505 | } |
| 6498 | 6506 | ||
| 6499 | event->filp = NULL; | ||
| 6500 | WARN_ON_ONCE(ctx->parent_ctx); | 6507 | WARN_ON_ONCE(ctx->parent_ctx); |
| 6501 | mutex_lock(&ctx->mutex); | 6508 | mutex_lock(&ctx->mutex); |
| 6502 | perf_install_in_context(ctx, event, cpu); | 6509 | perf_install_in_context(ctx, event, cpu); |
| @@ -6578,7 +6585,7 @@ static void sync_child_event(struct perf_event *child_event, | |||
| 6578 | * Release the parent event, if this was the last | 6585 | * Release the parent event, if this was the last |
| 6579 | * reference to it. | 6586 | * reference to it. |
| 6580 | */ | 6587 | */ |
| 6581 | fput(parent_event->filp); | 6588 | put_event(parent_event); |
| 6582 | } | 6589 | } |
| 6583 | 6590 | ||
| 6584 | static void | 6591 | static void |
| @@ -6654,9 +6661,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
| 6654 | * | 6661 | * |
| 6655 | * __perf_event_exit_task() | 6662 | * __perf_event_exit_task() |
| 6656 | * sync_child_event() | 6663 | * sync_child_event() |
| 6657 | * fput(parent_event->filp) | 6664 | * put_event() |
| 6658 | * perf_release() | 6665 | * mutex_lock(&ctx->mutex) |
| 6659 | * mutex_lock(&ctx->mutex) | ||
| 6660 | * | 6666 | * |
| 6661 | * But since its the parent context it won't be the same instance. | 6667 | * But since its the parent context it won't be the same instance. |
| 6662 | */ | 6668 | */ |
| @@ -6724,7 +6730,7 @@ static void perf_free_event(struct perf_event *event, | |||
| 6724 | list_del_init(&event->child_list); | 6730 | list_del_init(&event->child_list); |
| 6725 | mutex_unlock(&parent->child_mutex); | 6731 | mutex_unlock(&parent->child_mutex); |
| 6726 | 6732 | ||
| 6727 | fput(parent->filp); | 6733 | put_event(parent); |
| 6728 | 6734 | ||
| 6729 | perf_group_detach(event); | 6735 | perf_group_detach(event); |
| 6730 | list_del_event(event, ctx); | 6736 | list_del_event(event, ctx); |
| @@ -6804,6 +6810,12 @@ inherit_event(struct perf_event *parent_event, | |||
| 6804 | NULL, NULL); | 6810 | NULL, NULL); |
| 6805 | if (IS_ERR(child_event)) | 6811 | if (IS_ERR(child_event)) |
| 6806 | return child_event; | 6812 | return child_event; |
| 6813 | |||
| 6814 | if (!atomic_long_inc_not_zero(&parent_event->refcount)) { | ||
| 6815 | free_event(child_event); | ||
| 6816 | return NULL; | ||
| 6817 | } | ||
| 6818 | |||
| 6807 | get_ctx(child_ctx); | 6819 | get_ctx(child_ctx); |
| 6808 | 6820 | ||
| 6809 | /* | 6821 | /* |
| @@ -6845,14 +6857,6 @@ inherit_event(struct perf_event *parent_event, | |||
| 6845 | raw_spin_unlock_irqrestore(&child_ctx->lock, flags); | 6857 | raw_spin_unlock_irqrestore(&child_ctx->lock, flags); |
| 6846 | 6858 | ||
| 6847 | /* | 6859 | /* |
| 6848 | * Get a reference to the parent filp - we will fput it | ||
| 6849 | * when the child event exits. This is safe to do because | ||
| 6850 | * we are in the parent and we know that the filp still | ||
| 6851 | * exists and has a nonzero count: | ||
| 6852 | */ | ||
| 6853 | atomic_long_inc(&parent_event->filp->f_count); | ||
| 6854 | |||
| 6855 | /* | ||
| 6856 | * Link this into the parent event's child list | 6860 | * Link this into the parent event's child list |
| 6857 | */ | 6861 | */ |
| 6858 | WARN_ON_ONCE(parent_event->ctx->parent_ctx); | 6862 | WARN_ON_ONCE(parent_event->ctx->parent_ctx); |
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index bb38c4d3ee12..9a7b487c6fe2 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c | |||
| @@ -453,7 +453,16 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att | |||
| 453 | int old_type = bp->attr.bp_type; | 453 | int old_type = bp->attr.bp_type; |
| 454 | int err = 0; | 454 | int err = 0; |
| 455 | 455 | ||
| 456 | perf_event_disable(bp); | 456 | /* |
| 457 | * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it | ||
| 458 | * will not be possible to raise IPIs that invoke __perf_event_disable. | ||
| 459 | * So call the function directly after making sure we are targeting the | ||
| 460 | * current task. | ||
| 461 | */ | ||
| 462 | if (irqs_disabled() && bp->ctx && bp->ctx->task == current) | ||
| 463 | __perf_event_disable(bp); | ||
| 464 | else | ||
| 465 | perf_event_disable(bp); | ||
| 457 | 466 | ||
| 458 | bp->attr.bp_addr = attr->bp_addr; | 467 | bp->attr.bp_addr = attr->bp_addr; |
| 459 | bp->attr.bp_type = attr->bp_type; | 468 | bp->attr.bp_type = attr->bp_type; |
diff --git a/kernel/fork.c b/kernel/fork.c index 3bd2280d79f6..2c8857e12855 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -455,8 +455,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
| 455 | if (retval) | 455 | if (retval) |
| 456 | goto out; | 456 | goto out; |
| 457 | 457 | ||
| 458 | if (file && uprobe_mmap(tmp)) | 458 | if (file) |
| 459 | goto out; | 459 | uprobe_mmap(tmp); |
| 460 | } | 460 | } |
| 461 | /* a new mm has just been created */ | 461 | /* a new mm has just been created */ |
| 462 | arch_dup_mmap(oldmm, mm); | 462 | arch_dup_mmap(oldmm, mm); |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index b3c7fd554250..6144bab8fd8e 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
| @@ -232,15 +232,19 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write, | |||
| 232 | */ | 232 | */ |
| 233 | 233 | ||
| 234 | tmp.data = ¤t->nsproxy->pid_ns->last_pid; | 234 | tmp.data = ¤t->nsproxy->pid_ns->last_pid; |
| 235 | return proc_dointvec(&tmp, write, buffer, lenp, ppos); | 235 | return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); |
| 236 | } | 236 | } |
| 237 | 237 | ||
| 238 | extern int pid_max; | ||
| 239 | static int zero = 0; | ||
| 238 | static struct ctl_table pid_ns_ctl_table[] = { | 240 | static struct ctl_table pid_ns_ctl_table[] = { |
| 239 | { | 241 | { |
| 240 | .procname = "ns_last_pid", | 242 | .procname = "ns_last_pid", |
| 241 | .maxlen = sizeof(int), | 243 | .maxlen = sizeof(int), |
| 242 | .mode = 0666, /* permissions are checked in the handler */ | 244 | .mode = 0666, /* permissions are checked in the handler */ |
| 243 | .proc_handler = pid_ns_ctl_handler, | 245 | .proc_handler = pid_ns_ctl_handler, |
| 246 | .extra1 = &zero, | ||
| 247 | .extra2 = &pid_max, | ||
| 244 | }, | 248 | }, |
| 245 | { } | 249 | { } |
| 246 | }; | 250 | }; |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 1da39ea248fd..c8b7446b27df 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
| @@ -178,9 +178,6 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) | |||
| 178 | arch_suspend_enable_irqs(); | 178 | arch_suspend_enable_irqs(); |
| 179 | BUG_ON(irqs_disabled()); | 179 | BUG_ON(irqs_disabled()); |
| 180 | 180 | ||
| 181 | /* Kick the lockup detector */ | ||
| 182 | lockup_detector_bootcpu_resume(); | ||
| 183 | |||
| 184 | Enable_cpus: | 181 | Enable_cpus: |
| 185 | enable_nonboot_cpus(); | 182 | enable_nonboot_cpus(); |
| 186 | 183 | ||
diff --git a/kernel/printk.c b/kernel/printk.c index 6a76ab9d4476..66a2ea37b576 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
| @@ -1034,6 +1034,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |||
| 1034 | struct log *msg = log_from_idx(idx); | 1034 | struct log *msg = log_from_idx(idx); |
| 1035 | 1035 | ||
| 1036 | len += msg_print_text(msg, prev, true, NULL, 0); | 1036 | len += msg_print_text(msg, prev, true, NULL, 0); |
| 1037 | prev = msg->flags; | ||
| 1037 | idx = log_next(idx); | 1038 | idx = log_next(idx); |
| 1038 | seq++; | 1039 | seq++; |
| 1039 | } | 1040 | } |
| @@ -1046,6 +1047,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |||
| 1046 | struct log *msg = log_from_idx(idx); | 1047 | struct log *msg = log_from_idx(idx); |
| 1047 | 1048 | ||
| 1048 | len -= msg_print_text(msg, prev, true, NULL, 0); | 1049 | len -= msg_print_text(msg, prev, true, NULL, 0); |
| 1050 | prev = msg->flags; | ||
| 1049 | idx = log_next(idx); | 1051 | idx = log_next(idx); |
| 1050 | seq++; | 1052 | seq++; |
| 1051 | } | 1053 | } |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 82ad284f823b..649c9f876cb1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -3142,6 +3142,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | |||
| 3142 | # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) | 3142 | # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) |
| 3143 | #endif | 3143 | #endif |
| 3144 | 3144 | ||
| 3145 | static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) | ||
| 3146 | { | ||
| 3147 | u64 temp = (__force u64) rtime; | ||
| 3148 | |||
| 3149 | temp *= (__force u64) utime; | ||
| 3150 | |||
| 3151 | if (sizeof(cputime_t) == 4) | ||
| 3152 | temp = div_u64(temp, (__force u32) total); | ||
| 3153 | else | ||
| 3154 | temp = div64_u64(temp, (__force u64) total); | ||
| 3155 | |||
| 3156 | return (__force cputime_t) temp; | ||
| 3157 | } | ||
| 3158 | |||
| 3145 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | 3159 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) |
| 3146 | { | 3160 | { |
| 3147 | cputime_t rtime, utime = p->utime, total = utime + p->stime; | 3161 | cputime_t rtime, utime = p->utime, total = utime + p->stime; |
| @@ -3151,13 +3165,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | |||
| 3151 | */ | 3165 | */ |
| 3152 | rtime = nsecs_to_cputime(p->se.sum_exec_runtime); | 3166 | rtime = nsecs_to_cputime(p->se.sum_exec_runtime); |
| 3153 | 3167 | ||
| 3154 | if (total) { | 3168 | if (total) |
| 3155 | u64 temp = (__force u64) rtime; | 3169 | utime = scale_utime(utime, rtime, total); |
| 3156 | 3170 | else | |
| 3157 | temp *= (__force u64) utime; | ||
| 3158 | do_div(temp, (__force u32) total); | ||
| 3159 | utime = (__force cputime_t) temp; | ||
| 3160 | } else | ||
| 3161 | utime = rtime; | 3171 | utime = rtime; |
| 3162 | 3172 | ||
| 3163 | /* | 3173 | /* |
| @@ -3184,13 +3194,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | |||
| 3184 | total = cputime.utime + cputime.stime; | 3194 | total = cputime.utime + cputime.stime; |
| 3185 | rtime = nsecs_to_cputime(cputime.sum_exec_runtime); | 3195 | rtime = nsecs_to_cputime(cputime.sum_exec_runtime); |
| 3186 | 3196 | ||
| 3187 | if (total) { | 3197 | if (total) |
| 3188 | u64 temp = (__force u64) rtime; | 3198 | utime = scale_utime(cputime.utime, rtime, total); |
| 3189 | 3199 | else | |
| 3190 | temp *= (__force u64) cputime.utime; | ||
| 3191 | do_div(temp, (__force u32) total); | ||
| 3192 | utime = (__force cputime_t) temp; | ||
| 3193 | } else | ||
| 3194 | utime = rtime; | 3200 | utime = rtime; |
| 3195 | 3201 | ||
| 3196 | sig->prev_utime = max(sig->prev_utime, utime); | 3202 | sig->prev_utime = max(sig->prev_utime, utime); |
| @@ -5298,27 +5304,17 @@ void idle_task_exit(void) | |||
| 5298 | } | 5304 | } |
| 5299 | 5305 | ||
| 5300 | /* | 5306 | /* |
| 5301 | * While a dead CPU has no uninterruptible tasks queued at this point, | 5307 | * Since this CPU is going 'away' for a while, fold any nr_active delta |
| 5302 | * it might still have a nonzero ->nr_uninterruptible counter, because | 5308 | * we might have. Assumes we're called after migrate_tasks() so that the |
| 5303 | * for performance reasons the counter is not stricly tracking tasks to | 5309 | * nr_active count is stable. |
| 5304 | * their home CPUs. So we just add the counter to another CPU's counter, | 5310 | * |
| 5305 | * to keep the global sum constant after CPU-down: | 5311 | * Also see the comment "Global load-average calculations". |
| 5306 | */ | ||
| 5307 | static void migrate_nr_uninterruptible(struct rq *rq_src) | ||
| 5308 | { | ||
| 5309 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); | ||
| 5310 | |||
| 5311 | rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible; | ||
| 5312 | rq_src->nr_uninterruptible = 0; | ||
| 5313 | } | ||
| 5314 | |||
| 5315 | /* | ||
| 5316 | * remove the tasks which were accounted by rq from calc_load_tasks. | ||
| 5317 | */ | 5312 | */ |
| 5318 | static void calc_global_load_remove(struct rq *rq) | 5313 | static void calc_load_migrate(struct rq *rq) |
| 5319 | { | 5314 | { |
| 5320 | atomic_long_sub(rq->calc_load_active, &calc_load_tasks); | 5315 | long delta = calc_load_fold_active(rq); |
| 5321 | rq->calc_load_active = 0; | 5316 | if (delta) |
| 5317 | atomic_long_add(delta, &calc_load_tasks); | ||
| 5322 | } | 5318 | } |
| 5323 | 5319 | ||
| 5324 | /* | 5320 | /* |
| @@ -5346,9 +5342,6 @@ static void migrate_tasks(unsigned int dead_cpu) | |||
| 5346 | */ | 5342 | */ |
| 5347 | rq->stop = NULL; | 5343 | rq->stop = NULL; |
| 5348 | 5344 | ||
| 5349 | /* Ensure any throttled groups are reachable by pick_next_task */ | ||
| 5350 | unthrottle_offline_cfs_rqs(rq); | ||
| 5351 | |||
| 5352 | for ( ; ; ) { | 5345 | for ( ; ; ) { |
| 5353 | /* | 5346 | /* |
| 5354 | * There's this thread running, bail when that's the only | 5347 | * There's this thread running, bail when that's the only |
| @@ -5612,8 +5605,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 5612 | BUG_ON(rq->nr_running != 1); /* the migration thread */ | 5605 | BUG_ON(rq->nr_running != 1); /* the migration thread */ |
| 5613 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 5606 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
| 5614 | 5607 | ||
| 5615 | migrate_nr_uninterruptible(rq); | 5608 | calc_load_migrate(rq); |
| 5616 | calc_global_load_remove(rq); | ||
| 5617 | break; | 5609 | break; |
| 5618 | #endif | 5610 | #endif |
| 5619 | } | 5611 | } |
| @@ -6022,11 +6014,6 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu) | |||
| 6022 | * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this | 6014 | * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this |
| 6023 | * allows us to avoid some pointer chasing select_idle_sibling(). | 6015 | * allows us to avoid some pointer chasing select_idle_sibling(). |
| 6024 | * | 6016 | * |
| 6025 | * Iterate domains and sched_groups downward, assigning CPUs to be | ||
| 6026 | * select_idle_sibling() hw buddy. Cross-wiring hw makes bouncing | ||
| 6027 | * due to random perturbation self canceling, ie sw buddies pull | ||
| 6028 | * their counterpart to their CPU's hw counterpart. | ||
| 6029 | * | ||
| 6030 | * Also keep a unique ID per domain (we use the first cpu number in | 6017 | * Also keep a unique ID per domain (we use the first cpu number in |
| 6031 | * the cpumask of the domain), this allows us to quickly tell if | 6018 | * the cpumask of the domain), this allows us to quickly tell if |
| 6032 | * two cpus are in the same cache domain, see cpus_share_cache(). | 6019 | * two cpus are in the same cache domain, see cpus_share_cache(). |
| @@ -6040,40 +6027,8 @@ static void update_top_cache_domain(int cpu) | |||
| 6040 | int id = cpu; | 6027 | int id = cpu; |
| 6041 | 6028 | ||
| 6042 | sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES); | 6029 | sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES); |
| 6043 | if (sd) { | 6030 | if (sd) |
| 6044 | struct sched_domain *tmp = sd; | ||
| 6045 | struct sched_group *sg, *prev; | ||
| 6046 | bool right; | ||
| 6047 | |||
| 6048 | /* | ||
| 6049 | * Traverse to first CPU in group, and count hops | ||
| 6050 | * to cpu from there, switching direction on each | ||
| 6051 | * hop, never ever pointing the last CPU rightward. | ||
| 6052 | */ | ||
| 6053 | do { | ||
| 6054 | id = cpumask_first(sched_domain_span(tmp)); | ||
| 6055 | prev = sg = tmp->groups; | ||
| 6056 | right = 1; | ||
| 6057 | |||
| 6058 | while (cpumask_first(sched_group_cpus(sg)) != id) | ||
| 6059 | sg = sg->next; | ||
| 6060 | |||
| 6061 | while (!cpumask_test_cpu(cpu, sched_group_cpus(sg))) { | ||
| 6062 | prev = sg; | ||
| 6063 | sg = sg->next; | ||
| 6064 | right = !right; | ||
| 6065 | } | ||
| 6066 | |||
| 6067 | /* A CPU went down, never point back to domain start. */ | ||
| 6068 | if (right && cpumask_first(sched_group_cpus(sg->next)) == id) | ||
| 6069 | right = false; | ||
| 6070 | |||
| 6071 | sg = right ? sg->next : prev; | ||
| 6072 | tmp->idle_buddy = cpumask_first(sched_group_cpus(sg)); | ||
| 6073 | } while ((tmp = tmp->child)); | ||
| 6074 | |||
| 6075 | id = cpumask_first(sched_domain_span(sd)); | 6031 | id = cpumask_first(sched_domain_span(sd)); |
| 6076 | } | ||
| 6077 | 6032 | ||
| 6078 | rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); | 6033 | rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); |
| 6079 | per_cpu(sd_llc_id, cpu) = id; | 6034 | per_cpu(sd_llc_id, cpu) = id; |
| @@ -7246,6 +7201,7 @@ int in_sched_functions(unsigned long addr) | |||
| 7246 | 7201 | ||
| 7247 | #ifdef CONFIG_CGROUP_SCHED | 7202 | #ifdef CONFIG_CGROUP_SCHED |
| 7248 | struct task_group root_task_group; | 7203 | struct task_group root_task_group; |
| 7204 | LIST_HEAD(task_groups); | ||
| 7249 | #endif | 7205 | #endif |
| 7250 | 7206 | ||
| 7251 | DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask); | 7207 | DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask); |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d0cc03b3e70b..96e2b18b6283 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -2052,7 +2052,7 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | |||
| 2052 | hrtimer_cancel(&cfs_b->slack_timer); | 2052 | hrtimer_cancel(&cfs_b->slack_timer); |
| 2053 | } | 2053 | } |
| 2054 | 2054 | ||
| 2055 | void unthrottle_offline_cfs_rqs(struct rq *rq) | 2055 | static void unthrottle_offline_cfs_rqs(struct rq *rq) |
| 2056 | { | 2056 | { |
| 2057 | struct cfs_rq *cfs_rq; | 2057 | struct cfs_rq *cfs_rq; |
| 2058 | 2058 | ||
| @@ -2106,7 +2106,7 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) | |||
| 2106 | return NULL; | 2106 | return NULL; |
| 2107 | } | 2107 | } |
| 2108 | static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} | 2108 | static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} |
| 2109 | void unthrottle_offline_cfs_rqs(struct rq *rq) {} | 2109 | static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {} |
| 2110 | 2110 | ||
| 2111 | #endif /* CONFIG_CFS_BANDWIDTH */ | 2111 | #endif /* CONFIG_CFS_BANDWIDTH */ |
| 2112 | 2112 | ||
| @@ -2637,6 +2637,8 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
| 2637 | int cpu = smp_processor_id(); | 2637 | int cpu = smp_processor_id(); |
| 2638 | int prev_cpu = task_cpu(p); | 2638 | int prev_cpu = task_cpu(p); |
| 2639 | struct sched_domain *sd; | 2639 | struct sched_domain *sd; |
| 2640 | struct sched_group *sg; | ||
| 2641 | int i; | ||
| 2640 | 2642 | ||
| 2641 | /* | 2643 | /* |
| 2642 | * If the task is going to be woken-up on this cpu and if it is | 2644 | * If the task is going to be woken-up on this cpu and if it is |
| @@ -2653,17 +2655,29 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
| 2653 | return prev_cpu; | 2655 | return prev_cpu; |
| 2654 | 2656 | ||
| 2655 | /* | 2657 | /* |
| 2656 | * Otherwise, check assigned siblings to find an elegible idle cpu. | 2658 | * Otherwise, iterate the domains and find an elegible idle cpu. |
| 2657 | */ | 2659 | */ |
| 2658 | sd = rcu_dereference(per_cpu(sd_llc, target)); | 2660 | sd = rcu_dereference(per_cpu(sd_llc, target)); |
| 2659 | |||
| 2660 | for_each_lower_domain(sd) { | 2661 | for_each_lower_domain(sd) { |
| 2661 | if (!cpumask_test_cpu(sd->idle_buddy, tsk_cpus_allowed(p))) | 2662 | sg = sd->groups; |
| 2662 | continue; | 2663 | do { |
| 2663 | if (idle_cpu(sd->idle_buddy)) | 2664 | if (!cpumask_intersects(sched_group_cpus(sg), |
| 2664 | return sd->idle_buddy; | 2665 | tsk_cpus_allowed(p))) |
| 2665 | } | 2666 | goto next; |
| 2666 | 2667 | ||
| 2668 | for_each_cpu(i, sched_group_cpus(sg)) { | ||
| 2669 | if (!idle_cpu(i)) | ||
| 2670 | goto next; | ||
| 2671 | } | ||
| 2672 | |||
| 2673 | target = cpumask_first_and(sched_group_cpus(sg), | ||
| 2674 | tsk_cpus_allowed(p)); | ||
| 2675 | goto done; | ||
| 2676 | next: | ||
| 2677 | sg = sg->next; | ||
| 2678 | } while (sg != sd->groups); | ||
| 2679 | } | ||
| 2680 | done: | ||
| 2667 | return target; | 2681 | return target; |
| 2668 | } | 2682 | } |
| 2669 | 2683 | ||
| @@ -3387,6 +3401,14 @@ static int tg_load_down(struct task_group *tg, void *data) | |||
| 3387 | 3401 | ||
| 3388 | static void update_h_load(long cpu) | 3402 | static void update_h_load(long cpu) |
| 3389 | { | 3403 | { |
| 3404 | struct rq *rq = cpu_rq(cpu); | ||
| 3405 | unsigned long now = jiffies; | ||
| 3406 | |||
| 3407 | if (rq->h_load_throttle == now) | ||
| 3408 | return; | ||
| 3409 | |||
| 3410 | rq->h_load_throttle = now; | ||
| 3411 | |||
| 3390 | rcu_read_lock(); | 3412 | rcu_read_lock(); |
| 3391 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); | 3413 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); |
| 3392 | rcu_read_unlock(); | 3414 | rcu_read_unlock(); |
| @@ -3650,7 +3672,6 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) | |||
| 3650 | * @group: sched_group whose statistics are to be updated. | 3672 | * @group: sched_group whose statistics are to be updated. |
| 3651 | * @load_idx: Load index of sched_domain of this_cpu for load calc. | 3673 | * @load_idx: Load index of sched_domain of this_cpu for load calc. |
| 3652 | * @local_group: Does group contain this_cpu. | 3674 | * @local_group: Does group contain this_cpu. |
| 3653 | * @cpus: Set of cpus considered for load balancing. | ||
| 3654 | * @balance: Should we balance. | 3675 | * @balance: Should we balance. |
| 3655 | * @sgs: variable to hold the statistics for this group. | 3676 | * @sgs: variable to hold the statistics for this group. |
| 3656 | */ | 3677 | */ |
| @@ -3797,7 +3818,6 @@ static bool update_sd_pick_busiest(struct lb_env *env, | |||
| 3797 | /** | 3818 | /** |
| 3798 | * update_sd_lb_stats - Update sched_domain's statistics for load balancing. | 3819 | * update_sd_lb_stats - Update sched_domain's statistics for load balancing. |
| 3799 | * @env: The load balancing environment. | 3820 | * @env: The load balancing environment. |
| 3800 | * @cpus: Set of cpus considered for load balancing. | ||
| 3801 | * @balance: Should we balance. | 3821 | * @balance: Should we balance. |
| 3802 | * @sds: variable to hold the statistics for this sched_domain. | 3822 | * @sds: variable to hold the statistics for this sched_domain. |
| 3803 | */ | 3823 | */ |
| @@ -4293,11 +4313,10 @@ redo: | |||
| 4293 | env.src_rq = busiest; | 4313 | env.src_rq = busiest; |
| 4294 | env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running); | 4314 | env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running); |
| 4295 | 4315 | ||
| 4316 | update_h_load(env.src_cpu); | ||
| 4296 | more_balance: | 4317 | more_balance: |
| 4297 | local_irq_save(flags); | 4318 | local_irq_save(flags); |
| 4298 | double_rq_lock(this_rq, busiest); | 4319 | double_rq_lock(this_rq, busiest); |
| 4299 | if (!env.loop) | ||
| 4300 | update_h_load(env.src_cpu); | ||
| 4301 | 4320 | ||
| 4302 | /* | 4321 | /* |
| 4303 | * cur_ld_moved - load moved in current iteration | 4322 | * cur_ld_moved - load moved in current iteration |
| @@ -4949,6 +4968,9 @@ static void rq_online_fair(struct rq *rq) | |||
| 4949 | static void rq_offline_fair(struct rq *rq) | 4968 | static void rq_offline_fair(struct rq *rq) |
| 4950 | { | 4969 | { |
| 4951 | update_sysctl(); | 4970 | update_sysctl(); |
| 4971 | |||
| 4972 | /* Ensure any throttled groups are reachable by pick_next_task */ | ||
| 4973 | unthrottle_offline_cfs_rqs(rq); | ||
| 4952 | } | 4974 | } |
| 4953 | 4975 | ||
| 4954 | #endif /* CONFIG_SMP */ | 4976 | #endif /* CONFIG_SMP */ |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 573e1ca01102..e0b7ba9c040f 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
| @@ -691,6 +691,7 @@ balanced: | |||
| 691 | * runtime - in which case borrowing doesn't make sense. | 691 | * runtime - in which case borrowing doesn't make sense. |
| 692 | */ | 692 | */ |
| 693 | rt_rq->rt_runtime = RUNTIME_INF; | 693 | rt_rq->rt_runtime = RUNTIME_INF; |
| 694 | rt_rq->rt_throttled = 0; | ||
| 694 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | 695 | raw_spin_unlock(&rt_rq->rt_runtime_lock); |
| 695 | raw_spin_unlock(&rt_b->rt_runtime_lock); | 696 | raw_spin_unlock(&rt_b->rt_runtime_lock); |
| 696 | } | 697 | } |
| @@ -788,6 +789,19 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | |||
| 788 | const struct cpumask *span; | 789 | const struct cpumask *span; |
| 789 | 790 | ||
| 790 | span = sched_rt_period_mask(); | 791 | span = sched_rt_period_mask(); |
| 792 | #ifdef CONFIG_RT_GROUP_SCHED | ||
| 793 | /* | ||
| 794 | * FIXME: isolated CPUs should really leave the root task group, | ||
| 795 | * whether they are isolcpus or were isolated via cpusets, lest | ||
| 796 | * the timer run on a CPU which does not service all runqueues, | ||
| 797 | * potentially leaving other CPUs indefinitely throttled. If | ||
| 798 | * isolation is really required, the user will turn the throttle | ||
| 799 | * off to kill the perturbations it causes anyway. Meanwhile, | ||
| 800 | * this maintains functionality for boot and/or troubleshooting. | ||
| 801 | */ | ||
| 802 | if (rt_b == &root_task_group.rt_bandwidth) | ||
| 803 | span = cpu_online_mask; | ||
| 804 | #endif | ||
| 791 | for_each_cpu(i, span) { | 805 | for_each_cpu(i, span) { |
| 792 | int enqueue = 0; | 806 | int enqueue = 0; |
| 793 | struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); | 807 | struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c35a1a7dd4d6..0848fa36c383 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
| @@ -80,7 +80,7 @@ extern struct mutex sched_domains_mutex; | |||
| 80 | struct cfs_rq; | 80 | struct cfs_rq; |
| 81 | struct rt_rq; | 81 | struct rt_rq; |
| 82 | 82 | ||
| 83 | static LIST_HEAD(task_groups); | 83 | extern struct list_head task_groups; |
| 84 | 84 | ||
| 85 | struct cfs_bandwidth { | 85 | struct cfs_bandwidth { |
| 86 | #ifdef CONFIG_CFS_BANDWIDTH | 86 | #ifdef CONFIG_CFS_BANDWIDTH |
| @@ -374,7 +374,11 @@ struct rq { | |||
| 374 | #ifdef CONFIG_FAIR_GROUP_SCHED | 374 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 375 | /* list of leaf cfs_rq on this cpu: */ | 375 | /* list of leaf cfs_rq on this cpu: */ |
| 376 | struct list_head leaf_cfs_rq_list; | 376 | struct list_head leaf_cfs_rq_list; |
| 377 | #endif | 377 | #ifdef CONFIG_SMP |
| 378 | unsigned long h_load_throttle; | ||
| 379 | #endif /* CONFIG_SMP */ | ||
| 380 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | ||
| 381 | |||
| 378 | #ifdef CONFIG_RT_GROUP_SCHED | 382 | #ifdef CONFIG_RT_GROUP_SCHED |
| 379 | struct list_head leaf_rt_rq_list; | 383 | struct list_head leaf_rt_rq_list; |
| 380 | #endif | 384 | #endif |
| @@ -1140,7 +1144,6 @@ extern void print_rt_stats(struct seq_file *m, int cpu); | |||
| 1140 | 1144 | ||
| 1141 | extern void init_cfs_rq(struct cfs_rq *cfs_rq); | 1145 | extern void init_cfs_rq(struct cfs_rq *cfs_rq); |
| 1142 | extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq); | 1146 | extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq); |
| 1143 | extern void unthrottle_offline_cfs_rqs(struct rq *rq); | ||
| 1144 | 1147 | ||
| 1145 | extern void account_cfs_bandwidth_used(int enabled, int was_enabled); | 1148 | extern void account_cfs_bandwidth_used(int enabled, int was_enabled); |
| 1146 | 1149 | ||
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 7b386e86fd23..da5eb5bed84a 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c | |||
| @@ -27,8 +27,10 @@ static struct task_struct *pick_next_task_stop(struct rq *rq) | |||
| 27 | { | 27 | { |
| 28 | struct task_struct *stop = rq->stop; | 28 | struct task_struct *stop = rq->stop; |
| 29 | 29 | ||
| 30 | if (stop && stop->on_rq) | 30 | if (stop && stop->on_rq) { |
| 31 | stop->se.exec_start = rq->clock_task; | ||
| 31 | return stop; | 32 | return stop; |
| 33 | } | ||
| 32 | 34 | ||
| 33 | return NULL; | 35 | return NULL; |
| 34 | } | 36 | } |
| @@ -52,6 +54,21 @@ static void yield_task_stop(struct rq *rq) | |||
| 52 | 54 | ||
| 53 | static void put_prev_task_stop(struct rq *rq, struct task_struct *prev) | 55 | static void put_prev_task_stop(struct rq *rq, struct task_struct *prev) |
| 54 | { | 56 | { |
| 57 | struct task_struct *curr = rq->curr; | ||
| 58 | u64 delta_exec; | ||
| 59 | |||
| 60 | delta_exec = rq->clock_task - curr->se.exec_start; | ||
| 61 | if (unlikely((s64)delta_exec < 0)) | ||
| 62 | delta_exec = 0; | ||
| 63 | |||
| 64 | schedstat_set(curr->se.statistics.exec_max, | ||
| 65 | max(curr->se.statistics.exec_max, delta_exec)); | ||
| 66 | |||
| 67 | curr->se.sum_exec_runtime += delta_exec; | ||
| 68 | account_group_exec_runtime(curr, delta_exec); | ||
| 69 | |||
| 70 | curr->se.exec_start = rq->clock_task; | ||
| 71 | cpuacct_charge(curr, delta_exec); | ||
| 55 | } | 72 | } |
| 56 | 73 | ||
| 57 | static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued) | 74 | static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued) |
| @@ -60,6 +77,9 @@ static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued) | |||
| 60 | 77 | ||
| 61 | static void set_curr_task_stop(struct rq *rq) | 78 | static void set_curr_task_stop(struct rq *rq) |
| 62 | { | 79 | { |
| 80 | struct task_struct *stop = rq->stop; | ||
| 81 | |||
| 82 | stop->se.exec_start = rq->clock_task; | ||
| 63 | } | 83 | } |
| 64 | 84 | ||
| 65 | static void switched_to_stop(struct rq *rq, struct task_struct *p) | 85 | static void switched_to_stop(struct rq *rq, struct task_struct *p) |
diff --git a/kernel/task_work.c b/kernel/task_work.c index 91d4e1742a0c..d320d44903bd 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c | |||
| @@ -75,6 +75,7 @@ void task_work_run(void) | |||
| 75 | p = q->next; | 75 | p = q->next; |
| 76 | q->func(q); | 76 | q->func(q); |
| 77 | q = p; | 77 | q = p; |
| 78 | cond_resched(); | ||
| 78 | } | 79 | } |
| 79 | } | 80 | } |
| 80 | } | 81 | } |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 024540f97f74..3a9e5d5c1091 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -573,6 +573,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) | |||
| 573 | tick_do_update_jiffies64(now); | 573 | tick_do_update_jiffies64(now); |
| 574 | update_cpu_load_nohz(); | 574 | update_cpu_load_nohz(); |
| 575 | 575 | ||
| 576 | calc_load_exit_idle(); | ||
| 576 | touch_softlockup_watchdog(); | 577 | touch_softlockup_watchdog(); |
| 577 | /* | 578 | /* |
| 578 | * Cancel the scheduled timer and restore the tick | 579 | * Cancel the scheduled timer and restore the tick |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e16af197a2bc..d3b91e75cecd 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
| @@ -115,6 +115,7 @@ static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts) | |||
| 115 | { | 115 | { |
| 116 | tk->xtime_sec += ts->tv_sec; | 116 | tk->xtime_sec += ts->tv_sec; |
| 117 | tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift; | 117 | tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift; |
| 118 | tk_normalize_xtime(tk); | ||
| 118 | } | 119 | } |
| 119 | 120 | ||
| 120 | static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm) | 121 | static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm) |
| @@ -276,7 +277,7 @@ static void timekeeping_forward_now(struct timekeeper *tk) | |||
| 276 | tk->xtime_nsec += cycle_delta * tk->mult; | 277 | tk->xtime_nsec += cycle_delta * tk->mult; |
| 277 | 278 | ||
| 278 | /* If arch requires, add in gettimeoffset() */ | 279 | /* If arch requires, add in gettimeoffset() */ |
| 279 | tk->xtime_nsec += arch_gettimeoffset() << tk->shift; | 280 | tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift; |
| 280 | 281 | ||
| 281 | tk_normalize_xtime(tk); | 282 | tk_normalize_xtime(tk); |
| 282 | 283 | ||
| @@ -302,10 +303,11 @@ void getnstimeofday(struct timespec *ts) | |||
| 302 | seq = read_seqbegin(&tk->lock); | 303 | seq = read_seqbegin(&tk->lock); |
| 303 | 304 | ||
| 304 | ts->tv_sec = tk->xtime_sec; | 305 | ts->tv_sec = tk->xtime_sec; |
| 305 | ts->tv_nsec = timekeeping_get_ns(tk); | 306 | nsecs = timekeeping_get_ns(tk); |
| 306 | 307 | ||
| 307 | } while (read_seqretry(&tk->lock, seq)); | 308 | } while (read_seqretry(&tk->lock, seq)); |
| 308 | 309 | ||
| 310 | ts->tv_nsec = 0; | ||
| 309 | timespec_add_ns(ts, nsecs); | 311 | timespec_add_ns(ts, nsecs); |
| 310 | } | 312 | } |
| 311 | EXPORT_SYMBOL(getnstimeofday); | 313 | EXPORT_SYMBOL(getnstimeofday); |
| @@ -344,6 +346,7 @@ void ktime_get_ts(struct timespec *ts) | |||
| 344 | { | 346 | { |
| 345 | struct timekeeper *tk = &timekeeper; | 347 | struct timekeeper *tk = &timekeeper; |
| 346 | struct timespec tomono; | 348 | struct timespec tomono; |
| 349 | s64 nsec; | ||
| 347 | unsigned int seq; | 350 | unsigned int seq; |
| 348 | 351 | ||
| 349 | WARN_ON(timekeeping_suspended); | 352 | WARN_ON(timekeeping_suspended); |
| @@ -351,13 +354,14 @@ void ktime_get_ts(struct timespec *ts) | |||
| 351 | do { | 354 | do { |
| 352 | seq = read_seqbegin(&tk->lock); | 355 | seq = read_seqbegin(&tk->lock); |
| 353 | ts->tv_sec = tk->xtime_sec; | 356 | ts->tv_sec = tk->xtime_sec; |
| 354 | ts->tv_nsec = timekeeping_get_ns(tk); | 357 | nsec = timekeeping_get_ns(tk); |
| 355 | tomono = tk->wall_to_monotonic; | 358 | tomono = tk->wall_to_monotonic; |
| 356 | 359 | ||
| 357 | } while (read_seqretry(&tk->lock, seq)); | 360 | } while (read_seqretry(&tk->lock, seq)); |
| 358 | 361 | ||
| 359 | set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, | 362 | ts->tv_sec += tomono.tv_sec; |
| 360 | ts->tv_nsec + tomono.tv_nsec); | 363 | ts->tv_nsec = 0; |
| 364 | timespec_add_ns(ts, nsec + tomono.tv_nsec); | ||
| 361 | } | 365 | } |
| 362 | EXPORT_SYMBOL_GPL(ktime_get_ts); | 366 | EXPORT_SYMBOL_GPL(ktime_get_ts); |
| 363 | 367 | ||
| @@ -427,7 +431,7 @@ int do_settimeofday(const struct timespec *tv) | |||
| 427 | struct timespec ts_delta, xt; | 431 | struct timespec ts_delta, xt; |
| 428 | unsigned long flags; | 432 | unsigned long flags; |
| 429 | 433 | ||
| 430 | if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) | 434 | if (!timespec_valid_strict(tv)) |
| 431 | return -EINVAL; | 435 | return -EINVAL; |
| 432 | 436 | ||
| 433 | write_seqlock_irqsave(&tk->lock, flags); | 437 | write_seqlock_irqsave(&tk->lock, flags); |
| @@ -463,6 +467,8 @@ int timekeeping_inject_offset(struct timespec *ts) | |||
| 463 | { | 467 | { |
| 464 | struct timekeeper *tk = &timekeeper; | 468 | struct timekeeper *tk = &timekeeper; |
| 465 | unsigned long flags; | 469 | unsigned long flags; |
| 470 | struct timespec tmp; | ||
| 471 | int ret = 0; | ||
| 466 | 472 | ||
| 467 | if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) | 473 | if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) |
| 468 | return -EINVAL; | 474 | return -EINVAL; |
| @@ -471,10 +477,17 @@ int timekeeping_inject_offset(struct timespec *ts) | |||
| 471 | 477 | ||
| 472 | timekeeping_forward_now(tk); | 478 | timekeeping_forward_now(tk); |
| 473 | 479 | ||
| 480 | /* Make sure the proposed value is valid */ | ||
| 481 | tmp = timespec_add(tk_xtime(tk), *ts); | ||
| 482 | if (!timespec_valid_strict(&tmp)) { | ||
| 483 | ret = -EINVAL; | ||
| 484 | goto error; | ||
| 485 | } | ||
| 474 | 486 | ||
| 475 | tk_xtime_add(tk, ts); | 487 | tk_xtime_add(tk, ts); |
| 476 | tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); | 488 | tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); |
| 477 | 489 | ||
| 490 | error: /* even if we error out, we forwarded the time, so call update */ | ||
| 478 | timekeeping_update(tk, true); | 491 | timekeeping_update(tk, true); |
| 479 | 492 | ||
| 480 | write_sequnlock_irqrestore(&tk->lock, flags); | 493 | write_sequnlock_irqrestore(&tk->lock, flags); |
| @@ -482,7 +495,7 @@ int timekeeping_inject_offset(struct timespec *ts) | |||
| 482 | /* signal hrtimers about time change */ | 495 | /* signal hrtimers about time change */ |
| 483 | clock_was_set(); | 496 | clock_was_set(); |
| 484 | 497 | ||
| 485 | return 0; | 498 | return ret; |
| 486 | } | 499 | } |
| 487 | EXPORT_SYMBOL(timekeeping_inject_offset); | 500 | EXPORT_SYMBOL(timekeeping_inject_offset); |
| 488 | 501 | ||
| @@ -649,7 +662,20 @@ void __init timekeeping_init(void) | |||
| 649 | struct timespec now, boot, tmp; | 662 | struct timespec now, boot, tmp; |
| 650 | 663 | ||
| 651 | read_persistent_clock(&now); | 664 | read_persistent_clock(&now); |
| 665 | if (!timespec_valid_strict(&now)) { | ||
| 666 | pr_warn("WARNING: Persistent clock returned invalid value!\n" | ||
| 667 | " Check your CMOS/BIOS settings.\n"); | ||
| 668 | now.tv_sec = 0; | ||
| 669 | now.tv_nsec = 0; | ||
| 670 | } | ||
| 671 | |||
| 652 | read_boot_clock(&boot); | 672 | read_boot_clock(&boot); |
| 673 | if (!timespec_valid_strict(&boot)) { | ||
| 674 | pr_warn("WARNING: Boot clock returned invalid value!\n" | ||
| 675 | " Check your CMOS/BIOS settings.\n"); | ||
| 676 | boot.tv_sec = 0; | ||
| 677 | boot.tv_nsec = 0; | ||
| 678 | } | ||
| 653 | 679 | ||
| 654 | seqlock_init(&tk->lock); | 680 | seqlock_init(&tk->lock); |
| 655 | 681 | ||
| @@ -690,7 +716,7 @@ static struct timespec timekeeping_suspend_time; | |||
| 690 | static void __timekeeping_inject_sleeptime(struct timekeeper *tk, | 716 | static void __timekeeping_inject_sleeptime(struct timekeeper *tk, |
| 691 | struct timespec *delta) | 717 | struct timespec *delta) |
| 692 | { | 718 | { |
| 693 | if (!timespec_valid(delta)) { | 719 | if (!timespec_valid_strict(delta)) { |
| 694 | printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " | 720 | printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " |
| 695 | "sleep delta value!\n"); | 721 | "sleep delta value!\n"); |
| 696 | return; | 722 | return; |
| @@ -1129,6 +1155,10 @@ static void update_wall_time(void) | |||
| 1129 | offset = (clock->read(clock) - clock->cycle_last) & clock->mask; | 1155 | offset = (clock->read(clock) - clock->cycle_last) & clock->mask; |
| 1130 | #endif | 1156 | #endif |
| 1131 | 1157 | ||
| 1158 | /* Check if there's really nothing to do */ | ||
| 1159 | if (offset < tk->cycle_interval) | ||
| 1160 | goto out; | ||
| 1161 | |||
| 1132 | /* | 1162 | /* |
| 1133 | * With NO_HZ we may have to accumulate many cycle_intervals | 1163 | * With NO_HZ we may have to accumulate many cycle_intervals |
| 1134 | * (think "ticks") worth of time at once. To do this efficiently, | 1164 | * (think "ticks") worth of time at once. To do this efficiently, |
| @@ -1161,9 +1191,9 @@ static void update_wall_time(void) | |||
| 1161 | * the vsyscall implementations are converted to use xtime_nsec | 1191 | * the vsyscall implementations are converted to use xtime_nsec |
| 1162 | * (shifted nanoseconds), this can be killed. | 1192 | * (shifted nanoseconds), this can be killed. |
| 1163 | */ | 1193 | */ |
| 1164 | remainder = tk->xtime_nsec & ((1 << tk->shift) - 1); | 1194 | remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1); |
| 1165 | tk->xtime_nsec -= remainder; | 1195 | tk->xtime_nsec -= remainder; |
| 1166 | tk->xtime_nsec += 1 << tk->shift; | 1196 | tk->xtime_nsec += 1ULL << tk->shift; |
| 1167 | tk->ntp_error += remainder << tk->ntp_error_shift; | 1197 | tk->ntp_error += remainder << tk->ntp_error_shift; |
| 1168 | 1198 | ||
| 1169 | /* | 1199 | /* |
| @@ -1217,6 +1247,7 @@ void get_monotonic_boottime(struct timespec *ts) | |||
| 1217 | { | 1247 | { |
| 1218 | struct timekeeper *tk = &timekeeper; | 1248 | struct timekeeper *tk = &timekeeper; |
| 1219 | struct timespec tomono, sleep; | 1249 | struct timespec tomono, sleep; |
| 1250 | s64 nsec; | ||
| 1220 | unsigned int seq; | 1251 | unsigned int seq; |
| 1221 | 1252 | ||
| 1222 | WARN_ON(timekeeping_suspended); | 1253 | WARN_ON(timekeeping_suspended); |
| @@ -1224,14 +1255,15 @@ void get_monotonic_boottime(struct timespec *ts) | |||
| 1224 | do { | 1255 | do { |
| 1225 | seq = read_seqbegin(&tk->lock); | 1256 | seq = read_seqbegin(&tk->lock); |
| 1226 | ts->tv_sec = tk->xtime_sec; | 1257 | ts->tv_sec = tk->xtime_sec; |
| 1227 | ts->tv_nsec = timekeeping_get_ns(tk); | 1258 | nsec = timekeeping_get_ns(tk); |
| 1228 | tomono = tk->wall_to_monotonic; | 1259 | tomono = tk->wall_to_monotonic; |
| 1229 | sleep = tk->total_sleep_time; | 1260 | sleep = tk->total_sleep_time; |
| 1230 | 1261 | ||
| 1231 | } while (read_seqretry(&tk->lock, seq)); | 1262 | } while (read_seqretry(&tk->lock, seq)); |
| 1232 | 1263 | ||
| 1233 | set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec, | 1264 | ts->tv_sec += tomono.tv_sec + sleep.tv_sec; |
| 1234 | ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec); | 1265 | ts->tv_nsec = 0; |
| 1266 | timespec_add_ns(ts, nsec + tomono.tv_nsec + sleep.tv_nsec); | ||
| 1235 | } | 1267 | } |
| 1236 | EXPORT_SYMBOL_GPL(get_monotonic_boottime); | 1268 | EXPORT_SYMBOL_GPL(get_monotonic_boottime); |
| 1237 | 1269 | ||
diff --git a/kernel/timer.c b/kernel/timer.c index a61c09374eba..8c5e7b908c68 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -1407,13 +1407,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds) | |||
| 1407 | 1407 | ||
| 1408 | #endif | 1408 | #endif |
| 1409 | 1409 | ||
| 1410 | #ifndef __alpha__ | ||
| 1411 | |||
| 1412 | /* | ||
| 1413 | * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this | ||
| 1414 | * should be moved into arch/i386 instead? | ||
| 1415 | */ | ||
| 1416 | |||
| 1417 | /** | 1410 | /** |
| 1418 | * sys_getpid - return the thread group id of the current process | 1411 | * sys_getpid - return the thread group id of the current process |
| 1419 | * | 1412 | * |
| @@ -1469,8 +1462,6 @@ SYSCALL_DEFINE0(getegid) | |||
| 1469 | return from_kgid_munged(current_user_ns(), current_egid()); | 1462 | return from_kgid_munged(current_user_ns(), current_egid()); |
| 1470 | } | 1463 | } |
| 1471 | 1464 | ||
| 1472 | #endif | ||
| 1473 | |||
| 1474 | static void process_timeout(unsigned long __data) | 1465 | static void process_timeout(unsigned long __data) |
| 1475 | { | 1466 | { |
| 1476 | wake_up_process((struct task_struct *)__data); | 1467 | wake_up_process((struct task_struct *)__data); |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 60e4d7875672..6b245f64c8dd 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
| @@ -506,6 +506,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) | |||
| 506 | int size; | 506 | int size; |
| 507 | 507 | ||
| 508 | syscall_nr = syscall_get_nr(current, regs); | 508 | syscall_nr = syscall_get_nr(current, regs); |
| 509 | if (syscall_nr < 0) | ||
| 510 | return; | ||
| 509 | if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) | 511 | if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) |
| 510 | return; | 512 | return; |
| 511 | 513 | ||
| @@ -580,6 +582,8 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | |||
| 580 | int size; | 582 | int size; |
| 581 | 583 | ||
| 582 | syscall_nr = syscall_get_nr(current, regs); | 584 | syscall_nr = syscall_get_nr(current, regs); |
| 585 | if (syscall_nr < 0) | ||
| 586 | return; | ||
| 583 | if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) | 587 | if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) |
| 584 | return; | 588 | return; |
| 585 | 589 | ||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 69add8a9da68..4b1dfba70f7c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
| @@ -575,7 +575,7 @@ out: | |||
| 575 | /* | 575 | /* |
| 576 | * Create/destroy watchdog threads as CPUs come and go: | 576 | * Create/destroy watchdog threads as CPUs come and go: |
| 577 | */ | 577 | */ |
| 578 | static int | 578 | static int __cpuinit |
| 579 | cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | 579 | cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) |
| 580 | { | 580 | { |
| 581 | int hotcpu = (unsigned long)hcpu; | 581 | int hotcpu = (unsigned long)hcpu; |
| @@ -610,27 +610,10 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 610 | return NOTIFY_OK; | 610 | return NOTIFY_OK; |
| 611 | } | 611 | } |
| 612 | 612 | ||
| 613 | static struct notifier_block cpu_nfb = { | 613 | static struct notifier_block __cpuinitdata cpu_nfb = { |
| 614 | .notifier_call = cpu_callback | 614 | .notifier_call = cpu_callback |
| 615 | }; | 615 | }; |
| 616 | 616 | ||
| 617 | #ifdef CONFIG_SUSPEND | ||
| 618 | /* | ||
| 619 | * On exit from suspend we force an offline->online transition on the boot CPU | ||
| 620 | * so that the PMU state that was lost while in suspended state gets set up | ||
| 621 | * properly for the boot CPU. This information is required for restarting the | ||
| 622 | * NMI watchdog. | ||
| 623 | */ | ||
| 624 | void lockup_detector_bootcpu_resume(void) | ||
| 625 | { | ||
| 626 | void *cpu = (void *)(long)smp_processor_id(); | ||
| 627 | |||
| 628 | cpu_callback(&cpu_nfb, CPU_DEAD_FROZEN, cpu); | ||
| 629 | cpu_callback(&cpu_nfb, CPU_UP_PREPARE_FROZEN, cpu); | ||
| 630 | cpu_callback(&cpu_nfb, CPU_ONLINE_FROZEN, cpu); | ||
| 631 | } | ||
| 632 | #endif | ||
| 633 | |||
| 634 | void __init lockup_detector_init(void) | 617 | void __init lockup_detector_init(void) |
| 635 | { | 618 | { |
| 636 | void *cpu = (void *)(long)smp_processor_id(); | 619 | void *cpu = (void *)(long)smp_processor_id(); |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 692d97628a10..3c5a79e2134c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -66,6 +66,7 @@ enum { | |||
| 66 | 66 | ||
| 67 | /* pool flags */ | 67 | /* pool flags */ |
| 68 | POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ | 68 | POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ |
| 69 | POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */ | ||
| 69 | 70 | ||
| 70 | /* worker flags */ | 71 | /* worker flags */ |
| 71 | WORKER_STARTED = 1 << 0, /* started */ | 72 | WORKER_STARTED = 1 << 0, /* started */ |
| @@ -652,7 +653,7 @@ static bool need_to_manage_workers(struct worker_pool *pool) | |||
| 652 | /* Do we have too many workers and should some go away? */ | 653 | /* Do we have too many workers and should some go away? */ |
| 653 | static bool too_many_workers(struct worker_pool *pool) | 654 | static bool too_many_workers(struct worker_pool *pool) |
| 654 | { | 655 | { |
| 655 | bool managing = mutex_is_locked(&pool->manager_mutex); | 656 | bool managing = pool->flags & POOL_MANAGING_WORKERS; |
| 656 | int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ | 657 | int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ |
| 657 | int nr_busy = pool->nr_workers - nr_idle; | 658 | int nr_busy = pool->nr_workers - nr_idle; |
| 658 | 659 | ||
| @@ -1326,6 +1327,15 @@ static void idle_worker_rebind(struct worker *worker) | |||
| 1326 | 1327 | ||
| 1327 | /* we did our part, wait for rebind_workers() to finish up */ | 1328 | /* we did our part, wait for rebind_workers() to finish up */ |
| 1328 | wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND)); | 1329 | wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND)); |
| 1330 | |||
| 1331 | /* | ||
| 1332 | * rebind_workers() shouldn't finish until all workers passed the | ||
| 1333 | * above WORKER_REBIND wait. Tell it when done. | ||
| 1334 | */ | ||
| 1335 | spin_lock_irq(&worker->pool->gcwq->lock); | ||
| 1336 | if (!--worker->idle_rebind->cnt) | ||
| 1337 | complete(&worker->idle_rebind->done); | ||
| 1338 | spin_unlock_irq(&worker->pool->gcwq->lock); | ||
| 1329 | } | 1339 | } |
| 1330 | 1340 | ||
| 1331 | /* | 1341 | /* |
| @@ -1339,8 +1349,16 @@ static void busy_worker_rebind_fn(struct work_struct *work) | |||
| 1339 | struct worker *worker = container_of(work, struct worker, rebind_work); | 1349 | struct worker *worker = container_of(work, struct worker, rebind_work); |
| 1340 | struct global_cwq *gcwq = worker->pool->gcwq; | 1350 | struct global_cwq *gcwq = worker->pool->gcwq; |
| 1341 | 1351 | ||
| 1342 | if (worker_maybe_bind_and_lock(worker)) | 1352 | worker_maybe_bind_and_lock(worker); |
| 1343 | worker_clr_flags(worker, WORKER_REBIND); | 1353 | |
| 1354 | /* | ||
| 1355 | * %WORKER_REBIND must be cleared even if the above binding failed; | ||
| 1356 | * otherwise, we may confuse the next CPU_UP cycle or oops / get | ||
| 1357 | * stuck by calling idle_worker_rebind() prematurely. If CPU went | ||
| 1358 | * down again inbetween, %WORKER_UNBOUND would be set, so clearing | ||
| 1359 | * %WORKER_REBIND is always safe. | ||
| 1360 | */ | ||
| 1361 | worker_clr_flags(worker, WORKER_REBIND); | ||
| 1344 | 1362 | ||
| 1345 | spin_unlock_irq(&gcwq->lock); | 1363 | spin_unlock_irq(&gcwq->lock); |
| 1346 | } | 1364 | } |
| @@ -1396,12 +1414,15 @@ retry: | |||
| 1396 | /* set REBIND and kick idle ones, we'll wait for these later */ | 1414 | /* set REBIND and kick idle ones, we'll wait for these later */ |
| 1397 | for_each_worker_pool(pool, gcwq) { | 1415 | for_each_worker_pool(pool, gcwq) { |
| 1398 | list_for_each_entry(worker, &pool->idle_list, entry) { | 1416 | list_for_each_entry(worker, &pool->idle_list, entry) { |
| 1417 | unsigned long worker_flags = worker->flags; | ||
| 1418 | |||
| 1399 | if (worker->flags & WORKER_REBIND) | 1419 | if (worker->flags & WORKER_REBIND) |
| 1400 | continue; | 1420 | continue; |
| 1401 | 1421 | ||
| 1402 | /* morph UNBOUND to REBIND */ | 1422 | /* morph UNBOUND to REBIND atomically */ |
| 1403 | worker->flags &= ~WORKER_UNBOUND; | 1423 | worker_flags &= ~WORKER_UNBOUND; |
| 1404 | worker->flags |= WORKER_REBIND; | 1424 | worker_flags |= WORKER_REBIND; |
| 1425 | ACCESS_ONCE(worker->flags) = worker_flags; | ||
| 1405 | 1426 | ||
| 1406 | idle_rebind.cnt++; | 1427 | idle_rebind.cnt++; |
| 1407 | worker->idle_rebind = &idle_rebind; | 1428 | worker->idle_rebind = &idle_rebind; |
| @@ -1419,25 +1440,15 @@ retry: | |||
| 1419 | goto retry; | 1440 | goto retry; |
| 1420 | } | 1441 | } |
| 1421 | 1442 | ||
| 1422 | /* | 1443 | /* all idle workers are rebound, rebind busy workers */ |
| 1423 | * All idle workers are rebound and waiting for %WORKER_REBIND to | ||
| 1424 | * be cleared inside idle_worker_rebind(). Clear and release. | ||
| 1425 | * Clearing %WORKER_REBIND from this foreign context is safe | ||
| 1426 | * because these workers are still guaranteed to be idle. | ||
| 1427 | */ | ||
| 1428 | for_each_worker_pool(pool, gcwq) | ||
| 1429 | list_for_each_entry(worker, &pool->idle_list, entry) | ||
| 1430 | worker->flags &= ~WORKER_REBIND; | ||
| 1431 | |||
| 1432 | wake_up_all(&gcwq->rebind_hold); | ||
| 1433 | |||
| 1434 | /* rebind busy workers */ | ||
| 1435 | for_each_busy_worker(worker, i, pos, gcwq) { | 1444 | for_each_busy_worker(worker, i, pos, gcwq) { |
| 1436 | struct work_struct *rebind_work = &worker->rebind_work; | 1445 | struct work_struct *rebind_work = &worker->rebind_work; |
| 1446 | unsigned long worker_flags = worker->flags; | ||
| 1437 | 1447 | ||
| 1438 | /* morph UNBOUND to REBIND */ | 1448 | /* morph UNBOUND to REBIND atomically */ |
| 1439 | worker->flags &= ~WORKER_UNBOUND; | 1449 | worker_flags &= ~WORKER_UNBOUND; |
| 1440 | worker->flags |= WORKER_REBIND; | 1450 | worker_flags |= WORKER_REBIND; |
| 1451 | ACCESS_ONCE(worker->flags) = worker_flags; | ||
| 1441 | 1452 | ||
| 1442 | if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, | 1453 | if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, |
| 1443 | work_data_bits(rebind_work))) | 1454 | work_data_bits(rebind_work))) |
| @@ -1449,6 +1460,34 @@ retry: | |||
| 1449 | worker->scheduled.next, | 1460 | worker->scheduled.next, |
| 1450 | work_color_to_flags(WORK_NO_COLOR)); | 1461 | work_color_to_flags(WORK_NO_COLOR)); |
| 1451 | } | 1462 | } |
| 1463 | |||
| 1464 | /* | ||
| 1465 | * All idle workers are rebound and waiting for %WORKER_REBIND to | ||
| 1466 | * be cleared inside idle_worker_rebind(). Clear and release. | ||
| 1467 | * Clearing %WORKER_REBIND from this foreign context is safe | ||
| 1468 | * because these workers are still guaranteed to be idle. | ||
| 1469 | * | ||
| 1470 | * We need to make sure all idle workers passed WORKER_REBIND wait | ||
| 1471 | * in idle_worker_rebind() before returning; otherwise, workers can | ||
| 1472 | * get stuck at the wait if hotplug cycle repeats. | ||
| 1473 | */ | ||
| 1474 | idle_rebind.cnt = 1; | ||
| 1475 | INIT_COMPLETION(idle_rebind.done); | ||
| 1476 | |||
| 1477 | for_each_worker_pool(pool, gcwq) { | ||
| 1478 | list_for_each_entry(worker, &pool->idle_list, entry) { | ||
| 1479 | worker->flags &= ~WORKER_REBIND; | ||
| 1480 | idle_rebind.cnt++; | ||
| 1481 | } | ||
| 1482 | } | ||
| 1483 | |||
| 1484 | wake_up_all(&gcwq->rebind_hold); | ||
| 1485 | |||
| 1486 | if (--idle_rebind.cnt) { | ||
| 1487 | spin_unlock_irq(&gcwq->lock); | ||
| 1488 | wait_for_completion(&idle_rebind.done); | ||
| 1489 | spin_lock_irq(&gcwq->lock); | ||
| 1490 | } | ||
| 1452 | } | 1491 | } |
| 1453 | 1492 | ||
| 1454 | static struct worker *alloc_worker(void) | 1493 | static struct worker *alloc_worker(void) |
| @@ -1794,9 +1833,45 @@ static bool manage_workers(struct worker *worker) | |||
| 1794 | struct worker_pool *pool = worker->pool; | 1833 | struct worker_pool *pool = worker->pool; |
| 1795 | bool ret = false; | 1834 | bool ret = false; |
| 1796 | 1835 | ||
| 1797 | if (!mutex_trylock(&pool->manager_mutex)) | 1836 | if (pool->flags & POOL_MANAGING_WORKERS) |
| 1798 | return ret; | 1837 | return ret; |
| 1799 | 1838 | ||
| 1839 | pool->flags |= POOL_MANAGING_WORKERS; | ||
| 1840 | |||
| 1841 | /* | ||
| 1842 | * To simplify both worker management and CPU hotplug, hold off | ||
| 1843 | * management while hotplug is in progress. CPU hotplug path can't | ||
| 1844 | * grab %POOL_MANAGING_WORKERS to achieve this because that can | ||
| 1845 | * lead to idle worker depletion (all become busy thinking someone | ||
| 1846 | * else is managing) which in turn can result in deadlock under | ||
| 1847 | * extreme circumstances. Use @pool->manager_mutex to synchronize | ||
| 1848 | * manager against CPU hotplug. | ||
| 1849 | * | ||
| 1850 | * manager_mutex would always be free unless CPU hotplug is in | ||
| 1851 | * progress. trylock first without dropping @gcwq->lock. | ||
| 1852 | */ | ||
| 1853 | if (unlikely(!mutex_trylock(&pool->manager_mutex))) { | ||
| 1854 | spin_unlock_irq(&pool->gcwq->lock); | ||
| 1855 | mutex_lock(&pool->manager_mutex); | ||
| 1856 | /* | ||
| 1857 | * CPU hotplug could have happened while we were waiting | ||
| 1858 | * for manager_mutex. Hotplug itself can't handle us | ||
| 1859 | * because manager isn't either on idle or busy list, and | ||
| 1860 | * @gcwq's state and ours could have deviated. | ||
| 1861 | * | ||
| 1862 | * As hotplug is now excluded via manager_mutex, we can | ||
| 1863 | * simply try to bind. It will succeed or fail depending | ||
| 1864 | * on @gcwq's current state. Try it and adjust | ||
| 1865 | * %WORKER_UNBOUND accordingly. | ||
| 1866 | */ | ||
| 1867 | if (worker_maybe_bind_and_lock(worker)) | ||
| 1868 | worker->flags &= ~WORKER_UNBOUND; | ||
| 1869 | else | ||
| 1870 | worker->flags |= WORKER_UNBOUND; | ||
| 1871 | |||
| 1872 | ret = true; | ||
| 1873 | } | ||
| 1874 | |||
| 1800 | pool->flags &= ~POOL_MANAGE_WORKERS; | 1875 | pool->flags &= ~POOL_MANAGE_WORKERS; |
| 1801 | 1876 | ||
| 1802 | /* | 1877 | /* |
| @@ -1806,6 +1881,7 @@ static bool manage_workers(struct worker *worker) | |||
| 1806 | ret |= maybe_destroy_workers(pool); | 1881 | ret |= maybe_destroy_workers(pool); |
| 1807 | ret |= maybe_create_worker(pool); | 1882 | ret |= maybe_create_worker(pool); |
| 1808 | 1883 | ||
| 1884 | pool->flags &= ~POOL_MANAGING_WORKERS; | ||
| 1809 | mutex_unlock(&pool->manager_mutex); | 1885 | mutex_unlock(&pool->manager_mutex); |
| 1810 | return ret; | 1886 | return ret; |
| 1811 | } | 1887 | } |
| @@ -3500,18 +3576,17 @@ static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb, | |||
| 3500 | #ifdef CONFIG_SMP | 3576 | #ifdef CONFIG_SMP |
| 3501 | 3577 | ||
| 3502 | struct work_for_cpu { | 3578 | struct work_for_cpu { |
| 3503 | struct completion completion; | 3579 | struct work_struct work; |
| 3504 | long (*fn)(void *); | 3580 | long (*fn)(void *); |
| 3505 | void *arg; | 3581 | void *arg; |
| 3506 | long ret; | 3582 | long ret; |
| 3507 | }; | 3583 | }; |
| 3508 | 3584 | ||
| 3509 | static int do_work_for_cpu(void *_wfc) | 3585 | static void work_for_cpu_fn(struct work_struct *work) |
| 3510 | { | 3586 | { |
| 3511 | struct work_for_cpu *wfc = _wfc; | 3587 | struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work); |
| 3588 | |||
| 3512 | wfc->ret = wfc->fn(wfc->arg); | 3589 | wfc->ret = wfc->fn(wfc->arg); |
| 3513 | complete(&wfc->completion); | ||
| 3514 | return 0; | ||
| 3515 | } | 3590 | } |
| 3516 | 3591 | ||
| 3517 | /** | 3592 | /** |
| @@ -3526,19 +3601,11 @@ static int do_work_for_cpu(void *_wfc) | |||
| 3526 | */ | 3601 | */ |
| 3527 | long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) | 3602 | long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) |
| 3528 | { | 3603 | { |
| 3529 | struct task_struct *sub_thread; | 3604 | struct work_for_cpu wfc = { .fn = fn, .arg = arg }; |
| 3530 | struct work_for_cpu wfc = { | ||
| 3531 | .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion), | ||
| 3532 | .fn = fn, | ||
| 3533 | .arg = arg, | ||
| 3534 | }; | ||
| 3535 | 3605 | ||
| 3536 | sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu"); | 3606 | INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn); |
| 3537 | if (IS_ERR(sub_thread)) | 3607 | schedule_work_on(cpu, &wfc.work); |
| 3538 | return PTR_ERR(sub_thread); | 3608 | flush_work(&wfc.work); |
| 3539 | kthread_bind(sub_thread, cpu); | ||
| 3540 | wake_up_process(sub_thread); | ||
| 3541 | wait_for_completion(&wfc.completion); | ||
| 3542 | return wfc.ret; | 3609 | return wfc.ret; |
| 3543 | } | 3610 | } |
| 3544 | EXPORT_SYMBOL_GPL(work_on_cpu); | 3611 | EXPORT_SYMBOL_GPL(work_on_cpu); |
