diff options
author | David S. Miller <davem@davemloft.net> | 2012-09-15 11:43:53 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-09-15 11:43:53 -0400 |
commit | b48b63a1f6e26b0dec2c9f1690396ed4bcb66903 (patch) | |
tree | 8d9ad227c3a7d35cd78d40ecaf9bf59375dbd21a /kernel | |
parent | 7f2e6a5d8608d0353b017a0fe15502307593734e (diff) | |
parent | 3f0c3c8fe30c725c1264fb6db8cc4b69db3a658a (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Conflicts:
net/netfilter/nfnetlink_log.c
net/netfilter/xt_LOG.c
Rather easy conflict resolution, the 'net' tree had bug fixes to make
sure we checked if a socket is a time-wait one or not and elide the
logging code if so.
Whereas on the 'net-next' side we are calculating the UID and GID from
the creds using different interfaces due to the user namespace changes
from Eric Biederman.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/core.c | 64 | ||||
-rw-r--r-- | kernel/events/hw_breakpoint.c | 11 | ||||
-rw-r--r-- | kernel/fork.c | 4 | ||||
-rw-r--r-- | kernel/sched/core.c | 34 | ||||
-rw-r--r-- | kernel/sched/fair.c | 9 | ||||
-rw-r--r-- | kernel/sched/rt.c | 1 | ||||
-rw-r--r-- | kernel/sched/sched.h | 1 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 1 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 39 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 4 | ||||
-rw-r--r-- | kernel/workqueue.c | 110 |
11 files changed, 189 insertions, 89 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index b7935fcec7d9..7fee567153f0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -1253,7 +1253,7 @@ retry: | |||
1253 | /* | 1253 | /* |
1254 | * Cross CPU call to disable a performance event | 1254 | * Cross CPU call to disable a performance event |
1255 | */ | 1255 | */ |
1256 | static int __perf_event_disable(void *info) | 1256 | int __perf_event_disable(void *info) |
1257 | { | 1257 | { |
1258 | struct perf_event *event = info; | 1258 | struct perf_event *event = info; |
1259 | struct perf_event_context *ctx = event->ctx; | 1259 | struct perf_event_context *ctx = event->ctx; |
@@ -2935,12 +2935,12 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel); | |||
2935 | /* | 2935 | /* |
2936 | * Called when the last reference to the file is gone. | 2936 | * Called when the last reference to the file is gone. |
2937 | */ | 2937 | */ |
2938 | static int perf_release(struct inode *inode, struct file *file) | 2938 | static void put_event(struct perf_event *event) |
2939 | { | 2939 | { |
2940 | struct perf_event *event = file->private_data; | ||
2941 | struct task_struct *owner; | 2940 | struct task_struct *owner; |
2942 | 2941 | ||
2943 | file->private_data = NULL; | 2942 | if (!atomic_long_dec_and_test(&event->refcount)) |
2943 | return; | ||
2944 | 2944 | ||
2945 | rcu_read_lock(); | 2945 | rcu_read_lock(); |
2946 | owner = ACCESS_ONCE(event->owner); | 2946 | owner = ACCESS_ONCE(event->owner); |
@@ -2975,7 +2975,13 @@ static int perf_release(struct inode *inode, struct file *file) | |||
2975 | put_task_struct(owner); | 2975 | put_task_struct(owner); |
2976 | } | 2976 | } |
2977 | 2977 | ||
2978 | return perf_event_release_kernel(event); | 2978 | perf_event_release_kernel(event); |
2979 | } | ||
2980 | |||
2981 | static int perf_release(struct inode *inode, struct file *file) | ||
2982 | { | ||
2983 | put_event(file->private_data); | ||
2984 | return 0; | ||
2979 | } | 2985 | } |
2980 | 2986 | ||
2981 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) | 2987 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) |
@@ -3227,7 +3233,7 @@ unlock: | |||
3227 | 3233 | ||
3228 | static const struct file_operations perf_fops; | 3234 | static const struct file_operations perf_fops; |
3229 | 3235 | ||
3230 | static struct perf_event *perf_fget_light(int fd, int *fput_needed) | 3236 | static struct file *perf_fget_light(int fd, int *fput_needed) |
3231 | { | 3237 | { |
3232 | struct file *file; | 3238 | struct file *file; |
3233 | 3239 | ||
@@ -3241,7 +3247,7 @@ static struct perf_event *perf_fget_light(int fd, int *fput_needed) | |||
3241 | return ERR_PTR(-EBADF); | 3247 | return ERR_PTR(-EBADF); |
3242 | } | 3248 | } |
3243 | 3249 | ||
3244 | return file->private_data; | 3250 | return file; |
3245 | } | 3251 | } |
3246 | 3252 | ||
3247 | static int perf_event_set_output(struct perf_event *event, | 3253 | static int perf_event_set_output(struct perf_event *event, |
@@ -3273,19 +3279,21 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
3273 | 3279 | ||
3274 | case PERF_EVENT_IOC_SET_OUTPUT: | 3280 | case PERF_EVENT_IOC_SET_OUTPUT: |
3275 | { | 3281 | { |
3282 | struct file *output_file = NULL; | ||
3276 | struct perf_event *output_event = NULL; | 3283 | struct perf_event *output_event = NULL; |
3277 | int fput_needed = 0; | 3284 | int fput_needed = 0; |
3278 | int ret; | 3285 | int ret; |
3279 | 3286 | ||
3280 | if (arg != -1) { | 3287 | if (arg != -1) { |
3281 | output_event = perf_fget_light(arg, &fput_needed); | 3288 | output_file = perf_fget_light(arg, &fput_needed); |
3282 | if (IS_ERR(output_event)) | 3289 | if (IS_ERR(output_file)) |
3283 | return PTR_ERR(output_event); | 3290 | return PTR_ERR(output_file); |
3291 | output_event = output_file->private_data; | ||
3284 | } | 3292 | } |
3285 | 3293 | ||
3286 | ret = perf_event_set_output(event, output_event); | 3294 | ret = perf_event_set_output(event, output_event); |
3287 | if (output_event) | 3295 | if (output_event) |
3288 | fput_light(output_event->filp, fput_needed); | 3296 | fput_light(output_file, fput_needed); |
3289 | 3297 | ||
3290 | return ret; | 3298 | return ret; |
3291 | } | 3299 | } |
@@ -5950,6 +5958,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
5950 | 5958 | ||
5951 | mutex_init(&event->mmap_mutex); | 5959 | mutex_init(&event->mmap_mutex); |
5952 | 5960 | ||
5961 | atomic_long_set(&event->refcount, 1); | ||
5953 | event->cpu = cpu; | 5962 | event->cpu = cpu; |
5954 | event->attr = *attr; | 5963 | event->attr = *attr; |
5955 | event->group_leader = group_leader; | 5964 | event->group_leader = group_leader; |
@@ -6260,12 +6269,12 @@ SYSCALL_DEFINE5(perf_event_open, | |||
6260 | return event_fd; | 6269 | return event_fd; |
6261 | 6270 | ||
6262 | if (group_fd != -1) { | 6271 | if (group_fd != -1) { |
6263 | group_leader = perf_fget_light(group_fd, &fput_needed); | 6272 | group_file = perf_fget_light(group_fd, &fput_needed); |
6264 | if (IS_ERR(group_leader)) { | 6273 | if (IS_ERR(group_file)) { |
6265 | err = PTR_ERR(group_leader); | 6274 | err = PTR_ERR(group_file); |
6266 | goto err_fd; | 6275 | goto err_fd; |
6267 | } | 6276 | } |
6268 | group_file = group_leader->filp; | 6277 | group_leader = group_file->private_data; |
6269 | if (flags & PERF_FLAG_FD_OUTPUT) | 6278 | if (flags & PERF_FLAG_FD_OUTPUT) |
6270 | output_event = group_leader; | 6279 | output_event = group_leader; |
6271 | if (flags & PERF_FLAG_FD_NO_GROUP) | 6280 | if (flags & PERF_FLAG_FD_NO_GROUP) |
@@ -6402,7 +6411,6 @@ SYSCALL_DEFINE5(perf_event_open, | |||
6402 | put_ctx(gctx); | 6411 | put_ctx(gctx); |
6403 | } | 6412 | } |
6404 | 6413 | ||
6405 | event->filp = event_file; | ||
6406 | WARN_ON_ONCE(ctx->parent_ctx); | 6414 | WARN_ON_ONCE(ctx->parent_ctx); |
6407 | mutex_lock(&ctx->mutex); | 6415 | mutex_lock(&ctx->mutex); |
6408 | 6416 | ||
@@ -6496,7 +6504,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
6496 | goto err_free; | 6504 | goto err_free; |
6497 | } | 6505 | } |
6498 | 6506 | ||
6499 | event->filp = NULL; | ||
6500 | WARN_ON_ONCE(ctx->parent_ctx); | 6507 | WARN_ON_ONCE(ctx->parent_ctx); |
6501 | mutex_lock(&ctx->mutex); | 6508 | mutex_lock(&ctx->mutex); |
6502 | perf_install_in_context(ctx, event, cpu); | 6509 | perf_install_in_context(ctx, event, cpu); |
@@ -6578,7 +6585,7 @@ static void sync_child_event(struct perf_event *child_event, | |||
6578 | * Release the parent event, if this was the last | 6585 | * Release the parent event, if this was the last |
6579 | * reference to it. | 6586 | * reference to it. |
6580 | */ | 6587 | */ |
6581 | fput(parent_event->filp); | 6588 | put_event(parent_event); |
6582 | } | 6589 | } |
6583 | 6590 | ||
6584 | static void | 6591 | static void |
@@ -6654,9 +6661,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
6654 | * | 6661 | * |
6655 | * __perf_event_exit_task() | 6662 | * __perf_event_exit_task() |
6656 | * sync_child_event() | 6663 | * sync_child_event() |
6657 | * fput(parent_event->filp) | 6664 | * put_event() |
6658 | * perf_release() | 6665 | * mutex_lock(&ctx->mutex) |
6659 | * mutex_lock(&ctx->mutex) | ||
6660 | * | 6666 | * |
6661 | * But since its the parent context it won't be the same instance. | 6667 | * But since its the parent context it won't be the same instance. |
6662 | */ | 6668 | */ |
@@ -6724,7 +6730,7 @@ static void perf_free_event(struct perf_event *event, | |||
6724 | list_del_init(&event->child_list); | 6730 | list_del_init(&event->child_list); |
6725 | mutex_unlock(&parent->child_mutex); | 6731 | mutex_unlock(&parent->child_mutex); |
6726 | 6732 | ||
6727 | fput(parent->filp); | 6733 | put_event(parent); |
6728 | 6734 | ||
6729 | perf_group_detach(event); | 6735 | perf_group_detach(event); |
6730 | list_del_event(event, ctx); | 6736 | list_del_event(event, ctx); |
@@ -6804,6 +6810,12 @@ inherit_event(struct perf_event *parent_event, | |||
6804 | NULL, NULL); | 6810 | NULL, NULL); |
6805 | if (IS_ERR(child_event)) | 6811 | if (IS_ERR(child_event)) |
6806 | return child_event; | 6812 | return child_event; |
6813 | |||
6814 | if (!atomic_long_inc_not_zero(&parent_event->refcount)) { | ||
6815 | free_event(child_event); | ||
6816 | return NULL; | ||
6817 | } | ||
6818 | |||
6807 | get_ctx(child_ctx); | 6819 | get_ctx(child_ctx); |
6808 | 6820 | ||
6809 | /* | 6821 | /* |
@@ -6845,14 +6857,6 @@ inherit_event(struct perf_event *parent_event, | |||
6845 | raw_spin_unlock_irqrestore(&child_ctx->lock, flags); | 6857 | raw_spin_unlock_irqrestore(&child_ctx->lock, flags); |
6846 | 6858 | ||
6847 | /* | 6859 | /* |
6848 | * Get a reference to the parent filp - we will fput it | ||
6849 | * when the child event exits. This is safe to do because | ||
6850 | * we are in the parent and we know that the filp still | ||
6851 | * exists and has a nonzero count: | ||
6852 | */ | ||
6853 | atomic_long_inc(&parent_event->filp->f_count); | ||
6854 | |||
6855 | /* | ||
6856 | * Link this into the parent event's child list | 6860 | * Link this into the parent event's child list |
6857 | */ | 6861 | */ |
6858 | WARN_ON_ONCE(parent_event->ctx->parent_ctx); | 6862 | WARN_ON_ONCE(parent_event->ctx->parent_ctx); |
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index bb38c4d3ee12..9a7b487c6fe2 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c | |||
@@ -453,7 +453,16 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att | |||
453 | int old_type = bp->attr.bp_type; | 453 | int old_type = bp->attr.bp_type; |
454 | int err = 0; | 454 | int err = 0; |
455 | 455 | ||
456 | perf_event_disable(bp); | 456 | /* |
457 | * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it | ||
458 | * will not be possible to raise IPIs that invoke __perf_event_disable. | ||
459 | * So call the function directly after making sure we are targeting the | ||
460 | * current task. | ||
461 | */ | ||
462 | if (irqs_disabled() && bp->ctx && bp->ctx->task == current) | ||
463 | __perf_event_disable(bp); | ||
464 | else | ||
465 | perf_event_disable(bp); | ||
457 | 466 | ||
458 | bp->attr.bp_addr = attr->bp_addr; | 467 | bp->attr.bp_addr = attr->bp_addr; |
459 | bp->attr.bp_type = attr->bp_type; | 468 | bp->attr.bp_type = attr->bp_type; |
diff --git a/kernel/fork.c b/kernel/fork.c index 3bd2280d79f6..2c8857e12855 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -455,8 +455,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
455 | if (retval) | 455 | if (retval) |
456 | goto out; | 456 | goto out; |
457 | 457 | ||
458 | if (file && uprobe_mmap(tmp)) | 458 | if (file) |
459 | goto out; | 459 | uprobe_mmap(tmp); |
460 | } | 460 | } |
461 | /* a new mm has just been created */ | 461 | /* a new mm has just been created */ |
462 | arch_dup_mmap(oldmm, mm); | 462 | arch_dup_mmap(oldmm, mm); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fbf1fd098dc6..a4ea245f3d85 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -5304,27 +5304,17 @@ void idle_task_exit(void) | |||
5304 | } | 5304 | } |
5305 | 5305 | ||
5306 | /* | 5306 | /* |
5307 | * While a dead CPU has no uninterruptible tasks queued at this point, | 5307 | * Since this CPU is going 'away' for a while, fold any nr_active delta |
5308 | * it might still have a nonzero ->nr_uninterruptible counter, because | 5308 | * we might have. Assumes we're called after migrate_tasks() so that the |
5309 | * for performance reasons the counter is not stricly tracking tasks to | 5309 | * nr_active count is stable. |
5310 | * their home CPUs. So we just add the counter to another CPU's counter, | 5310 | * |
5311 | * to keep the global sum constant after CPU-down: | 5311 | * Also see the comment "Global load-average calculations". |
5312 | */ | ||
5313 | static void migrate_nr_uninterruptible(struct rq *rq_src) | ||
5314 | { | ||
5315 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); | ||
5316 | |||
5317 | rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible; | ||
5318 | rq_src->nr_uninterruptible = 0; | ||
5319 | } | ||
5320 | |||
5321 | /* | ||
5322 | * remove the tasks which were accounted by rq from calc_load_tasks. | ||
5323 | */ | 5312 | */ |
5324 | static void calc_global_load_remove(struct rq *rq) | 5313 | static void calc_load_migrate(struct rq *rq) |
5325 | { | 5314 | { |
5326 | atomic_long_sub(rq->calc_load_active, &calc_load_tasks); | 5315 | long delta = calc_load_fold_active(rq); |
5327 | rq->calc_load_active = 0; | 5316 | if (delta) |
5317 | atomic_long_add(delta, &calc_load_tasks); | ||
5328 | } | 5318 | } |
5329 | 5319 | ||
5330 | /* | 5320 | /* |
@@ -5352,9 +5342,6 @@ static void migrate_tasks(unsigned int dead_cpu) | |||
5352 | */ | 5342 | */ |
5353 | rq->stop = NULL; | 5343 | rq->stop = NULL; |
5354 | 5344 | ||
5355 | /* Ensure any throttled groups are reachable by pick_next_task */ | ||
5356 | unthrottle_offline_cfs_rqs(rq); | ||
5357 | |||
5358 | for ( ; ; ) { | 5345 | for ( ; ; ) { |
5359 | /* | 5346 | /* |
5360 | * There's this thread running, bail when that's the only | 5347 | * There's this thread running, bail when that's the only |
@@ -5618,8 +5605,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
5618 | BUG_ON(rq->nr_running != 1); /* the migration thread */ | 5605 | BUG_ON(rq->nr_running != 1); /* the migration thread */ |
5619 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 5606 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
5620 | 5607 | ||
5621 | migrate_nr_uninterruptible(rq); | 5608 | calc_load_migrate(rq); |
5622 | calc_global_load_remove(rq); | ||
5623 | break; | 5609 | break; |
5624 | #endif | 5610 | #endif |
5625 | } | 5611 | } |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c219bf8d704c..42d9df6a5ca4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -2052,7 +2052,7 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | |||
2052 | hrtimer_cancel(&cfs_b->slack_timer); | 2052 | hrtimer_cancel(&cfs_b->slack_timer); |
2053 | } | 2053 | } |
2054 | 2054 | ||
2055 | void unthrottle_offline_cfs_rqs(struct rq *rq) | 2055 | static void unthrottle_offline_cfs_rqs(struct rq *rq) |
2056 | { | 2056 | { |
2057 | struct cfs_rq *cfs_rq; | 2057 | struct cfs_rq *cfs_rq; |
2058 | 2058 | ||
@@ -2106,7 +2106,7 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) | |||
2106 | return NULL; | 2106 | return NULL; |
2107 | } | 2107 | } |
2108 | static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} | 2108 | static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} |
2109 | void unthrottle_offline_cfs_rqs(struct rq *rq) {} | 2109 | static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {} |
2110 | 2110 | ||
2111 | #endif /* CONFIG_CFS_BANDWIDTH */ | 2111 | #endif /* CONFIG_CFS_BANDWIDTH */ |
2112 | 2112 | ||
@@ -3658,7 +3658,6 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) | |||
3658 | * @group: sched_group whose statistics are to be updated. | 3658 | * @group: sched_group whose statistics are to be updated. |
3659 | * @load_idx: Load index of sched_domain of this_cpu for load calc. | 3659 | * @load_idx: Load index of sched_domain of this_cpu for load calc. |
3660 | * @local_group: Does group contain this_cpu. | 3660 | * @local_group: Does group contain this_cpu. |
3661 | * @cpus: Set of cpus considered for load balancing. | ||
3662 | * @balance: Should we balance. | 3661 | * @balance: Should we balance. |
3663 | * @sgs: variable to hold the statistics for this group. | 3662 | * @sgs: variable to hold the statistics for this group. |
3664 | */ | 3663 | */ |
@@ -3805,7 +3804,6 @@ static bool update_sd_pick_busiest(struct lb_env *env, | |||
3805 | /** | 3804 | /** |
3806 | * update_sd_lb_stats - Update sched_domain's statistics for load balancing. | 3805 | * update_sd_lb_stats - Update sched_domain's statistics for load balancing. |
3807 | * @env: The load balancing environment. | 3806 | * @env: The load balancing environment. |
3808 | * @cpus: Set of cpus considered for load balancing. | ||
3809 | * @balance: Should we balance. | 3807 | * @balance: Should we balance. |
3810 | * @sds: variable to hold the statistics for this sched_domain. | 3808 | * @sds: variable to hold the statistics for this sched_domain. |
3811 | */ | 3809 | */ |
@@ -4956,6 +4954,9 @@ static void rq_online_fair(struct rq *rq) | |||
4956 | static void rq_offline_fair(struct rq *rq) | 4954 | static void rq_offline_fair(struct rq *rq) |
4957 | { | 4955 | { |
4958 | update_sysctl(); | 4956 | update_sysctl(); |
4957 | |||
4958 | /* Ensure any throttled groups are reachable by pick_next_task */ | ||
4959 | unthrottle_offline_cfs_rqs(rq); | ||
4959 | } | 4960 | } |
4960 | 4961 | ||
4961 | #endif /* CONFIG_SMP */ | 4962 | #endif /* CONFIG_SMP */ |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 944cb68420e9..e0b7ba9c040f 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -691,6 +691,7 @@ balanced: | |||
691 | * runtime - in which case borrowing doesn't make sense. | 691 | * runtime - in which case borrowing doesn't make sense. |
692 | */ | 692 | */ |
693 | rt_rq->rt_runtime = RUNTIME_INF; | 693 | rt_rq->rt_runtime = RUNTIME_INF; |
694 | rt_rq->rt_throttled = 0; | ||
694 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | 695 | raw_spin_unlock(&rt_rq->rt_runtime_lock); |
695 | raw_spin_unlock(&rt_b->rt_runtime_lock); | 696 | raw_spin_unlock(&rt_b->rt_runtime_lock); |
696 | } | 697 | } |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f6714d009e77..0848fa36c383 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1144,7 +1144,6 @@ extern void print_rt_stats(struct seq_file *m, int cpu); | |||
1144 | 1144 | ||
1145 | extern void init_cfs_rq(struct cfs_rq *cfs_rq); | 1145 | extern void init_cfs_rq(struct cfs_rq *cfs_rq); |
1146 | extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq); | 1146 | extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq); |
1147 | extern void unthrottle_offline_cfs_rqs(struct rq *rq); | ||
1148 | 1147 | ||
1149 | extern void account_cfs_bandwidth_used(int enabled, int was_enabled); | 1148 | extern void account_cfs_bandwidth_used(int enabled, int was_enabled); |
1150 | 1149 | ||
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 024540f97f74..3a9e5d5c1091 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -573,6 +573,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) | |||
573 | tick_do_update_jiffies64(now); | 573 | tick_do_update_jiffies64(now); |
574 | update_cpu_load_nohz(); | 574 | update_cpu_load_nohz(); |
575 | 575 | ||
576 | calc_load_exit_idle(); | ||
576 | touch_softlockup_watchdog(); | 577 | touch_softlockup_watchdog(); |
577 | /* | 578 | /* |
578 | * Cancel the scheduled timer and restore the tick | 579 | * Cancel the scheduled timer and restore the tick |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e16af197a2bc..34e5eac81424 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -115,6 +115,7 @@ static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts) | |||
115 | { | 115 | { |
116 | tk->xtime_sec += ts->tv_sec; | 116 | tk->xtime_sec += ts->tv_sec; |
117 | tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift; | 117 | tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift; |
118 | tk_normalize_xtime(tk); | ||
118 | } | 119 | } |
119 | 120 | ||
120 | static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm) | 121 | static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm) |
@@ -276,7 +277,7 @@ static void timekeeping_forward_now(struct timekeeper *tk) | |||
276 | tk->xtime_nsec += cycle_delta * tk->mult; | 277 | tk->xtime_nsec += cycle_delta * tk->mult; |
277 | 278 | ||
278 | /* If arch requires, add in gettimeoffset() */ | 279 | /* If arch requires, add in gettimeoffset() */ |
279 | tk->xtime_nsec += arch_gettimeoffset() << tk->shift; | 280 | tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift; |
280 | 281 | ||
281 | tk_normalize_xtime(tk); | 282 | tk_normalize_xtime(tk); |
282 | 283 | ||
@@ -427,7 +428,7 @@ int do_settimeofday(const struct timespec *tv) | |||
427 | struct timespec ts_delta, xt; | 428 | struct timespec ts_delta, xt; |
428 | unsigned long flags; | 429 | unsigned long flags; |
429 | 430 | ||
430 | if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) | 431 | if (!timespec_valid_strict(tv)) |
431 | return -EINVAL; | 432 | return -EINVAL; |
432 | 433 | ||
433 | write_seqlock_irqsave(&tk->lock, flags); | 434 | write_seqlock_irqsave(&tk->lock, flags); |
@@ -463,6 +464,8 @@ int timekeeping_inject_offset(struct timespec *ts) | |||
463 | { | 464 | { |
464 | struct timekeeper *tk = &timekeeper; | 465 | struct timekeeper *tk = &timekeeper; |
465 | unsigned long flags; | 466 | unsigned long flags; |
467 | struct timespec tmp; | ||
468 | int ret = 0; | ||
466 | 469 | ||
467 | if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) | 470 | if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) |
468 | return -EINVAL; | 471 | return -EINVAL; |
@@ -471,10 +474,17 @@ int timekeeping_inject_offset(struct timespec *ts) | |||
471 | 474 | ||
472 | timekeeping_forward_now(tk); | 475 | timekeeping_forward_now(tk); |
473 | 476 | ||
477 | /* Make sure the proposed value is valid */ | ||
478 | tmp = timespec_add(tk_xtime(tk), *ts); | ||
479 | if (!timespec_valid_strict(&tmp)) { | ||
480 | ret = -EINVAL; | ||
481 | goto error; | ||
482 | } | ||
474 | 483 | ||
475 | tk_xtime_add(tk, ts); | 484 | tk_xtime_add(tk, ts); |
476 | tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); | 485 | tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); |
477 | 486 | ||
487 | error: /* even if we error out, we forwarded the time, so call update */ | ||
478 | timekeeping_update(tk, true); | 488 | timekeeping_update(tk, true); |
479 | 489 | ||
480 | write_sequnlock_irqrestore(&tk->lock, flags); | 490 | write_sequnlock_irqrestore(&tk->lock, flags); |
@@ -482,7 +492,7 @@ int timekeeping_inject_offset(struct timespec *ts) | |||
482 | /* signal hrtimers about time change */ | 492 | /* signal hrtimers about time change */ |
483 | clock_was_set(); | 493 | clock_was_set(); |
484 | 494 | ||
485 | return 0; | 495 | return ret; |
486 | } | 496 | } |
487 | EXPORT_SYMBOL(timekeeping_inject_offset); | 497 | EXPORT_SYMBOL(timekeeping_inject_offset); |
488 | 498 | ||
@@ -649,7 +659,20 @@ void __init timekeeping_init(void) | |||
649 | struct timespec now, boot, tmp; | 659 | struct timespec now, boot, tmp; |
650 | 660 | ||
651 | read_persistent_clock(&now); | 661 | read_persistent_clock(&now); |
662 | if (!timespec_valid_strict(&now)) { | ||
663 | pr_warn("WARNING: Persistent clock returned invalid value!\n" | ||
664 | " Check your CMOS/BIOS settings.\n"); | ||
665 | now.tv_sec = 0; | ||
666 | now.tv_nsec = 0; | ||
667 | } | ||
668 | |||
652 | read_boot_clock(&boot); | 669 | read_boot_clock(&boot); |
670 | if (!timespec_valid_strict(&boot)) { | ||
671 | pr_warn("WARNING: Boot clock returned invalid value!\n" | ||
672 | " Check your CMOS/BIOS settings.\n"); | ||
673 | boot.tv_sec = 0; | ||
674 | boot.tv_nsec = 0; | ||
675 | } | ||
653 | 676 | ||
654 | seqlock_init(&tk->lock); | 677 | seqlock_init(&tk->lock); |
655 | 678 | ||
@@ -690,7 +713,7 @@ static struct timespec timekeeping_suspend_time; | |||
690 | static void __timekeeping_inject_sleeptime(struct timekeeper *tk, | 713 | static void __timekeeping_inject_sleeptime(struct timekeeper *tk, |
691 | struct timespec *delta) | 714 | struct timespec *delta) |
692 | { | 715 | { |
693 | if (!timespec_valid(delta)) { | 716 | if (!timespec_valid_strict(delta)) { |
694 | printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " | 717 | printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " |
695 | "sleep delta value!\n"); | 718 | "sleep delta value!\n"); |
696 | return; | 719 | return; |
@@ -1129,6 +1152,10 @@ static void update_wall_time(void) | |||
1129 | offset = (clock->read(clock) - clock->cycle_last) & clock->mask; | 1152 | offset = (clock->read(clock) - clock->cycle_last) & clock->mask; |
1130 | #endif | 1153 | #endif |
1131 | 1154 | ||
1155 | /* Check if there's really nothing to do */ | ||
1156 | if (offset < tk->cycle_interval) | ||
1157 | goto out; | ||
1158 | |||
1132 | /* | 1159 | /* |
1133 | * With NO_HZ we may have to accumulate many cycle_intervals | 1160 | * With NO_HZ we may have to accumulate many cycle_intervals |
1134 | * (think "ticks") worth of time at once. To do this efficiently, | 1161 | * (think "ticks") worth of time at once. To do this efficiently, |
@@ -1161,9 +1188,9 @@ static void update_wall_time(void) | |||
1161 | * the vsyscall implementations are converted to use xtime_nsec | 1188 | * the vsyscall implementations are converted to use xtime_nsec |
1162 | * (shifted nanoseconds), this can be killed. | 1189 | * (shifted nanoseconds), this can be killed. |
1163 | */ | 1190 | */ |
1164 | remainder = tk->xtime_nsec & ((1 << tk->shift) - 1); | 1191 | remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1); |
1165 | tk->xtime_nsec -= remainder; | 1192 | tk->xtime_nsec -= remainder; |
1166 | tk->xtime_nsec += 1 << tk->shift; | 1193 | tk->xtime_nsec += 1ULL << tk->shift; |
1167 | tk->ntp_error += remainder << tk->ntp_error_shift; | 1194 | tk->ntp_error += remainder << tk->ntp_error_shift; |
1168 | 1195 | ||
1169 | /* | 1196 | /* |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 60e4d7875672..6b245f64c8dd 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -506,6 +506,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) | |||
506 | int size; | 506 | int size; |
507 | 507 | ||
508 | syscall_nr = syscall_get_nr(current, regs); | 508 | syscall_nr = syscall_get_nr(current, regs); |
509 | if (syscall_nr < 0) | ||
510 | return; | ||
509 | if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) | 511 | if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) |
510 | return; | 512 | return; |
511 | 513 | ||
@@ -580,6 +582,8 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | |||
580 | int size; | 582 | int size; |
581 | 583 | ||
582 | syscall_nr = syscall_get_nr(current, regs); | 584 | syscall_nr = syscall_get_nr(current, regs); |
585 | if (syscall_nr < 0) | ||
586 | return; | ||
583 | if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) | 587 | if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) |
584 | return; | 588 | return; |
585 | 589 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 692d97628a10..1e1373bcb3e3 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -66,6 +66,7 @@ enum { | |||
66 | 66 | ||
67 | /* pool flags */ | 67 | /* pool flags */ |
68 | POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ | 68 | POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ |
69 | POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */ | ||
69 | 70 | ||
70 | /* worker flags */ | 71 | /* worker flags */ |
71 | WORKER_STARTED = 1 << 0, /* started */ | 72 | WORKER_STARTED = 1 << 0, /* started */ |
@@ -652,7 +653,7 @@ static bool need_to_manage_workers(struct worker_pool *pool) | |||
652 | /* Do we have too many workers and should some go away? */ | 653 | /* Do we have too many workers and should some go away? */ |
653 | static bool too_many_workers(struct worker_pool *pool) | 654 | static bool too_many_workers(struct worker_pool *pool) |
654 | { | 655 | { |
655 | bool managing = mutex_is_locked(&pool->manager_mutex); | 656 | bool managing = pool->flags & POOL_MANAGING_WORKERS; |
656 | int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ | 657 | int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ |
657 | int nr_busy = pool->nr_workers - nr_idle; | 658 | int nr_busy = pool->nr_workers - nr_idle; |
658 | 659 | ||
@@ -1326,6 +1327,15 @@ static void idle_worker_rebind(struct worker *worker) | |||
1326 | 1327 | ||
1327 | /* we did our part, wait for rebind_workers() to finish up */ | 1328 | /* we did our part, wait for rebind_workers() to finish up */ |
1328 | wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND)); | 1329 | wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND)); |
1330 | |||
1331 | /* | ||
1332 | * rebind_workers() shouldn't finish until all workers passed the | ||
1333 | * above WORKER_REBIND wait. Tell it when done. | ||
1334 | */ | ||
1335 | spin_lock_irq(&worker->pool->gcwq->lock); | ||
1336 | if (!--worker->idle_rebind->cnt) | ||
1337 | complete(&worker->idle_rebind->done); | ||
1338 | spin_unlock_irq(&worker->pool->gcwq->lock); | ||
1329 | } | 1339 | } |
1330 | 1340 | ||
1331 | /* | 1341 | /* |
@@ -1396,12 +1406,15 @@ retry: | |||
1396 | /* set REBIND and kick idle ones, we'll wait for these later */ | 1406 | /* set REBIND and kick idle ones, we'll wait for these later */ |
1397 | for_each_worker_pool(pool, gcwq) { | 1407 | for_each_worker_pool(pool, gcwq) { |
1398 | list_for_each_entry(worker, &pool->idle_list, entry) { | 1408 | list_for_each_entry(worker, &pool->idle_list, entry) { |
1409 | unsigned long worker_flags = worker->flags; | ||
1410 | |||
1399 | if (worker->flags & WORKER_REBIND) | 1411 | if (worker->flags & WORKER_REBIND) |
1400 | continue; | 1412 | continue; |
1401 | 1413 | ||
1402 | /* morph UNBOUND to REBIND */ | 1414 | /* morph UNBOUND to REBIND atomically */ |
1403 | worker->flags &= ~WORKER_UNBOUND; | 1415 | worker_flags &= ~WORKER_UNBOUND; |
1404 | worker->flags |= WORKER_REBIND; | 1416 | worker_flags |= WORKER_REBIND; |
1417 | ACCESS_ONCE(worker->flags) = worker_flags; | ||
1405 | 1418 | ||
1406 | idle_rebind.cnt++; | 1419 | idle_rebind.cnt++; |
1407 | worker->idle_rebind = &idle_rebind; | 1420 | worker->idle_rebind = &idle_rebind; |
@@ -1419,25 +1432,15 @@ retry: | |||
1419 | goto retry; | 1432 | goto retry; |
1420 | } | 1433 | } |
1421 | 1434 | ||
1422 | /* | 1435 | /* all idle workers are rebound, rebind busy workers */ |
1423 | * All idle workers are rebound and waiting for %WORKER_REBIND to | ||
1424 | * be cleared inside idle_worker_rebind(). Clear and release. | ||
1425 | * Clearing %WORKER_REBIND from this foreign context is safe | ||
1426 | * because these workers are still guaranteed to be idle. | ||
1427 | */ | ||
1428 | for_each_worker_pool(pool, gcwq) | ||
1429 | list_for_each_entry(worker, &pool->idle_list, entry) | ||
1430 | worker->flags &= ~WORKER_REBIND; | ||
1431 | |||
1432 | wake_up_all(&gcwq->rebind_hold); | ||
1433 | |||
1434 | /* rebind busy workers */ | ||
1435 | for_each_busy_worker(worker, i, pos, gcwq) { | 1436 | for_each_busy_worker(worker, i, pos, gcwq) { |
1436 | struct work_struct *rebind_work = &worker->rebind_work; | 1437 | struct work_struct *rebind_work = &worker->rebind_work; |
1438 | unsigned long worker_flags = worker->flags; | ||
1437 | 1439 | ||
1438 | /* morph UNBOUND to REBIND */ | 1440 | /* morph UNBOUND to REBIND atomically */ |
1439 | worker->flags &= ~WORKER_UNBOUND; | 1441 | worker_flags &= ~WORKER_UNBOUND; |
1440 | worker->flags |= WORKER_REBIND; | 1442 | worker_flags |= WORKER_REBIND; |
1443 | ACCESS_ONCE(worker->flags) = worker_flags; | ||
1441 | 1444 | ||
1442 | if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, | 1445 | if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, |
1443 | work_data_bits(rebind_work))) | 1446 | work_data_bits(rebind_work))) |
@@ -1449,6 +1452,34 @@ retry: | |||
1449 | worker->scheduled.next, | 1452 | worker->scheduled.next, |
1450 | work_color_to_flags(WORK_NO_COLOR)); | 1453 | work_color_to_flags(WORK_NO_COLOR)); |
1451 | } | 1454 | } |
1455 | |||
1456 | /* | ||
1457 | * All idle workers are rebound and waiting for %WORKER_REBIND to | ||
1458 | * be cleared inside idle_worker_rebind(). Clear and release. | ||
1459 | * Clearing %WORKER_REBIND from this foreign context is safe | ||
1460 | * because these workers are still guaranteed to be idle. | ||
1461 | * | ||
1462 | * We need to make sure all idle workers passed WORKER_REBIND wait | ||
1463 | * in idle_worker_rebind() before returning; otherwise, workers can | ||
1464 | * get stuck at the wait if hotplug cycle repeats. | ||
1465 | */ | ||
1466 | idle_rebind.cnt = 1; | ||
1467 | INIT_COMPLETION(idle_rebind.done); | ||
1468 | |||
1469 | for_each_worker_pool(pool, gcwq) { | ||
1470 | list_for_each_entry(worker, &pool->idle_list, entry) { | ||
1471 | worker->flags &= ~WORKER_REBIND; | ||
1472 | idle_rebind.cnt++; | ||
1473 | } | ||
1474 | } | ||
1475 | |||
1476 | wake_up_all(&gcwq->rebind_hold); | ||
1477 | |||
1478 | if (--idle_rebind.cnt) { | ||
1479 | spin_unlock_irq(&gcwq->lock); | ||
1480 | wait_for_completion(&idle_rebind.done); | ||
1481 | spin_lock_irq(&gcwq->lock); | ||
1482 | } | ||
1452 | } | 1483 | } |
1453 | 1484 | ||
1454 | static struct worker *alloc_worker(void) | 1485 | static struct worker *alloc_worker(void) |
@@ -1794,9 +1825,45 @@ static bool manage_workers(struct worker *worker) | |||
1794 | struct worker_pool *pool = worker->pool; | 1825 | struct worker_pool *pool = worker->pool; |
1795 | bool ret = false; | 1826 | bool ret = false; |
1796 | 1827 | ||
1797 | if (!mutex_trylock(&pool->manager_mutex)) | 1828 | if (pool->flags & POOL_MANAGING_WORKERS) |
1798 | return ret; | 1829 | return ret; |
1799 | 1830 | ||
1831 | pool->flags |= POOL_MANAGING_WORKERS; | ||
1832 | |||
1833 | /* | ||
1834 | * To simplify both worker management and CPU hotplug, hold off | ||
1835 | * management while hotplug is in progress. CPU hotplug path can't | ||
1836 | * grab %POOL_MANAGING_WORKERS to achieve this because that can | ||
1837 | * lead to idle worker depletion (all become busy thinking someone | ||
1838 | * else is managing) which in turn can result in deadlock under | ||
1839 | * extreme circumstances. Use @pool->manager_mutex to synchronize | ||
1840 | * manager against CPU hotplug. | ||
1841 | * | ||
1842 | * manager_mutex would always be free unless CPU hotplug is in | ||
1843 | * progress. trylock first without dropping @gcwq->lock. | ||
1844 | */ | ||
1845 | if (unlikely(!mutex_trylock(&pool->manager_mutex))) { | ||
1846 | spin_unlock_irq(&pool->gcwq->lock); | ||
1847 | mutex_lock(&pool->manager_mutex); | ||
1848 | /* | ||
1849 | * CPU hotplug could have happened while we were waiting | ||
1850 | * for manager_mutex. Hotplug itself can't handle us | ||
1851 | * because manager isn't either on idle or busy list, and | ||
1852 | * @gcwq's state and ours could have deviated. | ||
1853 | * | ||
1854 | * As hotplug is now excluded via manager_mutex, we can | ||
1855 | * simply try to bind. It will succeed or fail depending | ||
1856 | * on @gcwq's current state. Try it and adjust | ||
1857 | * %WORKER_UNBOUND accordingly. | ||
1858 | */ | ||
1859 | if (worker_maybe_bind_and_lock(worker)) | ||
1860 | worker->flags &= ~WORKER_UNBOUND; | ||
1861 | else | ||
1862 | worker->flags |= WORKER_UNBOUND; | ||
1863 | |||
1864 | ret = true; | ||
1865 | } | ||
1866 | |||
1800 | pool->flags &= ~POOL_MANAGE_WORKERS; | 1867 | pool->flags &= ~POOL_MANAGE_WORKERS; |
1801 | 1868 | ||
1802 | /* | 1869 | /* |
@@ -1806,6 +1873,7 @@ static bool manage_workers(struct worker *worker) | |||
1806 | ret |= maybe_destroy_workers(pool); | 1873 | ret |= maybe_destroy_workers(pool); |
1807 | ret |= maybe_create_worker(pool); | 1874 | ret |= maybe_create_worker(pool); |
1808 | 1875 | ||
1876 | pool->flags &= ~POOL_MANAGING_WORKERS; | ||
1809 | mutex_unlock(&pool->manager_mutex); | 1877 | mutex_unlock(&pool->manager_mutex); |
1810 | return ret; | 1878 | return ret; |
1811 | } | 1879 | } |