aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2014-05-29 07:28:14 -0400
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2014-05-29 07:28:14 -0400
commitf70977fbd6359efb17bb93adac37b9f226d910a7 (patch)
treed36c3ace7ae638ec447b7cce39617d950d121265 /kernel
parent2807bd18cc60ec471917b5158e98e4d7b7e030fb (diff)
parente6f8a4d60b905eae1a20cbb0c72c67b26b2f02fd (diff)
Merge back earlier ACPI thermal material.
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c174
-rw-r--r--kernel/sched/core.c15
-rw-r--r--kernel/sched/cpudeadline.c4
-rw-r--r--kernel/sched/cpupri.c3
-rw-r--r--kernel/sched/cputime.c32
-rw-r--r--kernel/sched/deadline.c5
-rw-r--r--kernel/sched/fair.c16
7 files changed, 134 insertions, 115 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f83a71a3e46d..440eefc67397 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1443,6 +1443,11 @@ group_sched_out(struct perf_event *group_event,
1443 cpuctx->exclusive = 0; 1443 cpuctx->exclusive = 0;
1444} 1444}
1445 1445
1446struct remove_event {
1447 struct perf_event *event;
1448 bool detach_group;
1449};
1450
1446/* 1451/*
1447 * Cross CPU call to remove a performance event 1452 * Cross CPU call to remove a performance event
1448 * 1453 *
@@ -1451,12 +1456,15 @@ group_sched_out(struct perf_event *group_event,
1451 */ 1456 */
1452static int __perf_remove_from_context(void *info) 1457static int __perf_remove_from_context(void *info)
1453{ 1458{
1454 struct perf_event *event = info; 1459 struct remove_event *re = info;
1460 struct perf_event *event = re->event;
1455 struct perf_event_context *ctx = event->ctx; 1461 struct perf_event_context *ctx = event->ctx;
1456 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); 1462 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1457 1463
1458 raw_spin_lock(&ctx->lock); 1464 raw_spin_lock(&ctx->lock);
1459 event_sched_out(event, cpuctx, ctx); 1465 event_sched_out(event, cpuctx, ctx);
1466 if (re->detach_group)
1467 perf_group_detach(event);
1460 list_del_event(event, ctx); 1468 list_del_event(event, ctx);
1461 if (!ctx->nr_events && cpuctx->task_ctx == ctx) { 1469 if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
1462 ctx->is_active = 0; 1470 ctx->is_active = 0;
@@ -1481,10 +1489,14 @@ static int __perf_remove_from_context(void *info)
1481 * When called from perf_event_exit_task, it's OK because the 1489 * When called from perf_event_exit_task, it's OK because the
1482 * context has been detached from its task. 1490 * context has been detached from its task.
1483 */ 1491 */
1484static void perf_remove_from_context(struct perf_event *event) 1492static void perf_remove_from_context(struct perf_event *event, bool detach_group)
1485{ 1493{
1486 struct perf_event_context *ctx = event->ctx; 1494 struct perf_event_context *ctx = event->ctx;
1487 struct task_struct *task = ctx->task; 1495 struct task_struct *task = ctx->task;
1496 struct remove_event re = {
1497 .event = event,
1498 .detach_group = detach_group,
1499 };
1488 1500
1489 lockdep_assert_held(&ctx->mutex); 1501 lockdep_assert_held(&ctx->mutex);
1490 1502
@@ -1493,12 +1505,12 @@ static void perf_remove_from_context(struct perf_event *event)
1493 * Per cpu events are removed via an smp call and 1505 * Per cpu events are removed via an smp call and
1494 * the removal is always successful. 1506 * the removal is always successful.
1495 */ 1507 */
1496 cpu_function_call(event->cpu, __perf_remove_from_context, event); 1508 cpu_function_call(event->cpu, __perf_remove_from_context, &re);
1497 return; 1509 return;
1498 } 1510 }
1499 1511
1500retry: 1512retry:
1501 if (!task_function_call(task, __perf_remove_from_context, event)) 1513 if (!task_function_call(task, __perf_remove_from_context, &re))
1502 return; 1514 return;
1503 1515
1504 raw_spin_lock_irq(&ctx->lock); 1516 raw_spin_lock_irq(&ctx->lock);
@@ -1515,6 +1527,8 @@ retry:
1515 * Since the task isn't running, its safe to remove the event, us 1527 * Since the task isn't running, its safe to remove the event, us
1516 * holding the ctx->lock ensures the task won't get scheduled in. 1528 * holding the ctx->lock ensures the task won't get scheduled in.
1517 */ 1529 */
1530 if (detach_group)
1531 perf_group_detach(event);
1518 list_del_event(event, ctx); 1532 list_del_event(event, ctx);
1519 raw_spin_unlock_irq(&ctx->lock); 1533 raw_spin_unlock_irq(&ctx->lock);
1520} 1534}
@@ -3178,7 +3192,8 @@ static void free_event_rcu(struct rcu_head *head)
3178} 3192}
3179 3193
3180static void ring_buffer_put(struct ring_buffer *rb); 3194static void ring_buffer_put(struct ring_buffer *rb);
3181static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb); 3195static void ring_buffer_attach(struct perf_event *event,
3196 struct ring_buffer *rb);
3182 3197
3183static void unaccount_event_cpu(struct perf_event *event, int cpu) 3198static void unaccount_event_cpu(struct perf_event *event, int cpu)
3184{ 3199{
@@ -3238,8 +3253,6 @@ static void free_event(struct perf_event *event)
3238 unaccount_event(event); 3253 unaccount_event(event);
3239 3254
3240 if (event->rb) { 3255 if (event->rb) {
3241 struct ring_buffer *rb;
3242
3243 /* 3256 /*
3244 * Can happen when we close an event with re-directed output. 3257 * Can happen when we close an event with re-directed output.
3245 * 3258 *
@@ -3247,12 +3260,7 @@ static void free_event(struct perf_event *event)
3247 * over us; possibly making our ring_buffer_put() the last. 3260 * over us; possibly making our ring_buffer_put() the last.
3248 */ 3261 */
3249 mutex_lock(&event->mmap_mutex); 3262 mutex_lock(&event->mmap_mutex);
3250 rb = event->rb; 3263 ring_buffer_attach(event, NULL);
3251 if (rb) {
3252 rcu_assign_pointer(event->rb, NULL);
3253 ring_buffer_detach(event, rb);
3254 ring_buffer_put(rb); /* could be last */
3255 }
3256 mutex_unlock(&event->mmap_mutex); 3264 mutex_unlock(&event->mmap_mutex);
3257 } 3265 }
3258 3266
@@ -3281,10 +3289,7 @@ int perf_event_release_kernel(struct perf_event *event)
3281 * to trigger the AB-BA case. 3289 * to trigger the AB-BA case.
3282 */ 3290 */
3283 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); 3291 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
3284 raw_spin_lock_irq(&ctx->lock); 3292 perf_remove_from_context(event, true);
3285 perf_group_detach(event);
3286 raw_spin_unlock_irq(&ctx->lock);
3287 perf_remove_from_context(event);
3288 mutex_unlock(&ctx->mutex); 3293 mutex_unlock(&ctx->mutex);
3289 3294
3290 free_event(event); 3295 free_event(event);
@@ -3839,28 +3844,47 @@ unlock:
3839static void ring_buffer_attach(struct perf_event *event, 3844static void ring_buffer_attach(struct perf_event *event,
3840 struct ring_buffer *rb) 3845 struct ring_buffer *rb)
3841{ 3846{
3847 struct ring_buffer *old_rb = NULL;
3842 unsigned long flags; 3848 unsigned long flags;
3843 3849
3844 if (!list_empty(&event->rb_entry)) 3850 if (event->rb) {
3845 return; 3851 /*
3852 * Should be impossible, we set this when removing
3853 * event->rb_entry and wait/clear when adding event->rb_entry.
3854 */
3855 WARN_ON_ONCE(event->rcu_pending);
3846 3856
3847 spin_lock_irqsave(&rb->event_lock, flags); 3857 old_rb = event->rb;
3848 if (list_empty(&event->rb_entry)) 3858 event->rcu_batches = get_state_synchronize_rcu();
3849 list_add(&event->rb_entry, &rb->event_list); 3859 event->rcu_pending = 1;
3850 spin_unlock_irqrestore(&rb->event_lock, flags);
3851}
3852 3860
3853static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb) 3861 spin_lock_irqsave(&old_rb->event_lock, flags);
3854{ 3862 list_del_rcu(&event->rb_entry);
3855 unsigned long flags; 3863 spin_unlock_irqrestore(&old_rb->event_lock, flags);
3864 }
3856 3865
3857 if (list_empty(&event->rb_entry)) 3866 if (event->rcu_pending && rb) {
3858 return; 3867 cond_synchronize_rcu(event->rcu_batches);
3868 event->rcu_pending = 0;
3869 }
3870
3871 if (rb) {
3872 spin_lock_irqsave(&rb->event_lock, flags);
3873 list_add_rcu(&event->rb_entry, &rb->event_list);
3874 spin_unlock_irqrestore(&rb->event_lock, flags);
3875 }
3876
3877 rcu_assign_pointer(event->rb, rb);
3859 3878
3860 spin_lock_irqsave(&rb->event_lock, flags); 3879 if (old_rb) {
3861 list_del_init(&event->rb_entry); 3880 ring_buffer_put(old_rb);
3862 wake_up_all(&event->waitq); 3881 /*
3863 spin_unlock_irqrestore(&rb->event_lock, flags); 3882 * Since we detached before setting the new rb, so that we
3883 * could attach the new rb, we could have missed a wakeup.
3884 * Provide it now.
3885 */
3886 wake_up_all(&event->waitq);
3887 }
3864} 3888}
3865 3889
3866static void ring_buffer_wakeup(struct perf_event *event) 3890static void ring_buffer_wakeup(struct perf_event *event)
@@ -3929,7 +3953,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
3929{ 3953{
3930 struct perf_event *event = vma->vm_file->private_data; 3954 struct perf_event *event = vma->vm_file->private_data;
3931 3955
3932 struct ring_buffer *rb = event->rb; 3956 struct ring_buffer *rb = ring_buffer_get(event);
3933 struct user_struct *mmap_user = rb->mmap_user; 3957 struct user_struct *mmap_user = rb->mmap_user;
3934 int mmap_locked = rb->mmap_locked; 3958 int mmap_locked = rb->mmap_locked;
3935 unsigned long size = perf_data_size(rb); 3959 unsigned long size = perf_data_size(rb);
@@ -3937,18 +3961,14 @@ static void perf_mmap_close(struct vm_area_struct *vma)
3937 atomic_dec(&rb->mmap_count); 3961 atomic_dec(&rb->mmap_count);
3938 3962
3939 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) 3963 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
3940 return; 3964 goto out_put;
3941 3965
3942 /* Detach current event from the buffer. */ 3966 ring_buffer_attach(event, NULL);
3943 rcu_assign_pointer(event->rb, NULL);
3944 ring_buffer_detach(event, rb);
3945 mutex_unlock(&event->mmap_mutex); 3967 mutex_unlock(&event->mmap_mutex);
3946 3968
3947 /* If there's still other mmap()s of this buffer, we're done. */ 3969 /* If there's still other mmap()s of this buffer, we're done. */
3948 if (atomic_read(&rb->mmap_count)) { 3970 if (atomic_read(&rb->mmap_count))
3949 ring_buffer_put(rb); /* can't be last */ 3971 goto out_put;
3950 return;
3951 }
3952 3972
3953 /* 3973 /*
3954 * No other mmap()s, detach from all other events that might redirect 3974 * No other mmap()s, detach from all other events that might redirect
@@ -3978,11 +3998,9 @@ again:
3978 * still restart the iteration to make sure we're not now 3998 * still restart the iteration to make sure we're not now
3979 * iterating the wrong list. 3999 * iterating the wrong list.
3980 */ 4000 */
3981 if (event->rb == rb) { 4001 if (event->rb == rb)
3982 rcu_assign_pointer(event->rb, NULL); 4002 ring_buffer_attach(event, NULL);
3983 ring_buffer_detach(event, rb); 4003
3984 ring_buffer_put(rb); /* can't be last, we still have one */
3985 }
3986 mutex_unlock(&event->mmap_mutex); 4004 mutex_unlock(&event->mmap_mutex);
3987 put_event(event); 4005 put_event(event);
3988 4006
@@ -4007,6 +4025,7 @@ again:
4007 vma->vm_mm->pinned_vm -= mmap_locked; 4025 vma->vm_mm->pinned_vm -= mmap_locked;
4008 free_uid(mmap_user); 4026 free_uid(mmap_user);
4009 4027
4028out_put:
4010 ring_buffer_put(rb); /* could be last */ 4029 ring_buffer_put(rb); /* could be last */
4011} 4030}
4012 4031
@@ -4124,7 +4143,6 @@ again:
4124 vma->vm_mm->pinned_vm += extra; 4143 vma->vm_mm->pinned_vm += extra;
4125 4144
4126 ring_buffer_attach(event, rb); 4145 ring_buffer_attach(event, rb);
4127 rcu_assign_pointer(event->rb, rb);
4128 4146
4129 perf_event_init_userpage(event); 4147 perf_event_init_userpage(event);
4130 perf_event_update_userpage(event); 4148 perf_event_update_userpage(event);
@@ -5408,6 +5426,9 @@ struct swevent_htable {
5408 5426
5409 /* Recursion avoidance in each contexts */ 5427 /* Recursion avoidance in each contexts */
5410 int recursion[PERF_NR_CONTEXTS]; 5428 int recursion[PERF_NR_CONTEXTS];
5429
5430 /* Keeps track of cpu being initialized/exited */
5431 bool online;
5411}; 5432};
5412 5433
5413static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); 5434static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
@@ -5654,8 +5675,14 @@ static int perf_swevent_add(struct perf_event *event, int flags)
5654 hwc->state = !(flags & PERF_EF_START); 5675 hwc->state = !(flags & PERF_EF_START);
5655 5676
5656 head = find_swevent_head(swhash, event); 5677 head = find_swevent_head(swhash, event);
5657 if (WARN_ON_ONCE(!head)) 5678 if (!head) {
5679 /*
5680 * We can race with cpu hotplug code. Do not
5681 * WARN if the cpu just got unplugged.
5682 */
5683 WARN_ON_ONCE(swhash->online);
5658 return -EINVAL; 5684 return -EINVAL;
5685 }
5659 5686
5660 hlist_add_head_rcu(&event->hlist_entry, head); 5687 hlist_add_head_rcu(&event->hlist_entry, head);
5661 5688
@@ -6914,7 +6941,7 @@ err_size:
6914static int 6941static int
6915perf_event_set_output(struct perf_event *event, struct perf_event *output_event) 6942perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
6916{ 6943{
6917 struct ring_buffer *rb = NULL, *old_rb = NULL; 6944 struct ring_buffer *rb = NULL;
6918 int ret = -EINVAL; 6945 int ret = -EINVAL;
6919 6946
6920 if (!output_event) 6947 if (!output_event)
@@ -6942,8 +6969,6 @@ set:
6942 if (atomic_read(&event->mmap_count)) 6969 if (atomic_read(&event->mmap_count))
6943 goto unlock; 6970 goto unlock;
6944 6971
6945 old_rb = event->rb;
6946
6947 if (output_event) { 6972 if (output_event) {
6948 /* get the rb we want to redirect to */ 6973 /* get the rb we want to redirect to */
6949 rb = ring_buffer_get(output_event); 6974 rb = ring_buffer_get(output_event);
@@ -6951,23 +6976,7 @@ set:
6951 goto unlock; 6976 goto unlock;
6952 } 6977 }
6953 6978
6954 if (old_rb) 6979 ring_buffer_attach(event, rb);
6955 ring_buffer_detach(event, old_rb);
6956
6957 if (rb)
6958 ring_buffer_attach(event, rb);
6959
6960 rcu_assign_pointer(event->rb, rb);
6961
6962 if (old_rb) {
6963 ring_buffer_put(old_rb);
6964 /*
6965 * Since we detached before setting the new rb, so that we
6966 * could attach the new rb, we could have missed a wakeup.
6967 * Provide it now.
6968 */
6969 wake_up_all(&event->waitq);
6970 }
6971 6980
6972 ret = 0; 6981 ret = 0;
6973unlock: 6982unlock:
@@ -7018,6 +7027,9 @@ SYSCALL_DEFINE5(perf_event_open,
7018 if (attr.freq) { 7027 if (attr.freq) {
7019 if (attr.sample_freq > sysctl_perf_event_sample_rate) 7028 if (attr.sample_freq > sysctl_perf_event_sample_rate)
7020 return -EINVAL; 7029 return -EINVAL;
7030 } else {
7031 if (attr.sample_period & (1ULL << 63))
7032 return -EINVAL;
7021 } 7033 }
7022 7034
7023 /* 7035 /*
@@ -7165,7 +7177,7 @@ SYSCALL_DEFINE5(perf_event_open,
7165 struct perf_event_context *gctx = group_leader->ctx; 7177 struct perf_event_context *gctx = group_leader->ctx;
7166 7178
7167 mutex_lock(&gctx->mutex); 7179 mutex_lock(&gctx->mutex);
7168 perf_remove_from_context(group_leader); 7180 perf_remove_from_context(group_leader, false);
7169 7181
7170 /* 7182 /*
7171 * Removing from the context ends up with disabled 7183 * Removing from the context ends up with disabled
@@ -7175,7 +7187,7 @@ SYSCALL_DEFINE5(perf_event_open,
7175 perf_event__state_init(group_leader); 7187 perf_event__state_init(group_leader);
7176 list_for_each_entry(sibling, &group_leader->sibling_list, 7188 list_for_each_entry(sibling, &group_leader->sibling_list,
7177 group_entry) { 7189 group_entry) {
7178 perf_remove_from_context(sibling); 7190 perf_remove_from_context(sibling, false);
7179 perf_event__state_init(sibling); 7191 perf_event__state_init(sibling);
7180 put_ctx(gctx); 7192 put_ctx(gctx);
7181 } 7193 }
@@ -7305,7 +7317,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
7305 mutex_lock(&src_ctx->mutex); 7317 mutex_lock(&src_ctx->mutex);
7306 list_for_each_entry_safe(event, tmp, &src_ctx->event_list, 7318 list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
7307 event_entry) { 7319 event_entry) {
7308 perf_remove_from_context(event); 7320 perf_remove_from_context(event, false);
7309 unaccount_event_cpu(event, src_cpu); 7321 unaccount_event_cpu(event, src_cpu);
7310 put_ctx(src_ctx); 7322 put_ctx(src_ctx);
7311 list_add(&event->migrate_entry, &events); 7323 list_add(&event->migrate_entry, &events);
@@ -7367,13 +7379,7 @@ __perf_event_exit_task(struct perf_event *child_event,
7367 struct perf_event_context *child_ctx, 7379 struct perf_event_context *child_ctx,
7368 struct task_struct *child) 7380 struct task_struct *child)
7369{ 7381{
7370 if (child_event->parent) { 7382 perf_remove_from_context(child_event, !!child_event->parent);
7371 raw_spin_lock_irq(&child_ctx->lock);
7372 perf_group_detach(child_event);
7373 raw_spin_unlock_irq(&child_ctx->lock);
7374 }
7375
7376 perf_remove_from_context(child_event);
7377 7383
7378 /* 7384 /*
7379 * It can happen that the parent exits first, and has events 7385 * It can happen that the parent exits first, and has events
@@ -7724,6 +7730,8 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
7724 * swapped under us. 7730 * swapped under us.
7725 */ 7731 */
7726 parent_ctx = perf_pin_task_context(parent, ctxn); 7732 parent_ctx = perf_pin_task_context(parent, ctxn);
7733 if (!parent_ctx)
7734 return 0;
7727 7735
7728 /* 7736 /*
7729 * No need to check if parent_ctx != NULL here; since we saw 7737 * No need to check if parent_ctx != NULL here; since we saw
@@ -7835,6 +7843,7 @@ static void perf_event_init_cpu(int cpu)
7835 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); 7843 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
7836 7844
7837 mutex_lock(&swhash->hlist_mutex); 7845 mutex_lock(&swhash->hlist_mutex);
7846 swhash->online = true;
7838 if (swhash->hlist_refcount > 0) { 7847 if (swhash->hlist_refcount > 0) {
7839 struct swevent_hlist *hlist; 7848 struct swevent_hlist *hlist;
7840 7849
@@ -7857,14 +7866,14 @@ static void perf_pmu_rotate_stop(struct pmu *pmu)
7857 7866
7858static void __perf_event_exit_context(void *__info) 7867static void __perf_event_exit_context(void *__info)
7859{ 7868{
7869 struct remove_event re = { .detach_group = false };
7860 struct perf_event_context *ctx = __info; 7870 struct perf_event_context *ctx = __info;
7861 struct perf_event *event;
7862 7871
7863 perf_pmu_rotate_stop(ctx->pmu); 7872 perf_pmu_rotate_stop(ctx->pmu);
7864 7873
7865 rcu_read_lock(); 7874 rcu_read_lock();
7866 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) 7875 list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
7867 __perf_remove_from_context(event); 7876 __perf_remove_from_context(&re);
7868 rcu_read_unlock(); 7877 rcu_read_unlock();
7869} 7878}
7870 7879
@@ -7892,6 +7901,7 @@ static void perf_event_exit_cpu(int cpu)
7892 perf_event_exit_cpu_context(cpu); 7901 perf_event_exit_cpu_context(cpu);
7893 7902
7894 mutex_lock(&swhash->hlist_mutex); 7903 mutex_lock(&swhash->hlist_mutex);
7904 swhash->online = false;
7895 swevent_hlist_release(swhash); 7905 swevent_hlist_release(swhash);
7896 mutex_unlock(&swhash->hlist_mutex); 7906 mutex_unlock(&swhash->hlist_mutex);
7897} 7907}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d9d8ece46a15..204d3d281809 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2592,8 +2592,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev)
2592 if (likely(prev->sched_class == class && 2592 if (likely(prev->sched_class == class &&
2593 rq->nr_running == rq->cfs.h_nr_running)) { 2593 rq->nr_running == rq->cfs.h_nr_running)) {
2594 p = fair_sched_class.pick_next_task(rq, prev); 2594 p = fair_sched_class.pick_next_task(rq, prev);
2595 if (likely(p && p != RETRY_TASK)) 2595 if (unlikely(p == RETRY_TASK))
2596 return p; 2596 goto again;
2597
2598 /* assumes fair_sched_class->next == idle_sched_class */
2599 if (unlikely(!p))
2600 p = idle_sched_class.pick_next_task(rq, prev);
2601
2602 return p;
2597 } 2603 }
2598 2604
2599again: 2605again:
@@ -3124,6 +3130,7 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
3124 dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); 3130 dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
3125 dl_se->dl_throttled = 0; 3131 dl_se->dl_throttled = 0;
3126 dl_se->dl_new = 1; 3132 dl_se->dl_new = 1;
3133 dl_se->dl_yielded = 0;
3127} 3134}
3128 3135
3129static void __setscheduler_params(struct task_struct *p, 3136static void __setscheduler_params(struct task_struct *p,
@@ -3639,6 +3646,7 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
3639 * sys_sched_setattr - same as above, but with extended sched_attr 3646 * sys_sched_setattr - same as above, but with extended sched_attr
3640 * @pid: the pid in question. 3647 * @pid: the pid in question.
3641 * @uattr: structure containing the extended parameters. 3648 * @uattr: structure containing the extended parameters.
3649 * @flags: for future extension.
3642 */ 3650 */
3643SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, 3651SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
3644 unsigned int, flags) 3652 unsigned int, flags)
@@ -3783,6 +3791,7 @@ err_size:
3783 * @pid: the pid in question. 3791 * @pid: the pid in question.
3784 * @uattr: structure containing the extended parameters. 3792 * @uattr: structure containing the extended parameters.
3785 * @size: sizeof(attr) for fwd/bwd comp. 3793 * @size: sizeof(attr) for fwd/bwd comp.
3794 * @flags: for future extension.
3786 */ 3795 */
3787SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, 3796SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
3788 unsigned int, size, unsigned int, flags) 3797 unsigned int, size, unsigned int, flags)
@@ -6017,6 +6026,8 @@ sd_numa_init(struct sched_domain_topology_level *tl, int cpu)
6017 , 6026 ,
6018 .last_balance = jiffies, 6027 .last_balance = jiffies,
6019 .balance_interval = sd_weight, 6028 .balance_interval = sd_weight,
6029 .max_newidle_lb_cost = 0,
6030 .next_decay_max_lb_cost = jiffies,
6020 }; 6031 };
6021 SD_INIT_NAME(sd, NUMA); 6032 SD_INIT_NAME(sd, NUMA);
6022 sd->private = &tl->data; 6033 sd->private = &tl->data;
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 5b9bb42b2d47..ab001b5d5048 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -210,7 +210,5 @@ int cpudl_init(struct cpudl *cp)
210 */ 210 */
211void cpudl_cleanup(struct cpudl *cp) 211void cpudl_cleanup(struct cpudl *cp)
212{ 212{
213 /* 213 free_cpumask_var(cp->free_cpus);
214 * nothing to do for the moment
215 */
216} 214}
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 8b836b376d91..3031bac8aa3e 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -70,8 +70,7 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
70 int idx = 0; 70 int idx = 0;
71 int task_pri = convert_prio(p->prio); 71 int task_pri = convert_prio(p->prio);
72 72
73 if (task_pri >= MAX_RT_PRIO) 73 BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES);
74 return 0;
75 74
76 for (idx = 0; idx < task_pri; idx++) { 75 for (idx = 0; idx < task_pri; idx++) {
77 struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; 76 struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index a95097cb4591..72fdf06ef865 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -332,50 +332,50 @@ out:
332 * softirq as those do not count in task exec_runtime any more. 332 * softirq as those do not count in task exec_runtime any more.
333 */ 333 */
334static void irqtime_account_process_tick(struct task_struct *p, int user_tick, 334static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
335 struct rq *rq) 335 struct rq *rq, int ticks)
336{ 336{
337 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); 337 cputime_t scaled = cputime_to_scaled(cputime_one_jiffy);
338 u64 cputime = (__force u64) cputime_one_jiffy;
338 u64 *cpustat = kcpustat_this_cpu->cpustat; 339 u64 *cpustat = kcpustat_this_cpu->cpustat;
339 340
340 if (steal_account_process_tick()) 341 if (steal_account_process_tick())
341 return; 342 return;
342 343
344 cputime *= ticks;
345 scaled *= ticks;
346
343 if (irqtime_account_hi_update()) { 347 if (irqtime_account_hi_update()) {
344 cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; 348 cpustat[CPUTIME_IRQ] += cputime;
345 } else if (irqtime_account_si_update()) { 349 } else if (irqtime_account_si_update()) {
346 cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; 350 cpustat[CPUTIME_SOFTIRQ] += cputime;
347 } else if (this_cpu_ksoftirqd() == p) { 351 } else if (this_cpu_ksoftirqd() == p) {
348 /* 352 /*
349 * ksoftirqd time do not get accounted in cpu_softirq_time. 353 * ksoftirqd time do not get accounted in cpu_softirq_time.
350 * So, we have to handle it separately here. 354 * So, we have to handle it separately here.
351 * Also, p->stime needs to be updated for ksoftirqd. 355 * Also, p->stime needs to be updated for ksoftirqd.
352 */ 356 */
353 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, 357 __account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ);
354 CPUTIME_SOFTIRQ);
355 } else if (user_tick) { 358 } else if (user_tick) {
356 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); 359 account_user_time(p, cputime, scaled);
357 } else if (p == rq->idle) { 360 } else if (p == rq->idle) {
358 account_idle_time(cputime_one_jiffy); 361 account_idle_time(cputime);
359 } else if (p->flags & PF_VCPU) { /* System time or guest time */ 362 } else if (p->flags & PF_VCPU) { /* System time or guest time */
360 account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); 363 account_guest_time(p, cputime, scaled);
361 } else { 364 } else {
362 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, 365 __account_system_time(p, cputime, scaled, CPUTIME_SYSTEM);
363 CPUTIME_SYSTEM);
364 } 366 }
365} 367}
366 368
367static void irqtime_account_idle_ticks(int ticks) 369static void irqtime_account_idle_ticks(int ticks)
368{ 370{
369 int i;
370 struct rq *rq = this_rq(); 371 struct rq *rq = this_rq();
371 372
372 for (i = 0; i < ticks; i++) 373 irqtime_account_process_tick(current, 0, rq, ticks);
373 irqtime_account_process_tick(current, 0, rq);
374} 374}
375#else /* CONFIG_IRQ_TIME_ACCOUNTING */ 375#else /* CONFIG_IRQ_TIME_ACCOUNTING */
376static inline void irqtime_account_idle_ticks(int ticks) {} 376static inline void irqtime_account_idle_ticks(int ticks) {}
377static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, 377static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
378 struct rq *rq) {} 378 struct rq *rq, int nr_ticks) {}
379#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ 379#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
380 380
381/* 381/*
@@ -464,7 +464,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
464 return; 464 return;
465 465
466 if (sched_clock_irqtime) { 466 if (sched_clock_irqtime) {
467 irqtime_account_process_tick(p, user_tick, rq); 467 irqtime_account_process_tick(p, user_tick, rq, 1);
468 return; 468 return;
469 } 469 }
470 470
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index b08095786cb8..800e99b99075 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -528,6 +528,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
528 sched_clock_tick(); 528 sched_clock_tick();
529 update_rq_clock(rq); 529 update_rq_clock(rq);
530 dl_se->dl_throttled = 0; 530 dl_se->dl_throttled = 0;
531 dl_se->dl_yielded = 0;
531 if (p->on_rq) { 532 if (p->on_rq) {
532 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); 533 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
533 if (task_has_dl_policy(rq->curr)) 534 if (task_has_dl_policy(rq->curr))
@@ -893,10 +894,10 @@ static void yield_task_dl(struct rq *rq)
893 * We make the task go to sleep until its current deadline by 894 * We make the task go to sleep until its current deadline by
894 * forcing its runtime to zero. This way, update_curr_dl() stops 895 * forcing its runtime to zero. This way, update_curr_dl() stops
895 * it and the bandwidth timer will wake it up and will give it 896 * it and the bandwidth timer will wake it up and will give it
896 * new scheduling parameters (thanks to dl_new=1). 897 * new scheduling parameters (thanks to dl_yielded=1).
897 */ 898 */
898 if (p->dl.runtime > 0) { 899 if (p->dl.runtime > 0) {
899 rq->curr->dl.dl_new = 1; 900 rq->curr->dl.dl_yielded = 1;
900 p->dl.runtime = 0; 901 p->dl.runtime = 0;
901 } 902 }
902 update_curr_dl(rq); 903 update_curr_dl(rq);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7570dd969c28..0fdb96de81a5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6653,6 +6653,7 @@ static int idle_balance(struct rq *this_rq)
6653 int this_cpu = this_rq->cpu; 6653 int this_cpu = this_rq->cpu;
6654 6654
6655 idle_enter_fair(this_rq); 6655 idle_enter_fair(this_rq);
6656
6656 /* 6657 /*
6657 * We must set idle_stamp _before_ calling idle_balance(), such that we 6658 * We must set idle_stamp _before_ calling idle_balance(), such that we
6658 * measure the duration of idle_balance() as idle time. 6659 * measure the duration of idle_balance() as idle time.
@@ -6705,14 +6706,16 @@ static int idle_balance(struct rq *this_rq)
6705 6706
6706 raw_spin_lock(&this_rq->lock); 6707 raw_spin_lock(&this_rq->lock);
6707 6708
6709 if (curr_cost > this_rq->max_idle_balance_cost)
6710 this_rq->max_idle_balance_cost = curr_cost;
6711
6708 /* 6712 /*
6709 * While browsing the domains, we released the rq lock. 6713 * While browsing the domains, we released the rq lock, a task could
6710 * A task could have be enqueued in the meantime 6714 * have been enqueued in the meantime. Since we're not going idle,
6715 * pretend we pulled a task.
6711 */ 6716 */
6712 if (this_rq->cfs.h_nr_running && !pulled_task) { 6717 if (this_rq->cfs.h_nr_running && !pulled_task)
6713 pulled_task = 1; 6718 pulled_task = 1;
6714 goto out;
6715 }
6716 6719
6717 if (pulled_task || time_after(jiffies, this_rq->next_balance)) { 6720 if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
6718 /* 6721 /*
@@ -6722,9 +6725,6 @@ static int idle_balance(struct rq *this_rq)
6722 this_rq->next_balance = next_balance; 6725 this_rq->next_balance = next_balance;
6723 } 6726 }
6724 6727
6725 if (curr_cost > this_rq->max_idle_balance_cost)
6726 this_rq->max_idle_balance_cost = curr_cost;
6727
6728out: 6728out:
6729 /* Is there a task of a high priority class? */ 6729 /* Is there a task of a high priority class? */
6730 if (this_rq->nr_running != this_rq->cfs.h_nr_running && 6730 if (this_rq->nr_running != this_rq->cfs.h_nr_running &&