aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-06-05 12:47:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-06-05 12:47:15 -0400
commit0b3e9f3f21c42d064f5f4088df4088e3d55755eb (patch)
treeb153a9346bf21a64a9e5c5f3f440f00b591fd269 /kernel
parent99becf1328d8d71dd6f4480e3591d7dcdb389e57 (diff)
parent6a4c96eef42f835734a82c6b512abf9881b7c55d (diff)
Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar. * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched: Remove NULL assignment of dattr_cur sched: Remove the last NULL entry from sched_feat_names sched: Make sched_feat_names const sched/rt: Fix SCHED_RR across cgroups sched: Move nr_cpus_allowed out of 'struct sched_rt_entity' sched: Make sure to not re-read variables after validation sched: Fix SD_OVERLAP sched: Don't try allocating memory from offline nodes sched/nohz: Fix rq->cpu_load calculations some more sched/x86: Use cpu_llc_shared_mask(cpu) for coregroup_mask
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/core.c68
-rw-r--r--kernel/sched/fair.c42
-rw-r--r--kernel/sched/rt.c51
-rw-r--r--kernel/time/tick-sched.c1
4 files changed, 115 insertions, 47 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 39eb6011bc38..c46958e26121 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -142,9 +142,8 @@ const_debug unsigned int sysctl_sched_features =
142#define SCHED_FEAT(name, enabled) \ 142#define SCHED_FEAT(name, enabled) \
143 #name , 143 #name ,
144 144
145static __read_mostly char *sched_feat_names[] = { 145static const char * const sched_feat_names[] = {
146#include "features.h" 146#include "features.h"
147 NULL
148}; 147};
149 148
150#undef SCHED_FEAT 149#undef SCHED_FEAT
@@ -2517,25 +2516,32 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
2517 sched_avg_update(this_rq); 2516 sched_avg_update(this_rq);
2518} 2517}
2519 2518
2519#ifdef CONFIG_NO_HZ
2520/*
2521 * There is no sane way to deal with nohz on smp when using jiffies because the
2522 * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
2523 * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
2524 *
2525 * Therefore we cannot use the delta approach from the regular tick since that
2526 * would seriously skew the load calculation. However we'll make do for those
2527 * updates happening while idle (nohz_idle_balance) or coming out of idle
2528 * (tick_nohz_idle_exit).
2529 *
2530 * This means we might still be one tick off for nohz periods.
2531 */
2532
2520/* 2533/*
2521 * Called from nohz_idle_balance() to update the load ratings before doing the 2534 * Called from nohz_idle_balance() to update the load ratings before doing the
2522 * idle balance. 2535 * idle balance.
2523 */ 2536 */
2524void update_idle_cpu_load(struct rq *this_rq) 2537void update_idle_cpu_load(struct rq *this_rq)
2525{ 2538{
2526 unsigned long curr_jiffies = jiffies; 2539 unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
2527 unsigned long load = this_rq->load.weight; 2540 unsigned long load = this_rq->load.weight;
2528 unsigned long pending_updates; 2541 unsigned long pending_updates;
2529 2542
2530 /* 2543 /*
2531 * Bloody broken means of dealing with nohz, but better than nothing.. 2544 * bail if there's load or we're actually up-to-date.
2532 * jiffies is updated by one cpu, another cpu can drift wrt the jiffy
2533 * update and see 0 difference the one time and 2 the next, even though
2534 * we ticked at roughtly the same rate.
2535 *
2536 * Hence we only use this from nohz_idle_balance() and skip this
2537 * nonsense when called from the scheduler_tick() since that's
2538 * guaranteed a stable rate.
2539 */ 2545 */
2540 if (load || curr_jiffies == this_rq->last_load_update_tick) 2546 if (load || curr_jiffies == this_rq->last_load_update_tick)
2541 return; 2547 return;
@@ -2547,12 +2553,38 @@ void update_idle_cpu_load(struct rq *this_rq)
2547} 2553}
2548 2554
2549/* 2555/*
2556 * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
2557 */
2558void update_cpu_load_nohz(void)
2559{
2560 struct rq *this_rq = this_rq();
2561 unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
2562 unsigned long pending_updates;
2563
2564 if (curr_jiffies == this_rq->last_load_update_tick)
2565 return;
2566
2567 raw_spin_lock(&this_rq->lock);
2568 pending_updates = curr_jiffies - this_rq->last_load_update_tick;
2569 if (pending_updates) {
2570 this_rq->last_load_update_tick = curr_jiffies;
2571 /*
2572 * We were idle, this means load 0, the current load might be
2573 * !0 due to remote wakeups and the sort.
2574 */
2575 __update_cpu_load(this_rq, 0, pending_updates);
2576 }
2577 raw_spin_unlock(&this_rq->lock);
2578}
2579#endif /* CONFIG_NO_HZ */
2580
2581/*
2550 * Called from scheduler_tick() 2582 * Called from scheduler_tick()
2551 */ 2583 */
2552static void update_cpu_load_active(struct rq *this_rq) 2584static void update_cpu_load_active(struct rq *this_rq)
2553{ 2585{
2554 /* 2586 /*
2555 * See the mess in update_idle_cpu_load(). 2587 * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
2556 */ 2588 */
2557 this_rq->last_load_update_tick = jiffies; 2589 this_rq->last_load_update_tick = jiffies;
2558 __update_cpu_load(this_rq, this_rq->load.weight, 1); 2590 __update_cpu_load(this_rq, this_rq->load.weight, 1);
@@ -4982,7 +5014,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
4982 p->sched_class->set_cpus_allowed(p, new_mask); 5014 p->sched_class->set_cpus_allowed(p, new_mask);
4983 5015
4984 cpumask_copy(&p->cpus_allowed, new_mask); 5016 cpumask_copy(&p->cpus_allowed, new_mask);
4985 p->rt.nr_cpus_allowed = cpumask_weight(new_mask); 5017 p->nr_cpus_allowed = cpumask_weight(new_mask);
4986} 5018}
4987 5019
4988/* 5020/*
@@ -5997,11 +6029,14 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
5997 6029
5998 cpumask_or(covered, covered, sg_span); 6030 cpumask_or(covered, covered, sg_span);
5999 6031
6000 sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span)); 6032 sg->sgp = *per_cpu_ptr(sdd->sgp, i);
6001 atomic_inc(&sg->sgp->ref); 6033 atomic_inc(&sg->sgp->ref);
6002 6034
6003 if (cpumask_test_cpu(cpu, sg_span)) 6035 if ((!groups && cpumask_test_cpu(cpu, sg_span)) ||
6036 cpumask_first(sg_span) == cpu) {
6037 WARN_ON_ONCE(!cpumask_test_cpu(cpu, sg_span));
6004 groups = sg; 6038 groups = sg;
6039 }
6005 6040
6006 if (!first) 6041 if (!first)
6007 first = sg; 6042 first = sg;
@@ -6403,7 +6438,7 @@ static void sched_init_numa(void)
6403 return; 6438 return;
6404 6439
6405 for (j = 0; j < nr_node_ids; j++) { 6440 for (j = 0; j < nr_node_ids; j++) {
6406 struct cpumask *mask = kzalloc_node(cpumask_size(), GFP_KERNEL, j); 6441 struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL);
6407 if (!mask) 6442 if (!mask)
6408 return; 6443 return;
6409 6444
@@ -6691,7 +6726,6 @@ static int init_sched_domains(const struct cpumask *cpu_map)
6691 if (!doms_cur) 6726 if (!doms_cur)
6692 doms_cur = &fallback_doms; 6727 doms_cur = &fallback_doms;
6693 cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); 6728 cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
6694 dattr_cur = NULL;
6695 err = build_sched_domains(doms_cur[0], NULL); 6729 err = build_sched_domains(doms_cur[0], NULL);
6696 register_sched_domain_sysctl(); 6730 register_sched_domain_sysctl();
6697 6731
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 940e6d17cf96..b2a2d236f27b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2703,7 +2703,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
2703 int want_sd = 1; 2703 int want_sd = 1;
2704 int sync = wake_flags & WF_SYNC; 2704 int sync = wake_flags & WF_SYNC;
2705 2705
2706 if (p->rt.nr_cpus_allowed == 1) 2706 if (p->nr_cpus_allowed == 1)
2707 return prev_cpu; 2707 return prev_cpu;
2708 2708
2709 if (sd_flag & SD_BALANCE_WAKE) { 2709 if (sd_flag & SD_BALANCE_WAKE) {
@@ -3503,15 +3503,22 @@ unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
3503unsigned long scale_rt_power(int cpu) 3503unsigned long scale_rt_power(int cpu)
3504{ 3504{
3505 struct rq *rq = cpu_rq(cpu); 3505 struct rq *rq = cpu_rq(cpu);
3506 u64 total, available; 3506 u64 total, available, age_stamp, avg;
3507 3507
3508 total = sched_avg_period() + (rq->clock - rq->age_stamp); 3508 /*
3509 * Since we're reading these variables without serialization make sure
3510 * we read them once before doing sanity checks on them.
3511 */
3512 age_stamp = ACCESS_ONCE(rq->age_stamp);
3513 avg = ACCESS_ONCE(rq->rt_avg);
3514
3515 total = sched_avg_period() + (rq->clock - age_stamp);
3509 3516
3510 if (unlikely(total < rq->rt_avg)) { 3517 if (unlikely(total < avg)) {
3511 /* Ensures that power won't end up being negative */ 3518 /* Ensures that power won't end up being negative */
3512 available = 0; 3519 available = 0;
3513 } else { 3520 } else {
3514 available = total - rq->rt_avg; 3521 available = total - avg;
3515 } 3522 }
3516 3523
3517 if (unlikely((s64)total < SCHED_POWER_SCALE)) 3524 if (unlikely((s64)total < SCHED_POWER_SCALE))
@@ -3574,11 +3581,26 @@ void update_group_power(struct sched_domain *sd, int cpu)
3574 3581
3575 power = 0; 3582 power = 0;
3576 3583
3577 group = child->groups; 3584 if (child->flags & SD_OVERLAP) {
3578 do { 3585 /*
3579 power += group->sgp->power; 3586 * SD_OVERLAP domains cannot assume that child groups
3580 group = group->next; 3587 * span the current group.
3581 } while (group != child->groups); 3588 */
3589
3590 for_each_cpu(cpu, sched_group_cpus(sdg))
3591 power += power_of(cpu);
3592 } else {
3593 /*
3594 * !SD_OVERLAP domains can assume that child groups
3595 * span the current group.
3596 */
3597
3598 group = child->groups;
3599 do {
3600 power += group->sgp->power;
3601 group = group->next;
3602 } while (group != child->groups);
3603 }
3582 3604
3583 sdg->sgp->power = power; 3605 sdg->sgp->power = power;
3584} 3606}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index c5565c3c515f..2a4e8dffbd6b 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -274,13 +274,16 @@ static void update_rt_migration(struct rt_rq *rt_rq)
274 274
275static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 275static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
276{ 276{
277 struct task_struct *p;
278
277 if (!rt_entity_is_task(rt_se)) 279 if (!rt_entity_is_task(rt_se))
278 return; 280 return;
279 281
282 p = rt_task_of(rt_se);
280 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 283 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
281 284
282 rt_rq->rt_nr_total++; 285 rt_rq->rt_nr_total++;
283 if (rt_se->nr_cpus_allowed > 1) 286 if (p->nr_cpus_allowed > 1)
284 rt_rq->rt_nr_migratory++; 287 rt_rq->rt_nr_migratory++;
285 288
286 update_rt_migration(rt_rq); 289 update_rt_migration(rt_rq);
@@ -288,13 +291,16 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
288 291
289static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 292static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
290{ 293{
294 struct task_struct *p;
295
291 if (!rt_entity_is_task(rt_se)) 296 if (!rt_entity_is_task(rt_se))
292 return; 297 return;
293 298
299 p = rt_task_of(rt_se);
294 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 300 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
295 301
296 rt_rq->rt_nr_total--; 302 rt_rq->rt_nr_total--;
297 if (rt_se->nr_cpus_allowed > 1) 303 if (p->nr_cpus_allowed > 1)
298 rt_rq->rt_nr_migratory--; 304 rt_rq->rt_nr_migratory--;
299 305
300 update_rt_migration(rt_rq); 306 update_rt_migration(rt_rq);
@@ -1161,7 +1167,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
1161 1167
1162 enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); 1168 enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
1163 1169
1164 if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) 1170 if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
1165 enqueue_pushable_task(rq, p); 1171 enqueue_pushable_task(rq, p);
1166 1172
1167 inc_nr_running(rq); 1173 inc_nr_running(rq);
@@ -1225,7 +1231,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
1225 1231
1226 cpu = task_cpu(p); 1232 cpu = task_cpu(p);
1227 1233
1228 if (p->rt.nr_cpus_allowed == 1) 1234 if (p->nr_cpus_allowed == 1)
1229 goto out; 1235 goto out;
1230 1236
1231 /* For anything but wake ups, just return the task_cpu */ 1237 /* For anything but wake ups, just return the task_cpu */
@@ -1260,9 +1266,9 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
1260 * will have to sort it out. 1266 * will have to sort it out.
1261 */ 1267 */
1262 if (curr && unlikely(rt_task(curr)) && 1268 if (curr && unlikely(rt_task(curr)) &&
1263 (curr->rt.nr_cpus_allowed < 2 || 1269 (curr->nr_cpus_allowed < 2 ||
1264 curr->prio <= p->prio) && 1270 curr->prio <= p->prio) &&
1265 (p->rt.nr_cpus_allowed > 1)) { 1271 (p->nr_cpus_allowed > 1)) {
1266 int target = find_lowest_rq(p); 1272 int target = find_lowest_rq(p);
1267 1273
1268 if (target != -1) 1274 if (target != -1)
@@ -1276,10 +1282,10 @@ out:
1276 1282
1277static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 1283static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
1278{ 1284{
1279 if (rq->curr->rt.nr_cpus_allowed == 1) 1285 if (rq->curr->nr_cpus_allowed == 1)
1280 return; 1286 return;
1281 1287
1282 if (p->rt.nr_cpus_allowed != 1 1288 if (p->nr_cpus_allowed != 1
1283 && cpupri_find(&rq->rd->cpupri, p, NULL)) 1289 && cpupri_find(&rq->rd->cpupri, p, NULL))
1284 return; 1290 return;
1285 1291
@@ -1395,7 +1401,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
1395 * The previous task needs to be made eligible for pushing 1401 * The previous task needs to be made eligible for pushing
1396 * if it is still active 1402 * if it is still active
1397 */ 1403 */
1398 if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1) 1404 if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
1399 enqueue_pushable_task(rq, p); 1405 enqueue_pushable_task(rq, p);
1400} 1406}
1401 1407
@@ -1408,7 +1414,7 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
1408{ 1414{
1409 if (!task_running(rq, p) && 1415 if (!task_running(rq, p) &&
1410 (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && 1416 (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) &&
1411 (p->rt.nr_cpus_allowed > 1)) 1417 (p->nr_cpus_allowed > 1))
1412 return 1; 1418 return 1;
1413 return 0; 1419 return 0;
1414} 1420}
@@ -1464,7 +1470,7 @@ static int find_lowest_rq(struct task_struct *task)
1464 if (unlikely(!lowest_mask)) 1470 if (unlikely(!lowest_mask))
1465 return -1; 1471 return -1;
1466 1472
1467 if (task->rt.nr_cpus_allowed == 1) 1473 if (task->nr_cpus_allowed == 1)
1468 return -1; /* No other targets possible */ 1474 return -1; /* No other targets possible */
1469 1475
1470 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) 1476 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
@@ -1586,7 +1592,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
1586 1592
1587 BUG_ON(rq->cpu != task_cpu(p)); 1593 BUG_ON(rq->cpu != task_cpu(p));
1588 BUG_ON(task_current(rq, p)); 1594 BUG_ON(task_current(rq, p));
1589 BUG_ON(p->rt.nr_cpus_allowed <= 1); 1595 BUG_ON(p->nr_cpus_allowed <= 1);
1590 1596
1591 BUG_ON(!p->on_rq); 1597 BUG_ON(!p->on_rq);
1592 BUG_ON(!rt_task(p)); 1598 BUG_ON(!rt_task(p));
@@ -1793,9 +1799,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
1793 if (!task_running(rq, p) && 1799 if (!task_running(rq, p) &&
1794 !test_tsk_need_resched(rq->curr) && 1800 !test_tsk_need_resched(rq->curr) &&
1795 has_pushable_tasks(rq) && 1801 has_pushable_tasks(rq) &&
1796 p->rt.nr_cpus_allowed > 1 && 1802 p->nr_cpus_allowed > 1 &&
1797 rt_task(rq->curr) && 1803 rt_task(rq->curr) &&
1798 (rq->curr->rt.nr_cpus_allowed < 2 || 1804 (rq->curr->nr_cpus_allowed < 2 ||
1799 rq->curr->prio <= p->prio)) 1805 rq->curr->prio <= p->prio))
1800 push_rt_tasks(rq); 1806 push_rt_tasks(rq);
1801} 1807}
@@ -1817,7 +1823,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
1817 * Only update if the process changes its state from whether it 1823 * Only update if the process changes its state from whether it
1818 * can migrate or not. 1824 * can migrate or not.
1819 */ 1825 */
1820 if ((p->rt.nr_cpus_allowed > 1) == (weight > 1)) 1826 if ((p->nr_cpus_allowed > 1) == (weight > 1))
1821 return; 1827 return;
1822 1828
1823 rq = task_rq(p); 1829 rq = task_rq(p);
@@ -1979,6 +1985,8 @@ static void watchdog(struct rq *rq, struct task_struct *p)
1979 1985
1980static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) 1986static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
1981{ 1987{
1988 struct sched_rt_entity *rt_se = &p->rt;
1989
1982 update_curr_rt(rq); 1990 update_curr_rt(rq);
1983 1991
1984 watchdog(rq, p); 1992 watchdog(rq, p);
@@ -1996,12 +2004,15 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
1996 p->rt.time_slice = RR_TIMESLICE; 2004 p->rt.time_slice = RR_TIMESLICE;
1997 2005
1998 /* 2006 /*
1999 * Requeue to the end of queue if we are not the only element 2007 * Requeue to the end of queue if we (and all of our ancestors) are the
2000 * on the queue: 2008 * only element on the queue
2001 */ 2009 */
2002 if (p->rt.run_list.prev != p->rt.run_list.next) { 2010 for_each_sched_rt_entity(rt_se) {
2003 requeue_task_rt(rq, p, 0); 2011 if (rt_se->run_list.prev != rt_se->run_list.next) {
2004 set_tsk_need_resched(p); 2012 requeue_task_rt(rq, p, 0);
2013 set_tsk_need_resched(p);
2014 return;
2015 }
2005 } 2016 }
2006} 2017}
2007 2018
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index efd386667536..da70c6db496c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -576,6 +576,7 @@ void tick_nohz_idle_exit(void)
576 /* Update jiffies first */ 576 /* Update jiffies first */
577 select_nohz_load_balancer(0); 577 select_nohz_load_balancer(0);
578 tick_do_update_jiffies64(now); 578 tick_do_update_jiffies64(now);
579 update_cpu_load_nohz();
579 580
580#ifndef CONFIG_VIRT_CPU_ACCOUNTING 581#ifndef CONFIG_VIRT_CPU_ACCOUNTING
581 /* 582 /*