aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c126
1 files changed, 87 insertions, 39 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6fa833ab2cb8..37f22626225e 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -358,6 +358,10 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
358 } 358 }
359 359
360 cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime); 360 cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
361#ifndef CONFIG_64BIT
362 smp_wmb();
363 cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
364#endif
361} 365}
362 366
363/* 367/*
@@ -1340,6 +1344,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
1340 hrtick_update(rq); 1344 hrtick_update(rq);
1341} 1345}
1342 1346
1347static void set_next_buddy(struct sched_entity *se);
1348
1343/* 1349/*
1344 * The dequeue_task method is called before nr_running is 1350 * The dequeue_task method is called before nr_running is
1345 * decreased. We remove the task from the rbtree and 1351 * decreased. We remove the task from the rbtree and
@@ -1349,14 +1355,22 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
1349{ 1355{
1350 struct cfs_rq *cfs_rq; 1356 struct cfs_rq *cfs_rq;
1351 struct sched_entity *se = &p->se; 1357 struct sched_entity *se = &p->se;
1358 int task_sleep = flags & DEQUEUE_SLEEP;
1352 1359
1353 for_each_sched_entity(se) { 1360 for_each_sched_entity(se) {
1354 cfs_rq = cfs_rq_of(se); 1361 cfs_rq = cfs_rq_of(se);
1355 dequeue_entity(cfs_rq, se, flags); 1362 dequeue_entity(cfs_rq, se, flags);
1356 1363
1357 /* Don't dequeue parent if it has other entities besides us */ 1364 /* Don't dequeue parent if it has other entities besides us */
1358 if (cfs_rq->load.weight) 1365 if (cfs_rq->load.weight) {
1366 /*
1367 * Bias pick_next to pick a task from this cfs_rq, as
1368 * p is sleeping when it is within its sched_slice.
1369 */
1370 if (task_sleep && parent_entity(se))
1371 set_next_buddy(parent_entity(se));
1359 break; 1372 break;
1373 }
1360 flags |= DEQUEUE_SLEEP; 1374 flags |= DEQUEUE_SLEEP;
1361 } 1375 }
1362 1376
@@ -1372,12 +1386,25 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
1372 1386
1373#ifdef CONFIG_SMP 1387#ifdef CONFIG_SMP
1374 1388
1375static void task_waking_fair(struct rq *rq, struct task_struct *p) 1389static void task_waking_fair(struct task_struct *p)
1376{ 1390{
1377 struct sched_entity *se = &p->se; 1391 struct sched_entity *se = &p->se;
1378 struct cfs_rq *cfs_rq = cfs_rq_of(se); 1392 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1393 u64 min_vruntime;
1379 1394
1380 se->vruntime -= cfs_rq->min_vruntime; 1395#ifndef CONFIG_64BIT
1396 u64 min_vruntime_copy;
1397
1398 do {
1399 min_vruntime_copy = cfs_rq->min_vruntime_copy;
1400 smp_rmb();
1401 min_vruntime = cfs_rq->min_vruntime;
1402 } while (min_vruntime != min_vruntime_copy);
1403#else
1404 min_vruntime = cfs_rq->min_vruntime;
1405#endif
1406
1407 se->vruntime -= min_vruntime;
1381} 1408}
1382 1409
1383#ifdef CONFIG_FAIR_GROUP_SCHED 1410#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -1622,6 +1649,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
1622 /* 1649 /*
1623 * Otherwise, iterate the domains and find an elegible idle cpu. 1650 * Otherwise, iterate the domains and find an elegible idle cpu.
1624 */ 1651 */
1652 rcu_read_lock();
1625 for_each_domain(target, sd) { 1653 for_each_domain(target, sd) {
1626 if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) 1654 if (!(sd->flags & SD_SHARE_PKG_RESOURCES))
1627 break; 1655 break;
@@ -1641,6 +1669,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
1641 cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) 1669 cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
1642 break; 1670 break;
1643 } 1671 }
1672 rcu_read_unlock();
1644 1673
1645 return target; 1674 return target;
1646} 1675}
@@ -1657,7 +1686,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
1657 * preempt must be disabled. 1686 * preempt must be disabled.
1658 */ 1687 */
1659static int 1688static int
1660select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags) 1689select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
1661{ 1690{
1662 struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; 1691 struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
1663 int cpu = smp_processor_id(); 1692 int cpu = smp_processor_id();
@@ -1673,6 +1702,7 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
1673 new_cpu = prev_cpu; 1702 new_cpu = prev_cpu;
1674 } 1703 }
1675 1704
1705 rcu_read_lock();
1676 for_each_domain(cpu, tmp) { 1706 for_each_domain(cpu, tmp) {
1677 if (!(tmp->flags & SD_LOAD_BALANCE)) 1707 if (!(tmp->flags & SD_LOAD_BALANCE))
1678 continue; 1708 continue;
@@ -1723,9 +1753,10 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
1723 1753
1724 if (affine_sd) { 1754 if (affine_sd) {
1725 if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) 1755 if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
1726 return select_idle_sibling(p, cpu); 1756 prev_cpu = cpu;
1727 else 1757
1728 return select_idle_sibling(p, prev_cpu); 1758 new_cpu = select_idle_sibling(p, prev_cpu);
1759 goto unlock;
1729 } 1760 }
1730 1761
1731 while (sd) { 1762 while (sd) {
@@ -1766,6 +1797,8 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
1766 } 1797 }
1767 /* while loop will break here if sd == NULL */ 1798 /* while loop will break here if sd == NULL */
1768 } 1799 }
1800unlock:
1801 rcu_read_unlock();
1769 1802
1770 return new_cpu; 1803 return new_cpu;
1771} 1804}
@@ -1789,10 +1822,7 @@ wakeup_gran(struct sched_entity *curr, struct sched_entity *se)
1789 * This is especially important for buddies when the leftmost 1822 * This is especially important for buddies when the leftmost
1790 * task is higher priority than the buddy. 1823 * task is higher priority than the buddy.
1791 */ 1824 */
1792 if (unlikely(se->load.weight != NICE_0_LOAD)) 1825 return calc_delta_fair(gran, se);
1793 gran = calc_delta_fair(gran, se);
1794
1795 return gran;
1796} 1826}
1797 1827
1798/* 1828/*
@@ -1826,26 +1856,26 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
1826 1856
1827static void set_last_buddy(struct sched_entity *se) 1857static void set_last_buddy(struct sched_entity *se)
1828{ 1858{
1829 if (likely(task_of(se)->policy != SCHED_IDLE)) { 1859 if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
1830 for_each_sched_entity(se) 1860 return;
1831 cfs_rq_of(se)->last = se; 1861
1832 } 1862 for_each_sched_entity(se)
1863 cfs_rq_of(se)->last = se;
1833} 1864}
1834 1865
1835static void set_next_buddy(struct sched_entity *se) 1866static void set_next_buddy(struct sched_entity *se)
1836{ 1867{
1837 if (likely(task_of(se)->policy != SCHED_IDLE)) { 1868 if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
1838 for_each_sched_entity(se) 1869 return;
1839 cfs_rq_of(se)->next = se; 1870
1840 } 1871 for_each_sched_entity(se)
1872 cfs_rq_of(se)->next = se;
1841} 1873}
1842 1874
1843static void set_skip_buddy(struct sched_entity *se) 1875static void set_skip_buddy(struct sched_entity *se)
1844{ 1876{
1845 if (likely(task_of(se)->policy != SCHED_IDLE)) { 1877 for_each_sched_entity(se)
1846 for_each_sched_entity(se) 1878 cfs_rq_of(se)->skip = se;
1847 cfs_rq_of(se)->skip = se;
1848 }
1849} 1879}
1850 1880
1851/* 1881/*
@@ -1857,12 +1887,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1857 struct sched_entity *se = &curr->se, *pse = &p->se; 1887 struct sched_entity *se = &curr->se, *pse = &p->se;
1858 struct cfs_rq *cfs_rq = task_cfs_rq(curr); 1888 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1859 int scale = cfs_rq->nr_running >= sched_nr_latency; 1889 int scale = cfs_rq->nr_running >= sched_nr_latency;
1890 int next_buddy_marked = 0;
1860 1891
1861 if (unlikely(se == pse)) 1892 if (unlikely(se == pse))
1862 return; 1893 return;
1863 1894
1864 if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) 1895 if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) {
1865 set_next_buddy(pse); 1896 set_next_buddy(pse);
1897 next_buddy_marked = 1;
1898 }
1866 1899
1867 /* 1900 /*
1868 * We can come here with TIF_NEED_RESCHED already set from new task 1901 * We can come here with TIF_NEED_RESCHED already set from new task
@@ -1890,8 +1923,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1890 update_curr(cfs_rq); 1923 update_curr(cfs_rq);
1891 find_matching_se(&se, &pse); 1924 find_matching_se(&se, &pse);
1892 BUG_ON(!pse); 1925 BUG_ON(!pse);
1893 if (wakeup_preempt_entity(se, pse) == 1) 1926 if (wakeup_preempt_entity(se, pse) == 1) {
1927 /*
1928 * Bias pick_next to pick the sched entity that is
1929 * triggering this preemption.
1930 */
1931 if (!next_buddy_marked)
1932 set_next_buddy(pse);
1894 goto preempt; 1933 goto preempt;
1934 }
1895 1935
1896 return; 1936 return;
1897 1937
@@ -2102,7 +2142,7 @@ static unsigned long
2102balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, 2142balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2103 unsigned long max_load_move, struct sched_domain *sd, 2143 unsigned long max_load_move, struct sched_domain *sd,
2104 enum cpu_idle_type idle, int *all_pinned, 2144 enum cpu_idle_type idle, int *all_pinned,
2105 int *this_best_prio, struct cfs_rq *busiest_cfs_rq) 2145 struct cfs_rq *busiest_cfs_rq)
2106{ 2146{
2107 int loops = 0, pulled = 0; 2147 int loops = 0, pulled = 0;
2108 long rem_load_move = max_load_move; 2148 long rem_load_move = max_load_move;
@@ -2140,9 +2180,6 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2140 */ 2180 */
2141 if (rem_load_move <= 0) 2181 if (rem_load_move <= 0)
2142 break; 2182 break;
2143
2144 if (p->prio < *this_best_prio)
2145 *this_best_prio = p->prio;
2146 } 2183 }
2147out: 2184out:
2148 /* 2185 /*
@@ -2202,7 +2239,7 @@ static unsigned long
2202load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, 2239load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
2203 unsigned long max_load_move, 2240 unsigned long max_load_move,
2204 struct sched_domain *sd, enum cpu_idle_type idle, 2241 struct sched_domain *sd, enum cpu_idle_type idle,
2205 int *all_pinned, int *this_best_prio) 2242 int *all_pinned)
2206{ 2243{
2207 long rem_load_move = max_load_move; 2244 long rem_load_move = max_load_move;
2208 int busiest_cpu = cpu_of(busiest); 2245 int busiest_cpu = cpu_of(busiest);
@@ -2227,7 +2264,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
2227 rem_load = div_u64(rem_load, busiest_h_load + 1); 2264 rem_load = div_u64(rem_load, busiest_h_load + 1);
2228 2265
2229 moved_load = balance_tasks(this_rq, this_cpu, busiest, 2266 moved_load = balance_tasks(this_rq, this_cpu, busiest,
2230 rem_load, sd, idle, all_pinned, this_best_prio, 2267 rem_load, sd, idle, all_pinned,
2231 busiest_cfs_rq); 2268 busiest_cfs_rq);
2232 2269
2233 if (!moved_load) 2270 if (!moved_load)
@@ -2253,11 +2290,11 @@ static unsigned long
2253load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, 2290load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
2254 unsigned long max_load_move, 2291 unsigned long max_load_move,
2255 struct sched_domain *sd, enum cpu_idle_type idle, 2292 struct sched_domain *sd, enum cpu_idle_type idle,
2256 int *all_pinned, int *this_best_prio) 2293 int *all_pinned)
2257{ 2294{
2258 return balance_tasks(this_rq, this_cpu, busiest, 2295 return balance_tasks(this_rq, this_cpu, busiest,
2259 max_load_move, sd, idle, all_pinned, 2296 max_load_move, sd, idle, all_pinned,
2260 this_best_prio, &busiest->cfs); 2297 &busiest->cfs);
2261} 2298}
2262#endif 2299#endif
2263 2300
@@ -2274,12 +2311,11 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2274 int *all_pinned) 2311 int *all_pinned)
2275{ 2312{
2276 unsigned long total_load_moved = 0, load_moved; 2313 unsigned long total_load_moved = 0, load_moved;
2277 int this_best_prio = this_rq->curr->prio;
2278 2314
2279 do { 2315 do {
2280 load_moved = load_balance_fair(this_rq, this_cpu, busiest, 2316 load_moved = load_balance_fair(this_rq, this_cpu, busiest,
2281 max_load_move - total_load_moved, 2317 max_load_move - total_load_moved,
2282 sd, idle, all_pinned, &this_best_prio); 2318 sd, idle, all_pinned);
2283 2319
2284 total_load_moved += load_moved; 2320 total_load_moved += load_moved;
2285 2321
@@ -2648,7 +2684,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
2648 /* 2684 /*
2649 * Only siblings can have significantly less than SCHED_LOAD_SCALE 2685 * Only siblings can have significantly less than SCHED_LOAD_SCALE
2650 */ 2686 */
2651 if (sd->level != SD_LV_SIBLING) 2687 if (!(sd->flags & SD_SHARE_CPUPOWER))
2652 return 0; 2688 return 0;
2653 2689
2654 /* 2690 /*
@@ -3465,6 +3501,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3465 raw_spin_unlock(&this_rq->lock); 3501 raw_spin_unlock(&this_rq->lock);
3466 3502
3467 update_shares(this_cpu); 3503 update_shares(this_cpu);
3504 rcu_read_lock();
3468 for_each_domain(this_cpu, sd) { 3505 for_each_domain(this_cpu, sd) {
3469 unsigned long interval; 3506 unsigned long interval;
3470 int balance = 1; 3507 int balance = 1;
@@ -3486,6 +3523,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3486 break; 3523 break;
3487 } 3524 }
3488 } 3525 }
3526 rcu_read_unlock();
3489 3527
3490 raw_spin_lock(&this_rq->lock); 3528 raw_spin_lock(&this_rq->lock);
3491 3529
@@ -3534,6 +3572,7 @@ static int active_load_balance_cpu_stop(void *data)
3534 double_lock_balance(busiest_rq, target_rq); 3572 double_lock_balance(busiest_rq, target_rq);
3535 3573
3536 /* Search for an sd spanning us and the target CPU. */ 3574 /* Search for an sd spanning us and the target CPU. */
3575 rcu_read_lock();
3537 for_each_domain(target_cpu, sd) { 3576 for_each_domain(target_cpu, sd) {
3538 if ((sd->flags & SD_LOAD_BALANCE) && 3577 if ((sd->flags & SD_LOAD_BALANCE) &&
3539 cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) 3578 cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
@@ -3549,6 +3588,7 @@ static int active_load_balance_cpu_stop(void *data)
3549 else 3588 else
3550 schedstat_inc(sd, alb_failed); 3589 schedstat_inc(sd, alb_failed);
3551 } 3590 }
3591 rcu_read_unlock();
3552 double_unlock_balance(busiest_rq, target_rq); 3592 double_unlock_balance(busiest_rq, target_rq);
3553out_unlock: 3593out_unlock:
3554 busiest_rq->active_balance = 0; 3594 busiest_rq->active_balance = 0;
@@ -3675,6 +3715,7 @@ static int find_new_ilb(int cpu)
3675{ 3715{
3676 struct sched_domain *sd; 3716 struct sched_domain *sd;
3677 struct sched_group *ilb_group; 3717 struct sched_group *ilb_group;
3718 int ilb = nr_cpu_ids;
3678 3719
3679 /* 3720 /*
3680 * Have idle load balancer selection from semi-idle packages only 3721 * Have idle load balancer selection from semi-idle packages only
@@ -3690,20 +3731,25 @@ static int find_new_ilb(int cpu)
3690 if (cpumask_weight(nohz.idle_cpus_mask) < 2) 3731 if (cpumask_weight(nohz.idle_cpus_mask) < 2)
3691 goto out_done; 3732 goto out_done;
3692 3733
3734 rcu_read_lock();
3693 for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) { 3735 for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
3694 ilb_group = sd->groups; 3736 ilb_group = sd->groups;
3695 3737
3696 do { 3738 do {
3697 if (is_semi_idle_group(ilb_group)) 3739 if (is_semi_idle_group(ilb_group)) {
3698 return cpumask_first(nohz.grp_idle_mask); 3740 ilb = cpumask_first(nohz.grp_idle_mask);
3741 goto unlock;
3742 }
3699 3743
3700 ilb_group = ilb_group->next; 3744 ilb_group = ilb_group->next;
3701 3745
3702 } while (ilb_group != sd->groups); 3746 } while (ilb_group != sd->groups);
3703 } 3747 }
3748unlock:
3749 rcu_read_unlock();
3704 3750
3705out_done: 3751out_done:
3706 return nr_cpu_ids; 3752 return ilb;
3707} 3753}
3708#else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */ 3754#else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
3709static inline int find_new_ilb(int call_cpu) 3755static inline int find_new_ilb(int call_cpu)
@@ -3848,6 +3894,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3848 3894
3849 update_shares(cpu); 3895 update_shares(cpu);
3850 3896
3897 rcu_read_lock();
3851 for_each_domain(cpu, sd) { 3898 for_each_domain(cpu, sd) {
3852 if (!(sd->flags & SD_LOAD_BALANCE)) 3899 if (!(sd->flags & SD_LOAD_BALANCE))
3853 continue; 3900 continue;
@@ -3893,6 +3940,7 @@ out:
3893 if (!balance) 3940 if (!balance)
3894 break; 3941 break;
3895 } 3942 }
3943 rcu_read_unlock();
3896 3944
3897 /* 3945 /*
3898 * next_balance will be updated only when there is a need. 3946 * next_balance will be updated only when there is a need.