aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c317
1 files changed, 227 insertions, 90 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 4e777b47eeda..8fe7ee81c552 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -21,6 +21,7 @@
21 */ 21 */
22 22
23#include <linux/latencytop.h> 23#include <linux/latencytop.h>
24#include <linux/sched.h>
24 25
25/* 26/*
26 * Targeted preemption latency for CPU-bound tasks: 27 * Targeted preemption latency for CPU-bound tasks:
@@ -35,12 +36,26 @@
35 * run vmstat and monitor the context-switches (cs) field) 36 * run vmstat and monitor the context-switches (cs) field)
36 */ 37 */
37unsigned int sysctl_sched_latency = 5000000ULL; 38unsigned int sysctl_sched_latency = 5000000ULL;
39unsigned int normalized_sysctl_sched_latency = 5000000ULL;
40
41/*
42 * The initial- and re-scaling of tunables is configurable
43 * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
44 *
45 * Options are:
46 * SCHED_TUNABLESCALING_NONE - unscaled, always *1
47 * SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
48 * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
49 */
50enum sched_tunable_scaling sysctl_sched_tunable_scaling
51 = SCHED_TUNABLESCALING_LOG;
38 52
39/* 53/*
40 * Minimal preemption granularity for CPU-bound tasks: 54 * Minimal preemption granularity for CPU-bound tasks:
41 * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) 55 * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
42 */ 56 */
43unsigned int sysctl_sched_min_granularity = 1000000ULL; 57unsigned int sysctl_sched_min_granularity = 1000000ULL;
58unsigned int normalized_sysctl_sched_min_granularity = 1000000ULL;
44 59
45/* 60/*
46 * is kept at sysctl_sched_latency / sysctl_sched_min_granularity 61 * is kept at sysctl_sched_latency / sysctl_sched_min_granularity
@@ -70,6 +85,7 @@ unsigned int __read_mostly sysctl_sched_compat_yield;
70 * have immediate wakeup/sleep latencies. 85 * have immediate wakeup/sleep latencies.
71 */ 86 */
72unsigned int sysctl_sched_wakeup_granularity = 1000000UL; 87unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
88unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
73 89
74const_debug unsigned int sysctl_sched_migration_cost = 500000UL; 90const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
75 91
@@ -383,11 +399,12 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
383 */ 399 */
384 400
385#ifdef CONFIG_SCHED_DEBUG 401#ifdef CONFIG_SCHED_DEBUG
386int sched_nr_latency_handler(struct ctl_table *table, int write, 402int sched_proc_update_handler(struct ctl_table *table, int write,
387 void __user *buffer, size_t *lenp, 403 void __user *buffer, size_t *lenp,
388 loff_t *ppos) 404 loff_t *ppos)
389{ 405{
390 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 406 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
407 int factor = get_update_sysctl_factor();
391 408
392 if (ret || !write) 409 if (ret || !write)
393 return ret; 410 return ret;
@@ -395,6 +412,14 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
395 sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency, 412 sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency,
396 sysctl_sched_min_granularity); 413 sysctl_sched_min_granularity);
397 414
415#define WRT_SYSCTL(name) \
416 (normalized_sysctl_##name = sysctl_##name / (factor))
417 WRT_SYSCTL(sched_min_granularity);
418 WRT_SYSCTL(sched_latency);
419 WRT_SYSCTL(sched_wakeup_granularity);
420 WRT_SYSCTL(sched_shares_ratelimit);
421#undef WRT_SYSCTL
422
398 return 0; 423 return 0;
399} 424}
400#endif 425#endif
@@ -485,6 +510,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
485 curr->sum_exec_runtime += delta_exec; 510 curr->sum_exec_runtime += delta_exec;
486 schedstat_add(cfs_rq, exec_clock, delta_exec); 511 schedstat_add(cfs_rq, exec_clock, delta_exec);
487 delta_exec_weighted = calc_delta_fair(delta_exec, curr); 512 delta_exec_weighted = calc_delta_fair(delta_exec, curr);
513
488 curr->vruntime += delta_exec_weighted; 514 curr->vruntime += delta_exec_weighted;
489 update_min_vruntime(cfs_rq); 515 update_min_vruntime(cfs_rq);
490} 516}
@@ -740,16 +766,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
740 se->vruntime = vruntime; 766 se->vruntime = vruntime;
741} 767}
742 768
769#define ENQUEUE_WAKEUP 1
770#define ENQUEUE_MIGRATE 2
771
743static void 772static void
744enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) 773enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
745{ 774{
746 /* 775 /*
776 * Update the normalized vruntime before updating min_vruntime
777 * through callig update_curr().
778 */
779 if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE))
780 se->vruntime += cfs_rq->min_vruntime;
781
782 /*
747 * Update run-time statistics of the 'current'. 783 * Update run-time statistics of the 'current'.
748 */ 784 */
749 update_curr(cfs_rq); 785 update_curr(cfs_rq);
750 account_entity_enqueue(cfs_rq, se); 786 account_entity_enqueue(cfs_rq, se);
751 787
752 if (wakeup) { 788 if (flags & ENQUEUE_WAKEUP) {
753 place_entity(cfs_rq, se, 0); 789 place_entity(cfs_rq, se, 0);
754 enqueue_sleeper(cfs_rq, se); 790 enqueue_sleeper(cfs_rq, se);
755 } 791 }
@@ -803,6 +839,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
803 __dequeue_entity(cfs_rq, se); 839 __dequeue_entity(cfs_rq, se);
804 account_entity_dequeue(cfs_rq, se); 840 account_entity_dequeue(cfs_rq, se);
805 update_min_vruntime(cfs_rq); 841 update_min_vruntime(cfs_rq);
842
843 /*
844 * Normalize the entity after updating the min_vruntime because the
845 * update can refer to the ->curr item and we need to reflect this
846 * movement in our normalized position.
847 */
848 if (!sleep)
849 se->vruntime -= cfs_rq->min_vruntime;
806} 850}
807 851
808/* 852/*
@@ -822,6 +866,26 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
822 * re-elected due to buddy favours. 866 * re-elected due to buddy favours.
823 */ 867 */
824 clear_buddies(cfs_rq, curr); 868 clear_buddies(cfs_rq, curr);
869 return;
870 }
871
872 /*
873 * Ensure that a task that missed wakeup preemption by a
874 * narrow margin doesn't have to wait for a full slice.
875 * This also mitigates buddy induced latencies under load.
876 */
877 if (!sched_feat(WAKEUP_PREEMPT))
878 return;
879
880 if (delta_exec < sysctl_sched_min_granularity)
881 return;
882
883 if (cfs_rq->nr_running > 1) {
884 struct sched_entity *se = __pick_next_entity(cfs_rq);
885 s64 delta = curr->vruntime - se->vruntime;
886
887 if (delta > ideal_runtime)
888 resched_task(rq_of(cfs_rq)->curr);
825 } 889 }
826} 890}
827 891
@@ -861,12 +925,18 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
861static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) 925static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
862{ 926{
863 struct sched_entity *se = __pick_next_entity(cfs_rq); 927 struct sched_entity *se = __pick_next_entity(cfs_rq);
928 struct sched_entity *left = se;
864 929
865 if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1) 930 if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
866 return cfs_rq->next; 931 se = cfs_rq->next;
867 932
868 if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1) 933 /*
869 return cfs_rq->last; 934 * Prefer last buddy, try to return the CPU to a preempted task.
935 */
936 if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1)
937 se = cfs_rq->last;
938
939 clear_buddies(cfs_rq, se);
870 940
871 return se; 941 return se;
872} 942}
@@ -987,13 +1057,19 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
987{ 1057{
988 struct cfs_rq *cfs_rq; 1058 struct cfs_rq *cfs_rq;
989 struct sched_entity *se = &p->se; 1059 struct sched_entity *se = &p->se;
1060 int flags = 0;
1061
1062 if (wakeup)
1063 flags |= ENQUEUE_WAKEUP;
1064 if (p->state == TASK_WAKING)
1065 flags |= ENQUEUE_MIGRATE;
990 1066
991 for_each_sched_entity(se) { 1067 for_each_sched_entity(se) {
992 if (se->on_rq) 1068 if (se->on_rq)
993 break; 1069 break;
994 cfs_rq = cfs_rq_of(se); 1070 cfs_rq = cfs_rq_of(se);
995 enqueue_entity(cfs_rq, se, wakeup); 1071 enqueue_entity(cfs_rq, se, flags);
996 wakeup = 1; 1072 flags = ENQUEUE_WAKEUP;
997 } 1073 }
998 1074
999 hrtick_update(rq); 1075 hrtick_update(rq);
@@ -1069,6 +1145,14 @@ static void yield_task_fair(struct rq *rq)
1069 1145
1070#ifdef CONFIG_SMP 1146#ifdef CONFIG_SMP
1071 1147
1148static void task_waking_fair(struct rq *rq, struct task_struct *p)
1149{
1150 struct sched_entity *se = &p->se;
1151 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1152
1153 se->vruntime -= cfs_rq->min_vruntime;
1154}
1155
1072#ifdef CONFIG_FAIR_GROUP_SCHED 1156#ifdef CONFIG_FAIR_GROUP_SCHED
1073/* 1157/*
1074 * effective_load() calculates the load change as seen from the root_task_group 1158 * effective_load() calculates the load change as seen from the root_task_group
@@ -1319,6 +1403,37 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
1319} 1403}
1320 1404
1321/* 1405/*
1406 * Try and locate an idle CPU in the sched_domain.
1407 */
1408static int
1409select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target)
1410{
1411 int cpu = smp_processor_id();
1412 int prev_cpu = task_cpu(p);
1413 int i;
1414
1415 /*
1416 * If this domain spans both cpu and prev_cpu (see the SD_WAKE_AFFINE
1417 * test in select_task_rq_fair) and the prev_cpu is idle then that's
1418 * always a better target than the current cpu.
1419 */
1420 if (target == cpu && !cpu_rq(prev_cpu)->cfs.nr_running)
1421 return prev_cpu;
1422
1423 /*
1424 * Otherwise, iterate the domain and find an elegible idle cpu.
1425 */
1426 for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
1427 if (!cpu_rq(i)->cfs.nr_running) {
1428 target = i;
1429 break;
1430 }
1431 }
1432
1433 return target;
1434}
1435
1436/*
1322 * sched_balance_self: balance the current task (running on cpu) in domains 1437 * sched_balance_self: balance the current task (running on cpu) in domains
1323 * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and 1438 * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
1324 * SD_BALANCE_EXEC. 1439 * SD_BALANCE_EXEC.
@@ -1346,8 +1461,10 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
1346 new_cpu = prev_cpu; 1461 new_cpu = prev_cpu;
1347 } 1462 }
1348 1463
1349 rcu_read_lock();
1350 for_each_domain(cpu, tmp) { 1464 for_each_domain(cpu, tmp) {
1465 if (!(tmp->flags & SD_LOAD_BALANCE))
1466 continue;
1467
1351 /* 1468 /*
1352 * If power savings logic is enabled for a domain, see if we 1469 * If power savings logic is enabled for a domain, see if we
1353 * are not overloaded, if so, don't balance wider. 1470 * are not overloaded, if so, don't balance wider.
@@ -1372,11 +1489,35 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
1372 want_sd = 0; 1489 want_sd = 0;
1373 } 1490 }
1374 1491
1375 if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && 1492 /*
1376 cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { 1493 * While iterating the domains looking for a spanning
1494 * WAKE_AFFINE domain, adjust the affine target to any idle cpu
1495 * in cache sharing domains along the way.
1496 */
1497 if (want_affine) {
1498 int target = -1;
1377 1499
1378 affine_sd = tmp; 1500 /*
1379 want_affine = 0; 1501 * If both cpu and prev_cpu are part of this domain,
1502 * cpu is a valid SD_WAKE_AFFINE target.
1503 */
1504 if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp)))
1505 target = cpu;
1506
1507 /*
1508 * If there's an idle sibling in this domain, make that
1509 * the wake_affine target instead of the current cpu.
1510 */
1511 if (tmp->flags & SD_SHARE_PKG_RESOURCES)
1512 target = select_idle_sibling(p, tmp, target);
1513
1514 if (target >= 0) {
1515 if (tmp->flags & SD_WAKE_AFFINE) {
1516 affine_sd = tmp;
1517 want_affine = 0;
1518 }
1519 cpu = target;
1520 }
1380 } 1521 }
1381 1522
1382 if (!want_sd && !want_affine) 1523 if (!want_sd && !want_affine)
@@ -1403,10 +1544,8 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
1403 update_shares(tmp); 1544 update_shares(tmp);
1404 } 1545 }
1405 1546
1406 if (affine_sd && wake_affine(affine_sd, p, sync)) { 1547 if (affine_sd && wake_affine(affine_sd, p, sync))
1407 new_cpu = cpu; 1548 return cpu;
1408 goto out;
1409 }
1410 1549
1411 while (sd) { 1550 while (sd) {
1412 int load_idx = sd->forkexec_idx; 1551 int load_idx = sd->forkexec_idx;
@@ -1447,8 +1586,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
1447 /* while loop will break here if sd == NULL */ 1586 /* while loop will break here if sd == NULL */
1448 } 1587 }
1449 1588
1450out:
1451 rcu_read_unlock();
1452 return new_cpu; 1589 return new_cpu;
1453} 1590}
1454#endif /* CONFIG_SMP */ 1591#endif /* CONFIG_SMP */
@@ -1568,13 +1705,10 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1568 struct sched_entity *se = &curr->se, *pse = &p->se; 1705 struct sched_entity *se = &curr->se, *pse = &p->se;
1569 struct cfs_rq *cfs_rq = task_cfs_rq(curr); 1706 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1570 int sync = wake_flags & WF_SYNC; 1707 int sync = wake_flags & WF_SYNC;
1708 int scale = cfs_rq->nr_running >= sched_nr_latency;
1571 1709
1572 update_curr(cfs_rq); 1710 if (unlikely(rt_prio(p->prio)))
1573 1711 goto preempt;
1574 if (unlikely(rt_prio(p->prio))) {
1575 resched_task(curr);
1576 return;
1577 }
1578 1712
1579 if (unlikely(p->sched_class != &fair_sched_class)) 1713 if (unlikely(p->sched_class != &fair_sched_class))
1580 return; 1714 return;
@@ -1582,18 +1716,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1582 if (unlikely(se == pse)) 1716 if (unlikely(se == pse))
1583 return; 1717 return;
1584 1718
1585 /* 1719 if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK))
1586 * Only set the backward buddy when the current task is still on the
1587 * rq. This can happen when a wakeup gets interleaved with schedule on
1588 * the ->pre_schedule() or idle_balance() point, either of which can
1589 * drop the rq lock.
1590 *
1591 * Also, during early boot the idle thread is in the fair class, for
1592 * obvious reasons its a bad idea to schedule back to the idle thread.
1593 */
1594 if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle))
1595 set_last_buddy(se);
1596 if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK))
1597 set_next_buddy(pse); 1720 set_next_buddy(pse);
1598 1721
1599 /* 1722 /*
@@ -1611,36 +1734,44 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1611 return; 1734 return;
1612 1735
1613 /* Idle tasks are by definition preempted by everybody. */ 1736 /* Idle tasks are by definition preempted by everybody. */
1614 if (unlikely(curr->policy == SCHED_IDLE)) { 1737 if (unlikely(curr->policy == SCHED_IDLE))
1615 resched_task(curr); 1738 goto preempt;
1616 return;
1617 }
1618 1739
1619 if ((sched_feat(WAKEUP_SYNC) && sync) || 1740 if (sched_feat(WAKEUP_SYNC) && sync)
1620 (sched_feat(WAKEUP_OVERLAP) && 1741 goto preempt;
1621 (se->avg_overlap < sysctl_sched_migration_cost &&
1622 pse->avg_overlap < sysctl_sched_migration_cost))) {
1623 resched_task(curr);
1624 return;
1625 }
1626 1742
1627 if (sched_feat(WAKEUP_RUNNING)) { 1743 if (sched_feat(WAKEUP_OVERLAP) &&
1628 if (pse->avg_running < se->avg_running) { 1744 se->avg_overlap < sysctl_sched_migration_cost &&
1629 set_next_buddy(pse); 1745 pse->avg_overlap < sysctl_sched_migration_cost)
1630 resched_task(curr); 1746 goto preempt;
1631 return;
1632 }
1633 }
1634 1747
1635 if (!sched_feat(WAKEUP_PREEMPT)) 1748 if (!sched_feat(WAKEUP_PREEMPT))
1636 return; 1749 return;
1637 1750
1751 update_curr(cfs_rq);
1638 find_matching_se(&se, &pse); 1752 find_matching_se(&se, &pse);
1639
1640 BUG_ON(!pse); 1753 BUG_ON(!pse);
1641
1642 if (wakeup_preempt_entity(se, pse) == 1) 1754 if (wakeup_preempt_entity(se, pse) == 1)
1643 resched_task(curr); 1755 goto preempt;
1756
1757 return;
1758
1759preempt:
1760 resched_task(curr);
1761 /*
1762 * Only set the backward buddy when the current task is still
1763 * on the rq. This can happen when a wakeup gets interleaved
1764 * with schedule on the ->pre_schedule() or idle_balance()
1765 * point, either of which can * drop the rq lock.
1766 *
1767 * Also, during early boot the idle thread is in the fair class,
1768 * for obvious reasons its a bad idea to schedule back to it.
1769 */
1770 if (unlikely(!se->on_rq || curr == rq->idle))
1771 return;
1772
1773 if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
1774 set_last_buddy(se);
1644} 1775}
1645 1776
1646static struct task_struct *pick_next_task_fair(struct rq *rq) 1777static struct task_struct *pick_next_task_fair(struct rq *rq)
@@ -1649,21 +1780,11 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
1649 struct cfs_rq *cfs_rq = &rq->cfs; 1780 struct cfs_rq *cfs_rq = &rq->cfs;
1650 struct sched_entity *se; 1781 struct sched_entity *se;
1651 1782
1652 if (unlikely(!cfs_rq->nr_running)) 1783 if (!cfs_rq->nr_running)
1653 return NULL; 1784 return NULL;
1654 1785
1655 do { 1786 do {
1656 se = pick_next_entity(cfs_rq); 1787 se = pick_next_entity(cfs_rq);
1657 /*
1658 * If se was a buddy, clear it so that it will have to earn
1659 * the favour again.
1660 *
1661 * If se was not a buddy, clear the buddies because neither
1662 * was elegible to run, let them earn it again.
1663 *
1664 * IOW. unconditionally clear buddies.
1665 */
1666 __clear_buddies(cfs_rq, NULL);
1667 set_next_entity(cfs_rq, se); 1788 set_next_entity(cfs_rq, se);
1668 cfs_rq = group_cfs_rq(se); 1789 cfs_rq = group_cfs_rq(se);
1669 } while (cfs_rq); 1790 } while (cfs_rq);
@@ -1830,6 +1951,17 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1830 1951
1831 return 0; 1952 return 0;
1832} 1953}
1954
1955static void rq_online_fair(struct rq *rq)
1956{
1957 update_sysctl();
1958}
1959
1960static void rq_offline_fair(struct rq *rq)
1961{
1962 update_sysctl();
1963}
1964
1833#endif /* CONFIG_SMP */ 1965#endif /* CONFIG_SMP */
1834 1966
1835/* 1967/*
@@ -1847,28 +1979,30 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
1847} 1979}
1848 1980
1849/* 1981/*
1850 * Share the fairness runtime between parent and child, thus the 1982 * called on fork with the child task as argument from the parent's context
1851 * total amount of pressure for CPU stays equal - new tasks 1983 * - child not yet on the tasklist
1852 * get a chance to run but frequent forkers are not allowed to 1984 * - preemption disabled
1853 * monopolize the CPU. Note: the parent runqueue is locked,
1854 * the child is not running yet.
1855 */ 1985 */
1856static void task_new_fair(struct rq *rq, struct task_struct *p) 1986static void task_fork_fair(struct task_struct *p)
1857{ 1987{
1858 struct cfs_rq *cfs_rq = task_cfs_rq(p); 1988 struct cfs_rq *cfs_rq = task_cfs_rq(current);
1859 struct sched_entity *se = &p->se, *curr = cfs_rq->curr; 1989 struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
1860 int this_cpu = smp_processor_id(); 1990 int this_cpu = smp_processor_id();
1991 struct rq *rq = this_rq();
1992 unsigned long flags;
1993
1994 raw_spin_lock_irqsave(&rq->lock, flags);
1861 1995
1862 sched_info_queued(p); 1996 if (unlikely(task_cpu(p) != this_cpu))
1997 __set_task_cpu(p, this_cpu);
1863 1998
1864 update_curr(cfs_rq); 1999 update_curr(cfs_rq);
2000
1865 if (curr) 2001 if (curr)
1866 se->vruntime = curr->vruntime; 2002 se->vruntime = curr->vruntime;
1867 place_entity(cfs_rq, se, 1); 2003 place_entity(cfs_rq, se, 1);
1868 2004
1869 /* 'curr' will be NULL if the child belongs to a different group */ 2005 if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
1870 if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
1871 curr && entity_before(curr, se)) {
1872 /* 2006 /*
1873 * Upon rescheduling, sched_class::put_prev_task() will place 2007 * Upon rescheduling, sched_class::put_prev_task() will place
1874 * 'current' within the tree based on its new key value. 2008 * 'current' within the tree based on its new key value.
@@ -1877,7 +2011,9 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
1877 resched_task(rq->curr); 2011 resched_task(rq->curr);
1878 } 2012 }
1879 2013
1880 enqueue_task_fair(rq, p, 0); 2014 se->vruntime -= cfs_rq->min_vruntime;
2015
2016 raw_spin_unlock_irqrestore(&rq->lock, flags);
1881} 2017}
1882 2018
1883/* 2019/*
@@ -1930,30 +2066,27 @@ static void set_curr_task_fair(struct rq *rq)
1930} 2066}
1931 2067
1932#ifdef CONFIG_FAIR_GROUP_SCHED 2068#ifdef CONFIG_FAIR_GROUP_SCHED
1933static void moved_group_fair(struct task_struct *p) 2069static void moved_group_fair(struct task_struct *p, int on_rq)
1934{ 2070{
1935 struct cfs_rq *cfs_rq = task_cfs_rq(p); 2071 struct cfs_rq *cfs_rq = task_cfs_rq(p);
1936 2072
1937 update_curr(cfs_rq); 2073 update_curr(cfs_rq);
1938 place_entity(cfs_rq, &p->se, 1); 2074 if (!on_rq)
2075 place_entity(cfs_rq, &p->se, 1);
1939} 2076}
1940#endif 2077#endif
1941 2078
1942unsigned int get_rr_interval_fair(struct task_struct *task) 2079unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task)
1943{ 2080{
1944 struct sched_entity *se = &task->se; 2081 struct sched_entity *se = &task->se;
1945 unsigned long flags;
1946 struct rq *rq;
1947 unsigned int rr_interval = 0; 2082 unsigned int rr_interval = 0;
1948 2083
1949 /* 2084 /*
1950 * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise 2085 * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise
1951 * idle runqueue: 2086 * idle runqueue:
1952 */ 2087 */
1953 rq = task_rq_lock(task, &flags);
1954 if (rq->cfs.load.weight) 2088 if (rq->cfs.load.weight)
1955 rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); 2089 rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
1956 task_rq_unlock(rq, &flags);
1957 2090
1958 return rr_interval; 2091 return rr_interval;
1959} 2092}
@@ -1977,11 +2110,15 @@ static const struct sched_class fair_sched_class = {
1977 2110
1978 .load_balance = load_balance_fair, 2111 .load_balance = load_balance_fair,
1979 .move_one_task = move_one_task_fair, 2112 .move_one_task = move_one_task_fair,
2113 .rq_online = rq_online_fair,
2114 .rq_offline = rq_offline_fair,
2115
2116 .task_waking = task_waking_fair,
1980#endif 2117#endif
1981 2118
1982 .set_curr_task = set_curr_task_fair, 2119 .set_curr_task = set_curr_task_fair,
1983 .task_tick = task_tick_fair, 2120 .task_tick = task_tick_fair,
1984 .task_new = task_new_fair, 2121 .task_fork = task_fork_fair,
1985 2122
1986 .prio_changed = prio_changed_fair, 2123 .prio_changed = prio_changed_fair,
1987 .switched_to = switched_to_fair, 2124 .switched_to = switched_to_fair,