diff options
Diffstat (limited to 'kernel/sched_fair.c')
| -rw-r--r-- | kernel/sched_fair.c | 268 |
1 files changed, 182 insertions, 86 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ecc637a0d591..5bedf6e3ebf3 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | */ | 21 | */ |
| 22 | 22 | ||
| 23 | #include <linux/latencytop.h> | 23 | #include <linux/latencytop.h> |
| 24 | #include <linux/sched.h> | ||
| 24 | 25 | ||
| 25 | /* | 26 | /* |
| 26 | * Targeted preemption latency for CPU-bound tasks: | 27 | * Targeted preemption latency for CPU-bound tasks: |
| @@ -35,12 +36,26 @@ | |||
| 35 | * run vmstat and monitor the context-switches (cs) field) | 36 | * run vmstat and monitor the context-switches (cs) field) |
| 36 | */ | 37 | */ |
| 37 | unsigned int sysctl_sched_latency = 5000000ULL; | 38 | unsigned int sysctl_sched_latency = 5000000ULL; |
| 39 | unsigned int normalized_sysctl_sched_latency = 5000000ULL; | ||
| 40 | |||
| 41 | /* | ||
| 42 | * The initial- and re-scaling of tunables is configurable | ||
| 43 | * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) | ||
| 44 | * | ||
| 45 | * Options are: | ||
| 46 | * SCHED_TUNABLESCALING_NONE - unscaled, always *1 | ||
| 47 | * SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus) | ||
| 48 | * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus | ||
| 49 | */ | ||
| 50 | enum sched_tunable_scaling sysctl_sched_tunable_scaling | ||
| 51 | = SCHED_TUNABLESCALING_LOG; | ||
| 38 | 52 | ||
| 39 | /* | 53 | /* |
| 40 | * Minimal preemption granularity for CPU-bound tasks: | 54 | * Minimal preemption granularity for CPU-bound tasks: |
| 41 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) | 55 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) |
| 42 | */ | 56 | */ |
| 43 | unsigned int sysctl_sched_min_granularity = 1000000ULL; | 57 | unsigned int sysctl_sched_min_granularity = 1000000ULL; |
| 58 | unsigned int normalized_sysctl_sched_min_granularity = 1000000ULL; | ||
| 44 | 59 | ||
| 45 | /* | 60 | /* |
| 46 | * is kept at sysctl_sched_latency / sysctl_sched_min_granularity | 61 | * is kept at sysctl_sched_latency / sysctl_sched_min_granularity |
| @@ -70,6 +85,7 @@ unsigned int __read_mostly sysctl_sched_compat_yield; | |||
| 70 | * have immediate wakeup/sleep latencies. | 85 | * have immediate wakeup/sleep latencies. |
| 71 | */ | 86 | */ |
| 72 | unsigned int sysctl_sched_wakeup_granularity = 1000000UL; | 87 | unsigned int sysctl_sched_wakeup_granularity = 1000000UL; |
| 88 | unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; | ||
| 73 | 89 | ||
| 74 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | 90 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
| 75 | 91 | ||
| @@ -383,11 +399,12 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | |||
| 383 | */ | 399 | */ |
| 384 | 400 | ||
| 385 | #ifdef CONFIG_SCHED_DEBUG | 401 | #ifdef CONFIG_SCHED_DEBUG |
| 386 | int sched_nr_latency_handler(struct ctl_table *table, int write, | 402 | int sched_proc_update_handler(struct ctl_table *table, int write, |
| 387 | struct file *filp, void __user *buffer, size_t *lenp, | 403 | void __user *buffer, size_t *lenp, |
| 388 | loff_t *ppos) | 404 | loff_t *ppos) |
| 389 | { | 405 | { |
| 390 | int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | 406 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
| 407 | int factor = get_update_sysctl_factor(); | ||
| 391 | 408 | ||
| 392 | if (ret || !write) | 409 | if (ret || !write) |
| 393 | return ret; | 410 | return ret; |
| @@ -395,6 +412,14 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, | |||
| 395 | sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency, | 412 | sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency, |
| 396 | sysctl_sched_min_granularity); | 413 | sysctl_sched_min_granularity); |
| 397 | 414 | ||
| 415 | #define WRT_SYSCTL(name) \ | ||
| 416 | (normalized_sysctl_##name = sysctl_##name / (factor)) | ||
| 417 | WRT_SYSCTL(sched_min_granularity); | ||
| 418 | WRT_SYSCTL(sched_latency); | ||
| 419 | WRT_SYSCTL(sched_wakeup_granularity); | ||
| 420 | WRT_SYSCTL(sched_shares_ratelimit); | ||
| 421 | #undef WRT_SYSCTL | ||
| 422 | |||
| 398 | return 0; | 423 | return 0; |
| 399 | } | 424 | } |
| 400 | #endif | 425 | #endif |
| @@ -822,6 +847,26 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |||
| 822 | * re-elected due to buddy favours. | 847 | * re-elected due to buddy favours. |
| 823 | */ | 848 | */ |
| 824 | clear_buddies(cfs_rq, curr); | 849 | clear_buddies(cfs_rq, curr); |
| 850 | return; | ||
| 851 | } | ||
| 852 | |||
| 853 | /* | ||
| 854 | * Ensure that a task that missed wakeup preemption by a | ||
| 855 | * narrow margin doesn't have to wait for a full slice. | ||
| 856 | * This also mitigates buddy induced latencies under load. | ||
| 857 | */ | ||
| 858 | if (!sched_feat(WAKEUP_PREEMPT)) | ||
| 859 | return; | ||
| 860 | |||
| 861 | if (delta_exec < sysctl_sched_min_granularity) | ||
| 862 | return; | ||
| 863 | |||
| 864 | if (cfs_rq->nr_running > 1) { | ||
| 865 | struct sched_entity *se = __pick_next_entity(cfs_rq); | ||
| 866 | s64 delta = curr->vruntime - se->vruntime; | ||
| 867 | |||
| 868 | if (delta > ideal_runtime) | ||
| 869 | resched_task(rq_of(cfs_rq)->curr); | ||
| 825 | } | 870 | } |
| 826 | } | 871 | } |
| 827 | 872 | ||
| @@ -861,12 +906,18 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); | |||
| 861 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) | 906 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) |
| 862 | { | 907 | { |
| 863 | struct sched_entity *se = __pick_next_entity(cfs_rq); | 908 | struct sched_entity *se = __pick_next_entity(cfs_rq); |
| 909 | struct sched_entity *left = se; | ||
| 864 | 910 | ||
| 865 | if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1) | 911 | if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) |
| 866 | return cfs_rq->next; | 912 | se = cfs_rq->next; |
| 867 | 913 | ||
| 868 | if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1) | 914 | /* |
| 869 | return cfs_rq->last; | 915 | * Prefer last buddy, try to return the CPU to a preempted task. |
| 916 | */ | ||
| 917 | if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) | ||
| 918 | se = cfs_rq->last; | ||
| 919 | |||
| 920 | clear_buddies(cfs_rq, se); | ||
| 870 | 921 | ||
| 871 | return se; | 922 | return se; |
| 872 | } | 923 | } |
| @@ -1319,6 +1370,37 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | |||
| 1319 | } | 1370 | } |
| 1320 | 1371 | ||
| 1321 | /* | 1372 | /* |
| 1373 | * Try and locate an idle CPU in the sched_domain. | ||
| 1374 | */ | ||
| 1375 | static int | ||
| 1376 | select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target) | ||
| 1377 | { | ||
| 1378 | int cpu = smp_processor_id(); | ||
| 1379 | int prev_cpu = task_cpu(p); | ||
| 1380 | int i; | ||
| 1381 | |||
| 1382 | /* | ||
| 1383 | * If this domain spans both cpu and prev_cpu (see the SD_WAKE_AFFINE | ||
| 1384 | * test in select_task_rq_fair) and the prev_cpu is idle then that's | ||
| 1385 | * always a better target than the current cpu. | ||
| 1386 | */ | ||
| 1387 | if (target == cpu && !cpu_rq(prev_cpu)->cfs.nr_running) | ||
| 1388 | return prev_cpu; | ||
| 1389 | |||
| 1390 | /* | ||
| 1391 | * Otherwise, iterate the domain and find an elegible idle cpu. | ||
| 1392 | */ | ||
| 1393 | for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) { | ||
| 1394 | if (!cpu_rq(i)->cfs.nr_running) { | ||
| 1395 | target = i; | ||
| 1396 | break; | ||
| 1397 | } | ||
| 1398 | } | ||
| 1399 | |||
| 1400 | return target; | ||
| 1401 | } | ||
| 1402 | |||
| 1403 | /* | ||
| 1322 | * sched_balance_self: balance the current task (running on cpu) in domains | 1404 | * sched_balance_self: balance the current task (running on cpu) in domains |
| 1323 | * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and | 1405 | * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and |
| 1324 | * SD_BALANCE_EXEC. | 1406 | * SD_BALANCE_EXEC. |
| @@ -1346,7 +1428,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
| 1346 | new_cpu = prev_cpu; | 1428 | new_cpu = prev_cpu; |
| 1347 | } | 1429 | } |
| 1348 | 1430 | ||
| 1349 | rcu_read_lock(); | ||
| 1350 | for_each_domain(cpu, tmp) { | 1431 | for_each_domain(cpu, tmp) { |
| 1351 | /* | 1432 | /* |
| 1352 | * If power savings logic is enabled for a domain, see if we | 1433 | * If power savings logic is enabled for a domain, see if we |
| @@ -1372,11 +1453,35 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
| 1372 | want_sd = 0; | 1453 | want_sd = 0; |
| 1373 | } | 1454 | } |
| 1374 | 1455 | ||
| 1375 | if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && | 1456 | /* |
| 1376 | cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { | 1457 | * While iterating the domains looking for a spanning |
| 1458 | * WAKE_AFFINE domain, adjust the affine target to any idle cpu | ||
| 1459 | * in cache sharing domains along the way. | ||
| 1460 | */ | ||
| 1461 | if (want_affine) { | ||
| 1462 | int target = -1; | ||
| 1377 | 1463 | ||
| 1378 | affine_sd = tmp; | 1464 | /* |
| 1379 | want_affine = 0; | 1465 | * If both cpu and prev_cpu are part of this domain, |
| 1466 | * cpu is a valid SD_WAKE_AFFINE target. | ||
| 1467 | */ | ||
| 1468 | if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) | ||
| 1469 | target = cpu; | ||
| 1470 | |||
| 1471 | /* | ||
| 1472 | * If there's an idle sibling in this domain, make that | ||
| 1473 | * the wake_affine target instead of the current cpu. | ||
| 1474 | */ | ||
| 1475 | if (tmp->flags & SD_PREFER_SIBLING) | ||
| 1476 | target = select_idle_sibling(p, tmp, target); | ||
| 1477 | |||
| 1478 | if (target >= 0) { | ||
| 1479 | if (tmp->flags & SD_WAKE_AFFINE) { | ||
| 1480 | affine_sd = tmp; | ||
| 1481 | want_affine = 0; | ||
| 1482 | } | ||
| 1483 | cpu = target; | ||
| 1484 | } | ||
| 1380 | } | 1485 | } |
| 1381 | 1486 | ||
| 1382 | if (!want_sd && !want_affine) | 1487 | if (!want_sd && !want_affine) |
| @@ -1403,10 +1508,8 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
| 1403 | update_shares(tmp); | 1508 | update_shares(tmp); |
| 1404 | } | 1509 | } |
| 1405 | 1510 | ||
| 1406 | if (affine_sd && wake_affine(affine_sd, p, sync)) { | 1511 | if (affine_sd && wake_affine(affine_sd, p, sync)) |
| 1407 | new_cpu = cpu; | 1512 | return cpu; |
| 1408 | goto out; | ||
| 1409 | } | ||
| 1410 | 1513 | ||
| 1411 | while (sd) { | 1514 | while (sd) { |
| 1412 | int load_idx = sd->forkexec_idx; | 1515 | int load_idx = sd->forkexec_idx; |
| @@ -1447,8 +1550,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
| 1447 | /* while loop will break here if sd == NULL */ | 1550 | /* while loop will break here if sd == NULL */ |
| 1448 | } | 1551 | } |
| 1449 | 1552 | ||
| 1450 | out: | ||
| 1451 | rcu_read_unlock(); | ||
| 1452 | return new_cpu; | 1553 | return new_cpu; |
| 1453 | } | 1554 | } |
| 1454 | #endif /* CONFIG_SMP */ | 1555 | #endif /* CONFIG_SMP */ |
| @@ -1568,13 +1669,10 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
| 1568 | struct sched_entity *se = &curr->se, *pse = &p->se; | 1669 | struct sched_entity *se = &curr->se, *pse = &p->se; |
| 1569 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | 1670 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); |
| 1570 | int sync = wake_flags & WF_SYNC; | 1671 | int sync = wake_flags & WF_SYNC; |
| 1672 | int scale = cfs_rq->nr_running >= sched_nr_latency; | ||
| 1571 | 1673 | ||
| 1572 | update_curr(cfs_rq); | 1674 | if (unlikely(rt_prio(p->prio))) |
| 1573 | 1675 | goto preempt; | |
| 1574 | if (unlikely(rt_prio(p->prio))) { | ||
| 1575 | resched_task(curr); | ||
| 1576 | return; | ||
| 1577 | } | ||
| 1578 | 1676 | ||
| 1579 | if (unlikely(p->sched_class != &fair_sched_class)) | 1677 | if (unlikely(p->sched_class != &fair_sched_class)) |
| 1580 | return; | 1678 | return; |
| @@ -1582,18 +1680,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
| 1582 | if (unlikely(se == pse)) | 1680 | if (unlikely(se == pse)) |
| 1583 | return; | 1681 | return; |
| 1584 | 1682 | ||
| 1585 | /* | 1683 | if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) |
| 1586 | * Only set the backward buddy when the current task is still on the | ||
| 1587 | * rq. This can happen when a wakeup gets interleaved with schedule on | ||
| 1588 | * the ->pre_schedule() or idle_balance() point, either of which can | ||
| 1589 | * drop the rq lock. | ||
| 1590 | * | ||
| 1591 | * Also, during early boot the idle thread is in the fair class, for | ||
| 1592 | * obvious reasons its a bad idea to schedule back to the idle thread. | ||
| 1593 | */ | ||
| 1594 | if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) | ||
| 1595 | set_last_buddy(se); | ||
| 1596 | if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) | ||
| 1597 | set_next_buddy(pse); | 1684 | set_next_buddy(pse); |
| 1598 | 1685 | ||
| 1599 | /* | 1686 | /* |
| @@ -1611,36 +1698,44 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
| 1611 | return; | 1698 | return; |
| 1612 | 1699 | ||
| 1613 | /* Idle tasks are by definition preempted by everybody. */ | 1700 | /* Idle tasks are by definition preempted by everybody. */ |
| 1614 | if (unlikely(curr->policy == SCHED_IDLE)) { | 1701 | if (unlikely(curr->policy == SCHED_IDLE)) |
| 1615 | resched_task(curr); | 1702 | goto preempt; |
| 1616 | return; | ||
| 1617 | } | ||
| 1618 | 1703 | ||
| 1619 | if ((sched_feat(WAKEUP_SYNC) && sync) || | 1704 | if (sched_feat(WAKEUP_SYNC) && sync) |
| 1620 | (sched_feat(WAKEUP_OVERLAP) && | 1705 | goto preempt; |
| 1621 | (se->avg_overlap < sysctl_sched_migration_cost && | ||
| 1622 | pse->avg_overlap < sysctl_sched_migration_cost))) { | ||
| 1623 | resched_task(curr); | ||
| 1624 | return; | ||
| 1625 | } | ||
| 1626 | 1706 | ||
| 1627 | if (sched_feat(WAKEUP_RUNNING)) { | 1707 | if (sched_feat(WAKEUP_OVERLAP) && |
| 1628 | if (pse->avg_running < se->avg_running) { | 1708 | se->avg_overlap < sysctl_sched_migration_cost && |
| 1629 | set_next_buddy(pse); | 1709 | pse->avg_overlap < sysctl_sched_migration_cost) |
| 1630 | resched_task(curr); | 1710 | goto preempt; |
| 1631 | return; | ||
| 1632 | } | ||
| 1633 | } | ||
| 1634 | 1711 | ||
| 1635 | if (!sched_feat(WAKEUP_PREEMPT)) | 1712 | if (!sched_feat(WAKEUP_PREEMPT)) |
| 1636 | return; | 1713 | return; |
| 1637 | 1714 | ||
| 1715 | update_curr(cfs_rq); | ||
| 1638 | find_matching_se(&se, &pse); | 1716 | find_matching_se(&se, &pse); |
| 1639 | |||
| 1640 | BUG_ON(!pse); | 1717 | BUG_ON(!pse); |
| 1641 | |||
| 1642 | if (wakeup_preempt_entity(se, pse) == 1) | 1718 | if (wakeup_preempt_entity(se, pse) == 1) |
| 1643 | resched_task(curr); | 1719 | goto preempt; |
| 1720 | |||
| 1721 | return; | ||
| 1722 | |||
| 1723 | preempt: | ||
| 1724 | resched_task(curr); | ||
| 1725 | /* | ||
| 1726 | * Only set the backward buddy when the current task is still | ||
| 1727 | * on the rq. This can happen when a wakeup gets interleaved | ||
| 1728 | * with schedule on the ->pre_schedule() or idle_balance() | ||
| 1729 | * point, either of which can * drop the rq lock. | ||
| 1730 | * | ||
| 1731 | * Also, during early boot the idle thread is in the fair class, | ||
| 1732 | * for obvious reasons its a bad idea to schedule back to it. | ||
| 1733 | */ | ||
| 1734 | if (unlikely(!se->on_rq || curr == rq->idle)) | ||
| 1735 | return; | ||
| 1736 | |||
| 1737 | if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se)) | ||
| 1738 | set_last_buddy(se); | ||
| 1644 | } | 1739 | } |
| 1645 | 1740 | ||
| 1646 | static struct task_struct *pick_next_task_fair(struct rq *rq) | 1741 | static struct task_struct *pick_next_task_fair(struct rq *rq) |
| @@ -1649,21 +1744,11 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) | |||
| 1649 | struct cfs_rq *cfs_rq = &rq->cfs; | 1744 | struct cfs_rq *cfs_rq = &rq->cfs; |
| 1650 | struct sched_entity *se; | 1745 | struct sched_entity *se; |
| 1651 | 1746 | ||
| 1652 | if (unlikely(!cfs_rq->nr_running)) | 1747 | if (!cfs_rq->nr_running) |
| 1653 | return NULL; | 1748 | return NULL; |
| 1654 | 1749 | ||
| 1655 | do { | 1750 | do { |
| 1656 | se = pick_next_entity(cfs_rq); | 1751 | se = pick_next_entity(cfs_rq); |
| 1657 | /* | ||
| 1658 | * If se was a buddy, clear it so that it will have to earn | ||
| 1659 | * the favour again. | ||
| 1660 | * | ||
| 1661 | * If se was not a buddy, clear the buddies because neither | ||
| 1662 | * was elegible to run, let them earn it again. | ||
| 1663 | * | ||
| 1664 | * IOW. unconditionally clear buddies. | ||
| 1665 | */ | ||
| 1666 | __clear_buddies(cfs_rq, NULL); | ||
| 1667 | set_next_entity(cfs_rq, se); | 1752 | set_next_entity(cfs_rq, se); |
| 1668 | cfs_rq = group_cfs_rq(se); | 1753 | cfs_rq = group_cfs_rq(se); |
| 1669 | } while (cfs_rq); | 1754 | } while (cfs_rq); |
| @@ -1830,6 +1915,17 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
| 1830 | 1915 | ||
| 1831 | return 0; | 1916 | return 0; |
| 1832 | } | 1917 | } |
| 1918 | |||
| 1919 | static void rq_online_fair(struct rq *rq) | ||
| 1920 | { | ||
| 1921 | update_sysctl(); | ||
| 1922 | } | ||
| 1923 | |||
| 1924 | static void rq_offline_fair(struct rq *rq) | ||
| 1925 | { | ||
| 1926 | update_sysctl(); | ||
| 1927 | } | ||
| 1928 | |||
| 1833 | #endif /* CONFIG_SMP */ | 1929 | #endif /* CONFIG_SMP */ |
| 1834 | 1930 | ||
| 1835 | /* | 1931 | /* |
| @@ -1847,28 +1943,30 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) | |||
| 1847 | } | 1943 | } |
| 1848 | 1944 | ||
| 1849 | /* | 1945 | /* |
| 1850 | * Share the fairness runtime between parent and child, thus the | 1946 | * called on fork with the child task as argument from the parent's context |
| 1851 | * total amount of pressure for CPU stays equal - new tasks | 1947 | * - child not yet on the tasklist |
| 1852 | * get a chance to run but frequent forkers are not allowed to | 1948 | * - preemption disabled |
| 1853 | * monopolize the CPU. Note: the parent runqueue is locked, | ||
| 1854 | * the child is not running yet. | ||
| 1855 | */ | 1949 | */ |
| 1856 | static void task_new_fair(struct rq *rq, struct task_struct *p) | 1950 | static void task_fork_fair(struct task_struct *p) |
| 1857 | { | 1951 | { |
| 1858 | struct cfs_rq *cfs_rq = task_cfs_rq(p); | 1952 | struct cfs_rq *cfs_rq = task_cfs_rq(current); |
| 1859 | struct sched_entity *se = &p->se, *curr = cfs_rq->curr; | 1953 | struct sched_entity *se = &p->se, *curr = cfs_rq->curr; |
| 1860 | int this_cpu = smp_processor_id(); | 1954 | int this_cpu = smp_processor_id(); |
| 1955 | struct rq *rq = this_rq(); | ||
| 1956 | unsigned long flags; | ||
| 1957 | |||
| 1958 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
| 1861 | 1959 | ||
| 1862 | sched_info_queued(p); | 1960 | if (unlikely(task_cpu(p) != this_cpu)) |
| 1961 | __set_task_cpu(p, this_cpu); | ||
| 1863 | 1962 | ||
| 1864 | update_curr(cfs_rq); | 1963 | update_curr(cfs_rq); |
| 1964 | |||
| 1865 | if (curr) | 1965 | if (curr) |
| 1866 | se->vruntime = curr->vruntime; | 1966 | se->vruntime = curr->vruntime; |
| 1867 | place_entity(cfs_rq, se, 1); | 1967 | place_entity(cfs_rq, se, 1); |
| 1868 | 1968 | ||
| 1869 | /* 'curr' will be NULL if the child belongs to a different group */ | 1969 | if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) { |
| 1870 | if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) && | ||
| 1871 | curr && entity_before(curr, se)) { | ||
| 1872 | /* | 1970 | /* |
| 1873 | * Upon rescheduling, sched_class::put_prev_task() will place | 1971 | * Upon rescheduling, sched_class::put_prev_task() will place |
| 1874 | * 'current' within the tree based on its new key value. | 1972 | * 'current' within the tree based on its new key value. |
| @@ -1877,7 +1975,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) | |||
| 1877 | resched_task(rq->curr); | 1975 | resched_task(rq->curr); |
| 1878 | } | 1976 | } |
| 1879 | 1977 | ||
| 1880 | enqueue_task_fair(rq, p, 0); | 1978 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
| 1881 | } | 1979 | } |
| 1882 | 1980 | ||
| 1883 | /* | 1981 | /* |
| @@ -1939,21 +2037,17 @@ static void moved_group_fair(struct task_struct *p) | |||
| 1939 | } | 2037 | } |
| 1940 | #endif | 2038 | #endif |
| 1941 | 2039 | ||
| 1942 | unsigned int get_rr_interval_fair(struct task_struct *task) | 2040 | unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task) |
| 1943 | { | 2041 | { |
| 1944 | struct sched_entity *se = &task->se; | 2042 | struct sched_entity *se = &task->se; |
| 1945 | unsigned long flags; | ||
| 1946 | struct rq *rq; | ||
| 1947 | unsigned int rr_interval = 0; | 2043 | unsigned int rr_interval = 0; |
| 1948 | 2044 | ||
| 1949 | /* | 2045 | /* |
| 1950 | * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise | 2046 | * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise |
| 1951 | * idle runqueue: | 2047 | * idle runqueue: |
| 1952 | */ | 2048 | */ |
| 1953 | rq = task_rq_lock(task, &flags); | ||
| 1954 | if (rq->cfs.load.weight) | 2049 | if (rq->cfs.load.weight) |
| 1955 | rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); | 2050 | rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); |
| 1956 | task_rq_unlock(rq, &flags); | ||
| 1957 | 2051 | ||
| 1958 | return rr_interval; | 2052 | return rr_interval; |
| 1959 | } | 2053 | } |
| @@ -1977,11 +2071,13 @@ static const struct sched_class fair_sched_class = { | |||
| 1977 | 2071 | ||
| 1978 | .load_balance = load_balance_fair, | 2072 | .load_balance = load_balance_fair, |
| 1979 | .move_one_task = move_one_task_fair, | 2073 | .move_one_task = move_one_task_fair, |
| 2074 | .rq_online = rq_online_fair, | ||
| 2075 | .rq_offline = rq_offline_fair, | ||
| 1980 | #endif | 2076 | #endif |
| 1981 | 2077 | ||
| 1982 | .set_curr_task = set_curr_task_fair, | 2078 | .set_curr_task = set_curr_task_fair, |
| 1983 | .task_tick = task_tick_fair, | 2079 | .task_tick = task_tick_fair, |
| 1984 | .task_new = task_new_fair, | 2080 | .task_fork = task_fork_fair, |
| 1985 | 2081 | ||
| 1986 | .prio_changed = prio_changed_fair, | 2082 | .prio_changed = prio_changed_fair, |
| 1987 | .switched_to = switched_to_fair, | 2083 | .switched_to = switched_to_fair, |
