diff options
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 155 |
1 files changed, 88 insertions, 67 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f61837ad336d..804a411838f1 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -21,6 +21,7 @@ | |||
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <linux/latencytop.h> | 23 | #include <linux/latencytop.h> |
24 | #include <linux/sched.h> | ||
24 | 25 | ||
25 | /* | 26 | /* |
26 | * Targeted preemption latency for CPU-bound tasks: | 27 | * Targeted preemption latency for CPU-bound tasks: |
@@ -35,12 +36,26 @@ | |||
35 | * run vmstat and monitor the context-switches (cs) field) | 36 | * run vmstat and monitor the context-switches (cs) field) |
36 | */ | 37 | */ |
37 | unsigned int sysctl_sched_latency = 5000000ULL; | 38 | unsigned int sysctl_sched_latency = 5000000ULL; |
39 | unsigned int normalized_sysctl_sched_latency = 5000000ULL; | ||
40 | |||
41 | /* | ||
42 | * The initial- and re-scaling of tunables is configurable | ||
43 | * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) | ||
44 | * | ||
45 | * Options are: | ||
46 | * SCHED_TUNABLESCALING_NONE - unscaled, always *1 | ||
47 | * SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus) | ||
48 | * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus | ||
49 | */ | ||
50 | enum sched_tunable_scaling sysctl_sched_tunable_scaling | ||
51 | = SCHED_TUNABLESCALING_LOG; | ||
38 | 52 | ||
39 | /* | 53 | /* |
40 | * Minimal preemption granularity for CPU-bound tasks: | 54 | * Minimal preemption granularity for CPU-bound tasks: |
41 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) | 55 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) |
42 | */ | 56 | */ |
43 | unsigned int sysctl_sched_min_granularity = 1000000ULL; | 57 | unsigned int sysctl_sched_min_granularity = 1000000ULL; |
58 | unsigned int normalized_sysctl_sched_min_granularity = 1000000ULL; | ||
44 | 59 | ||
45 | /* | 60 | /* |
46 | * is kept at sysctl_sched_latency / sysctl_sched_min_granularity | 61 | * is kept at sysctl_sched_latency / sysctl_sched_min_granularity |
@@ -70,6 +85,7 @@ unsigned int __read_mostly sysctl_sched_compat_yield; | |||
70 | * have immediate wakeup/sleep latencies. | 85 | * have immediate wakeup/sleep latencies. |
71 | */ | 86 | */ |
72 | unsigned int sysctl_sched_wakeup_granularity = 1000000UL; | 87 | unsigned int sysctl_sched_wakeup_granularity = 1000000UL; |
88 | unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; | ||
73 | 89 | ||
74 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | 90 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
75 | 91 | ||
@@ -383,11 +399,12 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | |||
383 | */ | 399 | */ |
384 | 400 | ||
385 | #ifdef CONFIG_SCHED_DEBUG | 401 | #ifdef CONFIG_SCHED_DEBUG |
386 | int sched_nr_latency_handler(struct ctl_table *table, int write, | 402 | int sched_proc_update_handler(struct ctl_table *table, int write, |
387 | void __user *buffer, size_t *lenp, | 403 | void __user *buffer, size_t *lenp, |
388 | loff_t *ppos) | 404 | loff_t *ppos) |
389 | { | 405 | { |
390 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | 406 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
407 | int factor = get_update_sysctl_factor(); | ||
391 | 408 | ||
392 | if (ret || !write) | 409 | if (ret || !write) |
393 | return ret; | 410 | return ret; |
@@ -395,6 +412,14 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, | |||
395 | sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency, | 412 | sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency, |
396 | sysctl_sched_min_granularity); | 413 | sysctl_sched_min_granularity); |
397 | 414 | ||
415 | #define WRT_SYSCTL(name) \ | ||
416 | (normalized_sysctl_##name = sysctl_##name / (factor)) | ||
417 | WRT_SYSCTL(sched_min_granularity); | ||
418 | WRT_SYSCTL(sched_latency); | ||
419 | WRT_SYSCTL(sched_wakeup_granularity); | ||
420 | WRT_SYSCTL(sched_shares_ratelimit); | ||
421 | #undef WRT_SYSCTL | ||
422 | |||
398 | return 0; | 423 | return 0; |
399 | } | 424 | } |
400 | #endif | 425 | #endif |
@@ -1403,7 +1428,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
1403 | new_cpu = prev_cpu; | 1428 | new_cpu = prev_cpu; |
1404 | } | 1429 | } |
1405 | 1430 | ||
1406 | rcu_read_lock(); | ||
1407 | for_each_domain(cpu, tmp) { | 1431 | for_each_domain(cpu, tmp) { |
1408 | /* | 1432 | /* |
1409 | * If power savings logic is enabled for a domain, see if we | 1433 | * If power savings logic is enabled for a domain, see if we |
@@ -1484,10 +1508,8 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
1484 | update_shares(tmp); | 1508 | update_shares(tmp); |
1485 | } | 1509 | } |
1486 | 1510 | ||
1487 | if (affine_sd && wake_affine(affine_sd, p, sync)) { | 1511 | if (affine_sd && wake_affine(affine_sd, p, sync)) |
1488 | new_cpu = cpu; | 1512 | return cpu; |
1489 | goto out; | ||
1490 | } | ||
1491 | 1513 | ||
1492 | while (sd) { | 1514 | while (sd) { |
1493 | int load_idx = sd->forkexec_idx; | 1515 | int load_idx = sd->forkexec_idx; |
@@ -1528,8 +1550,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
1528 | /* while loop will break here if sd == NULL */ | 1550 | /* while loop will break here if sd == NULL */ |
1529 | } | 1551 | } |
1530 | 1552 | ||
1531 | out: | ||
1532 | rcu_read_unlock(); | ||
1533 | return new_cpu; | 1553 | return new_cpu; |
1534 | } | 1554 | } |
1535 | #endif /* CONFIG_SMP */ | 1555 | #endif /* CONFIG_SMP */ |
@@ -1651,12 +1671,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1651 | int sync = wake_flags & WF_SYNC; | 1671 | int sync = wake_flags & WF_SYNC; |
1652 | int scale = cfs_rq->nr_running >= sched_nr_latency; | 1672 | int scale = cfs_rq->nr_running >= sched_nr_latency; |
1653 | 1673 | ||
1654 | update_curr(cfs_rq); | 1674 | if (unlikely(rt_prio(p->prio))) |
1655 | 1675 | goto preempt; | |
1656 | if (unlikely(rt_prio(p->prio))) { | ||
1657 | resched_task(curr); | ||
1658 | return; | ||
1659 | } | ||
1660 | 1676 | ||
1661 | if (unlikely(p->sched_class != &fair_sched_class)) | 1677 | if (unlikely(p->sched_class != &fair_sched_class)) |
1662 | return; | 1678 | return; |
@@ -1682,50 +1698,44 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1682 | return; | 1698 | return; |
1683 | 1699 | ||
1684 | /* Idle tasks are by definition preempted by everybody. */ | 1700 | /* Idle tasks are by definition preempted by everybody. */ |
1685 | if (unlikely(curr->policy == SCHED_IDLE)) { | 1701 | if (unlikely(curr->policy == SCHED_IDLE)) |
1686 | resched_task(curr); | 1702 | goto preempt; |
1687 | return; | ||
1688 | } | ||
1689 | 1703 | ||
1690 | if ((sched_feat(WAKEUP_SYNC) && sync) || | 1704 | if (sched_feat(WAKEUP_SYNC) && sync) |
1691 | (sched_feat(WAKEUP_OVERLAP) && | 1705 | goto preempt; |
1692 | (se->avg_overlap < sysctl_sched_migration_cost && | ||
1693 | pse->avg_overlap < sysctl_sched_migration_cost))) { | ||
1694 | resched_task(curr); | ||
1695 | return; | ||
1696 | } | ||
1697 | 1706 | ||
1698 | if (sched_feat(WAKEUP_RUNNING)) { | 1707 | if (sched_feat(WAKEUP_OVERLAP) && |
1699 | if (pse->avg_running < se->avg_running) { | 1708 | se->avg_overlap < sysctl_sched_migration_cost && |
1700 | set_next_buddy(pse); | 1709 | pse->avg_overlap < sysctl_sched_migration_cost) |
1701 | resched_task(curr); | 1710 | goto preempt; |
1702 | return; | ||
1703 | } | ||
1704 | } | ||
1705 | 1711 | ||
1706 | if (!sched_feat(WAKEUP_PREEMPT)) | 1712 | if (!sched_feat(WAKEUP_PREEMPT)) |
1707 | return; | 1713 | return; |
1708 | 1714 | ||
1715 | update_curr(cfs_rq); | ||
1709 | find_matching_se(&se, &pse); | 1716 | find_matching_se(&se, &pse); |
1710 | |||
1711 | BUG_ON(!pse); | 1717 | BUG_ON(!pse); |
1718 | if (wakeup_preempt_entity(se, pse) == 1) | ||
1719 | goto preempt; | ||
1712 | 1720 | ||
1713 | if (wakeup_preempt_entity(se, pse) == 1) { | 1721 | return; |
1714 | resched_task(curr); | 1722 | |
1715 | /* | 1723 | preempt: |
1716 | * Only set the backward buddy when the current task is still | 1724 | resched_task(curr); |
1717 | * on the rq. This can happen when a wakeup gets interleaved | 1725 | /* |
1718 | * with schedule on the ->pre_schedule() or idle_balance() | 1726 | * Only set the backward buddy when the current task is still |
1719 | * point, either of which can * drop the rq lock. | 1727 | * on the rq. This can happen when a wakeup gets interleaved |
1720 | * | 1728 | * with schedule on the ->pre_schedule() or idle_balance() |
1721 | * Also, during early boot the idle thread is in the fair class, | 1729 | * point, either of which can * drop the rq lock. |
1722 | * for obvious reasons its a bad idea to schedule back to it. | 1730 | * |
1723 | */ | 1731 | * Also, during early boot the idle thread is in the fair class, |
1724 | if (unlikely(!se->on_rq || curr == rq->idle)) | 1732 | * for obvious reasons its a bad idea to schedule back to it. |
1725 | return; | 1733 | */ |
1726 | if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se)) | 1734 | if (unlikely(!se->on_rq || curr == rq->idle)) |
1727 | set_last_buddy(se); | 1735 | return; |
1728 | } | 1736 | |
1737 | if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se)) | ||
1738 | set_last_buddy(se); | ||
1729 | } | 1739 | } |
1730 | 1740 | ||
1731 | static struct task_struct *pick_next_task_fair(struct rq *rq) | 1741 | static struct task_struct *pick_next_task_fair(struct rq *rq) |
@@ -1905,6 +1915,17 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
1905 | 1915 | ||
1906 | return 0; | 1916 | return 0; |
1907 | } | 1917 | } |
1918 | |||
1919 | static void rq_online_fair(struct rq *rq) | ||
1920 | { | ||
1921 | update_sysctl(); | ||
1922 | } | ||
1923 | |||
1924 | static void rq_offline_fair(struct rq *rq) | ||
1925 | { | ||
1926 | update_sysctl(); | ||
1927 | } | ||
1928 | |||
1908 | #endif /* CONFIG_SMP */ | 1929 | #endif /* CONFIG_SMP */ |
1909 | 1930 | ||
1910 | /* | 1931 | /* |
@@ -1922,28 +1943,30 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) | |||
1922 | } | 1943 | } |
1923 | 1944 | ||
1924 | /* | 1945 | /* |
1925 | * Share the fairness runtime between parent and child, thus the | 1946 | * called on fork with the child task as argument from the parent's context |
1926 | * total amount of pressure for CPU stays equal - new tasks | 1947 | * - child not yet on the tasklist |
1927 | * get a chance to run but frequent forkers are not allowed to | 1948 | * - preemption disabled |
1928 | * monopolize the CPU. Note: the parent runqueue is locked, | ||
1929 | * the child is not running yet. | ||
1930 | */ | 1949 | */ |
1931 | static void task_new_fair(struct rq *rq, struct task_struct *p) | 1950 | static void task_fork_fair(struct task_struct *p) |
1932 | { | 1951 | { |
1933 | struct cfs_rq *cfs_rq = task_cfs_rq(p); | 1952 | struct cfs_rq *cfs_rq = task_cfs_rq(current); |
1934 | struct sched_entity *se = &p->se, *curr = cfs_rq->curr; | 1953 | struct sched_entity *se = &p->se, *curr = cfs_rq->curr; |
1935 | int this_cpu = smp_processor_id(); | 1954 | int this_cpu = smp_processor_id(); |
1955 | struct rq *rq = this_rq(); | ||
1956 | unsigned long flags; | ||
1957 | |||
1958 | spin_lock_irqsave(&rq->lock, flags); | ||
1936 | 1959 | ||
1937 | sched_info_queued(p); | 1960 | if (unlikely(task_cpu(p) != this_cpu)) |
1961 | __set_task_cpu(p, this_cpu); | ||
1938 | 1962 | ||
1939 | update_curr(cfs_rq); | 1963 | update_curr(cfs_rq); |
1964 | |||
1940 | if (curr) | 1965 | if (curr) |
1941 | se->vruntime = curr->vruntime; | 1966 | se->vruntime = curr->vruntime; |
1942 | place_entity(cfs_rq, se, 1); | 1967 | place_entity(cfs_rq, se, 1); |
1943 | 1968 | ||
1944 | /* 'curr' will be NULL if the child belongs to a different group */ | 1969 | if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) { |
1945 | if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) && | ||
1946 | curr && entity_before(curr, se)) { | ||
1947 | /* | 1970 | /* |
1948 | * Upon rescheduling, sched_class::put_prev_task() will place | 1971 | * Upon rescheduling, sched_class::put_prev_task() will place |
1949 | * 'current' within the tree based on its new key value. | 1972 | * 'current' within the tree based on its new key value. |
@@ -1952,7 +1975,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) | |||
1952 | resched_task(rq->curr); | 1975 | resched_task(rq->curr); |
1953 | } | 1976 | } |
1954 | 1977 | ||
1955 | enqueue_task_fair(rq, p, 0); | 1978 | spin_unlock_irqrestore(&rq->lock, flags); |
1956 | } | 1979 | } |
1957 | 1980 | ||
1958 | /* | 1981 | /* |
@@ -2014,21 +2037,17 @@ static void moved_group_fair(struct task_struct *p) | |||
2014 | } | 2037 | } |
2015 | #endif | 2038 | #endif |
2016 | 2039 | ||
2017 | unsigned int get_rr_interval_fair(struct task_struct *task) | 2040 | unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task) |
2018 | { | 2041 | { |
2019 | struct sched_entity *se = &task->se; | 2042 | struct sched_entity *se = &task->se; |
2020 | unsigned long flags; | ||
2021 | struct rq *rq; | ||
2022 | unsigned int rr_interval = 0; | 2043 | unsigned int rr_interval = 0; |
2023 | 2044 | ||
2024 | /* | 2045 | /* |
2025 | * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise | 2046 | * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise |
2026 | * idle runqueue: | 2047 | * idle runqueue: |
2027 | */ | 2048 | */ |
2028 | rq = task_rq_lock(task, &flags); | ||
2029 | if (rq->cfs.load.weight) | 2049 | if (rq->cfs.load.weight) |
2030 | rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); | 2050 | rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); |
2031 | task_rq_unlock(rq, &flags); | ||
2032 | 2051 | ||
2033 | return rr_interval; | 2052 | return rr_interval; |
2034 | } | 2053 | } |
@@ -2052,11 +2071,13 @@ static const struct sched_class fair_sched_class = { | |||
2052 | 2071 | ||
2053 | .load_balance = load_balance_fair, | 2072 | .load_balance = load_balance_fair, |
2054 | .move_one_task = move_one_task_fair, | 2073 | .move_one_task = move_one_task_fair, |
2074 | .rq_online = rq_online_fair, | ||
2075 | .rq_offline = rq_offline_fair, | ||
2055 | #endif | 2076 | #endif |
2056 | 2077 | ||
2057 | .set_curr_task = set_curr_task_fair, | 2078 | .set_curr_task = set_curr_task_fair, |
2058 | .task_tick = task_tick_fair, | 2079 | .task_tick = task_tick_fair, |
2059 | .task_new = task_new_fair, | 2080 | .task_fork = task_fork_fair, |
2060 | 2081 | ||
2061 | .prio_changed = prio_changed_fair, | 2082 | .prio_changed = prio_changed_fair, |
2062 | .switched_to = switched_to_fair, | 2083 | .switched_to = switched_to_fair, |