diff options
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r-- | kernel/sched/core.c | 880 |
1 files changed, 756 insertions, 124 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a88f4a485c5e..f5c6635b806c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -296,8 +296,6 @@ __read_mostly int scheduler_running; | |||
296 | */ | 296 | */ |
297 | int sysctl_sched_rt_runtime = 950000; | 297 | int sysctl_sched_rt_runtime = 950000; |
298 | 298 | ||
299 | |||
300 | |||
301 | /* | 299 | /* |
302 | * __task_rq_lock - lock the rq @p resides on. | 300 | * __task_rq_lock - lock the rq @p resides on. |
303 | */ | 301 | */ |
@@ -899,7 +897,9 @@ static inline int normal_prio(struct task_struct *p) | |||
899 | { | 897 | { |
900 | int prio; | 898 | int prio; |
901 | 899 | ||
902 | if (task_has_rt_policy(p)) | 900 | if (task_has_dl_policy(p)) |
901 | prio = MAX_DL_PRIO-1; | ||
902 | else if (task_has_rt_policy(p)) | ||
903 | prio = MAX_RT_PRIO-1 - p->rt_priority; | 903 | prio = MAX_RT_PRIO-1 - p->rt_priority; |
904 | else | 904 | else |
905 | prio = __normal_prio(p); | 905 | prio = __normal_prio(p); |
@@ -945,7 +945,7 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
945 | if (prev_class->switched_from) | 945 | if (prev_class->switched_from) |
946 | prev_class->switched_from(rq, p); | 946 | prev_class->switched_from(rq, p); |
947 | p->sched_class->switched_to(rq, p); | 947 | p->sched_class->switched_to(rq, p); |
948 | } else if (oldprio != p->prio) | 948 | } else if (oldprio != p->prio || dl_task(p)) |
949 | p->sched_class->prio_changed(rq, p, oldprio); | 949 | p->sched_class->prio_changed(rq, p, oldprio); |
950 | } | 950 | } |
951 | 951 | ||
@@ -1108,6 +1108,7 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p) | |||
1108 | if (!cpumask_test_cpu(arg.src_cpu, tsk_cpus_allowed(arg.dst_task))) | 1108 | if (!cpumask_test_cpu(arg.src_cpu, tsk_cpus_allowed(arg.dst_task))) |
1109 | goto out; | 1109 | goto out; |
1110 | 1110 | ||
1111 | trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu); | ||
1111 | ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg); | 1112 | ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg); |
1112 | 1113 | ||
1113 | out: | 1114 | out: |
@@ -1499,8 +1500,7 @@ void scheduler_ipi(void) | |||
1499 | * TIF_NEED_RESCHED remotely (for the first time) will also send | 1500 | * TIF_NEED_RESCHED remotely (for the first time) will also send |
1500 | * this IPI. | 1501 | * this IPI. |
1501 | */ | 1502 | */ |
1502 | if (tif_need_resched()) | 1503 | preempt_fold_need_resched(); |
1503 | set_preempt_need_resched(); | ||
1504 | 1504 | ||
1505 | if (llist_empty(&this_rq()->wake_list) | 1505 | if (llist_empty(&this_rq()->wake_list) |
1506 | && !tick_nohz_full_cpu(smp_processor_id()) | 1506 | && !tick_nohz_full_cpu(smp_processor_id()) |
@@ -1717,6 +1717,13 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) | |||
1717 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); | 1717 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); |
1718 | #endif | 1718 | #endif |
1719 | 1719 | ||
1720 | RB_CLEAR_NODE(&p->dl.rb_node); | ||
1721 | hrtimer_init(&p->dl.dl_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
1722 | p->dl.dl_runtime = p->dl.runtime = 0; | ||
1723 | p->dl.dl_deadline = p->dl.deadline = 0; | ||
1724 | p->dl.dl_period = 0; | ||
1725 | p->dl.flags = 0; | ||
1726 | |||
1720 | INIT_LIST_HEAD(&p->rt.run_list); | 1727 | INIT_LIST_HEAD(&p->rt.run_list); |
1721 | 1728 | ||
1722 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 1729 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
@@ -1763,12 +1770,34 @@ void set_numabalancing_state(bool enabled) | |||
1763 | numabalancing_enabled = enabled; | 1770 | numabalancing_enabled = enabled; |
1764 | } | 1771 | } |
1765 | #endif /* CONFIG_SCHED_DEBUG */ | 1772 | #endif /* CONFIG_SCHED_DEBUG */ |
1766 | #endif /* CONFIG_NUMA_BALANCING */ | 1773 | |
1774 | #ifdef CONFIG_PROC_SYSCTL | ||
1775 | int sysctl_numa_balancing(struct ctl_table *table, int write, | ||
1776 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
1777 | { | ||
1778 | struct ctl_table t; | ||
1779 | int err; | ||
1780 | int state = numabalancing_enabled; | ||
1781 | |||
1782 | if (write && !capable(CAP_SYS_ADMIN)) | ||
1783 | return -EPERM; | ||
1784 | |||
1785 | t = *table; | ||
1786 | t.data = &state; | ||
1787 | err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); | ||
1788 | if (err < 0) | ||
1789 | return err; | ||
1790 | if (write) | ||
1791 | set_numabalancing_state(state); | ||
1792 | return err; | ||
1793 | } | ||
1794 | #endif | ||
1795 | #endif | ||
1767 | 1796 | ||
1768 | /* | 1797 | /* |
1769 | * fork()/clone()-time setup: | 1798 | * fork()/clone()-time setup: |
1770 | */ | 1799 | */ |
1771 | void sched_fork(unsigned long clone_flags, struct task_struct *p) | 1800 | int sched_fork(unsigned long clone_flags, struct task_struct *p) |
1772 | { | 1801 | { |
1773 | unsigned long flags; | 1802 | unsigned long flags; |
1774 | int cpu = get_cpu(); | 1803 | int cpu = get_cpu(); |
@@ -1790,7 +1819,7 @@ void sched_fork(unsigned long clone_flags, struct task_struct *p) | |||
1790 | * Revert to default priority/policy on fork if requested. | 1819 | * Revert to default priority/policy on fork if requested. |
1791 | */ | 1820 | */ |
1792 | if (unlikely(p->sched_reset_on_fork)) { | 1821 | if (unlikely(p->sched_reset_on_fork)) { |
1793 | if (task_has_rt_policy(p)) { | 1822 | if (task_has_dl_policy(p) || task_has_rt_policy(p)) { |
1794 | p->policy = SCHED_NORMAL; | 1823 | p->policy = SCHED_NORMAL; |
1795 | p->static_prio = NICE_TO_PRIO(0); | 1824 | p->static_prio = NICE_TO_PRIO(0); |
1796 | p->rt_priority = 0; | 1825 | p->rt_priority = 0; |
@@ -1807,8 +1836,14 @@ void sched_fork(unsigned long clone_flags, struct task_struct *p) | |||
1807 | p->sched_reset_on_fork = 0; | 1836 | p->sched_reset_on_fork = 0; |
1808 | } | 1837 | } |
1809 | 1838 | ||
1810 | if (!rt_prio(p->prio)) | 1839 | if (dl_prio(p->prio)) { |
1840 | put_cpu(); | ||
1841 | return -EAGAIN; | ||
1842 | } else if (rt_prio(p->prio)) { | ||
1843 | p->sched_class = &rt_sched_class; | ||
1844 | } else { | ||
1811 | p->sched_class = &fair_sched_class; | 1845 | p->sched_class = &fair_sched_class; |
1846 | } | ||
1812 | 1847 | ||
1813 | if (p->sched_class->task_fork) | 1848 | if (p->sched_class->task_fork) |
1814 | p->sched_class->task_fork(p); | 1849 | p->sched_class->task_fork(p); |
@@ -1834,11 +1869,124 @@ void sched_fork(unsigned long clone_flags, struct task_struct *p) | |||
1834 | init_task_preempt_count(p); | 1869 | init_task_preempt_count(p); |
1835 | #ifdef CONFIG_SMP | 1870 | #ifdef CONFIG_SMP |
1836 | plist_node_init(&p->pushable_tasks, MAX_PRIO); | 1871 | plist_node_init(&p->pushable_tasks, MAX_PRIO); |
1872 | RB_CLEAR_NODE(&p->pushable_dl_tasks); | ||
1837 | #endif | 1873 | #endif |
1838 | 1874 | ||
1839 | put_cpu(); | 1875 | put_cpu(); |
1876 | return 0; | ||
1877 | } | ||
1878 | |||
1879 | unsigned long to_ratio(u64 period, u64 runtime) | ||
1880 | { | ||
1881 | if (runtime == RUNTIME_INF) | ||
1882 | return 1ULL << 20; | ||
1883 | |||
1884 | /* | ||
1885 | * Doing this here saves a lot of checks in all | ||
1886 | * the calling paths, and returning zero seems | ||
1887 | * safe for them anyway. | ||
1888 | */ | ||
1889 | if (period == 0) | ||
1890 | return 0; | ||
1891 | |||
1892 | return div64_u64(runtime << 20, period); | ||
1840 | } | 1893 | } |
1841 | 1894 | ||
1895 | #ifdef CONFIG_SMP | ||
1896 | inline struct dl_bw *dl_bw_of(int i) | ||
1897 | { | ||
1898 | return &cpu_rq(i)->rd->dl_bw; | ||
1899 | } | ||
1900 | |||
1901 | static inline int dl_bw_cpus(int i) | ||
1902 | { | ||
1903 | struct root_domain *rd = cpu_rq(i)->rd; | ||
1904 | int cpus = 0; | ||
1905 | |||
1906 | for_each_cpu_and(i, rd->span, cpu_active_mask) | ||
1907 | cpus++; | ||
1908 | |||
1909 | return cpus; | ||
1910 | } | ||
1911 | #else | ||
1912 | inline struct dl_bw *dl_bw_of(int i) | ||
1913 | { | ||
1914 | return &cpu_rq(i)->dl.dl_bw; | ||
1915 | } | ||
1916 | |||
1917 | static inline int dl_bw_cpus(int i) | ||
1918 | { | ||
1919 | return 1; | ||
1920 | } | ||
1921 | #endif | ||
1922 | |||
1923 | static inline | ||
1924 | void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw) | ||
1925 | { | ||
1926 | dl_b->total_bw -= tsk_bw; | ||
1927 | } | ||
1928 | |||
1929 | static inline | ||
1930 | void __dl_add(struct dl_bw *dl_b, u64 tsk_bw) | ||
1931 | { | ||
1932 | dl_b->total_bw += tsk_bw; | ||
1933 | } | ||
1934 | |||
1935 | static inline | ||
1936 | bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw) | ||
1937 | { | ||
1938 | return dl_b->bw != -1 && | ||
1939 | dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw; | ||
1940 | } | ||
1941 | |||
1942 | /* | ||
1943 | * We must be sure that accepting a new task (or allowing changing the | ||
1944 | * parameters of an existing one) is consistent with the bandwidth | ||
1945 | * constraints. If yes, this function also accordingly updates the currently | ||
1946 | * allocated bandwidth to reflect the new situation. | ||
1947 | * | ||
1948 | * This function is called while holding p's rq->lock. | ||
1949 | */ | ||
1950 | static int dl_overflow(struct task_struct *p, int policy, | ||
1951 | const struct sched_attr *attr) | ||
1952 | { | ||
1953 | |||
1954 | struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); | ||
1955 | u64 period = attr->sched_period ?: attr->sched_deadline; | ||
1956 | u64 runtime = attr->sched_runtime; | ||
1957 | u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0; | ||
1958 | int cpus, err = -1; | ||
1959 | |||
1960 | if (new_bw == p->dl.dl_bw) | ||
1961 | return 0; | ||
1962 | |||
1963 | /* | ||
1964 | * Either if a task, enters, leave, or stays -deadline but changes | ||
1965 | * its parameters, we may need to update accordingly the total | ||
1966 | * allocated bandwidth of the container. | ||
1967 | */ | ||
1968 | raw_spin_lock(&dl_b->lock); | ||
1969 | cpus = dl_bw_cpus(task_cpu(p)); | ||
1970 | if (dl_policy(policy) && !task_has_dl_policy(p) && | ||
1971 | !__dl_overflow(dl_b, cpus, 0, new_bw)) { | ||
1972 | __dl_add(dl_b, new_bw); | ||
1973 | err = 0; | ||
1974 | } else if (dl_policy(policy) && task_has_dl_policy(p) && | ||
1975 | !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) { | ||
1976 | __dl_clear(dl_b, p->dl.dl_bw); | ||
1977 | __dl_add(dl_b, new_bw); | ||
1978 | err = 0; | ||
1979 | } else if (!dl_policy(policy) && task_has_dl_policy(p)) { | ||
1980 | __dl_clear(dl_b, p->dl.dl_bw); | ||
1981 | err = 0; | ||
1982 | } | ||
1983 | raw_spin_unlock(&dl_b->lock); | ||
1984 | |||
1985 | return err; | ||
1986 | } | ||
1987 | |||
1988 | extern void init_dl_bw(struct dl_bw *dl_b); | ||
1989 | |||
1842 | /* | 1990 | /* |
1843 | * wake_up_new_task - wake up a newly created task for the first time. | 1991 | * wake_up_new_task - wake up a newly created task for the first time. |
1844 | * | 1992 | * |
@@ -2003,6 +2151,9 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
2003 | if (unlikely(prev_state == TASK_DEAD)) { | 2151 | if (unlikely(prev_state == TASK_DEAD)) { |
2004 | task_numa_free(prev); | 2152 | task_numa_free(prev); |
2005 | 2153 | ||
2154 | if (prev->sched_class->task_dead) | ||
2155 | prev->sched_class->task_dead(prev); | ||
2156 | |||
2006 | /* | 2157 | /* |
2007 | * Remove function-return probe instances associated with this | 2158 | * Remove function-return probe instances associated with this |
2008 | * task and put them back on the free list. | 2159 | * task and put them back on the free list. |
@@ -2296,7 +2447,7 @@ void scheduler_tick(void) | |||
2296 | 2447 | ||
2297 | #ifdef CONFIG_SMP | 2448 | #ifdef CONFIG_SMP |
2298 | rq->idle_balance = idle_cpu(cpu); | 2449 | rq->idle_balance = idle_cpu(cpu); |
2299 | trigger_load_balance(rq, cpu); | 2450 | trigger_load_balance(rq); |
2300 | #endif | 2451 | #endif |
2301 | rq_last_tick_reset(rq); | 2452 | rq_last_tick_reset(rq); |
2302 | } | 2453 | } |
@@ -2325,7 +2476,7 @@ u64 scheduler_tick_max_deferment(void) | |||
2325 | if (time_before_eq(next, now)) | 2476 | if (time_before_eq(next, now)) |
2326 | return 0; | 2477 | return 0; |
2327 | 2478 | ||
2328 | return jiffies_to_usecs(next - now) * NSEC_PER_USEC; | 2479 | return jiffies_to_nsecs(next - now); |
2329 | } | 2480 | } |
2330 | #endif | 2481 | #endif |
2331 | 2482 | ||
@@ -2414,10 +2565,10 @@ static inline void schedule_debug(struct task_struct *prev) | |||
2414 | { | 2565 | { |
2415 | /* | 2566 | /* |
2416 | * Test if we are atomic. Since do_exit() needs to call into | 2567 | * Test if we are atomic. Since do_exit() needs to call into |
2417 | * schedule() atomically, we ignore that path for now. | 2568 | * schedule() atomically, we ignore that path. Otherwise whine |
2418 | * Otherwise, whine if we are scheduling when we should not be. | 2569 | * if we are scheduling when we should not. |
2419 | */ | 2570 | */ |
2420 | if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) | 2571 | if (unlikely(in_atomic_preempt_off() && prev->state != TASK_DEAD)) |
2421 | __schedule_bug(prev); | 2572 | __schedule_bug(prev); |
2422 | rcu_sleep_check(); | 2573 | rcu_sleep_check(); |
2423 | 2574 | ||
@@ -2761,11 +2912,11 @@ EXPORT_SYMBOL(sleep_on_timeout); | |||
2761 | */ | 2912 | */ |
2762 | void rt_mutex_setprio(struct task_struct *p, int prio) | 2913 | void rt_mutex_setprio(struct task_struct *p, int prio) |
2763 | { | 2914 | { |
2764 | int oldprio, on_rq, running; | 2915 | int oldprio, on_rq, running, enqueue_flag = 0; |
2765 | struct rq *rq; | 2916 | struct rq *rq; |
2766 | const struct sched_class *prev_class; | 2917 | const struct sched_class *prev_class; |
2767 | 2918 | ||
2768 | BUG_ON(prio < 0 || prio > MAX_PRIO); | 2919 | BUG_ON(prio > MAX_PRIO); |
2769 | 2920 | ||
2770 | rq = __task_rq_lock(p); | 2921 | rq = __task_rq_lock(p); |
2771 | 2922 | ||
@@ -2788,6 +2939,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
2788 | } | 2939 | } |
2789 | 2940 | ||
2790 | trace_sched_pi_setprio(p, prio); | 2941 | trace_sched_pi_setprio(p, prio); |
2942 | p->pi_top_task = rt_mutex_get_top_task(p); | ||
2791 | oldprio = p->prio; | 2943 | oldprio = p->prio; |
2792 | prev_class = p->sched_class; | 2944 | prev_class = p->sched_class; |
2793 | on_rq = p->on_rq; | 2945 | on_rq = p->on_rq; |
@@ -2797,23 +2949,49 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
2797 | if (running) | 2949 | if (running) |
2798 | p->sched_class->put_prev_task(rq, p); | 2950 | p->sched_class->put_prev_task(rq, p); |
2799 | 2951 | ||
2800 | if (rt_prio(prio)) | 2952 | /* |
2953 | * Boosting condition are: | ||
2954 | * 1. -rt task is running and holds mutex A | ||
2955 | * --> -dl task blocks on mutex A | ||
2956 | * | ||
2957 | * 2. -dl task is running and holds mutex A | ||
2958 | * --> -dl task blocks on mutex A and could preempt the | ||
2959 | * running task | ||
2960 | */ | ||
2961 | if (dl_prio(prio)) { | ||
2962 | if (!dl_prio(p->normal_prio) || (p->pi_top_task && | ||
2963 | dl_entity_preempt(&p->pi_top_task->dl, &p->dl))) { | ||
2964 | p->dl.dl_boosted = 1; | ||
2965 | p->dl.dl_throttled = 0; | ||
2966 | enqueue_flag = ENQUEUE_REPLENISH; | ||
2967 | } else | ||
2968 | p->dl.dl_boosted = 0; | ||
2969 | p->sched_class = &dl_sched_class; | ||
2970 | } else if (rt_prio(prio)) { | ||
2971 | if (dl_prio(oldprio)) | ||
2972 | p->dl.dl_boosted = 0; | ||
2973 | if (oldprio < prio) | ||
2974 | enqueue_flag = ENQUEUE_HEAD; | ||
2801 | p->sched_class = &rt_sched_class; | 2975 | p->sched_class = &rt_sched_class; |
2802 | else | 2976 | } else { |
2977 | if (dl_prio(oldprio)) | ||
2978 | p->dl.dl_boosted = 0; | ||
2803 | p->sched_class = &fair_sched_class; | 2979 | p->sched_class = &fair_sched_class; |
2980 | } | ||
2804 | 2981 | ||
2805 | p->prio = prio; | 2982 | p->prio = prio; |
2806 | 2983 | ||
2807 | if (running) | 2984 | if (running) |
2808 | p->sched_class->set_curr_task(rq); | 2985 | p->sched_class->set_curr_task(rq); |
2809 | if (on_rq) | 2986 | if (on_rq) |
2810 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); | 2987 | enqueue_task(rq, p, enqueue_flag); |
2811 | 2988 | ||
2812 | check_class_changed(rq, p, prev_class, oldprio); | 2989 | check_class_changed(rq, p, prev_class, oldprio); |
2813 | out_unlock: | 2990 | out_unlock: |
2814 | __task_rq_unlock(rq); | 2991 | __task_rq_unlock(rq); |
2815 | } | 2992 | } |
2816 | #endif | 2993 | #endif |
2994 | |||
2817 | void set_user_nice(struct task_struct *p, long nice) | 2995 | void set_user_nice(struct task_struct *p, long nice) |
2818 | { | 2996 | { |
2819 | int old_prio, delta, on_rq; | 2997 | int old_prio, delta, on_rq; |
@@ -2831,9 +3009,9 @@ void set_user_nice(struct task_struct *p, long nice) | |||
2831 | * The RT priorities are set via sched_setscheduler(), but we still | 3009 | * The RT priorities are set via sched_setscheduler(), but we still |
2832 | * allow the 'normal' nice value to be set - but as expected | 3010 | * allow the 'normal' nice value to be set - but as expected |
2833 | * it wont have any effect on scheduling until the task is | 3011 | * it wont have any effect on scheduling until the task is |
2834 | * SCHED_FIFO/SCHED_RR: | 3012 | * SCHED_DEADLINE, SCHED_FIFO or SCHED_RR: |
2835 | */ | 3013 | */ |
2836 | if (task_has_rt_policy(p)) { | 3014 | if (task_has_dl_policy(p) || task_has_rt_policy(p)) { |
2837 | p->static_prio = NICE_TO_PRIO(nice); | 3015 | p->static_prio = NICE_TO_PRIO(nice); |
2838 | goto out_unlock; | 3016 | goto out_unlock; |
2839 | } | 3017 | } |
@@ -2988,22 +3166,95 @@ static struct task_struct *find_process_by_pid(pid_t pid) | |||
2988 | return pid ? find_task_by_vpid(pid) : current; | 3166 | return pid ? find_task_by_vpid(pid) : current; |
2989 | } | 3167 | } |
2990 | 3168 | ||
2991 | /* Actually do priority change: must hold rq lock. */ | 3169 | /* |
3170 | * This function initializes the sched_dl_entity of a newly becoming | ||
3171 | * SCHED_DEADLINE task. | ||
3172 | * | ||
3173 | * Only the static values are considered here, the actual runtime and the | ||
3174 | * absolute deadline will be properly calculated when the task is enqueued | ||
3175 | * for the first time with its new policy. | ||
3176 | */ | ||
2992 | static void | 3177 | static void |
2993 | __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | 3178 | __setparam_dl(struct task_struct *p, const struct sched_attr *attr) |
2994 | { | 3179 | { |
3180 | struct sched_dl_entity *dl_se = &p->dl; | ||
3181 | |||
3182 | init_dl_task_timer(dl_se); | ||
3183 | dl_se->dl_runtime = attr->sched_runtime; | ||
3184 | dl_se->dl_deadline = attr->sched_deadline; | ||
3185 | dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline; | ||
3186 | dl_se->flags = attr->sched_flags; | ||
3187 | dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); | ||
3188 | dl_se->dl_throttled = 0; | ||
3189 | dl_se->dl_new = 1; | ||
3190 | } | ||
3191 | |||
3192 | /* Actually do priority change: must hold pi & rq lock. */ | ||
3193 | static void __setscheduler(struct rq *rq, struct task_struct *p, | ||
3194 | const struct sched_attr *attr) | ||
3195 | { | ||
3196 | int policy = attr->sched_policy; | ||
3197 | |||
3198 | if (policy == -1) /* setparam */ | ||
3199 | policy = p->policy; | ||
3200 | |||
2995 | p->policy = policy; | 3201 | p->policy = policy; |
2996 | p->rt_priority = prio; | 3202 | |
3203 | if (dl_policy(policy)) | ||
3204 | __setparam_dl(p, attr); | ||
3205 | else if (fair_policy(policy)) | ||
3206 | p->static_prio = NICE_TO_PRIO(attr->sched_nice); | ||
3207 | |||
3208 | /* | ||
3209 | * __sched_setscheduler() ensures attr->sched_priority == 0 when | ||
3210 | * !rt_policy. Always setting this ensures that things like | ||
3211 | * getparam()/getattr() don't report silly values for !rt tasks. | ||
3212 | */ | ||
3213 | p->rt_priority = attr->sched_priority; | ||
3214 | |||
2997 | p->normal_prio = normal_prio(p); | 3215 | p->normal_prio = normal_prio(p); |
2998 | /* we are holding p->pi_lock already */ | ||
2999 | p->prio = rt_mutex_getprio(p); | 3216 | p->prio = rt_mutex_getprio(p); |
3000 | if (rt_prio(p->prio)) | 3217 | |
3218 | if (dl_prio(p->prio)) | ||
3219 | p->sched_class = &dl_sched_class; | ||
3220 | else if (rt_prio(p->prio)) | ||
3001 | p->sched_class = &rt_sched_class; | 3221 | p->sched_class = &rt_sched_class; |
3002 | else | 3222 | else |
3003 | p->sched_class = &fair_sched_class; | 3223 | p->sched_class = &fair_sched_class; |
3224 | |||
3004 | set_load_weight(p); | 3225 | set_load_weight(p); |
3005 | } | 3226 | } |
3006 | 3227 | ||
3228 | static void | ||
3229 | __getparam_dl(struct task_struct *p, struct sched_attr *attr) | ||
3230 | { | ||
3231 | struct sched_dl_entity *dl_se = &p->dl; | ||
3232 | |||
3233 | attr->sched_priority = p->rt_priority; | ||
3234 | attr->sched_runtime = dl_se->dl_runtime; | ||
3235 | attr->sched_deadline = dl_se->dl_deadline; | ||
3236 | attr->sched_period = dl_se->dl_period; | ||
3237 | attr->sched_flags = dl_se->flags; | ||
3238 | } | ||
3239 | |||
3240 | /* | ||
3241 | * This function validates the new parameters of a -deadline task. | ||
3242 | * We ask for the deadline not being zero, and greater or equal | ||
3243 | * than the runtime, as well as the period of being zero or | ||
3244 | * greater than deadline. Furthermore, we have to be sure that | ||
3245 | * user parameters are above the internal resolution (1us); we | ||
3246 | * check sched_runtime only since it is always the smaller one. | ||
3247 | */ | ||
3248 | static bool | ||
3249 | __checkparam_dl(const struct sched_attr *attr) | ||
3250 | { | ||
3251 | return attr && attr->sched_deadline != 0 && | ||
3252 | (attr->sched_period == 0 || | ||
3253 | (s64)(attr->sched_period - attr->sched_deadline) >= 0) && | ||
3254 | (s64)(attr->sched_deadline - attr->sched_runtime ) >= 0 && | ||
3255 | attr->sched_runtime >= (2 << (DL_SCALE - 1)); | ||
3256 | } | ||
3257 | |||
3007 | /* | 3258 | /* |
3008 | * check the target process has a UID that matches the current process's | 3259 | * check the target process has a UID that matches the current process's |
3009 | */ | 3260 | */ |
@@ -3020,10 +3271,12 @@ static bool check_same_owner(struct task_struct *p) | |||
3020 | return match; | 3271 | return match; |
3021 | } | 3272 | } |
3022 | 3273 | ||
3023 | static int __sched_setscheduler(struct task_struct *p, int policy, | 3274 | static int __sched_setscheduler(struct task_struct *p, |
3024 | const struct sched_param *param, bool user) | 3275 | const struct sched_attr *attr, |
3276 | bool user) | ||
3025 | { | 3277 | { |
3026 | int retval, oldprio, oldpolicy = -1, on_rq, running; | 3278 | int retval, oldprio, oldpolicy = -1, on_rq, running; |
3279 | int policy = attr->sched_policy; | ||
3027 | unsigned long flags; | 3280 | unsigned long flags; |
3028 | const struct sched_class *prev_class; | 3281 | const struct sched_class *prev_class; |
3029 | struct rq *rq; | 3282 | struct rq *rq; |
@@ -3037,31 +3290,40 @@ recheck: | |||
3037 | reset_on_fork = p->sched_reset_on_fork; | 3290 | reset_on_fork = p->sched_reset_on_fork; |
3038 | policy = oldpolicy = p->policy; | 3291 | policy = oldpolicy = p->policy; |
3039 | } else { | 3292 | } else { |
3040 | reset_on_fork = !!(policy & SCHED_RESET_ON_FORK); | 3293 | reset_on_fork = !!(attr->sched_flags & SCHED_FLAG_RESET_ON_FORK); |
3041 | policy &= ~SCHED_RESET_ON_FORK; | ||
3042 | 3294 | ||
3043 | if (policy != SCHED_FIFO && policy != SCHED_RR && | 3295 | if (policy != SCHED_DEADLINE && |
3296 | policy != SCHED_FIFO && policy != SCHED_RR && | ||
3044 | policy != SCHED_NORMAL && policy != SCHED_BATCH && | 3297 | policy != SCHED_NORMAL && policy != SCHED_BATCH && |
3045 | policy != SCHED_IDLE) | 3298 | policy != SCHED_IDLE) |
3046 | return -EINVAL; | 3299 | return -EINVAL; |
3047 | } | 3300 | } |
3048 | 3301 | ||
3302 | if (attr->sched_flags & ~(SCHED_FLAG_RESET_ON_FORK)) | ||
3303 | return -EINVAL; | ||
3304 | |||
3049 | /* | 3305 | /* |
3050 | * Valid priorities for SCHED_FIFO and SCHED_RR are | 3306 | * Valid priorities for SCHED_FIFO and SCHED_RR are |
3051 | * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL, | 3307 | * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL, |
3052 | * SCHED_BATCH and SCHED_IDLE is 0. | 3308 | * SCHED_BATCH and SCHED_IDLE is 0. |
3053 | */ | 3309 | */ |
3054 | if (param->sched_priority < 0 || | 3310 | if ((p->mm && attr->sched_priority > MAX_USER_RT_PRIO-1) || |
3055 | (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || | 3311 | (!p->mm && attr->sched_priority > MAX_RT_PRIO-1)) |
3056 | (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) | ||
3057 | return -EINVAL; | 3312 | return -EINVAL; |
3058 | if (rt_policy(policy) != (param->sched_priority != 0)) | 3313 | if ((dl_policy(policy) && !__checkparam_dl(attr)) || |
3314 | (rt_policy(policy) != (attr->sched_priority != 0))) | ||
3059 | return -EINVAL; | 3315 | return -EINVAL; |
3060 | 3316 | ||
3061 | /* | 3317 | /* |
3062 | * Allow unprivileged RT tasks to decrease priority: | 3318 | * Allow unprivileged RT tasks to decrease priority: |
3063 | */ | 3319 | */ |
3064 | if (user && !capable(CAP_SYS_NICE)) { | 3320 | if (user && !capable(CAP_SYS_NICE)) { |
3321 | if (fair_policy(policy)) { | ||
3322 | if (attr->sched_nice < TASK_NICE(p) && | ||
3323 | !can_nice(p, attr->sched_nice)) | ||
3324 | return -EPERM; | ||
3325 | } | ||
3326 | |||
3065 | if (rt_policy(policy)) { | 3327 | if (rt_policy(policy)) { |
3066 | unsigned long rlim_rtprio = | 3328 | unsigned long rlim_rtprio = |
3067 | task_rlimit(p, RLIMIT_RTPRIO); | 3329 | task_rlimit(p, RLIMIT_RTPRIO); |
@@ -3071,11 +3333,20 @@ recheck: | |||
3071 | return -EPERM; | 3333 | return -EPERM; |
3072 | 3334 | ||
3073 | /* can't increase priority */ | 3335 | /* can't increase priority */ |
3074 | if (param->sched_priority > p->rt_priority && | 3336 | if (attr->sched_priority > p->rt_priority && |
3075 | param->sched_priority > rlim_rtprio) | 3337 | attr->sched_priority > rlim_rtprio) |
3076 | return -EPERM; | 3338 | return -EPERM; |
3077 | } | 3339 | } |
3078 | 3340 | ||
3341 | /* | ||
3342 | * Can't set/change SCHED_DEADLINE policy at all for now | ||
3343 | * (safest behavior); in the future we would like to allow | ||
3344 | * unprivileged DL tasks to increase their relative deadline | ||
3345 | * or reduce their runtime (both ways reducing utilization) | ||
3346 | */ | ||
3347 | if (dl_policy(policy)) | ||
3348 | return -EPERM; | ||
3349 | |||
3079 | /* | 3350 | /* |
3080 | * Treat SCHED_IDLE as nice 20. Only allow a switch to | 3351 | * Treat SCHED_IDLE as nice 20. Only allow a switch to |
3081 | * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. | 3352 | * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. |
@@ -3120,14 +3391,21 @@ recheck: | |||
3120 | /* | 3391 | /* |
3121 | * If not changing anything there's no need to proceed further: | 3392 | * If not changing anything there's no need to proceed further: |
3122 | */ | 3393 | */ |
3123 | if (unlikely(policy == p->policy && (!rt_policy(policy) || | 3394 | if (unlikely(policy == p->policy)) { |
3124 | param->sched_priority == p->rt_priority))) { | 3395 | if (fair_policy(policy) && attr->sched_nice != TASK_NICE(p)) |
3396 | goto change; | ||
3397 | if (rt_policy(policy) && attr->sched_priority != p->rt_priority) | ||
3398 | goto change; | ||
3399 | if (dl_policy(policy)) | ||
3400 | goto change; | ||
3401 | |||
3125 | task_rq_unlock(rq, p, &flags); | 3402 | task_rq_unlock(rq, p, &flags); |
3126 | return 0; | 3403 | return 0; |
3127 | } | 3404 | } |
3405 | change: | ||
3128 | 3406 | ||
3129 | #ifdef CONFIG_RT_GROUP_SCHED | ||
3130 | if (user) { | 3407 | if (user) { |
3408 | #ifdef CONFIG_RT_GROUP_SCHED | ||
3131 | /* | 3409 | /* |
3132 | * Do not allow realtime tasks into groups that have no runtime | 3410 | * Do not allow realtime tasks into groups that have no runtime |
3133 | * assigned. | 3411 | * assigned. |
@@ -3138,8 +3416,24 @@ recheck: | |||
3138 | task_rq_unlock(rq, p, &flags); | 3416 | task_rq_unlock(rq, p, &flags); |
3139 | return -EPERM; | 3417 | return -EPERM; |
3140 | } | 3418 | } |
3141 | } | ||
3142 | #endif | 3419 | #endif |
3420 | #ifdef CONFIG_SMP | ||
3421 | if (dl_bandwidth_enabled() && dl_policy(policy)) { | ||
3422 | cpumask_t *span = rq->rd->span; | ||
3423 | |||
3424 | /* | ||
3425 | * Don't allow tasks with an affinity mask smaller than | ||
3426 | * the entire root_domain to become SCHED_DEADLINE. We | ||
3427 | * will also fail if there's no bandwidth available. | ||
3428 | */ | ||
3429 | if (!cpumask_subset(span, &p->cpus_allowed) || | ||
3430 | rq->rd->dl_bw.bw == 0) { | ||
3431 | task_rq_unlock(rq, p, &flags); | ||
3432 | return -EPERM; | ||
3433 | } | ||
3434 | } | ||
3435 | #endif | ||
3436 | } | ||
3143 | 3437 | ||
3144 | /* recheck policy now with rq lock held */ | 3438 | /* recheck policy now with rq lock held */ |
3145 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { | 3439 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { |
@@ -3147,6 +3441,17 @@ recheck: | |||
3147 | task_rq_unlock(rq, p, &flags); | 3441 | task_rq_unlock(rq, p, &flags); |
3148 | goto recheck; | 3442 | goto recheck; |
3149 | } | 3443 | } |
3444 | |||
3445 | /* | ||
3446 | * If setscheduling to SCHED_DEADLINE (or changing the parameters | ||
3447 | * of a SCHED_DEADLINE task) we need to check if enough bandwidth | ||
3448 | * is available. | ||
3449 | */ | ||
3450 | if ((dl_policy(policy) || dl_task(p)) && dl_overflow(p, policy, attr)) { | ||
3451 | task_rq_unlock(rq, p, &flags); | ||
3452 | return -EBUSY; | ||
3453 | } | ||
3454 | |||
3150 | on_rq = p->on_rq; | 3455 | on_rq = p->on_rq; |
3151 | running = task_current(rq, p); | 3456 | running = task_current(rq, p); |
3152 | if (on_rq) | 3457 | if (on_rq) |
@@ -3158,7 +3463,7 @@ recheck: | |||
3158 | 3463 | ||
3159 | oldprio = p->prio; | 3464 | oldprio = p->prio; |
3160 | prev_class = p->sched_class; | 3465 | prev_class = p->sched_class; |
3161 | __setscheduler(rq, p, policy, param->sched_priority); | 3466 | __setscheduler(rq, p, attr); |
3162 | 3467 | ||
3163 | if (running) | 3468 | if (running) |
3164 | p->sched_class->set_curr_task(rq); | 3469 | p->sched_class->set_curr_task(rq); |
@@ -3173,6 +3478,26 @@ recheck: | |||
3173 | return 0; | 3478 | return 0; |
3174 | } | 3479 | } |
3175 | 3480 | ||
3481 | static int _sched_setscheduler(struct task_struct *p, int policy, | ||
3482 | const struct sched_param *param, bool check) | ||
3483 | { | ||
3484 | struct sched_attr attr = { | ||
3485 | .sched_policy = policy, | ||
3486 | .sched_priority = param->sched_priority, | ||
3487 | .sched_nice = PRIO_TO_NICE(p->static_prio), | ||
3488 | }; | ||
3489 | |||
3490 | /* | ||
3491 | * Fixup the legacy SCHED_RESET_ON_FORK hack | ||
3492 | */ | ||
3493 | if (policy & SCHED_RESET_ON_FORK) { | ||
3494 | attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; | ||
3495 | policy &= ~SCHED_RESET_ON_FORK; | ||
3496 | attr.sched_policy = policy; | ||
3497 | } | ||
3498 | |||
3499 | return __sched_setscheduler(p, &attr, check); | ||
3500 | } | ||
3176 | /** | 3501 | /** |
3177 | * sched_setscheduler - change the scheduling policy and/or RT priority of a thread. | 3502 | * sched_setscheduler - change the scheduling policy and/or RT priority of a thread. |
3178 | * @p: the task in question. | 3503 | * @p: the task in question. |
@@ -3186,10 +3511,16 @@ recheck: | |||
3186 | int sched_setscheduler(struct task_struct *p, int policy, | 3511 | int sched_setscheduler(struct task_struct *p, int policy, |
3187 | const struct sched_param *param) | 3512 | const struct sched_param *param) |
3188 | { | 3513 | { |
3189 | return __sched_setscheduler(p, policy, param, true); | 3514 | return _sched_setscheduler(p, policy, param, true); |
3190 | } | 3515 | } |
3191 | EXPORT_SYMBOL_GPL(sched_setscheduler); | 3516 | EXPORT_SYMBOL_GPL(sched_setscheduler); |
3192 | 3517 | ||
3518 | int sched_setattr(struct task_struct *p, const struct sched_attr *attr) | ||
3519 | { | ||
3520 | return __sched_setscheduler(p, attr, true); | ||
3521 | } | ||
3522 | EXPORT_SYMBOL_GPL(sched_setattr); | ||
3523 | |||
3193 | /** | 3524 | /** |
3194 | * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. | 3525 | * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. |
3195 | * @p: the task in question. | 3526 | * @p: the task in question. |
@@ -3206,7 +3537,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler); | |||
3206 | int sched_setscheduler_nocheck(struct task_struct *p, int policy, | 3537 | int sched_setscheduler_nocheck(struct task_struct *p, int policy, |
3207 | const struct sched_param *param) | 3538 | const struct sched_param *param) |
3208 | { | 3539 | { |
3209 | return __sched_setscheduler(p, policy, param, false); | 3540 | return _sched_setscheduler(p, policy, param, false); |
3210 | } | 3541 | } |
3211 | 3542 | ||
3212 | static int | 3543 | static int |
@@ -3231,6 +3562,79 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | |||
3231 | return retval; | 3562 | return retval; |
3232 | } | 3563 | } |
3233 | 3564 | ||
3565 | /* | ||
3566 | * Mimics kernel/events/core.c perf_copy_attr(). | ||
3567 | */ | ||
3568 | static int sched_copy_attr(struct sched_attr __user *uattr, | ||
3569 | struct sched_attr *attr) | ||
3570 | { | ||
3571 | u32 size; | ||
3572 | int ret; | ||
3573 | |||
3574 | if (!access_ok(VERIFY_WRITE, uattr, SCHED_ATTR_SIZE_VER0)) | ||
3575 | return -EFAULT; | ||
3576 | |||
3577 | /* | ||
3578 | * zero the full structure, so that a short copy will be nice. | ||
3579 | */ | ||
3580 | memset(attr, 0, sizeof(*attr)); | ||
3581 | |||
3582 | ret = get_user(size, &uattr->size); | ||
3583 | if (ret) | ||
3584 | return ret; | ||
3585 | |||
3586 | if (size > PAGE_SIZE) /* silly large */ | ||
3587 | goto err_size; | ||
3588 | |||
3589 | if (!size) /* abi compat */ | ||
3590 | size = SCHED_ATTR_SIZE_VER0; | ||
3591 | |||
3592 | if (size < SCHED_ATTR_SIZE_VER0) | ||
3593 | goto err_size; | ||
3594 | |||
3595 | /* | ||
3596 | * If we're handed a bigger struct than we know of, | ||
3597 | * ensure all the unknown bits are 0 - i.e. new | ||
3598 | * user-space does not rely on any kernel feature | ||
3599 | * extensions we dont know about yet. | ||
3600 | */ | ||
3601 | if (size > sizeof(*attr)) { | ||
3602 | unsigned char __user *addr; | ||
3603 | unsigned char __user *end; | ||
3604 | unsigned char val; | ||
3605 | |||
3606 | addr = (void __user *)uattr + sizeof(*attr); | ||
3607 | end = (void __user *)uattr + size; | ||
3608 | |||
3609 | for (; addr < end; addr++) { | ||
3610 | ret = get_user(val, addr); | ||
3611 | if (ret) | ||
3612 | return ret; | ||
3613 | if (val) | ||
3614 | goto err_size; | ||
3615 | } | ||
3616 | size = sizeof(*attr); | ||
3617 | } | ||
3618 | |||
3619 | ret = copy_from_user(attr, uattr, size); | ||
3620 | if (ret) | ||
3621 | return -EFAULT; | ||
3622 | |||
3623 | /* | ||
3624 | * XXX: do we want to be lenient like existing syscalls; or do we want | ||
3625 | * to be strict and return an error on out-of-bounds values? | ||
3626 | */ | ||
3627 | attr->sched_nice = clamp(attr->sched_nice, -20, 19); | ||
3628 | |||
3629 | out: | ||
3630 | return ret; | ||
3631 | |||
3632 | err_size: | ||
3633 | put_user(sizeof(*attr), &uattr->size); | ||
3634 | ret = -E2BIG; | ||
3635 | goto out; | ||
3636 | } | ||
3637 | |||
3234 | /** | 3638 | /** |
3235 | * sys_sched_setscheduler - set/change the scheduler policy and RT priority | 3639 | * sys_sched_setscheduler - set/change the scheduler policy and RT priority |
3236 | * @pid: the pid in question. | 3640 | * @pid: the pid in question. |
@@ -3262,6 +3666,34 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) | |||
3262 | } | 3666 | } |
3263 | 3667 | ||
3264 | /** | 3668 | /** |
3669 | * sys_sched_setattr - same as above, but with extended sched_attr | ||
3670 | * @pid: the pid in question. | ||
3671 | * @uattr: structure containing the extended parameters. | ||
3672 | */ | ||
3673 | SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, | ||
3674 | unsigned int, flags) | ||
3675 | { | ||
3676 | struct sched_attr attr; | ||
3677 | struct task_struct *p; | ||
3678 | int retval; | ||
3679 | |||
3680 | if (!uattr || pid < 0 || flags) | ||
3681 | return -EINVAL; | ||
3682 | |||
3683 | if (sched_copy_attr(uattr, &attr)) | ||
3684 | return -EFAULT; | ||
3685 | |||
3686 | rcu_read_lock(); | ||
3687 | retval = -ESRCH; | ||
3688 | p = find_process_by_pid(pid); | ||
3689 | if (p != NULL) | ||
3690 | retval = sched_setattr(p, &attr); | ||
3691 | rcu_read_unlock(); | ||
3692 | |||
3693 | return retval; | ||
3694 | } | ||
3695 | |||
3696 | /** | ||
3265 | * sys_sched_getscheduler - get the policy (scheduling class) of a thread | 3697 | * sys_sched_getscheduler - get the policy (scheduling class) of a thread |
3266 | * @pid: the pid in question. | 3698 | * @pid: the pid in question. |
3267 | * | 3699 | * |
@@ -3316,6 +3748,10 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | |||
3316 | if (retval) | 3748 | if (retval) |
3317 | goto out_unlock; | 3749 | goto out_unlock; |
3318 | 3750 | ||
3751 | if (task_has_dl_policy(p)) { | ||
3752 | retval = -EINVAL; | ||
3753 | goto out_unlock; | ||
3754 | } | ||
3319 | lp.sched_priority = p->rt_priority; | 3755 | lp.sched_priority = p->rt_priority; |
3320 | rcu_read_unlock(); | 3756 | rcu_read_unlock(); |
3321 | 3757 | ||
@@ -3331,6 +3767,96 @@ out_unlock: | |||
3331 | return retval; | 3767 | return retval; |
3332 | } | 3768 | } |
3333 | 3769 | ||
3770 | static int sched_read_attr(struct sched_attr __user *uattr, | ||
3771 | struct sched_attr *attr, | ||
3772 | unsigned int usize) | ||
3773 | { | ||
3774 | int ret; | ||
3775 | |||
3776 | if (!access_ok(VERIFY_WRITE, uattr, usize)) | ||
3777 | return -EFAULT; | ||
3778 | |||
3779 | /* | ||
3780 | * If we're handed a smaller struct than we know of, | ||
3781 | * ensure all the unknown bits are 0 - i.e. old | ||
3782 | * user-space does not get uncomplete information. | ||
3783 | */ | ||
3784 | if (usize < sizeof(*attr)) { | ||
3785 | unsigned char *addr; | ||
3786 | unsigned char *end; | ||
3787 | |||
3788 | addr = (void *)attr + usize; | ||
3789 | end = (void *)attr + sizeof(*attr); | ||
3790 | |||
3791 | for (; addr < end; addr++) { | ||
3792 | if (*addr) | ||
3793 | goto err_size; | ||
3794 | } | ||
3795 | |||
3796 | attr->size = usize; | ||
3797 | } | ||
3798 | |||
3799 | ret = copy_to_user(uattr, attr, attr->size); | ||
3800 | if (ret) | ||
3801 | return -EFAULT; | ||
3802 | |||
3803 | out: | ||
3804 | return ret; | ||
3805 | |||
3806 | err_size: | ||
3807 | ret = -E2BIG; | ||
3808 | goto out; | ||
3809 | } | ||
3810 | |||
3811 | /** | ||
3812 | * sys_sched_getattr - similar to sched_getparam, but with sched_attr | ||
3813 | * @pid: the pid in question. | ||
3814 | * @uattr: structure containing the extended parameters. | ||
3815 | * @size: sizeof(attr) for fwd/bwd comp. | ||
3816 | */ | ||
3817 | SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, | ||
3818 | unsigned int, size, unsigned int, flags) | ||
3819 | { | ||
3820 | struct sched_attr attr = { | ||
3821 | .size = sizeof(struct sched_attr), | ||
3822 | }; | ||
3823 | struct task_struct *p; | ||
3824 | int retval; | ||
3825 | |||
3826 | if (!uattr || pid < 0 || size > PAGE_SIZE || | ||
3827 | size < SCHED_ATTR_SIZE_VER0 || flags) | ||
3828 | return -EINVAL; | ||
3829 | |||
3830 | rcu_read_lock(); | ||
3831 | p = find_process_by_pid(pid); | ||
3832 | retval = -ESRCH; | ||
3833 | if (!p) | ||
3834 | goto out_unlock; | ||
3835 | |||
3836 | retval = security_task_getscheduler(p); | ||
3837 | if (retval) | ||
3838 | goto out_unlock; | ||
3839 | |||
3840 | attr.sched_policy = p->policy; | ||
3841 | if (p->sched_reset_on_fork) | ||
3842 | attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; | ||
3843 | if (task_has_dl_policy(p)) | ||
3844 | __getparam_dl(p, &attr); | ||
3845 | else if (task_has_rt_policy(p)) | ||
3846 | attr.sched_priority = p->rt_priority; | ||
3847 | else | ||
3848 | attr.sched_nice = TASK_NICE(p); | ||
3849 | |||
3850 | rcu_read_unlock(); | ||
3851 | |||
3852 | retval = sched_read_attr(uattr, &attr, size); | ||
3853 | return retval; | ||
3854 | |||
3855 | out_unlock: | ||
3856 | rcu_read_unlock(); | ||
3857 | return retval; | ||
3858 | } | ||
3859 | |||
3334 | long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | 3860 | long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) |
3335 | { | 3861 | { |
3336 | cpumask_var_t cpus_allowed, new_mask; | 3862 | cpumask_var_t cpus_allowed, new_mask; |
@@ -3375,8 +3901,26 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
3375 | if (retval) | 3901 | if (retval) |
3376 | goto out_unlock; | 3902 | goto out_unlock; |
3377 | 3903 | ||
3904 | |||
3378 | cpuset_cpus_allowed(p, cpus_allowed); | 3905 | cpuset_cpus_allowed(p, cpus_allowed); |
3379 | cpumask_and(new_mask, in_mask, cpus_allowed); | 3906 | cpumask_and(new_mask, in_mask, cpus_allowed); |
3907 | |||
3908 | /* | ||
3909 | * Since bandwidth control happens on root_domain basis, | ||
3910 | * if admission test is enabled, we only admit -deadline | ||
3911 | * tasks allowed to run on all the CPUs in the task's | ||
3912 | * root_domain. | ||
3913 | */ | ||
3914 | #ifdef CONFIG_SMP | ||
3915 | if (task_has_dl_policy(p)) { | ||
3916 | const struct cpumask *span = task_rq(p)->rd->span; | ||
3917 | |||
3918 | if (dl_bandwidth_enabled() && !cpumask_subset(span, new_mask)) { | ||
3919 | retval = -EBUSY; | ||
3920 | goto out_unlock; | ||
3921 | } | ||
3922 | } | ||
3923 | #endif | ||
3380 | again: | 3924 | again: |
3381 | retval = set_cpus_allowed_ptr(p, new_mask); | 3925 | retval = set_cpus_allowed_ptr(p, new_mask); |
3382 | 3926 | ||
@@ -3653,7 +4197,7 @@ again: | |||
3653 | } | 4197 | } |
3654 | 4198 | ||
3655 | double_rq_lock(rq, p_rq); | 4199 | double_rq_lock(rq, p_rq); |
3656 | while (task_rq(p) != p_rq) { | 4200 | if (task_rq(p) != p_rq) { |
3657 | double_rq_unlock(rq, p_rq); | 4201 | double_rq_unlock(rq, p_rq); |
3658 | goto again; | 4202 | goto again; |
3659 | } | 4203 | } |
@@ -3742,6 +4286,7 @@ SYSCALL_DEFINE1(sched_get_priority_max, int, policy) | |||
3742 | case SCHED_RR: | 4286 | case SCHED_RR: |
3743 | ret = MAX_USER_RT_PRIO-1; | 4287 | ret = MAX_USER_RT_PRIO-1; |
3744 | break; | 4288 | break; |
4289 | case SCHED_DEADLINE: | ||
3745 | case SCHED_NORMAL: | 4290 | case SCHED_NORMAL: |
3746 | case SCHED_BATCH: | 4291 | case SCHED_BATCH: |
3747 | case SCHED_IDLE: | 4292 | case SCHED_IDLE: |
@@ -3768,6 +4313,7 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy) | |||
3768 | case SCHED_RR: | 4313 | case SCHED_RR: |
3769 | ret = 1; | 4314 | ret = 1; |
3770 | break; | 4315 | break; |
4316 | case SCHED_DEADLINE: | ||
3771 | case SCHED_NORMAL: | 4317 | case SCHED_NORMAL: |
3772 | case SCHED_BATCH: | 4318 | case SCHED_BATCH: |
3773 | case SCHED_IDLE: | 4319 | case SCHED_IDLE: |
@@ -3811,7 +4357,9 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
3811 | goto out_unlock; | 4357 | goto out_unlock; |
3812 | 4358 | ||
3813 | rq = task_rq_lock(p, &flags); | 4359 | rq = task_rq_lock(p, &flags); |
3814 | time_slice = p->sched_class->get_rr_interval(rq, p); | 4360 | time_slice = 0; |
4361 | if (p->sched_class->get_rr_interval) | ||
4362 | time_slice = p->sched_class->get_rr_interval(rq, p); | ||
3815 | task_rq_unlock(rq, p, &flags); | 4363 | task_rq_unlock(rq, p, &flags); |
3816 | 4364 | ||
3817 | rcu_read_unlock(); | 4365 | rcu_read_unlock(); |
@@ -4090,6 +4638,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu) | |||
4090 | 4638 | ||
4091 | /* TODO: This is not properly updating schedstats */ | 4639 | /* TODO: This is not properly updating schedstats */ |
4092 | 4640 | ||
4641 | trace_sched_move_numa(p, curr_cpu, target_cpu); | ||
4093 | return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg); | 4642 | return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg); |
4094 | } | 4643 | } |
4095 | 4644 | ||
@@ -4514,13 +5063,31 @@ static int sched_cpu_active(struct notifier_block *nfb, | |||
4514 | static int sched_cpu_inactive(struct notifier_block *nfb, | 5063 | static int sched_cpu_inactive(struct notifier_block *nfb, |
4515 | unsigned long action, void *hcpu) | 5064 | unsigned long action, void *hcpu) |
4516 | { | 5065 | { |
5066 | unsigned long flags; | ||
5067 | long cpu = (long)hcpu; | ||
5068 | |||
4517 | switch (action & ~CPU_TASKS_FROZEN) { | 5069 | switch (action & ~CPU_TASKS_FROZEN) { |
4518 | case CPU_DOWN_PREPARE: | 5070 | case CPU_DOWN_PREPARE: |
4519 | set_cpu_active((long)hcpu, false); | 5071 | set_cpu_active(cpu, false); |
5072 | |||
5073 | /* explicitly allow suspend */ | ||
5074 | if (!(action & CPU_TASKS_FROZEN)) { | ||
5075 | struct dl_bw *dl_b = dl_bw_of(cpu); | ||
5076 | bool overflow; | ||
5077 | int cpus; | ||
5078 | |||
5079 | raw_spin_lock_irqsave(&dl_b->lock, flags); | ||
5080 | cpus = dl_bw_cpus(cpu); | ||
5081 | overflow = __dl_overflow(dl_b, cpus, 0, 0); | ||
5082 | raw_spin_unlock_irqrestore(&dl_b->lock, flags); | ||
5083 | |||
5084 | if (overflow) | ||
5085 | return notifier_from_errno(-EBUSY); | ||
5086 | } | ||
4520 | return NOTIFY_OK; | 5087 | return NOTIFY_OK; |
4521 | default: | ||
4522 | return NOTIFY_DONE; | ||
4523 | } | 5088 | } |
5089 | |||
5090 | return NOTIFY_DONE; | ||
4524 | } | 5091 | } |
4525 | 5092 | ||
4526 | static int __init migration_init(void) | 5093 | static int __init migration_init(void) |
@@ -4739,6 +5306,8 @@ static void free_rootdomain(struct rcu_head *rcu) | |||
4739 | struct root_domain *rd = container_of(rcu, struct root_domain, rcu); | 5306 | struct root_domain *rd = container_of(rcu, struct root_domain, rcu); |
4740 | 5307 | ||
4741 | cpupri_cleanup(&rd->cpupri); | 5308 | cpupri_cleanup(&rd->cpupri); |
5309 | cpudl_cleanup(&rd->cpudl); | ||
5310 | free_cpumask_var(rd->dlo_mask); | ||
4742 | free_cpumask_var(rd->rto_mask); | 5311 | free_cpumask_var(rd->rto_mask); |
4743 | free_cpumask_var(rd->online); | 5312 | free_cpumask_var(rd->online); |
4744 | free_cpumask_var(rd->span); | 5313 | free_cpumask_var(rd->span); |
@@ -4790,8 +5359,14 @@ static int init_rootdomain(struct root_domain *rd) | |||
4790 | goto out; | 5359 | goto out; |
4791 | if (!alloc_cpumask_var(&rd->online, GFP_KERNEL)) | 5360 | if (!alloc_cpumask_var(&rd->online, GFP_KERNEL)) |
4792 | goto free_span; | 5361 | goto free_span; |
4793 | if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) | 5362 | if (!alloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL)) |
4794 | goto free_online; | 5363 | goto free_online; |
5364 | if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) | ||
5365 | goto free_dlo_mask; | ||
5366 | |||
5367 | init_dl_bw(&rd->dl_bw); | ||
5368 | if (cpudl_init(&rd->cpudl) != 0) | ||
5369 | goto free_dlo_mask; | ||
4795 | 5370 | ||
4796 | if (cpupri_init(&rd->cpupri) != 0) | 5371 | if (cpupri_init(&rd->cpupri) != 0) |
4797 | goto free_rto_mask; | 5372 | goto free_rto_mask; |
@@ -4799,6 +5374,8 @@ static int init_rootdomain(struct root_domain *rd) | |||
4799 | 5374 | ||
4800 | free_rto_mask: | 5375 | free_rto_mask: |
4801 | free_cpumask_var(rd->rto_mask); | 5376 | free_cpumask_var(rd->rto_mask); |
5377 | free_dlo_mask: | ||
5378 | free_cpumask_var(rd->dlo_mask); | ||
4802 | free_online: | 5379 | free_online: |
4803 | free_cpumask_var(rd->online); | 5380 | free_cpumask_var(rd->online); |
4804 | free_span: | 5381 | free_span: |
@@ -6150,6 +6727,7 @@ void __init sched_init_smp(void) | |||
6150 | free_cpumask_var(non_isolated_cpus); | 6727 | free_cpumask_var(non_isolated_cpus); |
6151 | 6728 | ||
6152 | init_sched_rt_class(); | 6729 | init_sched_rt_class(); |
6730 | init_sched_dl_class(); | ||
6153 | } | 6731 | } |
6154 | #else | 6732 | #else |
6155 | void __init sched_init_smp(void) | 6733 | void __init sched_init_smp(void) |
@@ -6219,13 +6797,15 @@ void __init sched_init(void) | |||
6219 | #endif /* CONFIG_CPUMASK_OFFSTACK */ | 6797 | #endif /* CONFIG_CPUMASK_OFFSTACK */ |
6220 | } | 6798 | } |
6221 | 6799 | ||
6800 | init_rt_bandwidth(&def_rt_bandwidth, | ||
6801 | global_rt_period(), global_rt_runtime()); | ||
6802 | init_dl_bandwidth(&def_dl_bandwidth, | ||
6803 | global_rt_period(), global_rt_runtime()); | ||
6804 | |||
6222 | #ifdef CONFIG_SMP | 6805 | #ifdef CONFIG_SMP |
6223 | init_defrootdomain(); | 6806 | init_defrootdomain(); |
6224 | #endif | 6807 | #endif |
6225 | 6808 | ||
6226 | init_rt_bandwidth(&def_rt_bandwidth, | ||
6227 | global_rt_period(), global_rt_runtime()); | ||
6228 | |||
6229 | #ifdef CONFIG_RT_GROUP_SCHED | 6809 | #ifdef CONFIG_RT_GROUP_SCHED |
6230 | init_rt_bandwidth(&root_task_group.rt_bandwidth, | 6810 | init_rt_bandwidth(&root_task_group.rt_bandwidth, |
6231 | global_rt_period(), global_rt_runtime()); | 6811 | global_rt_period(), global_rt_runtime()); |
@@ -6249,6 +6829,7 @@ void __init sched_init(void) | |||
6249 | rq->calc_load_update = jiffies + LOAD_FREQ; | 6829 | rq->calc_load_update = jiffies + LOAD_FREQ; |
6250 | init_cfs_rq(&rq->cfs); | 6830 | init_cfs_rq(&rq->cfs); |
6251 | init_rt_rq(&rq->rt, rq); | 6831 | init_rt_rq(&rq->rt, rq); |
6832 | init_dl_rq(&rq->dl, rq); | ||
6252 | #ifdef CONFIG_FAIR_GROUP_SCHED | 6833 | #ifdef CONFIG_FAIR_GROUP_SCHED |
6253 | root_task_group.shares = ROOT_TASK_GROUP_LOAD; | 6834 | root_task_group.shares = ROOT_TASK_GROUP_LOAD; |
6254 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); | 6835 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); |
@@ -6320,10 +6901,6 @@ void __init sched_init(void) | |||
6320 | INIT_HLIST_HEAD(&init_task.preempt_notifiers); | 6901 | INIT_HLIST_HEAD(&init_task.preempt_notifiers); |
6321 | #endif | 6902 | #endif |
6322 | 6903 | ||
6323 | #ifdef CONFIG_RT_MUTEXES | ||
6324 | plist_head_init(&init_task.pi_waiters); | ||
6325 | #endif | ||
6326 | |||
6327 | /* | 6904 | /* |
6328 | * The boot idle thread does lazy MMU switching as well: | 6905 | * The boot idle thread does lazy MMU switching as well: |
6329 | */ | 6906 | */ |
@@ -6397,13 +6974,16 @@ EXPORT_SYMBOL(__might_sleep); | |||
6397 | static void normalize_task(struct rq *rq, struct task_struct *p) | 6974 | static void normalize_task(struct rq *rq, struct task_struct *p) |
6398 | { | 6975 | { |
6399 | const struct sched_class *prev_class = p->sched_class; | 6976 | const struct sched_class *prev_class = p->sched_class; |
6977 | struct sched_attr attr = { | ||
6978 | .sched_policy = SCHED_NORMAL, | ||
6979 | }; | ||
6400 | int old_prio = p->prio; | 6980 | int old_prio = p->prio; |
6401 | int on_rq; | 6981 | int on_rq; |
6402 | 6982 | ||
6403 | on_rq = p->on_rq; | 6983 | on_rq = p->on_rq; |
6404 | if (on_rq) | 6984 | if (on_rq) |
6405 | dequeue_task(rq, p, 0); | 6985 | dequeue_task(rq, p, 0); |
6406 | __setscheduler(rq, p, SCHED_NORMAL, 0); | 6986 | __setscheduler(rq, p, &attr); |
6407 | if (on_rq) { | 6987 | if (on_rq) { |
6408 | enqueue_task(rq, p, 0); | 6988 | enqueue_task(rq, p, 0); |
6409 | resched_task(rq->curr); | 6989 | resched_task(rq->curr); |
@@ -6433,7 +7013,7 @@ void normalize_rt_tasks(void) | |||
6433 | p->se.statistics.block_start = 0; | 7013 | p->se.statistics.block_start = 0; |
6434 | #endif | 7014 | #endif |
6435 | 7015 | ||
6436 | if (!rt_task(p)) { | 7016 | if (!dl_task(p) && !rt_task(p)) { |
6437 | /* | 7017 | /* |
6438 | * Renice negative nice level userspace | 7018 | * Renice negative nice level userspace |
6439 | * tasks back to 0: | 7019 | * tasks back to 0: |
@@ -6628,16 +7208,6 @@ void sched_move_task(struct task_struct *tsk) | |||
6628 | } | 7208 | } |
6629 | #endif /* CONFIG_CGROUP_SCHED */ | 7209 | #endif /* CONFIG_CGROUP_SCHED */ |
6630 | 7210 | ||
6631 | #if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH) | ||
6632 | static unsigned long to_ratio(u64 period, u64 runtime) | ||
6633 | { | ||
6634 | if (runtime == RUNTIME_INF) | ||
6635 | return 1ULL << 20; | ||
6636 | |||
6637 | return div64_u64(runtime << 20, period); | ||
6638 | } | ||
6639 | #endif | ||
6640 | |||
6641 | #ifdef CONFIG_RT_GROUP_SCHED | 7211 | #ifdef CONFIG_RT_GROUP_SCHED |
6642 | /* | 7212 | /* |
6643 | * Ensure that the real time constraints are schedulable. | 7213 | * Ensure that the real time constraints are schedulable. |
@@ -6811,24 +7381,13 @@ static long sched_group_rt_period(struct task_group *tg) | |||
6811 | do_div(rt_period_us, NSEC_PER_USEC); | 7381 | do_div(rt_period_us, NSEC_PER_USEC); |
6812 | return rt_period_us; | 7382 | return rt_period_us; |
6813 | } | 7383 | } |
7384 | #endif /* CONFIG_RT_GROUP_SCHED */ | ||
6814 | 7385 | ||
7386 | #ifdef CONFIG_RT_GROUP_SCHED | ||
6815 | static int sched_rt_global_constraints(void) | 7387 | static int sched_rt_global_constraints(void) |
6816 | { | 7388 | { |
6817 | u64 runtime, period; | ||
6818 | int ret = 0; | 7389 | int ret = 0; |
6819 | 7390 | ||
6820 | if (sysctl_sched_rt_period <= 0) | ||
6821 | return -EINVAL; | ||
6822 | |||
6823 | runtime = global_rt_runtime(); | ||
6824 | period = global_rt_period(); | ||
6825 | |||
6826 | /* | ||
6827 | * Sanity check on the sysctl variables. | ||
6828 | */ | ||
6829 | if (runtime > period && runtime != RUNTIME_INF) | ||
6830 | return -EINVAL; | ||
6831 | |||
6832 | mutex_lock(&rt_constraints_mutex); | 7391 | mutex_lock(&rt_constraints_mutex); |
6833 | read_lock(&tasklist_lock); | 7392 | read_lock(&tasklist_lock); |
6834 | ret = __rt_schedulable(NULL, 0, 0); | 7393 | ret = __rt_schedulable(NULL, 0, 0); |
@@ -6851,17 +7410,7 @@ static int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) | |||
6851 | static int sched_rt_global_constraints(void) | 7410 | static int sched_rt_global_constraints(void) |
6852 | { | 7411 | { |
6853 | unsigned long flags; | 7412 | unsigned long flags; |
6854 | int i; | 7413 | int i, ret = 0; |
6855 | |||
6856 | if (sysctl_sched_rt_period <= 0) | ||
6857 | return -EINVAL; | ||
6858 | |||
6859 | /* | ||
6860 | * There's always some RT tasks in the root group | ||
6861 | * -- migration, kstopmachine etc.. | ||
6862 | */ | ||
6863 | if (sysctl_sched_rt_runtime == 0) | ||
6864 | return -EBUSY; | ||
6865 | 7414 | ||
6866 | raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); | 7415 | raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); |
6867 | for_each_possible_cpu(i) { | 7416 | for_each_possible_cpu(i) { |
@@ -6873,36 +7422,91 @@ static int sched_rt_global_constraints(void) | |||
6873 | } | 7422 | } |
6874 | raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); | 7423 | raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); |
6875 | 7424 | ||
6876 | return 0; | 7425 | return ret; |
6877 | } | 7426 | } |
6878 | #endif /* CONFIG_RT_GROUP_SCHED */ | 7427 | #endif /* CONFIG_RT_GROUP_SCHED */ |
6879 | 7428 | ||
6880 | int sched_rr_handler(struct ctl_table *table, int write, | 7429 | static int sched_dl_global_constraints(void) |
6881 | void __user *buffer, size_t *lenp, | ||
6882 | loff_t *ppos) | ||
6883 | { | 7430 | { |
6884 | int ret; | 7431 | u64 runtime = global_rt_runtime(); |
6885 | static DEFINE_MUTEX(mutex); | 7432 | u64 period = global_rt_period(); |
7433 | u64 new_bw = to_ratio(period, runtime); | ||
7434 | int cpu, ret = 0; | ||
7435 | unsigned long flags; | ||
6886 | 7436 | ||
6887 | mutex_lock(&mutex); | 7437 | /* |
6888 | ret = proc_dointvec(table, write, buffer, lenp, ppos); | 7438 | * Here we want to check the bandwidth not being set to some |
6889 | /* make sure that internally we keep jiffies */ | 7439 | * value smaller than the currently allocated bandwidth in |
6890 | /* also, writing zero resets timeslice to default */ | 7440 | * any of the root_domains. |
6891 | if (!ret && write) { | 7441 | * |
6892 | sched_rr_timeslice = sched_rr_timeslice <= 0 ? | 7442 | * FIXME: Cycling on all the CPUs is overdoing, but simpler than |
6893 | RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice); | 7443 | * cycling on root_domains... Discussion on different/better |
7444 | * solutions is welcome! | ||
7445 | */ | ||
7446 | for_each_possible_cpu(cpu) { | ||
7447 | struct dl_bw *dl_b = dl_bw_of(cpu); | ||
7448 | |||
7449 | raw_spin_lock_irqsave(&dl_b->lock, flags); | ||
7450 | if (new_bw < dl_b->total_bw) | ||
7451 | ret = -EBUSY; | ||
7452 | raw_spin_unlock_irqrestore(&dl_b->lock, flags); | ||
7453 | |||
7454 | if (ret) | ||
7455 | break; | ||
6894 | } | 7456 | } |
6895 | mutex_unlock(&mutex); | 7457 | |
6896 | return ret; | 7458 | return ret; |
6897 | } | 7459 | } |
6898 | 7460 | ||
7461 | static void sched_dl_do_global(void) | ||
7462 | { | ||
7463 | u64 new_bw = -1; | ||
7464 | int cpu; | ||
7465 | unsigned long flags; | ||
7466 | |||
7467 | def_dl_bandwidth.dl_period = global_rt_period(); | ||
7468 | def_dl_bandwidth.dl_runtime = global_rt_runtime(); | ||
7469 | |||
7470 | if (global_rt_runtime() != RUNTIME_INF) | ||
7471 | new_bw = to_ratio(global_rt_period(), global_rt_runtime()); | ||
7472 | |||
7473 | /* | ||
7474 | * FIXME: As above... | ||
7475 | */ | ||
7476 | for_each_possible_cpu(cpu) { | ||
7477 | struct dl_bw *dl_b = dl_bw_of(cpu); | ||
7478 | |||
7479 | raw_spin_lock_irqsave(&dl_b->lock, flags); | ||
7480 | dl_b->bw = new_bw; | ||
7481 | raw_spin_unlock_irqrestore(&dl_b->lock, flags); | ||
7482 | } | ||
7483 | } | ||
7484 | |||
7485 | static int sched_rt_global_validate(void) | ||
7486 | { | ||
7487 | if (sysctl_sched_rt_period <= 0) | ||
7488 | return -EINVAL; | ||
7489 | |||
7490 | if ((sysctl_sched_rt_runtime != RUNTIME_INF) && | ||
7491 | (sysctl_sched_rt_runtime > sysctl_sched_rt_period)) | ||
7492 | return -EINVAL; | ||
7493 | |||
7494 | return 0; | ||
7495 | } | ||
7496 | |||
7497 | static void sched_rt_do_global(void) | ||
7498 | { | ||
7499 | def_rt_bandwidth.rt_runtime = global_rt_runtime(); | ||
7500 | def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period()); | ||
7501 | } | ||
7502 | |||
6899 | int sched_rt_handler(struct ctl_table *table, int write, | 7503 | int sched_rt_handler(struct ctl_table *table, int write, |
6900 | void __user *buffer, size_t *lenp, | 7504 | void __user *buffer, size_t *lenp, |
6901 | loff_t *ppos) | 7505 | loff_t *ppos) |
6902 | { | 7506 | { |
6903 | int ret; | ||
6904 | int old_period, old_runtime; | 7507 | int old_period, old_runtime; |
6905 | static DEFINE_MUTEX(mutex); | 7508 | static DEFINE_MUTEX(mutex); |
7509 | int ret; | ||
6906 | 7510 | ||
6907 | mutex_lock(&mutex); | 7511 | mutex_lock(&mutex); |
6908 | old_period = sysctl_sched_rt_period; | 7512 | old_period = sysctl_sched_rt_period; |
@@ -6911,21 +7515,50 @@ int sched_rt_handler(struct ctl_table *table, int write, | |||
6911 | ret = proc_dointvec(table, write, buffer, lenp, ppos); | 7515 | ret = proc_dointvec(table, write, buffer, lenp, ppos); |
6912 | 7516 | ||
6913 | if (!ret && write) { | 7517 | if (!ret && write) { |
7518 | ret = sched_rt_global_validate(); | ||
7519 | if (ret) | ||
7520 | goto undo; | ||
7521 | |||
6914 | ret = sched_rt_global_constraints(); | 7522 | ret = sched_rt_global_constraints(); |
6915 | if (ret) { | 7523 | if (ret) |
6916 | sysctl_sched_rt_period = old_period; | 7524 | goto undo; |
6917 | sysctl_sched_rt_runtime = old_runtime; | 7525 | |
6918 | } else { | 7526 | ret = sched_dl_global_constraints(); |
6919 | def_rt_bandwidth.rt_runtime = global_rt_runtime(); | 7527 | if (ret) |
6920 | def_rt_bandwidth.rt_period = | 7528 | goto undo; |
6921 | ns_to_ktime(global_rt_period()); | 7529 | |
6922 | } | 7530 | sched_rt_do_global(); |
7531 | sched_dl_do_global(); | ||
7532 | } | ||
7533 | if (0) { | ||
7534 | undo: | ||
7535 | sysctl_sched_rt_period = old_period; | ||
7536 | sysctl_sched_rt_runtime = old_runtime; | ||
6923 | } | 7537 | } |
6924 | mutex_unlock(&mutex); | 7538 | mutex_unlock(&mutex); |
6925 | 7539 | ||
6926 | return ret; | 7540 | return ret; |
6927 | } | 7541 | } |
6928 | 7542 | ||
7543 | int sched_rr_handler(struct ctl_table *table, int write, | ||
7544 | void __user *buffer, size_t *lenp, | ||
7545 | loff_t *ppos) | ||
7546 | { | ||
7547 | int ret; | ||
7548 | static DEFINE_MUTEX(mutex); | ||
7549 | |||
7550 | mutex_lock(&mutex); | ||
7551 | ret = proc_dointvec(table, write, buffer, lenp, ppos); | ||
7552 | /* make sure that internally we keep jiffies */ | ||
7553 | /* also, writing zero resets timeslice to default */ | ||
7554 | if (!ret && write) { | ||
7555 | sched_rr_timeslice = sched_rr_timeslice <= 0 ? | ||
7556 | RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice); | ||
7557 | } | ||
7558 | mutex_unlock(&mutex); | ||
7559 | return ret; | ||
7560 | } | ||
7561 | |||
6929 | #ifdef CONFIG_CGROUP_SCHED | 7562 | #ifdef CONFIG_CGROUP_SCHED |
6930 | 7563 | ||
6931 | static inline struct task_group *css_tg(struct cgroup_subsys_state *css) | 7564 | static inline struct task_group *css_tg(struct cgroup_subsys_state *css) |
@@ -7258,15 +7891,14 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) | |||
7258 | return ret; | 7891 | return ret; |
7259 | } | 7892 | } |
7260 | 7893 | ||
7261 | static int cpu_stats_show(struct cgroup_subsys_state *css, struct cftype *cft, | 7894 | static int cpu_stats_show(struct seq_file *sf, void *v) |
7262 | struct cgroup_map_cb *cb) | ||
7263 | { | 7895 | { |
7264 | struct task_group *tg = css_tg(css); | 7896 | struct task_group *tg = css_tg(seq_css(sf)); |
7265 | struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; | 7897 | struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; |
7266 | 7898 | ||
7267 | cb->fill(cb, "nr_periods", cfs_b->nr_periods); | 7899 | seq_printf(sf, "nr_periods %d\n", cfs_b->nr_periods); |
7268 | cb->fill(cb, "nr_throttled", cfs_b->nr_throttled); | 7900 | seq_printf(sf, "nr_throttled %d\n", cfs_b->nr_throttled); |
7269 | cb->fill(cb, "throttled_time", cfs_b->throttled_time); | 7901 | seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time); |
7270 | 7902 | ||
7271 | return 0; | 7903 | return 0; |
7272 | } | 7904 | } |
@@ -7320,7 +7952,7 @@ static struct cftype cpu_files[] = { | |||
7320 | }, | 7952 | }, |
7321 | { | 7953 | { |
7322 | .name = "stat", | 7954 | .name = "stat", |
7323 | .read_map = cpu_stats_show, | 7955 | .seq_show = cpu_stats_show, |
7324 | }, | 7956 | }, |
7325 | #endif | 7957 | #endif |
7326 | #ifdef CONFIG_RT_GROUP_SCHED | 7958 | #ifdef CONFIG_RT_GROUP_SCHED |