diff options
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r-- | kernel/sched/core.c | 441 |
1 files changed, 414 insertions, 27 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 599ee3b11b44..c7c68e6b5c51 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -296,6 +296,15 @@ __read_mostly int scheduler_running; | |||
296 | */ | 296 | */ |
297 | int sysctl_sched_rt_runtime = 950000; | 297 | int sysctl_sched_rt_runtime = 950000; |
298 | 298 | ||
299 | /* | ||
300 | * Maximum bandwidth available for all -deadline tasks and groups | ||
301 | * (if group scheduling is configured) on each CPU. | ||
302 | * | ||
303 | * default: 5% | ||
304 | */ | ||
305 | unsigned int sysctl_sched_dl_period = 1000000; | ||
306 | int sysctl_sched_dl_runtime = 50000; | ||
307 | |||
299 | 308 | ||
300 | 309 | ||
301 | /* | 310 | /* |
@@ -1856,6 +1865,111 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) | |||
1856 | return 0; | 1865 | return 0; |
1857 | } | 1866 | } |
1858 | 1867 | ||
1868 | unsigned long to_ratio(u64 period, u64 runtime) | ||
1869 | { | ||
1870 | if (runtime == RUNTIME_INF) | ||
1871 | return 1ULL << 20; | ||
1872 | |||
1873 | /* | ||
1874 | * Doing this here saves a lot of checks in all | ||
1875 | * the calling paths, and returning zero seems | ||
1876 | * safe for them anyway. | ||
1877 | */ | ||
1878 | if (period == 0) | ||
1879 | return 0; | ||
1880 | |||
1881 | return div64_u64(runtime << 20, period); | ||
1882 | } | ||
1883 | |||
1884 | #ifdef CONFIG_SMP | ||
1885 | inline struct dl_bw *dl_bw_of(int i) | ||
1886 | { | ||
1887 | return &cpu_rq(i)->rd->dl_bw; | ||
1888 | } | ||
1889 | |||
1890 | static inline int __dl_span_weight(struct rq *rq) | ||
1891 | { | ||
1892 | return cpumask_weight(rq->rd->span); | ||
1893 | } | ||
1894 | #else | ||
1895 | inline struct dl_bw *dl_bw_of(int i) | ||
1896 | { | ||
1897 | return &cpu_rq(i)->dl.dl_bw; | ||
1898 | } | ||
1899 | |||
1900 | static inline int __dl_span_weight(struct rq *rq) | ||
1901 | { | ||
1902 | return 1; | ||
1903 | } | ||
1904 | #endif | ||
1905 | |||
1906 | static inline | ||
1907 | void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw) | ||
1908 | { | ||
1909 | dl_b->total_bw -= tsk_bw; | ||
1910 | } | ||
1911 | |||
1912 | static inline | ||
1913 | void __dl_add(struct dl_bw *dl_b, u64 tsk_bw) | ||
1914 | { | ||
1915 | dl_b->total_bw += tsk_bw; | ||
1916 | } | ||
1917 | |||
1918 | static inline | ||
1919 | bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw) | ||
1920 | { | ||
1921 | return dl_b->bw != -1 && | ||
1922 | dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw; | ||
1923 | } | ||
1924 | |||
1925 | /* | ||
1926 | * We must be sure that accepting a new task (or allowing changing the | ||
1927 | * parameters of an existing one) is consistent with the bandwidth | ||
1928 | * constraints. If yes, this function also accordingly updates the currently | ||
1929 | * allocated bandwidth to reflect the new situation. | ||
1930 | * | ||
1931 | * This function is called while holding p's rq->lock. | ||
1932 | */ | ||
1933 | static int dl_overflow(struct task_struct *p, int policy, | ||
1934 | const struct sched_attr *attr) | ||
1935 | { | ||
1936 | |||
1937 | struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); | ||
1938 | u64 period = attr->sched_period; | ||
1939 | u64 runtime = attr->sched_runtime; | ||
1940 | u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0; | ||
1941 | int cpus = __dl_span_weight(task_rq(p)); | ||
1942 | int err = -1; | ||
1943 | |||
1944 | if (new_bw == p->dl.dl_bw) | ||
1945 | return 0; | ||
1946 | |||
1947 | /* | ||
1948 | * Either if a task, enters, leave, or stays -deadline but changes | ||
1949 | * its parameters, we may need to update accordingly the total | ||
1950 | * allocated bandwidth of the container. | ||
1951 | */ | ||
1952 | raw_spin_lock(&dl_b->lock); | ||
1953 | if (dl_policy(policy) && !task_has_dl_policy(p) && | ||
1954 | !__dl_overflow(dl_b, cpus, 0, new_bw)) { | ||
1955 | __dl_add(dl_b, new_bw); | ||
1956 | err = 0; | ||
1957 | } else if (dl_policy(policy) && task_has_dl_policy(p) && | ||
1958 | !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) { | ||
1959 | __dl_clear(dl_b, p->dl.dl_bw); | ||
1960 | __dl_add(dl_b, new_bw); | ||
1961 | err = 0; | ||
1962 | } else if (!dl_policy(policy) && task_has_dl_policy(p)) { | ||
1963 | __dl_clear(dl_b, p->dl.dl_bw); | ||
1964 | err = 0; | ||
1965 | } | ||
1966 | raw_spin_unlock(&dl_b->lock); | ||
1967 | |||
1968 | return err; | ||
1969 | } | ||
1970 | |||
1971 | extern void init_dl_bw(struct dl_bw *dl_b); | ||
1972 | |||
1859 | /* | 1973 | /* |
1860 | * wake_up_new_task - wake up a newly created task for the first time. | 1974 | * wake_up_new_task - wake up a newly created task for the first time. |
1861 | * | 1975 | * |
@@ -3053,6 +3167,7 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr) | |||
3053 | dl_se->dl_deadline = attr->sched_deadline; | 3167 | dl_se->dl_deadline = attr->sched_deadline; |
3054 | dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline; | 3168 | dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline; |
3055 | dl_se->flags = attr->sched_flags; | 3169 | dl_se->flags = attr->sched_flags; |
3170 | dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); | ||
3056 | dl_se->dl_throttled = 0; | 3171 | dl_se->dl_throttled = 0; |
3057 | dl_se->dl_new = 1; | 3172 | dl_se->dl_new = 1; |
3058 | } | 3173 | } |
@@ -3101,7 +3216,9 @@ __getparam_dl(struct task_struct *p, struct sched_attr *attr) | |||
3101 | * This function validates the new parameters of a -deadline task. | 3216 | * This function validates the new parameters of a -deadline task. |
3102 | * We ask for the deadline not being zero, and greater or equal | 3217 | * We ask for the deadline not being zero, and greater or equal |
3103 | * than the runtime, as well as the period of being zero or | 3218 | * than the runtime, as well as the period of being zero or |
3104 | * greater than deadline. | 3219 | * greater than deadline. Furthermore, we have to be sure that |
3220 | * user parameters are above the internal resolution (1us); we | ||
3221 | * check sched_runtime only since it is always the smaller one. | ||
3105 | */ | 3222 | */ |
3106 | static bool | 3223 | static bool |
3107 | __checkparam_dl(const struct sched_attr *attr) | 3224 | __checkparam_dl(const struct sched_attr *attr) |
@@ -3109,7 +3226,8 @@ __checkparam_dl(const struct sched_attr *attr) | |||
3109 | return attr && attr->sched_deadline != 0 && | 3226 | return attr && attr->sched_deadline != 0 && |
3110 | (attr->sched_period == 0 || | 3227 | (attr->sched_period == 0 || |
3111 | (s64)(attr->sched_period - attr->sched_deadline) >= 0) && | 3228 | (s64)(attr->sched_period - attr->sched_deadline) >= 0) && |
3112 | (s64)(attr->sched_deadline - attr->sched_runtime ) >= 0; | 3229 | (s64)(attr->sched_deadline - attr->sched_runtime ) >= 0 && |
3230 | attr->sched_runtime >= (2 << (DL_SCALE - 1)); | ||
3113 | } | 3231 | } |
3114 | 3232 | ||
3115 | /* | 3233 | /* |
@@ -3250,8 +3368,8 @@ recheck: | |||
3250 | } | 3368 | } |
3251 | change: | 3369 | change: |
3252 | 3370 | ||
3253 | #ifdef CONFIG_RT_GROUP_SCHED | ||
3254 | if (user) { | 3371 | if (user) { |
3372 | #ifdef CONFIG_RT_GROUP_SCHED | ||
3255 | /* | 3373 | /* |
3256 | * Do not allow realtime tasks into groups that have no runtime | 3374 | * Do not allow realtime tasks into groups that have no runtime |
3257 | * assigned. | 3375 | * assigned. |
@@ -3262,8 +3380,33 @@ change: | |||
3262 | task_rq_unlock(rq, p, &flags); | 3380 | task_rq_unlock(rq, p, &flags); |
3263 | return -EPERM; | 3381 | return -EPERM; |
3264 | } | 3382 | } |
3265 | } | ||
3266 | #endif | 3383 | #endif |
3384 | #ifdef CONFIG_SMP | ||
3385 | if (dl_bandwidth_enabled() && dl_policy(policy)) { | ||
3386 | cpumask_t *span = rq->rd->span; | ||
3387 | cpumask_t act_affinity; | ||
3388 | |||
3389 | /* | ||
3390 | * cpus_allowed mask is statically initialized with | ||
3391 | * CPU_MASK_ALL, span is instead dynamic. Here we | ||
3392 | * compute the "dynamic" affinity of a task. | ||
3393 | */ | ||
3394 | cpumask_and(&act_affinity, &p->cpus_allowed, | ||
3395 | cpu_active_mask); | ||
3396 | |||
3397 | /* | ||
3398 | * Don't allow tasks with an affinity mask smaller than | ||
3399 | * the entire root_domain to become SCHED_DEADLINE. We | ||
3400 | * will also fail if there's no bandwidth available. | ||
3401 | */ | ||
3402 | if (!cpumask_equal(&act_affinity, span) || | ||
3403 | rq->rd->dl_bw.bw == 0) { | ||
3404 | task_rq_unlock(rq, p, &flags); | ||
3405 | return -EPERM; | ||
3406 | } | ||
3407 | } | ||
3408 | #endif | ||
3409 | } | ||
3267 | 3410 | ||
3268 | /* recheck policy now with rq lock held */ | 3411 | /* recheck policy now with rq lock held */ |
3269 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { | 3412 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { |
@@ -3271,6 +3414,18 @@ change: | |||
3271 | task_rq_unlock(rq, p, &flags); | 3414 | task_rq_unlock(rq, p, &flags); |
3272 | goto recheck; | 3415 | goto recheck; |
3273 | } | 3416 | } |
3417 | |||
3418 | /* | ||
3419 | * If setscheduling to SCHED_DEADLINE (or changing the parameters | ||
3420 | * of a SCHED_DEADLINE task) we need to check if enough bandwidth | ||
3421 | * is available. | ||
3422 | */ | ||
3423 | if ((dl_policy(policy) || dl_task(p)) && | ||
3424 | dl_overflow(p, policy, attr)) { | ||
3425 | task_rq_unlock(rq, p, &flags); | ||
3426 | return -EBUSY; | ||
3427 | } | ||
3428 | |||
3274 | on_rq = p->on_rq; | 3429 | on_rq = p->on_rq; |
3275 | running = task_current(rq, p); | 3430 | running = task_current(rq, p); |
3276 | if (on_rq) | 3431 | if (on_rq) |
@@ -3705,6 +3860,24 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
3705 | if (retval) | 3860 | if (retval) |
3706 | goto out_unlock; | 3861 | goto out_unlock; |
3707 | 3862 | ||
3863 | /* | ||
3864 | * Since bandwidth control happens on root_domain basis, | ||
3865 | * if admission test is enabled, we only admit -deadline | ||
3866 | * tasks allowed to run on all the CPUs in the task's | ||
3867 | * root_domain. | ||
3868 | */ | ||
3869 | #ifdef CONFIG_SMP | ||
3870 | if (task_has_dl_policy(p)) { | ||
3871 | const struct cpumask *span = task_rq(p)->rd->span; | ||
3872 | |||
3873 | if (dl_bandwidth_enabled() && | ||
3874 | !cpumask_equal(in_mask, span)) { | ||
3875 | retval = -EBUSY; | ||
3876 | goto out_unlock; | ||
3877 | } | ||
3878 | } | ||
3879 | #endif | ||
3880 | |||
3708 | cpuset_cpus_allowed(p, cpus_allowed); | 3881 | cpuset_cpus_allowed(p, cpus_allowed); |
3709 | cpumask_and(new_mask, in_mask, cpus_allowed); | 3882 | cpumask_and(new_mask, in_mask, cpus_allowed); |
3710 | again: | 3883 | again: |
@@ -4359,6 +4532,42 @@ out: | |||
4359 | EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); | 4532 | EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); |
4360 | 4533 | ||
4361 | /* | 4534 | /* |
4535 | * When dealing with a -deadline task, we have to check if moving it to | ||
4536 | * a new CPU is possible or not. In fact, this is only true iff there | ||
4537 | * is enough bandwidth available on such CPU, otherwise we want the | ||
4538 | * whole migration progedure to fail over. | ||
4539 | */ | ||
4540 | static inline | ||
4541 | bool set_task_cpu_dl(struct task_struct *p, unsigned int cpu) | ||
4542 | { | ||
4543 | struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); | ||
4544 | struct dl_bw *cpu_b = dl_bw_of(cpu); | ||
4545 | int ret = 1; | ||
4546 | u64 bw; | ||
4547 | |||
4548 | if (dl_b == cpu_b) | ||
4549 | return 1; | ||
4550 | |||
4551 | raw_spin_lock(&dl_b->lock); | ||
4552 | raw_spin_lock(&cpu_b->lock); | ||
4553 | |||
4554 | bw = cpu_b->bw * cpumask_weight(cpu_rq(cpu)->rd->span); | ||
4555 | if (dl_bandwidth_enabled() && | ||
4556 | bw < cpu_b->total_bw + p->dl.dl_bw) { | ||
4557 | ret = 0; | ||
4558 | goto unlock; | ||
4559 | } | ||
4560 | dl_b->total_bw -= p->dl.dl_bw; | ||
4561 | cpu_b->total_bw += p->dl.dl_bw; | ||
4562 | |||
4563 | unlock: | ||
4564 | raw_spin_unlock(&cpu_b->lock); | ||
4565 | raw_spin_unlock(&dl_b->lock); | ||
4566 | |||
4567 | return ret; | ||
4568 | } | ||
4569 | |||
4570 | /* | ||
4362 | * Move (not current) task off this cpu, onto dest cpu. We're doing | 4571 | * Move (not current) task off this cpu, onto dest cpu. We're doing |
4363 | * this because either it can't run here any more (set_cpus_allowed() | 4572 | * this because either it can't run here any more (set_cpus_allowed() |
4364 | * away from this CPU, or CPU going down), or because we're | 4573 | * away from this CPU, or CPU going down), or because we're |
@@ -4390,6 +4599,13 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
4390 | goto fail; | 4599 | goto fail; |
4391 | 4600 | ||
4392 | /* | 4601 | /* |
4602 | * If p is -deadline, proceed only if there is enough | ||
4603 | * bandwidth available on dest_cpu | ||
4604 | */ | ||
4605 | if (unlikely(dl_task(p)) && !set_task_cpu_dl(p, dest_cpu)) | ||
4606 | goto fail; | ||
4607 | |||
4608 | /* | ||
4393 | * If we're not on a rq, the next wake-up will ensure we're | 4609 | * If we're not on a rq, the next wake-up will ensure we're |
4394 | * placed properly. | 4610 | * placed properly. |
4395 | */ | 4611 | */ |
@@ -5128,6 +5344,8 @@ static int init_rootdomain(struct root_domain *rd) | |||
5128 | if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) | 5344 | if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) |
5129 | goto free_dlo_mask; | 5345 | goto free_dlo_mask; |
5130 | 5346 | ||
5347 | init_dl_bw(&rd->dl_bw); | ||
5348 | |||
5131 | if (cpupri_init(&rd->cpupri) != 0) | 5349 | if (cpupri_init(&rd->cpupri) != 0) |
5132 | goto free_rto_mask; | 5350 | goto free_rto_mask; |
5133 | return 0; | 5351 | return 0; |
@@ -6557,13 +6775,15 @@ void __init sched_init(void) | |||
6557 | #endif /* CONFIG_CPUMASK_OFFSTACK */ | 6775 | #endif /* CONFIG_CPUMASK_OFFSTACK */ |
6558 | } | 6776 | } |
6559 | 6777 | ||
6778 | init_rt_bandwidth(&def_rt_bandwidth, | ||
6779 | global_rt_period(), global_rt_runtime()); | ||
6780 | init_dl_bandwidth(&def_dl_bandwidth, | ||
6781 | global_dl_period(), global_dl_runtime()); | ||
6782 | |||
6560 | #ifdef CONFIG_SMP | 6783 | #ifdef CONFIG_SMP |
6561 | init_defrootdomain(); | 6784 | init_defrootdomain(); |
6562 | #endif | 6785 | #endif |
6563 | 6786 | ||
6564 | init_rt_bandwidth(&def_rt_bandwidth, | ||
6565 | global_rt_period(), global_rt_runtime()); | ||
6566 | |||
6567 | #ifdef CONFIG_RT_GROUP_SCHED | 6787 | #ifdef CONFIG_RT_GROUP_SCHED |
6568 | init_rt_bandwidth(&root_task_group.rt_bandwidth, | 6788 | init_rt_bandwidth(&root_task_group.rt_bandwidth, |
6569 | global_rt_period(), global_rt_runtime()); | 6789 | global_rt_period(), global_rt_runtime()); |
@@ -6966,16 +7186,6 @@ void sched_move_task(struct task_struct *tsk) | |||
6966 | } | 7186 | } |
6967 | #endif /* CONFIG_CGROUP_SCHED */ | 7187 | #endif /* CONFIG_CGROUP_SCHED */ |
6968 | 7188 | ||
6969 | #if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH) | ||
6970 | static unsigned long to_ratio(u64 period, u64 runtime) | ||
6971 | { | ||
6972 | if (runtime == RUNTIME_INF) | ||
6973 | return 1ULL << 20; | ||
6974 | |||
6975 | return div64_u64(runtime << 20, period); | ||
6976 | } | ||
6977 | #endif | ||
6978 | |||
6979 | #ifdef CONFIG_RT_GROUP_SCHED | 7189 | #ifdef CONFIG_RT_GROUP_SCHED |
6980 | /* | 7190 | /* |
6981 | * Ensure that the real time constraints are schedulable. | 7191 | * Ensure that the real time constraints are schedulable. |
@@ -7149,10 +7359,48 @@ static long sched_group_rt_period(struct task_group *tg) | |||
7149 | do_div(rt_period_us, NSEC_PER_USEC); | 7359 | do_div(rt_period_us, NSEC_PER_USEC); |
7150 | return rt_period_us; | 7360 | return rt_period_us; |
7151 | } | 7361 | } |
7362 | #endif /* CONFIG_RT_GROUP_SCHED */ | ||
7152 | 7363 | ||
7364 | /* | ||
7365 | * Coupling of -rt and -deadline bandwidth. | ||
7366 | * | ||
7367 | * Here we check if the new -rt bandwidth value is consistent | ||
7368 | * with the system settings for the bandwidth available | ||
7369 | * to -deadline tasks. | ||
7370 | * | ||
7371 | * IOW, we want to enforce that | ||
7372 | * | ||
7373 | * rt_bandwidth + dl_bandwidth <= 100% | ||
7374 | * | ||
7375 | * is always true. | ||
7376 | */ | ||
7377 | static bool __sched_rt_dl_global_constraints(u64 rt_bw) | ||
7378 | { | ||
7379 | unsigned long flags; | ||
7380 | u64 dl_bw; | ||
7381 | bool ret; | ||
7382 | |||
7383 | raw_spin_lock_irqsave(&def_dl_bandwidth.dl_runtime_lock, flags); | ||
7384 | if (global_rt_runtime() == RUNTIME_INF || | ||
7385 | global_dl_runtime() == RUNTIME_INF) { | ||
7386 | ret = true; | ||
7387 | goto unlock; | ||
7388 | } | ||
7389 | |||
7390 | dl_bw = to_ratio(def_dl_bandwidth.dl_period, | ||
7391 | def_dl_bandwidth.dl_runtime); | ||
7392 | |||
7393 | ret = rt_bw + dl_bw <= to_ratio(RUNTIME_INF, RUNTIME_INF); | ||
7394 | unlock: | ||
7395 | raw_spin_unlock_irqrestore(&def_dl_bandwidth.dl_runtime_lock, flags); | ||
7396 | |||
7397 | return ret; | ||
7398 | } | ||
7399 | |||
7400 | #ifdef CONFIG_RT_GROUP_SCHED | ||
7153 | static int sched_rt_global_constraints(void) | 7401 | static int sched_rt_global_constraints(void) |
7154 | { | 7402 | { |
7155 | u64 runtime, period; | 7403 | u64 runtime, period, bw; |
7156 | int ret = 0; | 7404 | int ret = 0; |
7157 | 7405 | ||
7158 | if (sysctl_sched_rt_period <= 0) | 7406 | if (sysctl_sched_rt_period <= 0) |
@@ -7167,6 +7415,10 @@ static int sched_rt_global_constraints(void) | |||
7167 | if (runtime > period && runtime != RUNTIME_INF) | 7415 | if (runtime > period && runtime != RUNTIME_INF) |
7168 | return -EINVAL; | 7416 | return -EINVAL; |
7169 | 7417 | ||
7418 | bw = to_ratio(period, runtime); | ||
7419 | if (!__sched_rt_dl_global_constraints(bw)) | ||
7420 | return -EINVAL; | ||
7421 | |||
7170 | mutex_lock(&rt_constraints_mutex); | 7422 | mutex_lock(&rt_constraints_mutex); |
7171 | read_lock(&tasklist_lock); | 7423 | read_lock(&tasklist_lock); |
7172 | ret = __rt_schedulable(NULL, 0, 0); | 7424 | ret = __rt_schedulable(NULL, 0, 0); |
@@ -7189,19 +7441,19 @@ static int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) | |||
7189 | static int sched_rt_global_constraints(void) | 7441 | static int sched_rt_global_constraints(void) |
7190 | { | 7442 | { |
7191 | unsigned long flags; | 7443 | unsigned long flags; |
7192 | int i; | 7444 | int i, ret = 0; |
7445 | u64 bw; | ||
7193 | 7446 | ||
7194 | if (sysctl_sched_rt_period <= 0) | 7447 | if (sysctl_sched_rt_period <= 0) |
7195 | return -EINVAL; | 7448 | return -EINVAL; |
7196 | 7449 | ||
7197 | /* | ||
7198 | * There's always some RT tasks in the root group | ||
7199 | * -- migration, kstopmachine etc.. | ||
7200 | */ | ||
7201 | if (sysctl_sched_rt_runtime == 0) | ||
7202 | return -EBUSY; | ||
7203 | |||
7204 | raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); | 7450 | raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); |
7451 | bw = to_ratio(global_rt_period(), global_rt_runtime()); | ||
7452 | if (!__sched_rt_dl_global_constraints(bw)) { | ||
7453 | ret = -EINVAL; | ||
7454 | goto unlock; | ||
7455 | } | ||
7456 | |||
7205 | for_each_possible_cpu(i) { | 7457 | for_each_possible_cpu(i) { |
7206 | struct rt_rq *rt_rq = &cpu_rq(i)->rt; | 7458 | struct rt_rq *rt_rq = &cpu_rq(i)->rt; |
7207 | 7459 | ||
@@ -7209,12 +7461,93 @@ static int sched_rt_global_constraints(void) | |||
7209 | rt_rq->rt_runtime = global_rt_runtime(); | 7461 | rt_rq->rt_runtime = global_rt_runtime(); |
7210 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | 7462 | raw_spin_unlock(&rt_rq->rt_runtime_lock); |
7211 | } | 7463 | } |
7464 | unlock: | ||
7212 | raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); | 7465 | raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); |
7213 | 7466 | ||
7214 | return 0; | 7467 | return ret; |
7215 | } | 7468 | } |
7216 | #endif /* CONFIG_RT_GROUP_SCHED */ | 7469 | #endif /* CONFIG_RT_GROUP_SCHED */ |
7217 | 7470 | ||
7471 | /* | ||
7472 | * Coupling of -dl and -rt bandwidth. | ||
7473 | * | ||
7474 | * Here we check, while setting the system wide bandwidth available | ||
7475 | * for -dl tasks and groups, if the new values are consistent with | ||
7476 | * the system settings for the bandwidth available to -rt entities. | ||
7477 | * | ||
7478 | * IOW, we want to enforce that | ||
7479 | * | ||
7480 | * rt_bandwidth + dl_bandwidth <= 100% | ||
7481 | * | ||
7482 | * is always true. | ||
7483 | */ | ||
7484 | static bool __sched_dl_rt_global_constraints(u64 dl_bw) | ||
7485 | { | ||
7486 | u64 rt_bw; | ||
7487 | bool ret; | ||
7488 | |||
7489 | raw_spin_lock(&def_rt_bandwidth.rt_runtime_lock); | ||
7490 | if (global_dl_runtime() == RUNTIME_INF || | ||
7491 | global_rt_runtime() == RUNTIME_INF) { | ||
7492 | ret = true; | ||
7493 | goto unlock; | ||
7494 | } | ||
7495 | |||
7496 | rt_bw = to_ratio(ktime_to_ns(def_rt_bandwidth.rt_period), | ||
7497 | def_rt_bandwidth.rt_runtime); | ||
7498 | |||
7499 | ret = rt_bw + dl_bw <= to_ratio(RUNTIME_INF, RUNTIME_INF); | ||
7500 | unlock: | ||
7501 | raw_spin_unlock(&def_rt_bandwidth.rt_runtime_lock); | ||
7502 | |||
7503 | return ret; | ||
7504 | } | ||
7505 | |||
7506 | static bool __sched_dl_global_constraints(u64 runtime, u64 period) | ||
7507 | { | ||
7508 | if (!period || (runtime != RUNTIME_INF && runtime > period)) | ||
7509 | return -EINVAL; | ||
7510 | |||
7511 | return 0; | ||
7512 | } | ||
7513 | |||
7514 | static int sched_dl_global_constraints(void) | ||
7515 | { | ||
7516 | u64 runtime = global_dl_runtime(); | ||
7517 | u64 period = global_dl_period(); | ||
7518 | u64 new_bw = to_ratio(period, runtime); | ||
7519 | int ret, i; | ||
7520 | |||
7521 | ret = __sched_dl_global_constraints(runtime, period); | ||
7522 | if (ret) | ||
7523 | return ret; | ||
7524 | |||
7525 | if (!__sched_dl_rt_global_constraints(new_bw)) | ||
7526 | return -EINVAL; | ||
7527 | |||
7528 | /* | ||
7529 | * Here we want to check the bandwidth not being set to some | ||
7530 | * value smaller than the currently allocated bandwidth in | ||
7531 | * any of the root_domains. | ||
7532 | * | ||
7533 | * FIXME: Cycling on all the CPUs is overdoing, but simpler than | ||
7534 | * cycling on root_domains... Discussion on different/better | ||
7535 | * solutions is welcome! | ||
7536 | */ | ||
7537 | for_each_possible_cpu(i) { | ||
7538 | struct dl_bw *dl_b = dl_bw_of(i); | ||
7539 | |||
7540 | raw_spin_lock(&dl_b->lock); | ||
7541 | if (new_bw < dl_b->total_bw) { | ||
7542 | raw_spin_unlock(&dl_b->lock); | ||
7543 | return -EBUSY; | ||
7544 | } | ||
7545 | raw_spin_unlock(&dl_b->lock); | ||
7546 | } | ||
7547 | |||
7548 | return 0; | ||
7549 | } | ||
7550 | |||
7218 | int sched_rr_handler(struct ctl_table *table, int write, | 7551 | int sched_rr_handler(struct ctl_table *table, int write, |
7219 | void __user *buffer, size_t *lenp, | 7552 | void __user *buffer, size_t *lenp, |
7220 | loff_t *ppos) | 7553 | loff_t *ppos) |
@@ -7264,6 +7597,60 @@ int sched_rt_handler(struct ctl_table *table, int write, | |||
7264 | return ret; | 7597 | return ret; |
7265 | } | 7598 | } |
7266 | 7599 | ||
7600 | int sched_dl_handler(struct ctl_table *table, int write, | ||
7601 | void __user *buffer, size_t *lenp, | ||
7602 | loff_t *ppos) | ||
7603 | { | ||
7604 | int ret; | ||
7605 | int old_period, old_runtime; | ||
7606 | static DEFINE_MUTEX(mutex); | ||
7607 | unsigned long flags; | ||
7608 | |||
7609 | mutex_lock(&mutex); | ||
7610 | old_period = sysctl_sched_dl_period; | ||
7611 | old_runtime = sysctl_sched_dl_runtime; | ||
7612 | |||
7613 | ret = proc_dointvec(table, write, buffer, lenp, ppos); | ||
7614 | |||
7615 | if (!ret && write) { | ||
7616 | raw_spin_lock_irqsave(&def_dl_bandwidth.dl_runtime_lock, | ||
7617 | flags); | ||
7618 | |||
7619 | ret = sched_dl_global_constraints(); | ||
7620 | if (ret) { | ||
7621 | sysctl_sched_dl_period = old_period; | ||
7622 | sysctl_sched_dl_runtime = old_runtime; | ||
7623 | } else { | ||
7624 | u64 new_bw; | ||
7625 | int i; | ||
7626 | |||
7627 | def_dl_bandwidth.dl_period = global_dl_period(); | ||
7628 | def_dl_bandwidth.dl_runtime = global_dl_runtime(); | ||
7629 | if (global_dl_runtime() == RUNTIME_INF) | ||
7630 | new_bw = -1; | ||
7631 | else | ||
7632 | new_bw = to_ratio(global_dl_period(), | ||
7633 | global_dl_runtime()); | ||
7634 | /* | ||
7635 | * FIXME: As above... | ||
7636 | */ | ||
7637 | for_each_possible_cpu(i) { | ||
7638 | struct dl_bw *dl_b = dl_bw_of(i); | ||
7639 | |||
7640 | raw_spin_lock(&dl_b->lock); | ||
7641 | dl_b->bw = new_bw; | ||
7642 | raw_spin_unlock(&dl_b->lock); | ||
7643 | } | ||
7644 | } | ||
7645 | |||
7646 | raw_spin_unlock_irqrestore(&def_dl_bandwidth.dl_runtime_lock, | ||
7647 | flags); | ||
7648 | } | ||
7649 | mutex_unlock(&mutex); | ||
7650 | |||
7651 | return ret; | ||
7652 | } | ||
7653 | |||
7267 | #ifdef CONFIG_CGROUP_SCHED | 7654 | #ifdef CONFIG_CGROUP_SCHED |
7268 | 7655 | ||
7269 | static inline struct task_group *css_tg(struct cgroup_subsys_state *css) | 7656 | static inline struct task_group *css_tg(struct cgroup_subsys_state *css) |