aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
authorDario Faggioli <raistlin@linux.it>2013-11-07 08:43:36 -0500
committerIngo Molnar <mingo@kernel.org>2014-01-13 07:41:04 -0500
commitd50dde5a10f305253cbc3855307f608f8a3c5f73 (patch)
tree940022e0216611f198d9a00f1cb3bfc59b2014d8 /kernel/sched/core.c
parent56b4811039174bba9cbd68318d0d8b1585b9eded (diff)
sched: Add new scheduler syscalls to support an extended scheduling parameters ABI
Add the syscalls needed for supporting scheduling algorithms with extended scheduling parameters (e.g., SCHED_DEADLINE). In general, it makes possible to specify a periodic/sporadic task, that executes for a given amount of runtime at each instance, and is scheduled according to the urgency of their own timing constraints, i.e.: - a (maximum/typical) instance execution time, - a minimum interval between consecutive instances, - a time constraint by which each instance must be completed. Thus, both the data structure that holds the scheduling parameters of the tasks and the system calls dealing with it must be extended. Unfortunately, modifying the existing struct sched_param would break the ABI and result in potentially serious compatibility issues with legacy binaries. For these reasons, this patch: - defines the new struct sched_attr, containing all the fields that are necessary for specifying a task in the computational model described above; - defines and implements the new scheduling related syscalls that manipulate it, i.e., sched_setattr() and sched_getattr(). Syscalls are introduced for x86 (32 and 64 bits) and ARM only, as a proof of concept and for developing and testing purposes. Making them available on other architectures is straightforward. Since no "user" for these new parameters is introduced in this patch, the implementation of the new system calls is just identical to their already existing counterpart. Future patches that implement scheduling policies able to exploit the new data structure must also take care of modifying the sched_*attr() calls accordingly with their own purposes. Signed-off-by: Dario Faggioli <raistlin@linux.it> [ Rewrote to use sched_attr. ] Signed-off-by: Juri Lelli <juri.lelli@gmail.com> [ Removed sched_setscheduler2() for now. ] Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1383831828-15501-3-git-send-email-juri.lelli@gmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c263
1 files changed, 243 insertions, 20 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b21a63ed5d62..8174f889076c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2817,6 +2817,7 @@ out_unlock:
2817 __task_rq_unlock(rq); 2817 __task_rq_unlock(rq);
2818} 2818}
2819#endif 2819#endif
2820
2820void set_user_nice(struct task_struct *p, long nice) 2821void set_user_nice(struct task_struct *p, long nice)
2821{ 2822{
2822 int old_prio, delta, on_rq; 2823 int old_prio, delta, on_rq;
@@ -2991,22 +2992,29 @@ static struct task_struct *find_process_by_pid(pid_t pid)
2991 return pid ? find_task_by_vpid(pid) : current; 2992 return pid ? find_task_by_vpid(pid) : current;
2992} 2993}
2993 2994
2994/* Actually do priority change: must hold rq lock. */ 2995/* Actually do priority change: must hold pi & rq lock. */
2995static void 2996static void __setscheduler(struct rq *rq, struct task_struct *p,
2996__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) 2997 const struct sched_attr *attr)
2997{ 2998{
2999 int policy = attr->sched_policy;
3000
2998 p->policy = policy; 3001 p->policy = policy;
2999 p->rt_priority = prio; 3002
3003 if (rt_policy(policy))
3004 p->rt_priority = attr->sched_priority;
3005 else
3006 p->static_prio = NICE_TO_PRIO(attr->sched_nice);
3007
3000 p->normal_prio = normal_prio(p); 3008 p->normal_prio = normal_prio(p);
3001 /* we are holding p->pi_lock already */
3002 p->prio = rt_mutex_getprio(p); 3009 p->prio = rt_mutex_getprio(p);
3010
3003 if (rt_prio(p->prio)) 3011 if (rt_prio(p->prio))
3004 p->sched_class = &rt_sched_class; 3012 p->sched_class = &rt_sched_class;
3005 else 3013 else
3006 p->sched_class = &fair_sched_class; 3014 p->sched_class = &fair_sched_class;
3015
3007 set_load_weight(p); 3016 set_load_weight(p);
3008} 3017}
3009
3010/* 3018/*
3011 * check the target process has a UID that matches the current process's 3019 * check the target process has a UID that matches the current process's
3012 */ 3020 */
@@ -3023,10 +3031,12 @@ static bool check_same_owner(struct task_struct *p)
3023 return match; 3031 return match;
3024} 3032}
3025 3033
3026static int __sched_setscheduler(struct task_struct *p, int policy, 3034static int __sched_setscheduler(struct task_struct *p,
3027 const struct sched_param *param, bool user) 3035 const struct sched_attr *attr,
3036 bool user)
3028{ 3037{
3029 int retval, oldprio, oldpolicy = -1, on_rq, running; 3038 int retval, oldprio, oldpolicy = -1, on_rq, running;
3039 int policy = attr->sched_policy;
3030 unsigned long flags; 3040 unsigned long flags;
3031 const struct sched_class *prev_class; 3041 const struct sched_class *prev_class;
3032 struct rq *rq; 3042 struct rq *rq;
@@ -3054,17 +3064,22 @@ recheck:
3054 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL, 3064 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL,
3055 * SCHED_BATCH and SCHED_IDLE is 0. 3065 * SCHED_BATCH and SCHED_IDLE is 0.
3056 */ 3066 */
3057 if (param->sched_priority < 0 || 3067 if (attr->sched_priority < 0 ||
3058 (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || 3068 (p->mm && attr->sched_priority > MAX_USER_RT_PRIO-1) ||
3059 (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) 3069 (!p->mm && attr->sched_priority > MAX_RT_PRIO-1))
3060 return -EINVAL; 3070 return -EINVAL;
3061 if (rt_policy(policy) != (param->sched_priority != 0)) 3071 if (rt_policy(policy) != (attr->sched_priority != 0))
3062 return -EINVAL; 3072 return -EINVAL;
3063 3073
3064 /* 3074 /*
3065 * Allow unprivileged RT tasks to decrease priority: 3075 * Allow unprivileged RT tasks to decrease priority:
3066 */ 3076 */
3067 if (user && !capable(CAP_SYS_NICE)) { 3077 if (user && !capable(CAP_SYS_NICE)) {
3078 if (fair_policy(policy)) {
3079 if (!can_nice(p, attr->sched_nice))
3080 return -EPERM;
3081 }
3082
3068 if (rt_policy(policy)) { 3083 if (rt_policy(policy)) {
3069 unsigned long rlim_rtprio = 3084 unsigned long rlim_rtprio =
3070 task_rlimit(p, RLIMIT_RTPRIO); 3085 task_rlimit(p, RLIMIT_RTPRIO);
@@ -3074,8 +3089,8 @@ recheck:
3074 return -EPERM; 3089 return -EPERM;
3075 3090
3076 /* can't increase priority */ 3091 /* can't increase priority */
3077 if (param->sched_priority > p->rt_priority && 3092 if (attr->sched_priority > p->rt_priority &&
3078 param->sched_priority > rlim_rtprio) 3093 attr->sched_priority > rlim_rtprio)
3079 return -EPERM; 3094 return -EPERM;
3080 } 3095 }
3081 3096
@@ -3123,11 +3138,16 @@ recheck:
3123 /* 3138 /*
3124 * If not changing anything there's no need to proceed further: 3139 * If not changing anything there's no need to proceed further:
3125 */ 3140 */
3126 if (unlikely(policy == p->policy && (!rt_policy(policy) || 3141 if (unlikely(policy == p->policy)) {
3127 param->sched_priority == p->rt_priority))) { 3142 if (fair_policy(policy) && attr->sched_nice != TASK_NICE(p))
3143 goto change;
3144 if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
3145 goto change;
3146
3128 task_rq_unlock(rq, p, &flags); 3147 task_rq_unlock(rq, p, &flags);
3129 return 0; 3148 return 0;
3130 } 3149 }
3150change:
3131 3151
3132#ifdef CONFIG_RT_GROUP_SCHED 3152#ifdef CONFIG_RT_GROUP_SCHED
3133 if (user) { 3153 if (user) {
@@ -3161,7 +3181,7 @@ recheck:
3161 3181
3162 oldprio = p->prio; 3182 oldprio = p->prio;
3163 prev_class = p->sched_class; 3183 prev_class = p->sched_class;
3164 __setscheduler(rq, p, policy, param->sched_priority); 3184 __setscheduler(rq, p, attr);
3165 3185
3166 if (running) 3186 if (running)
3167 p->sched_class->set_curr_task(rq); 3187 p->sched_class->set_curr_task(rq);
@@ -3189,10 +3209,20 @@ recheck:
3189int sched_setscheduler(struct task_struct *p, int policy, 3209int sched_setscheduler(struct task_struct *p, int policy,
3190 const struct sched_param *param) 3210 const struct sched_param *param)
3191{ 3211{
3192 return __sched_setscheduler(p, policy, param, true); 3212 struct sched_attr attr = {
3213 .sched_policy = policy,
3214 .sched_priority = param->sched_priority
3215 };
3216 return __sched_setscheduler(p, &attr, true);
3193} 3217}
3194EXPORT_SYMBOL_GPL(sched_setscheduler); 3218EXPORT_SYMBOL_GPL(sched_setscheduler);
3195 3219
3220int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
3221{
3222 return __sched_setscheduler(p, attr, true);
3223}
3224EXPORT_SYMBOL_GPL(sched_setattr);
3225
3196/** 3226/**
3197 * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. 3227 * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
3198 * @p: the task in question. 3228 * @p: the task in question.
@@ -3209,7 +3239,11 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
3209int sched_setscheduler_nocheck(struct task_struct *p, int policy, 3239int sched_setscheduler_nocheck(struct task_struct *p, int policy,
3210 const struct sched_param *param) 3240 const struct sched_param *param)
3211{ 3241{
3212 return __sched_setscheduler(p, policy, param, false); 3242 struct sched_attr attr = {
3243 .sched_policy = policy,
3244 .sched_priority = param->sched_priority
3245 };
3246 return __sched_setscheduler(p, &attr, false);
3213} 3247}
3214 3248
3215static int 3249static int
@@ -3234,6 +3268,79 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
3234 return retval; 3268 return retval;
3235} 3269}
3236 3270
3271/*
3272 * Mimics kernel/events/core.c perf_copy_attr().
3273 */
3274static int sched_copy_attr(struct sched_attr __user *uattr,
3275 struct sched_attr *attr)
3276{
3277 u32 size;
3278 int ret;
3279
3280 if (!access_ok(VERIFY_WRITE, uattr, SCHED_ATTR_SIZE_VER0))
3281 return -EFAULT;
3282
3283 /*
3284 * zero the full structure, so that a short copy will be nice.
3285 */
3286 memset(attr, 0, sizeof(*attr));
3287
3288 ret = get_user(size, &uattr->size);
3289 if (ret)
3290 return ret;
3291
3292 if (size > PAGE_SIZE) /* silly large */
3293 goto err_size;
3294
3295 if (!size) /* abi compat */
3296 size = SCHED_ATTR_SIZE_VER0;
3297
3298 if (size < SCHED_ATTR_SIZE_VER0)
3299 goto err_size;
3300
3301 /*
3302 * If we're handed a bigger struct than we know of,
3303 * ensure all the unknown bits are 0 - i.e. new
3304 * user-space does not rely on any kernel feature
3305 * extensions we dont know about yet.
3306 */
3307 if (size > sizeof(*attr)) {
3308 unsigned char __user *addr;
3309 unsigned char __user *end;
3310 unsigned char val;
3311
3312 addr = (void __user *)uattr + sizeof(*attr);
3313 end = (void __user *)uattr + size;
3314
3315 for (; addr < end; addr++) {
3316 ret = get_user(val, addr);
3317 if (ret)
3318 return ret;
3319 if (val)
3320 goto err_size;
3321 }
3322 size = sizeof(*attr);
3323 }
3324
3325 ret = copy_from_user(attr, uattr, size);
3326 if (ret)
3327 return -EFAULT;
3328
3329 /*
3330 * XXX: do we want to be lenient like existing syscalls; or do we want
3331 * to be strict and return an error on out-of-bounds values?
3332 */
3333 attr->sched_nice = clamp(attr->sched_nice, -20, 19);
3334
3335out:
3336 return ret;
3337
3338err_size:
3339 put_user(sizeof(*attr), &uattr->size);
3340 ret = -E2BIG;
3341 goto out;
3342}
3343
3237/** 3344/**
3238 * sys_sched_setscheduler - set/change the scheduler policy and RT priority 3345 * sys_sched_setscheduler - set/change the scheduler policy and RT priority
3239 * @pid: the pid in question. 3346 * @pid: the pid in question.
@@ -3265,6 +3372,33 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
3265} 3372}
3266 3373
3267/** 3374/**
3375 * sys_sched_setattr - same as above, but with extended sched_attr
3376 * @pid: the pid in question.
3377 * @attr: structure containing the extended parameters.
3378 */
3379SYSCALL_DEFINE2(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr)
3380{
3381 struct sched_attr attr;
3382 struct task_struct *p;
3383 int retval;
3384
3385 if (!uattr || pid < 0)
3386 return -EINVAL;
3387
3388 if (sched_copy_attr(uattr, &attr))
3389 return -EFAULT;
3390
3391 rcu_read_lock();
3392 retval = -ESRCH;
3393 p = find_process_by_pid(pid);
3394 if (p != NULL)
3395 retval = sched_setattr(p, &attr);
3396 rcu_read_unlock();
3397
3398 return retval;
3399}
3400
3401/**
3268 * sys_sched_getscheduler - get the policy (scheduling class) of a thread 3402 * sys_sched_getscheduler - get the policy (scheduling class) of a thread
3269 * @pid: the pid in question. 3403 * @pid: the pid in question.
3270 * 3404 *
@@ -3334,6 +3468,92 @@ out_unlock:
3334 return retval; 3468 return retval;
3335} 3469}
3336 3470
3471static int sched_read_attr(struct sched_attr __user *uattr,
3472 struct sched_attr *attr,
3473 unsigned int usize)
3474{
3475 int ret;
3476
3477 if (!access_ok(VERIFY_WRITE, uattr, usize))
3478 return -EFAULT;
3479
3480 /*
3481 * If we're handed a smaller struct than we know of,
3482 * ensure all the unknown bits are 0 - i.e. old
3483 * user-space does not get uncomplete information.
3484 */
3485 if (usize < sizeof(*attr)) {
3486 unsigned char *addr;
3487 unsigned char *end;
3488
3489 addr = (void *)attr + usize;
3490 end = (void *)attr + sizeof(*attr);
3491
3492 for (; addr < end; addr++) {
3493 if (*addr)
3494 goto err_size;
3495 }
3496
3497 attr->size = usize;
3498 }
3499
3500 ret = copy_to_user(uattr, attr, usize);
3501 if (ret)
3502 return -EFAULT;
3503
3504out:
3505 return ret;
3506
3507err_size:
3508 ret = -E2BIG;
3509 goto out;
3510}
3511
3512/**
3513 * sys_sched_getattr - same as above, but with extended "sched_param"
3514 * @pid: the pid in question.
3515 * @attr: structure containing the extended parameters.
3516 * @size: sizeof(attr) for fwd/bwd comp.
3517 */
3518SYSCALL_DEFINE3(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
3519 unsigned int, size)
3520{
3521 struct sched_attr attr = {
3522 .size = sizeof(struct sched_attr),
3523 };
3524 struct task_struct *p;
3525 int retval;
3526
3527 if (!uattr || pid < 0 || size > PAGE_SIZE ||
3528 size < SCHED_ATTR_SIZE_VER0)
3529 return -EINVAL;
3530
3531 rcu_read_lock();
3532 p = find_process_by_pid(pid);
3533 retval = -ESRCH;
3534 if (!p)
3535 goto out_unlock;
3536
3537 retval = security_task_getscheduler(p);
3538 if (retval)
3539 goto out_unlock;
3540
3541 attr.sched_policy = p->policy;
3542 if (task_has_rt_policy(p))
3543 attr.sched_priority = p->rt_priority;
3544 else
3545 attr.sched_nice = TASK_NICE(p);
3546
3547 rcu_read_unlock();
3548
3549 retval = sched_read_attr(uattr, &attr, size);
3550 return retval;
3551
3552out_unlock:
3553 rcu_read_unlock();
3554 return retval;
3555}
3556
3337long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) 3557long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
3338{ 3558{
3339 cpumask_var_t cpus_allowed, new_mask; 3559 cpumask_var_t cpus_allowed, new_mask;
@@ -6400,13 +6620,16 @@ EXPORT_SYMBOL(__might_sleep);
6400static void normalize_task(struct rq *rq, struct task_struct *p) 6620static void normalize_task(struct rq *rq, struct task_struct *p)
6401{ 6621{
6402 const struct sched_class *prev_class = p->sched_class; 6622 const struct sched_class *prev_class = p->sched_class;
6623 struct sched_attr attr = {
6624 .sched_policy = SCHED_NORMAL,
6625 };
6403 int old_prio = p->prio; 6626 int old_prio = p->prio;
6404 int on_rq; 6627 int on_rq;
6405 6628
6406 on_rq = p->on_rq; 6629 on_rq = p->on_rq;
6407 if (on_rq) 6630 if (on_rq)
6408 dequeue_task(rq, p, 0); 6631 dequeue_task(rq, p, 0);
6409 __setscheduler(rq, p, SCHED_NORMAL, 0); 6632 __setscheduler(rq, p, &attr);
6410 if (on_rq) { 6633 if (on_rq) {
6411 enqueue_task(rq, p, 0); 6634 enqueue_task(rq, p, 0);
6412 resched_task(rq->curr); 6635 resched_task(rq->curr);