diff options
Diffstat (limited to 'kernel/sched')
| -rw-r--r-- | kernel/sched/core.c | 263 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 9 |
2 files changed, 249 insertions, 23 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b21a63ed5d62..8174f889076c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -2817,6 +2817,7 @@ out_unlock: | |||
| 2817 | __task_rq_unlock(rq); | 2817 | __task_rq_unlock(rq); |
| 2818 | } | 2818 | } |
| 2819 | #endif | 2819 | #endif |
| 2820 | |||
| 2820 | void set_user_nice(struct task_struct *p, long nice) | 2821 | void set_user_nice(struct task_struct *p, long nice) |
| 2821 | { | 2822 | { |
| 2822 | int old_prio, delta, on_rq; | 2823 | int old_prio, delta, on_rq; |
| @@ -2991,22 +2992,29 @@ static struct task_struct *find_process_by_pid(pid_t pid) | |||
| 2991 | return pid ? find_task_by_vpid(pid) : current; | 2992 | return pid ? find_task_by_vpid(pid) : current; |
| 2992 | } | 2993 | } |
| 2993 | 2994 | ||
| 2994 | /* Actually do priority change: must hold rq lock. */ | 2995 | /* Actually do priority change: must hold pi & rq lock. */ |
| 2995 | static void | 2996 | static void __setscheduler(struct rq *rq, struct task_struct *p, |
| 2996 | __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | 2997 | const struct sched_attr *attr) |
| 2997 | { | 2998 | { |
| 2999 | int policy = attr->sched_policy; | ||
| 3000 | |||
| 2998 | p->policy = policy; | 3001 | p->policy = policy; |
| 2999 | p->rt_priority = prio; | 3002 | |
| 3003 | if (rt_policy(policy)) | ||
| 3004 | p->rt_priority = attr->sched_priority; | ||
| 3005 | else | ||
| 3006 | p->static_prio = NICE_TO_PRIO(attr->sched_nice); | ||
| 3007 | |||
| 3000 | p->normal_prio = normal_prio(p); | 3008 | p->normal_prio = normal_prio(p); |
| 3001 | /* we are holding p->pi_lock already */ | ||
| 3002 | p->prio = rt_mutex_getprio(p); | 3009 | p->prio = rt_mutex_getprio(p); |
| 3010 | |||
| 3003 | if (rt_prio(p->prio)) | 3011 | if (rt_prio(p->prio)) |
| 3004 | p->sched_class = &rt_sched_class; | 3012 | p->sched_class = &rt_sched_class; |
| 3005 | else | 3013 | else |
| 3006 | p->sched_class = &fair_sched_class; | 3014 | p->sched_class = &fair_sched_class; |
| 3015 | |||
| 3007 | set_load_weight(p); | 3016 | set_load_weight(p); |
| 3008 | } | 3017 | } |
| 3009 | |||
| 3010 | /* | 3018 | /* |
| 3011 | * check the target process has a UID that matches the current process's | 3019 | * check the target process has a UID that matches the current process's |
| 3012 | */ | 3020 | */ |
| @@ -3023,10 +3031,12 @@ static bool check_same_owner(struct task_struct *p) | |||
| 3023 | return match; | 3031 | return match; |
| 3024 | } | 3032 | } |
| 3025 | 3033 | ||
| 3026 | static int __sched_setscheduler(struct task_struct *p, int policy, | 3034 | static int __sched_setscheduler(struct task_struct *p, |
| 3027 | const struct sched_param *param, bool user) | 3035 | const struct sched_attr *attr, |
| 3036 | bool user) | ||
| 3028 | { | 3037 | { |
| 3029 | int retval, oldprio, oldpolicy = -1, on_rq, running; | 3038 | int retval, oldprio, oldpolicy = -1, on_rq, running; |
| 3039 | int policy = attr->sched_policy; | ||
| 3030 | unsigned long flags; | 3040 | unsigned long flags; |
| 3031 | const struct sched_class *prev_class; | 3041 | const struct sched_class *prev_class; |
| 3032 | struct rq *rq; | 3042 | struct rq *rq; |
| @@ -3054,17 +3064,22 @@ recheck: | |||
| 3054 | * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL, | 3064 | * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL, |
| 3055 | * SCHED_BATCH and SCHED_IDLE is 0. | 3065 | * SCHED_BATCH and SCHED_IDLE is 0. |
| 3056 | */ | 3066 | */ |
| 3057 | if (param->sched_priority < 0 || | 3067 | if (attr->sched_priority < 0 || |
| 3058 | (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || | 3068 | (p->mm && attr->sched_priority > MAX_USER_RT_PRIO-1) || |
| 3059 | (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) | 3069 | (!p->mm && attr->sched_priority > MAX_RT_PRIO-1)) |
| 3060 | return -EINVAL; | 3070 | return -EINVAL; |
| 3061 | if (rt_policy(policy) != (param->sched_priority != 0)) | 3071 | if (rt_policy(policy) != (attr->sched_priority != 0)) |
| 3062 | return -EINVAL; | 3072 | return -EINVAL; |
| 3063 | 3073 | ||
| 3064 | /* | 3074 | /* |
| 3065 | * Allow unprivileged RT tasks to decrease priority: | 3075 | * Allow unprivileged RT tasks to decrease priority: |
| 3066 | */ | 3076 | */ |
| 3067 | if (user && !capable(CAP_SYS_NICE)) { | 3077 | if (user && !capable(CAP_SYS_NICE)) { |
| 3078 | if (fair_policy(policy)) { | ||
| 3079 | if (!can_nice(p, attr->sched_nice)) | ||
| 3080 | return -EPERM; | ||
| 3081 | } | ||
| 3082 | |||
| 3068 | if (rt_policy(policy)) { | 3083 | if (rt_policy(policy)) { |
| 3069 | unsigned long rlim_rtprio = | 3084 | unsigned long rlim_rtprio = |
| 3070 | task_rlimit(p, RLIMIT_RTPRIO); | 3085 | task_rlimit(p, RLIMIT_RTPRIO); |
| @@ -3074,8 +3089,8 @@ recheck: | |||
| 3074 | return -EPERM; | 3089 | return -EPERM; |
| 3075 | 3090 | ||
| 3076 | /* can't increase priority */ | 3091 | /* can't increase priority */ |
| 3077 | if (param->sched_priority > p->rt_priority && | 3092 | if (attr->sched_priority > p->rt_priority && |
| 3078 | param->sched_priority > rlim_rtprio) | 3093 | attr->sched_priority > rlim_rtprio) |
| 3079 | return -EPERM; | 3094 | return -EPERM; |
| 3080 | } | 3095 | } |
| 3081 | 3096 | ||
| @@ -3123,11 +3138,16 @@ recheck: | |||
| 3123 | /* | 3138 | /* |
| 3124 | * If not changing anything there's no need to proceed further: | 3139 | * If not changing anything there's no need to proceed further: |
| 3125 | */ | 3140 | */ |
| 3126 | if (unlikely(policy == p->policy && (!rt_policy(policy) || | 3141 | if (unlikely(policy == p->policy)) { |
| 3127 | param->sched_priority == p->rt_priority))) { | 3142 | if (fair_policy(policy) && attr->sched_nice != TASK_NICE(p)) |
| 3143 | goto change; | ||
| 3144 | if (rt_policy(policy) && attr->sched_priority != p->rt_priority) | ||
| 3145 | goto change; | ||
| 3146 | |||
| 3128 | task_rq_unlock(rq, p, &flags); | 3147 | task_rq_unlock(rq, p, &flags); |
| 3129 | return 0; | 3148 | return 0; |
| 3130 | } | 3149 | } |
| 3150 | change: | ||
| 3131 | 3151 | ||
| 3132 | #ifdef CONFIG_RT_GROUP_SCHED | 3152 | #ifdef CONFIG_RT_GROUP_SCHED |
| 3133 | if (user) { | 3153 | if (user) { |
| @@ -3161,7 +3181,7 @@ recheck: | |||
| 3161 | 3181 | ||
| 3162 | oldprio = p->prio; | 3182 | oldprio = p->prio; |
| 3163 | prev_class = p->sched_class; | 3183 | prev_class = p->sched_class; |
| 3164 | __setscheduler(rq, p, policy, param->sched_priority); | 3184 | __setscheduler(rq, p, attr); |
| 3165 | 3185 | ||
| 3166 | if (running) | 3186 | if (running) |
| 3167 | p->sched_class->set_curr_task(rq); | 3187 | p->sched_class->set_curr_task(rq); |
| @@ -3189,10 +3209,20 @@ recheck: | |||
| 3189 | int sched_setscheduler(struct task_struct *p, int policy, | 3209 | int sched_setscheduler(struct task_struct *p, int policy, |
| 3190 | const struct sched_param *param) | 3210 | const struct sched_param *param) |
| 3191 | { | 3211 | { |
| 3192 | return __sched_setscheduler(p, policy, param, true); | 3212 | struct sched_attr attr = { |
| 3213 | .sched_policy = policy, | ||
| 3214 | .sched_priority = param->sched_priority | ||
| 3215 | }; | ||
| 3216 | return __sched_setscheduler(p, &attr, true); | ||
| 3193 | } | 3217 | } |
| 3194 | EXPORT_SYMBOL_GPL(sched_setscheduler); | 3218 | EXPORT_SYMBOL_GPL(sched_setscheduler); |
| 3195 | 3219 | ||
| 3220 | int sched_setattr(struct task_struct *p, const struct sched_attr *attr) | ||
| 3221 | { | ||
| 3222 | return __sched_setscheduler(p, attr, true); | ||
| 3223 | } | ||
| 3224 | EXPORT_SYMBOL_GPL(sched_setattr); | ||
| 3225 | |||
| 3196 | /** | 3226 | /** |
| 3197 | * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. | 3227 | * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. |
| 3198 | * @p: the task in question. | 3228 | * @p: the task in question. |
| @@ -3209,7 +3239,11 @@ EXPORT_SYMBOL_GPL(sched_setscheduler); | |||
| 3209 | int sched_setscheduler_nocheck(struct task_struct *p, int policy, | 3239 | int sched_setscheduler_nocheck(struct task_struct *p, int policy, |
| 3210 | const struct sched_param *param) | 3240 | const struct sched_param *param) |
| 3211 | { | 3241 | { |
| 3212 | return __sched_setscheduler(p, policy, param, false); | 3242 | struct sched_attr attr = { |
| 3243 | .sched_policy = policy, | ||
| 3244 | .sched_priority = param->sched_priority | ||
| 3245 | }; | ||
| 3246 | return __sched_setscheduler(p, &attr, false); | ||
| 3213 | } | 3247 | } |
| 3214 | 3248 | ||
| 3215 | static int | 3249 | static int |
| @@ -3234,6 +3268,79 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | |||
| 3234 | return retval; | 3268 | return retval; |
| 3235 | } | 3269 | } |
| 3236 | 3270 | ||
| 3271 | /* | ||
| 3272 | * Mimics kernel/events/core.c perf_copy_attr(). | ||
| 3273 | */ | ||
| 3274 | static int sched_copy_attr(struct sched_attr __user *uattr, | ||
| 3275 | struct sched_attr *attr) | ||
| 3276 | { | ||
| 3277 | u32 size; | ||
| 3278 | int ret; | ||
| 3279 | |||
| 3280 | if (!access_ok(VERIFY_WRITE, uattr, SCHED_ATTR_SIZE_VER0)) | ||
| 3281 | return -EFAULT; | ||
| 3282 | |||
| 3283 | /* | ||
| 3284 | * zero the full structure, so that a short copy will be nice. | ||
| 3285 | */ | ||
| 3286 | memset(attr, 0, sizeof(*attr)); | ||
| 3287 | |||
| 3288 | ret = get_user(size, &uattr->size); | ||
| 3289 | if (ret) | ||
| 3290 | return ret; | ||
| 3291 | |||
| 3292 | if (size > PAGE_SIZE) /* silly large */ | ||
| 3293 | goto err_size; | ||
| 3294 | |||
| 3295 | if (!size) /* abi compat */ | ||
| 3296 | size = SCHED_ATTR_SIZE_VER0; | ||
| 3297 | |||
| 3298 | if (size < SCHED_ATTR_SIZE_VER0) | ||
| 3299 | goto err_size; | ||
| 3300 | |||
| 3301 | /* | ||
| 3302 | * If we're handed a bigger struct than we know of, | ||
| 3303 | * ensure all the unknown bits are 0 - i.e. new | ||
| 3304 | * user-space does not rely on any kernel feature | ||
| 3305 | * extensions we dont know about yet. | ||
| 3306 | */ | ||
| 3307 | if (size > sizeof(*attr)) { | ||
| 3308 | unsigned char __user *addr; | ||
| 3309 | unsigned char __user *end; | ||
| 3310 | unsigned char val; | ||
| 3311 | |||
| 3312 | addr = (void __user *)uattr + sizeof(*attr); | ||
| 3313 | end = (void __user *)uattr + size; | ||
| 3314 | |||
| 3315 | for (; addr < end; addr++) { | ||
| 3316 | ret = get_user(val, addr); | ||
| 3317 | if (ret) | ||
| 3318 | return ret; | ||
| 3319 | if (val) | ||
| 3320 | goto err_size; | ||
| 3321 | } | ||
| 3322 | size = sizeof(*attr); | ||
| 3323 | } | ||
| 3324 | |||
| 3325 | ret = copy_from_user(attr, uattr, size); | ||
| 3326 | if (ret) | ||
| 3327 | return -EFAULT; | ||
| 3328 | |||
| 3329 | /* | ||
| 3330 | * XXX: do we want to be lenient like existing syscalls; or do we want | ||
| 3331 | * to be strict and return an error on out-of-bounds values? | ||
| 3332 | */ | ||
| 3333 | attr->sched_nice = clamp(attr->sched_nice, -20, 19); | ||
| 3334 | |||
| 3335 | out: | ||
| 3336 | return ret; | ||
| 3337 | |||
| 3338 | err_size: | ||
| 3339 | put_user(sizeof(*attr), &uattr->size); | ||
| 3340 | ret = -E2BIG; | ||
| 3341 | goto out; | ||
| 3342 | } | ||
| 3343 | |||
| 3237 | /** | 3344 | /** |
| 3238 | * sys_sched_setscheduler - set/change the scheduler policy and RT priority | 3345 | * sys_sched_setscheduler - set/change the scheduler policy and RT priority |
| 3239 | * @pid: the pid in question. | 3346 | * @pid: the pid in question. |
| @@ -3265,6 +3372,33 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) | |||
| 3265 | } | 3372 | } |
| 3266 | 3373 | ||
| 3267 | /** | 3374 | /** |
| 3375 | * sys_sched_setattr - same as above, but with extended sched_attr | ||
| 3376 | * @pid: the pid in question. | ||
| 3377 | * @attr: structure containing the extended parameters. | ||
| 3378 | */ | ||
| 3379 | SYSCALL_DEFINE2(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr) | ||
| 3380 | { | ||
| 3381 | struct sched_attr attr; | ||
| 3382 | struct task_struct *p; | ||
| 3383 | int retval; | ||
| 3384 | |||
| 3385 | if (!uattr || pid < 0) | ||
| 3386 | return -EINVAL; | ||
| 3387 | |||
| 3388 | if (sched_copy_attr(uattr, &attr)) | ||
| 3389 | return -EFAULT; | ||
| 3390 | |||
| 3391 | rcu_read_lock(); | ||
| 3392 | retval = -ESRCH; | ||
| 3393 | p = find_process_by_pid(pid); | ||
| 3394 | if (p != NULL) | ||
| 3395 | retval = sched_setattr(p, &attr); | ||
| 3396 | rcu_read_unlock(); | ||
| 3397 | |||
| 3398 | return retval; | ||
| 3399 | } | ||
| 3400 | |||
| 3401 | /** | ||
| 3268 | * sys_sched_getscheduler - get the policy (scheduling class) of a thread | 3402 | * sys_sched_getscheduler - get the policy (scheduling class) of a thread |
| 3269 | * @pid: the pid in question. | 3403 | * @pid: the pid in question. |
| 3270 | * | 3404 | * |
| @@ -3334,6 +3468,92 @@ out_unlock: | |||
| 3334 | return retval; | 3468 | return retval; |
| 3335 | } | 3469 | } |
| 3336 | 3470 | ||
| 3471 | static int sched_read_attr(struct sched_attr __user *uattr, | ||
| 3472 | struct sched_attr *attr, | ||
| 3473 | unsigned int usize) | ||
| 3474 | { | ||
| 3475 | int ret; | ||
| 3476 | |||
| 3477 | if (!access_ok(VERIFY_WRITE, uattr, usize)) | ||
| 3478 | return -EFAULT; | ||
| 3479 | |||
| 3480 | /* | ||
| 3481 | * If we're handed a smaller struct than we know of, | ||
| 3482 | * ensure all the unknown bits are 0 - i.e. old | ||
| 3483 | * user-space does not get uncomplete information. | ||
| 3484 | */ | ||
| 3485 | if (usize < sizeof(*attr)) { | ||
| 3486 | unsigned char *addr; | ||
| 3487 | unsigned char *end; | ||
| 3488 | |||
| 3489 | addr = (void *)attr + usize; | ||
| 3490 | end = (void *)attr + sizeof(*attr); | ||
| 3491 | |||
| 3492 | for (; addr < end; addr++) { | ||
| 3493 | if (*addr) | ||
| 3494 | goto err_size; | ||
| 3495 | } | ||
| 3496 | |||
| 3497 | attr->size = usize; | ||
| 3498 | } | ||
| 3499 | |||
| 3500 | ret = copy_to_user(uattr, attr, usize); | ||
| 3501 | if (ret) | ||
| 3502 | return -EFAULT; | ||
| 3503 | |||
| 3504 | out: | ||
| 3505 | return ret; | ||
| 3506 | |||
| 3507 | err_size: | ||
| 3508 | ret = -E2BIG; | ||
| 3509 | goto out; | ||
| 3510 | } | ||
| 3511 | |||
| 3512 | /** | ||
| 3513 | * sys_sched_getattr - same as above, but with extended "sched_param" | ||
| 3514 | * @pid: the pid in question. | ||
| 3515 | * @attr: structure containing the extended parameters. | ||
| 3516 | * @size: sizeof(attr) for fwd/bwd comp. | ||
| 3517 | */ | ||
| 3518 | SYSCALL_DEFINE3(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, | ||
| 3519 | unsigned int, size) | ||
| 3520 | { | ||
| 3521 | struct sched_attr attr = { | ||
| 3522 | .size = sizeof(struct sched_attr), | ||
| 3523 | }; | ||
| 3524 | struct task_struct *p; | ||
| 3525 | int retval; | ||
| 3526 | |||
| 3527 | if (!uattr || pid < 0 || size > PAGE_SIZE || | ||
| 3528 | size < SCHED_ATTR_SIZE_VER0) | ||
| 3529 | return -EINVAL; | ||
| 3530 | |||
| 3531 | rcu_read_lock(); | ||
| 3532 | p = find_process_by_pid(pid); | ||
| 3533 | retval = -ESRCH; | ||
| 3534 | if (!p) | ||
| 3535 | goto out_unlock; | ||
| 3536 | |||
| 3537 | retval = security_task_getscheduler(p); | ||
| 3538 | if (retval) | ||
| 3539 | goto out_unlock; | ||
| 3540 | |||
| 3541 | attr.sched_policy = p->policy; | ||
| 3542 | if (task_has_rt_policy(p)) | ||
| 3543 | attr.sched_priority = p->rt_priority; | ||
| 3544 | else | ||
| 3545 | attr.sched_nice = TASK_NICE(p); | ||
| 3546 | |||
| 3547 | rcu_read_unlock(); | ||
| 3548 | |||
| 3549 | retval = sched_read_attr(uattr, &attr, size); | ||
| 3550 | return retval; | ||
| 3551 | |||
| 3552 | out_unlock: | ||
| 3553 | rcu_read_unlock(); | ||
| 3554 | return retval; | ||
| 3555 | } | ||
| 3556 | |||
| 3337 | long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | 3557 | long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) |
| 3338 | { | 3558 | { |
| 3339 | cpumask_var_t cpus_allowed, new_mask; | 3559 | cpumask_var_t cpus_allowed, new_mask; |
| @@ -6400,13 +6620,16 @@ EXPORT_SYMBOL(__might_sleep); | |||
| 6400 | static void normalize_task(struct rq *rq, struct task_struct *p) | 6620 | static void normalize_task(struct rq *rq, struct task_struct *p) |
| 6401 | { | 6621 | { |
| 6402 | const struct sched_class *prev_class = p->sched_class; | 6622 | const struct sched_class *prev_class = p->sched_class; |
| 6623 | struct sched_attr attr = { | ||
| 6624 | .sched_policy = SCHED_NORMAL, | ||
| 6625 | }; | ||
| 6403 | int old_prio = p->prio; | 6626 | int old_prio = p->prio; |
| 6404 | int on_rq; | 6627 | int on_rq; |
| 6405 | 6628 | ||
| 6406 | on_rq = p->on_rq; | 6629 | on_rq = p->on_rq; |
| 6407 | if (on_rq) | 6630 | if (on_rq) |
| 6408 | dequeue_task(rq, p, 0); | 6631 | dequeue_task(rq, p, 0); |
| 6409 | __setscheduler(rq, p, SCHED_NORMAL, 0); | 6632 | __setscheduler(rq, p, &attr); |
| 6410 | if (on_rq) { | 6633 | if (on_rq) { |
| 6411 | enqueue_task(rq, p, 0); | 6634 | enqueue_task(rq, p, 0); |
| 6412 | resched_task(rq->curr); | 6635 | resched_task(rq->curr); |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b3b4a4953efc..df023db7721c 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
| @@ -81,11 +81,14 @@ extern void update_cpu_load_active(struct rq *this_rq); | |||
| 81 | */ | 81 | */ |
| 82 | #define RUNTIME_INF ((u64)~0ULL) | 82 | #define RUNTIME_INF ((u64)~0ULL) |
| 83 | 83 | ||
| 84 | static inline int fair_policy(int policy) | ||
| 85 | { | ||
| 86 | return policy == SCHED_NORMAL || policy == SCHED_BATCH; | ||
| 87 | } | ||
| 88 | |||
| 84 | static inline int rt_policy(int policy) | 89 | static inline int rt_policy(int policy) |
| 85 | { | 90 | { |
| 86 | if (policy == SCHED_FIFO || policy == SCHED_RR) | 91 | return policy == SCHED_FIFO || policy == SCHED_RR; |
| 87 | return 1; | ||
| 88 | return 0; | ||
| 89 | } | 92 | } |
| 90 | 93 | ||
| 91 | static inline int task_has_rt_policy(struct task_struct *p) | 94 | static inline int task_has_rt_policy(struct task_struct *p) |
