aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2009-06-15 11:17:47 -0400
committerIngo Molnar <mingo@elte.hu>2009-06-15 11:31:59 -0400
commitca94c442535a44d508c99a77e54f21a59f4fc462 (patch)
tree5dda78242ed00f035e033ebd1f29200303b92b10 /kernel/sched.c
parent45e3e1935e2857c54783291107d33323b3ef33c8 (diff)
sched: Introduce SCHED_RESET_ON_FORK scheduling policy flag
This patch introduces a new flag SCHED_RESET_ON_FORK which can be passed to the kernel via sched_setscheduler(), ORed in the policy parameter. If set this will make sure that when the process forks a) the scheduling priority is reset to DEFAULT_PRIO if it was higher and b) the scheduling policy is reset to SCHED_NORMAL if it was either SCHED_FIFO or SCHED_RR. Why have this? Currently, if a process is real-time scheduled this will 'leak' to all its child processes. For security reasons it is often (always?) a good idea to make sure that if a process acquires RT scheduling this is confined to this process and only this process. More specifically this makes the per-process resource limit RLIMIT_RTTIME useful for security purposes, because it makes it impossible to use a fork bomb to circumvent the per-process RLIMIT_RTTIME accounting. This feature is also useful for tools like 'renice' which can then change the nice level of a process without having this spill to all its child processes. Why expose this via sched_setscheduler() and not other syscalls such as prctl() or sched_setparam()? prctl() does not take a pid parameter. Due to that it would be impossible to modify this flag for other processes than the current one. The struct passed to sched_setparam() can unfortunately not be extended without breaking compatibility, since sched_setparam() lacks a size parameter. How to use this from userspace? In your RT program simply replace this: sched_setscheduler(pid, SCHED_FIFO, &param); by this: sched_setscheduler(pid, SCHED_FIFO|SCHED_RESET_ON_FORK, &param); Signed-off-by: Lennart Poettering <lennart@poettering.net> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <20090615152714.GA29092@tango.0pointer.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c49
1 files changed, 40 insertions, 9 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 8ec9d13140be..32e6ede85255 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2613,12 +2613,28 @@ void sched_fork(struct task_struct *p, int clone_flags)
2613 set_task_cpu(p, cpu); 2613 set_task_cpu(p, cpu);
2614 2614
2615 /* 2615 /*
2616 * Make sure we do not leak PI boosting priority to the child: 2616 * Revert to default priority/policy on fork if requested. Make sure we
2617 * do not leak PI boosting priority to the child.
2617 */ 2618 */
2618 p->prio = current->normal_prio; 2619 if (current->sched_reset_on_fork &&
2620 (p->policy == SCHED_FIFO || p->policy == SCHED_RR))
2621 p->policy = SCHED_NORMAL;
2622
2623 if (current->sched_reset_on_fork &&
2624 (current->normal_prio < DEFAULT_PRIO))
2625 p->prio = DEFAULT_PRIO;
2626 else
2627 p->prio = current->normal_prio;
2628
2619 if (!rt_prio(p->prio)) 2629 if (!rt_prio(p->prio))
2620 p->sched_class = &fair_sched_class; 2630 p->sched_class = &fair_sched_class;
2621 2631
2632 /*
2633 * We don't need the reset flag anymore after the fork. It has
2634 * fulfilled its duty:
2635 */
2636 p->sched_reset_on_fork = 0;
2637
2622#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 2638#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
2623 if (likely(sched_info_on())) 2639 if (likely(sched_info_on()))
2624 memset(&p->sched_info, 0, sizeof(p->sched_info)); 2640 memset(&p->sched_info, 0, sizeof(p->sched_info));
@@ -6094,17 +6110,25 @@ static int __sched_setscheduler(struct task_struct *p, int policy,
6094 unsigned long flags; 6110 unsigned long flags;
6095 const struct sched_class *prev_class = p->sched_class; 6111 const struct sched_class *prev_class = p->sched_class;
6096 struct rq *rq; 6112 struct rq *rq;
6113 int reset_on_fork;
6097 6114
6098 /* may grab non-irq protected spin_locks */ 6115 /* may grab non-irq protected spin_locks */
6099 BUG_ON(in_interrupt()); 6116 BUG_ON(in_interrupt());
6100recheck: 6117recheck:
6101 /* double check policy once rq lock held */ 6118 /* double check policy once rq lock held */
6102 if (policy < 0) 6119 if (policy < 0) {
6120 reset_on_fork = p->sched_reset_on_fork;
6103 policy = oldpolicy = p->policy; 6121 policy = oldpolicy = p->policy;
6104 else if (policy != SCHED_FIFO && policy != SCHED_RR && 6122 } else {
6105 policy != SCHED_NORMAL && policy != SCHED_BATCH && 6123 reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
6106 policy != SCHED_IDLE) 6124 policy &= ~SCHED_RESET_ON_FORK;
6107 return -EINVAL; 6125
6126 if (policy != SCHED_FIFO && policy != SCHED_RR &&
6127 policy != SCHED_NORMAL && policy != SCHED_BATCH &&
6128 policy != SCHED_IDLE)
6129 return -EINVAL;
6130 }
6131
6108 /* 6132 /*
6109 * Valid priorities for SCHED_FIFO and SCHED_RR are 6133 * Valid priorities for SCHED_FIFO and SCHED_RR are
6110 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL, 6134 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL,
@@ -6148,6 +6172,10 @@ recheck:
6148 /* can't change other user's priorities */ 6172 /* can't change other user's priorities */
6149 if (!check_same_owner(p)) 6173 if (!check_same_owner(p))
6150 return -EPERM; 6174 return -EPERM;
6175
6176 /* Normal users shall not reset the sched_reset_on_fork flag */
6177 if (p->sched_reset_on_fork && !reset_on_fork)
6178 return -EPERM;
6151 } 6179 }
6152 6180
6153 if (user) { 6181 if (user) {
@@ -6191,6 +6219,8 @@ recheck:
6191 if (running) 6219 if (running)
6192 p->sched_class->put_prev_task(rq, p); 6220 p->sched_class->put_prev_task(rq, p);
6193 6221
6222 p->sched_reset_on_fork = reset_on_fork;
6223
6194 oldprio = p->prio; 6224 oldprio = p->prio;
6195 __setscheduler(rq, p, policy, param->sched_priority); 6225 __setscheduler(rq, p, policy, param->sched_priority);
6196 6226
@@ -6307,14 +6337,15 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
6307 if (p) { 6337 if (p) {
6308 retval = security_task_getscheduler(p); 6338 retval = security_task_getscheduler(p);
6309 if (!retval) 6339 if (!retval)
6310 retval = p->policy; 6340 retval = p->policy
6341 | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
6311 } 6342 }
6312 read_unlock(&tasklist_lock); 6343 read_unlock(&tasklist_lock);
6313 return retval; 6344 return retval;
6314} 6345}
6315 6346
6316/** 6347/**
6317 * sys_sched_getscheduler - get the RT priority of a thread 6348 * sys_sched_getparam - get the RT priority of a thread
6318 * @pid: the pid in question. 6349 * @pid: the pid in question.
6319 * @param: structure containing the RT priority. 6350 * @param: structure containing the RT priority.
6320 */ 6351 */