diff options
author | Lennart Poettering <lennart@poettering.net> | 2009-06-15 11:17:47 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-06-15 11:31:59 -0400 |
commit | ca94c442535a44d508c99a77e54f21a59f4fc462 (patch) | |
tree | 5dda78242ed00f035e033ebd1f29200303b92b10 | |
parent | 45e3e1935e2857c54783291107d33323b3ef33c8 (diff) |
sched: Introduce SCHED_RESET_ON_FORK scheduling policy flag
This patch introduces a new flag SCHED_RESET_ON_FORK which can be passed
to the kernel via sched_setscheduler(), ORed in the policy parameter. If
set this will make sure that when the process forks a) the scheduling
priority is reset to DEFAULT_PRIO if it was higher and b) the scheduling
policy is reset to SCHED_NORMAL if it was either SCHED_FIFO or SCHED_RR.
Why have this?
Currently, if a process is real-time scheduled this will 'leak' to all
its child processes. For security reasons it is often (always?) a good
idea to make sure that if a process acquires RT scheduling this is
confined to this process and only this process. More specifically this
makes the per-process resource limit RLIMIT_RTTIME useful for security
purposes, because it makes it impossible to use a fork bomb to
circumvent the per-process RLIMIT_RTTIME accounting.
This feature is also useful for tools like 'renice' which can then
change the nice level of a process without having this spill to all its
child processes.
Why expose this via sched_setscheduler() and not other syscalls such as
prctl() or sched_setparam()?
prctl() does not take a pid parameter. Due to that it would be
impossible to modify this flag for other processes than the current one.
The struct passed to sched_setparam() can unfortunately not be extended
without breaking compatibility, since sched_setparam() lacks a size
parameter.
How to use this from userspace? In your RT program simply replace this:
sched_setscheduler(pid, SCHED_FIFO, ¶m);
by this:
sched_setscheduler(pid, SCHED_FIFO|SCHED_RESET_ON_FORK, ¶m);
Signed-off-by: Lennart Poettering <lennart@poettering.net>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090615152714.GA29092@tango.0pointer.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/sched.h | 6 | ||||
-rw-r--r-- | kernel/sched.c | 49 |
2 files changed, 46 insertions, 9 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 4896fdfec913..d4a2c6662f7d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -38,6 +38,8 @@ | |||
38 | #define SCHED_BATCH 3 | 38 | #define SCHED_BATCH 3 |
39 | /* SCHED_ISO: reserved but not implemented yet */ | 39 | /* SCHED_ISO: reserved but not implemented yet */ |
40 | #define SCHED_IDLE 5 | 40 | #define SCHED_IDLE 5 |
41 | /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ | ||
42 | #define SCHED_RESET_ON_FORK 0x40000000 | ||
41 | 43 | ||
42 | #ifdef __KERNEL__ | 44 | #ifdef __KERNEL__ |
43 | 45 | ||
@@ -1209,6 +1211,10 @@ struct task_struct { | |||
1209 | unsigned did_exec:1; | 1211 | unsigned did_exec:1; |
1210 | unsigned in_execve:1; /* Tell the LSMs that the process is doing an | 1212 | unsigned in_execve:1; /* Tell the LSMs that the process is doing an |
1211 | * execve */ | 1213 | * execve */ |
1214 | |||
1215 | /* Revert to default priority/policy when forking */ | ||
1216 | unsigned sched_reset_on_fork:1; | ||
1217 | |||
1212 | pid_t pid; | 1218 | pid_t pid; |
1213 | pid_t tgid; | 1219 | pid_t tgid; |
1214 | 1220 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index 8ec9d13140be..32e6ede85255 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2613,12 +2613,28 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2613 | set_task_cpu(p, cpu); | 2613 | set_task_cpu(p, cpu); |
2614 | 2614 | ||
2615 | /* | 2615 | /* |
2616 | * Make sure we do not leak PI boosting priority to the child: | 2616 | * Revert to default priority/policy on fork if requested. Make sure we |
2617 | * do not leak PI boosting priority to the child. | ||
2617 | */ | 2618 | */ |
2618 | p->prio = current->normal_prio; | 2619 | if (current->sched_reset_on_fork && |
2620 | (p->policy == SCHED_FIFO || p->policy == SCHED_RR)) | ||
2621 | p->policy = SCHED_NORMAL; | ||
2622 | |||
2623 | if (current->sched_reset_on_fork && | ||
2624 | (current->normal_prio < DEFAULT_PRIO)) | ||
2625 | p->prio = DEFAULT_PRIO; | ||
2626 | else | ||
2627 | p->prio = current->normal_prio; | ||
2628 | |||
2619 | if (!rt_prio(p->prio)) | 2629 | if (!rt_prio(p->prio)) |
2620 | p->sched_class = &fair_sched_class; | 2630 | p->sched_class = &fair_sched_class; |
2621 | 2631 | ||
2632 | /* | ||
2633 | * We don't need the reset flag anymore after the fork. It has | ||
2634 | * fulfilled its duty: | ||
2635 | */ | ||
2636 | p->sched_reset_on_fork = 0; | ||
2637 | |||
2622 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2638 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
2623 | if (likely(sched_info_on())) | 2639 | if (likely(sched_info_on())) |
2624 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 2640 | memset(&p->sched_info, 0, sizeof(p->sched_info)); |
@@ -6094,17 +6110,25 @@ static int __sched_setscheduler(struct task_struct *p, int policy, | |||
6094 | unsigned long flags; | 6110 | unsigned long flags; |
6095 | const struct sched_class *prev_class = p->sched_class; | 6111 | const struct sched_class *prev_class = p->sched_class; |
6096 | struct rq *rq; | 6112 | struct rq *rq; |
6113 | int reset_on_fork; | ||
6097 | 6114 | ||
6098 | /* may grab non-irq protected spin_locks */ | 6115 | /* may grab non-irq protected spin_locks */ |
6099 | BUG_ON(in_interrupt()); | 6116 | BUG_ON(in_interrupt()); |
6100 | recheck: | 6117 | recheck: |
6101 | /* double check policy once rq lock held */ | 6118 | /* double check policy once rq lock held */ |
6102 | if (policy < 0) | 6119 | if (policy < 0) { |
6120 | reset_on_fork = p->sched_reset_on_fork; | ||
6103 | policy = oldpolicy = p->policy; | 6121 | policy = oldpolicy = p->policy; |
6104 | else if (policy != SCHED_FIFO && policy != SCHED_RR && | 6122 | } else { |
6105 | policy != SCHED_NORMAL && policy != SCHED_BATCH && | 6123 | reset_on_fork = !!(policy & SCHED_RESET_ON_FORK); |
6106 | policy != SCHED_IDLE) | 6124 | policy &= ~SCHED_RESET_ON_FORK; |
6107 | return -EINVAL; | 6125 | |
6126 | if (policy != SCHED_FIFO && policy != SCHED_RR && | ||
6127 | policy != SCHED_NORMAL && policy != SCHED_BATCH && | ||
6128 | policy != SCHED_IDLE) | ||
6129 | return -EINVAL; | ||
6130 | } | ||
6131 | |||
6108 | /* | 6132 | /* |
6109 | * Valid priorities for SCHED_FIFO and SCHED_RR are | 6133 | * Valid priorities for SCHED_FIFO and SCHED_RR are |
6110 | * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL, | 6134 | * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL, |
@@ -6148,6 +6172,10 @@ recheck: | |||
6148 | /* can't change other user's priorities */ | 6172 | /* can't change other user's priorities */ |
6149 | if (!check_same_owner(p)) | 6173 | if (!check_same_owner(p)) |
6150 | return -EPERM; | 6174 | return -EPERM; |
6175 | |||
6176 | /* Normal users shall not reset the sched_reset_on_fork flag */ | ||
6177 | if (p->sched_reset_on_fork && !reset_on_fork) | ||
6178 | return -EPERM; | ||
6151 | } | 6179 | } |
6152 | 6180 | ||
6153 | if (user) { | 6181 | if (user) { |
@@ -6191,6 +6219,8 @@ recheck: | |||
6191 | if (running) | 6219 | if (running) |
6192 | p->sched_class->put_prev_task(rq, p); | 6220 | p->sched_class->put_prev_task(rq, p); |
6193 | 6221 | ||
6222 | p->sched_reset_on_fork = reset_on_fork; | ||
6223 | |||
6194 | oldprio = p->prio; | 6224 | oldprio = p->prio; |
6195 | __setscheduler(rq, p, policy, param->sched_priority); | 6225 | __setscheduler(rq, p, policy, param->sched_priority); |
6196 | 6226 | ||
@@ -6307,14 +6337,15 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) | |||
6307 | if (p) { | 6337 | if (p) { |
6308 | retval = security_task_getscheduler(p); | 6338 | retval = security_task_getscheduler(p); |
6309 | if (!retval) | 6339 | if (!retval) |
6310 | retval = p->policy; | 6340 | retval = p->policy |
6341 | | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0); | ||
6311 | } | 6342 | } |
6312 | read_unlock(&tasklist_lock); | 6343 | read_unlock(&tasklist_lock); |
6313 | return retval; | 6344 | return retval; |
6314 | } | 6345 | } |
6315 | 6346 | ||
6316 | /** | 6347 | /** |
6317 | * sys_sched_getscheduler - get the RT priority of a thread | 6348 | * sys_sched_getparam - get the RT priority of a thread |
6318 | * @pid: the pid in question. | 6349 | * @pid: the pid in question. |
6319 | * @param: structure containing the RT priority. | 6350 | * @param: structure containing the RT priority. |
6320 | */ | 6351 | */ |