diff options
author | Ingo Molnar <mingo@elte.hu> | 2006-01-14 16:20:41 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-14 21:25:20 -0500 |
commit | b0a9499c3dd50d333e2aedb7e894873c58da3785 (patch) | |
tree | 1b9610020884091984ce8290c70bebdc3e7bb09b /kernel | |
parent | 2d0cfb527944c2cfee2cffab14f52d483e329fcf (diff) |
[PATCH] sched: add new SCHED_BATCH policy
Add a new SCHED_BATCH (3) scheduling policy: such tasks are presumed
CPU-intensive, and will acquire a constant +5 priority level penalty. Such
policy is nice for workloads that are non-interactive, but which do not
want to give up their nice levels. The policy is also useful for workloads
that want a deterministic scheduling policy without interactivity causing
extra preemptions (between that workload's tasks).
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/exit.c | 4 | ||||
-rw-r--r-- | kernel/sched.c | 48 |
2 files changed, 36 insertions, 16 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index f8e609ff1893..7fb541cb8d69 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -244,7 +244,9 @@ static inline void reparent_to_init(void) | |||
244 | /* Set the exit signal to SIGCHLD so we signal init on exit */ | 244 | /* Set the exit signal to SIGCHLD so we signal init on exit */ |
245 | current->exit_signal = SIGCHLD; | 245 | current->exit_signal = SIGCHLD; |
246 | 246 | ||
247 | if ((current->policy == SCHED_NORMAL) && (task_nice(current) < 0)) | 247 | if ((current->policy == SCHED_NORMAL || |
248 | current->policy == SCHED_BATCH) | ||
249 | && (task_nice(current) < 0)) | ||
248 | set_user_nice(current, 0); | 250 | set_user_nice(current, 0); |
249 | /* cpus_allowed? */ | 251 | /* cpus_allowed? */ |
250 | /* rt_priority? */ | 252 | /* rt_priority? */ |
diff --git a/kernel/sched.c b/kernel/sched.c index c9dec2aa1976..e1dc903d5a75 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -748,10 +748,14 @@ static int recalc_task_prio(task_t *p, unsigned long long now) | |||
748 | unsigned long long __sleep_time = now - p->timestamp; | 748 | unsigned long long __sleep_time = now - p->timestamp; |
749 | unsigned long sleep_time; | 749 | unsigned long sleep_time; |
750 | 750 | ||
751 | if (__sleep_time > NS_MAX_SLEEP_AVG) | 751 | if (unlikely(p->policy == SCHED_BATCH)) |
752 | sleep_time = NS_MAX_SLEEP_AVG; | 752 | sleep_time = 0; |
753 | else | 753 | else { |
754 | sleep_time = (unsigned long)__sleep_time; | 754 | if (__sleep_time > NS_MAX_SLEEP_AVG) |
755 | sleep_time = NS_MAX_SLEEP_AVG; | ||
756 | else | ||
757 | sleep_time = (unsigned long)__sleep_time; | ||
758 | } | ||
755 | 759 | ||
756 | if (likely(sleep_time > 0)) { | 760 | if (likely(sleep_time > 0)) { |
757 | /* | 761 | /* |
@@ -3560,7 +3564,7 @@ void set_user_nice(task_t *p, long nice) | |||
3560 | * The RT priorities are set via sched_setscheduler(), but we still | 3564 | * The RT priorities are set via sched_setscheduler(), but we still |
3561 | * allow the 'normal' nice value to be set - but as expected | 3565 | * allow the 'normal' nice value to be set - but as expected |
3562 | * it wont have any effect on scheduling until the task is | 3566 | * it wont have any effect on scheduling until the task is |
3563 | * not SCHED_NORMAL: | 3567 | * not SCHED_NORMAL/SCHED_BATCH: |
3564 | */ | 3568 | */ |
3565 | if (rt_task(p)) { | 3569 | if (rt_task(p)) { |
3566 | p->static_prio = NICE_TO_PRIO(nice); | 3570 | p->static_prio = NICE_TO_PRIO(nice); |
@@ -3706,10 +3710,16 @@ static void __setscheduler(struct task_struct *p, int policy, int prio) | |||
3706 | BUG_ON(p->array); | 3710 | BUG_ON(p->array); |
3707 | p->policy = policy; | 3711 | p->policy = policy; |
3708 | p->rt_priority = prio; | 3712 | p->rt_priority = prio; |
3709 | if (policy != SCHED_NORMAL) | 3713 | if (policy != SCHED_NORMAL && policy != SCHED_BATCH) { |
3710 | p->prio = MAX_RT_PRIO-1 - p->rt_priority; | 3714 | p->prio = MAX_RT_PRIO-1 - p->rt_priority; |
3711 | else | 3715 | } else { |
3712 | p->prio = p->static_prio; | 3716 | p->prio = p->static_prio; |
3717 | /* | ||
3718 | * SCHED_BATCH tasks are treated as perpetual CPU hogs: | ||
3719 | */ | ||
3720 | if (policy == SCHED_BATCH) | ||
3721 | p->sleep_avg = 0; | ||
3722 | } | ||
3713 | } | 3723 | } |
3714 | 3724 | ||
3715 | /** | 3725 | /** |
@@ -3733,29 +3743,35 @@ recheck: | |||
3733 | if (policy < 0) | 3743 | if (policy < 0) |
3734 | policy = oldpolicy = p->policy; | 3744 | policy = oldpolicy = p->policy; |
3735 | else if (policy != SCHED_FIFO && policy != SCHED_RR && | 3745 | else if (policy != SCHED_FIFO && policy != SCHED_RR && |
3736 | policy != SCHED_NORMAL) | 3746 | policy != SCHED_NORMAL && policy != SCHED_BATCH) |
3737 | return -EINVAL; | 3747 | return -EINVAL; |
3738 | /* | 3748 | /* |
3739 | * Valid priorities for SCHED_FIFO and SCHED_RR are | 3749 | * Valid priorities for SCHED_FIFO and SCHED_RR are |
3740 | * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0. | 3750 | * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and |
3751 | * SCHED_BATCH is 0. | ||
3741 | */ | 3752 | */ |
3742 | if (param->sched_priority < 0 || | 3753 | if (param->sched_priority < 0 || |
3743 | (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || | 3754 | (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || |
3744 | (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) | 3755 | (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) |
3745 | return -EINVAL; | 3756 | return -EINVAL; |
3746 | if ((policy == SCHED_NORMAL) != (param->sched_priority == 0)) | 3757 | if ((policy == SCHED_NORMAL || policy == SCHED_BATCH) |
3758 | != (param->sched_priority == 0)) | ||
3747 | return -EINVAL; | 3759 | return -EINVAL; |
3748 | 3760 | ||
3749 | /* | 3761 | /* |
3750 | * Allow unprivileged RT tasks to decrease priority: | 3762 | * Allow unprivileged RT tasks to decrease priority: |
3751 | */ | 3763 | */ |
3752 | if (!capable(CAP_SYS_NICE)) { | 3764 | if (!capable(CAP_SYS_NICE)) { |
3753 | /* can't change policy */ | 3765 | /* |
3754 | if (policy != p->policy && | 3766 | * can't change policy, except between SCHED_NORMAL |
3755 | !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) | 3767 | * and SCHED_BATCH: |
3768 | */ | ||
3769 | if (((policy != SCHED_NORMAL && p->policy != SCHED_BATCH) && | ||
3770 | (policy != SCHED_BATCH && p->policy != SCHED_NORMAL)) && | ||
3771 | !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) | ||
3756 | return -EPERM; | 3772 | return -EPERM; |
3757 | /* can't increase priority */ | 3773 | /* can't increase priority */ |
3758 | if (policy != SCHED_NORMAL && | 3774 | if ((policy != SCHED_NORMAL && policy != SCHED_BATCH) && |
3759 | param->sched_priority > p->rt_priority && | 3775 | param->sched_priority > p->rt_priority && |
3760 | param->sched_priority > | 3776 | param->sched_priority > |
3761 | p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) | 3777 | p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) |
@@ -4233,6 +4249,7 @@ asmlinkage long sys_sched_get_priority_max(int policy) | |||
4233 | ret = MAX_USER_RT_PRIO-1; | 4249 | ret = MAX_USER_RT_PRIO-1; |
4234 | break; | 4250 | break; |
4235 | case SCHED_NORMAL: | 4251 | case SCHED_NORMAL: |
4252 | case SCHED_BATCH: | ||
4236 | ret = 0; | 4253 | ret = 0; |
4237 | break; | 4254 | break; |
4238 | } | 4255 | } |
@@ -4256,6 +4273,7 @@ asmlinkage long sys_sched_get_priority_min(int policy) | |||
4256 | ret = 1; | 4273 | ret = 1; |
4257 | break; | 4274 | break; |
4258 | case SCHED_NORMAL: | 4275 | case SCHED_NORMAL: |
4276 | case SCHED_BATCH: | ||
4259 | ret = 0; | 4277 | ret = 0; |
4260 | } | 4278 | } |
4261 | return ret; | 4279 | return ret; |