aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2006-01-14 16:20:41 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-14 21:25:20 -0500
commitb0a9499c3dd50d333e2aedb7e894873c58da3785 (patch)
tree1b9610020884091984ce8290c70bebdc3e7bb09b /kernel
parent2d0cfb527944c2cfee2cffab14f52d483e329fcf (diff)
[PATCH] sched: add new SCHED_BATCH policy
Add a new SCHED_BATCH (3) scheduling policy: such tasks are presumed CPU-intensive, and will acquire a constant +5 priority level penalty. Such policy is nice for workloads that are non-interactive, but which do not want to give up their nice levels. The policy is also useful for workloads that want a deterministic scheduling policy without interactivity causing extra preemptions (between that workload's tasks). Signed-off-by: Ingo Molnar <mingo@elte.hu> Cc: Michael Kerrisk <mtk-manpages@gmx.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/sched.c48
2 files changed, 36 insertions, 16 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index f8e609ff1893..7fb541cb8d69 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -244,7 +244,9 @@ static inline void reparent_to_init(void)
244 /* Set the exit signal to SIGCHLD so we signal init on exit */ 244 /* Set the exit signal to SIGCHLD so we signal init on exit */
245 current->exit_signal = SIGCHLD; 245 current->exit_signal = SIGCHLD;
246 246
247 if ((current->policy == SCHED_NORMAL) && (task_nice(current) < 0)) 247 if ((current->policy == SCHED_NORMAL ||
248 current->policy == SCHED_BATCH)
249 && (task_nice(current) < 0))
248 set_user_nice(current, 0); 250 set_user_nice(current, 0);
249 /* cpus_allowed? */ 251 /* cpus_allowed? */
250 /* rt_priority? */ 252 /* rt_priority? */
diff --git a/kernel/sched.c b/kernel/sched.c
index c9dec2aa1976..e1dc903d5a75 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -748,10 +748,14 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
748 unsigned long long __sleep_time = now - p->timestamp; 748 unsigned long long __sleep_time = now - p->timestamp;
749 unsigned long sleep_time; 749 unsigned long sleep_time;
750 750
751 if (__sleep_time > NS_MAX_SLEEP_AVG) 751 if (unlikely(p->policy == SCHED_BATCH))
752 sleep_time = NS_MAX_SLEEP_AVG; 752 sleep_time = 0;
753 else 753 else {
754 sleep_time = (unsigned long)__sleep_time; 754 if (__sleep_time > NS_MAX_SLEEP_AVG)
755 sleep_time = NS_MAX_SLEEP_AVG;
756 else
757 sleep_time = (unsigned long)__sleep_time;
758 }
755 759
756 if (likely(sleep_time > 0)) { 760 if (likely(sleep_time > 0)) {
757 /* 761 /*
@@ -3560,7 +3564,7 @@ void set_user_nice(task_t *p, long nice)
3560 * The RT priorities are set via sched_setscheduler(), but we still 3564 * The RT priorities are set via sched_setscheduler(), but we still
3561 * allow the 'normal' nice value to be set - but as expected 3565 * allow the 'normal' nice value to be set - but as expected
3562 * it wont have any effect on scheduling until the task is 3566 * it wont have any effect on scheduling until the task is
3563 * not SCHED_NORMAL: 3567 * not SCHED_NORMAL/SCHED_BATCH:
3564 */ 3568 */
3565 if (rt_task(p)) { 3569 if (rt_task(p)) {
3566 p->static_prio = NICE_TO_PRIO(nice); 3570 p->static_prio = NICE_TO_PRIO(nice);
@@ -3706,10 +3710,16 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
3706 BUG_ON(p->array); 3710 BUG_ON(p->array);
3707 p->policy = policy; 3711 p->policy = policy;
3708 p->rt_priority = prio; 3712 p->rt_priority = prio;
3709 if (policy != SCHED_NORMAL) 3713 if (policy != SCHED_NORMAL && policy != SCHED_BATCH) {
3710 p->prio = MAX_RT_PRIO-1 - p->rt_priority; 3714 p->prio = MAX_RT_PRIO-1 - p->rt_priority;
3711 else 3715 } else {
3712 p->prio = p->static_prio; 3716 p->prio = p->static_prio;
3717 /*
3718 * SCHED_BATCH tasks are treated as perpetual CPU hogs:
3719 */
3720 if (policy == SCHED_BATCH)
3721 p->sleep_avg = 0;
3722 }
3713} 3723}
3714 3724
3715/** 3725/**
@@ -3733,29 +3743,35 @@ recheck:
3733 if (policy < 0) 3743 if (policy < 0)
3734 policy = oldpolicy = p->policy; 3744 policy = oldpolicy = p->policy;
3735 else if (policy != SCHED_FIFO && policy != SCHED_RR && 3745 else if (policy != SCHED_FIFO && policy != SCHED_RR &&
3736 policy != SCHED_NORMAL) 3746 policy != SCHED_NORMAL && policy != SCHED_BATCH)
3737 return -EINVAL; 3747 return -EINVAL;
3738 /* 3748 /*
3739 * Valid priorities for SCHED_FIFO and SCHED_RR are 3749 * Valid priorities for SCHED_FIFO and SCHED_RR are
3740 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0. 3750 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
3751 * SCHED_BATCH is 0.
3741 */ 3752 */
3742 if (param->sched_priority < 0 || 3753 if (param->sched_priority < 0 ||
3743 (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || 3754 (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
3744 (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) 3755 (!p->mm && param->sched_priority > MAX_RT_PRIO-1))
3745 return -EINVAL; 3756 return -EINVAL;
3746 if ((policy == SCHED_NORMAL) != (param->sched_priority == 0)) 3757 if ((policy == SCHED_NORMAL || policy == SCHED_BATCH)
3758 != (param->sched_priority == 0))
3747 return -EINVAL; 3759 return -EINVAL;
3748 3760
3749 /* 3761 /*
3750 * Allow unprivileged RT tasks to decrease priority: 3762 * Allow unprivileged RT tasks to decrease priority:
3751 */ 3763 */
3752 if (!capable(CAP_SYS_NICE)) { 3764 if (!capable(CAP_SYS_NICE)) {
3753 /* can't change policy */ 3765 /*
3754 if (policy != p->policy && 3766 * can't change policy, except between SCHED_NORMAL
3755 !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) 3767 * and SCHED_BATCH:
3768 */
3769 if (((policy != SCHED_NORMAL && p->policy != SCHED_BATCH) &&
3770 (policy != SCHED_BATCH && p->policy != SCHED_NORMAL)) &&
3771 !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
3756 return -EPERM; 3772 return -EPERM;
3757 /* can't increase priority */ 3773 /* can't increase priority */
3758 if (policy != SCHED_NORMAL && 3774 if ((policy != SCHED_NORMAL && policy != SCHED_BATCH) &&
3759 param->sched_priority > p->rt_priority && 3775 param->sched_priority > p->rt_priority &&
3760 param->sched_priority > 3776 param->sched_priority >
3761 p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) 3777 p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
@@ -4233,6 +4249,7 @@ asmlinkage long sys_sched_get_priority_max(int policy)
4233 ret = MAX_USER_RT_PRIO-1; 4249 ret = MAX_USER_RT_PRIO-1;
4234 break; 4250 break;
4235 case SCHED_NORMAL: 4251 case SCHED_NORMAL:
4252 case SCHED_BATCH:
4236 ret = 0; 4253 ret = 0;
4237 break; 4254 break;
4238 } 4255 }
@@ -4256,6 +4273,7 @@ asmlinkage long sys_sched_get_priority_min(int policy)
4256 ret = 1; 4273 ret = 1;
4257 break; 4274 break;
4258 case SCHED_NORMAL: 4275 case SCHED_NORMAL:
4276 case SCHED_BATCH:
4259 ret = 0; 4277 ret = 0;
4260 } 4278 }
4261 return ret; 4279 return ret;