aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2006-01-14 16:20:41 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-14 21:25:20 -0500
commitb0a9499c3dd50d333e2aedb7e894873c58da3785 (patch)
tree1b9610020884091984ce8290c70bebdc3e7bb09b
parent2d0cfb527944c2cfee2cffab14f52d483e329fcf (diff)
[PATCH] sched: add new SCHED_BATCH policy
Add a new SCHED_BATCH (3) scheduling policy: such tasks are presumed CPU-intensive, and will acquire a constant +5 priority level penalty. Such policy is nice for workloads that are non-interactive, but which do not want to give up their nice levels. The policy is also useful for workloads that want a deterministic scheduling policy without interactivity causing extra preemptions (between that workload's tasks). Signed-off-by: Ingo Molnar <mingo@elte.hu> Cc: Michael Kerrisk <mtk-manpages@gmx.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/sched.h7
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/sched.c48
3 files changed, 40 insertions, 19 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a72e17135421..2df1a1a2fee5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -160,6 +160,7 @@ extern unsigned long nr_iowait(void);
160#define SCHED_NORMAL 0 160#define SCHED_NORMAL 0
161#define SCHED_FIFO 1 161#define SCHED_FIFO 1
162#define SCHED_RR 2 162#define SCHED_RR 2
163#define SCHED_BATCH 3
163 164
164struct sched_param { 165struct sched_param {
165 int sched_priority; 166 int sched_priority;
@@ -470,9 +471,9 @@ struct signal_struct {
470 471
471/* 472/*
472 * Priority of a process goes from 0..MAX_PRIO-1, valid RT 473 * Priority of a process goes from 0..MAX_PRIO-1, valid RT
473 * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL tasks are 474 * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
474 * in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values 475 * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
475 * are inverted: lower p->prio value means higher priority. 476 * values are inverted: lower p->prio value means higher priority.
476 * 477 *
477 * The MAX_USER_RT_PRIO value allows the actual maximum 478 * The MAX_USER_RT_PRIO value allows the actual maximum
478 * RT priority to be separate from the value exported to 479 * RT priority to be separate from the value exported to
diff --git a/kernel/exit.c b/kernel/exit.c
index f8e609ff1893..7fb541cb8d69 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -244,7 +244,9 @@ static inline void reparent_to_init(void)
244 /* Set the exit signal to SIGCHLD so we signal init on exit */ 244 /* Set the exit signal to SIGCHLD so we signal init on exit */
245 current->exit_signal = SIGCHLD; 245 current->exit_signal = SIGCHLD;
246 246
247 if ((current->policy == SCHED_NORMAL) && (task_nice(current) < 0)) 247 if ((current->policy == SCHED_NORMAL ||
248 current->policy == SCHED_BATCH)
249 && (task_nice(current) < 0))
248 set_user_nice(current, 0); 250 set_user_nice(current, 0);
249 /* cpus_allowed? */ 251 /* cpus_allowed? */
250 /* rt_priority? */ 252 /* rt_priority? */
diff --git a/kernel/sched.c b/kernel/sched.c
index c9dec2aa1976..e1dc903d5a75 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -748,10 +748,14 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
748 unsigned long long __sleep_time = now - p->timestamp; 748 unsigned long long __sleep_time = now - p->timestamp;
749 unsigned long sleep_time; 749 unsigned long sleep_time;
750 750
751 if (__sleep_time > NS_MAX_SLEEP_AVG) 751 if (unlikely(p->policy == SCHED_BATCH))
752 sleep_time = NS_MAX_SLEEP_AVG; 752 sleep_time = 0;
753 else 753 else {
754 sleep_time = (unsigned long)__sleep_time; 754 if (__sleep_time > NS_MAX_SLEEP_AVG)
755 sleep_time = NS_MAX_SLEEP_AVG;
756 else
757 sleep_time = (unsigned long)__sleep_time;
758 }
755 759
756 if (likely(sleep_time > 0)) { 760 if (likely(sleep_time > 0)) {
757 /* 761 /*
@@ -3560,7 +3564,7 @@ void set_user_nice(task_t *p, long nice)
3560 * The RT priorities are set via sched_setscheduler(), but we still 3564 * The RT priorities are set via sched_setscheduler(), but we still
3561 * allow the 'normal' nice value to be set - but as expected 3565 * allow the 'normal' nice value to be set - but as expected
3562 * it wont have any effect on scheduling until the task is 3566 * it wont have any effect on scheduling until the task is
3563 * not SCHED_NORMAL: 3567 * not SCHED_NORMAL/SCHED_BATCH:
3564 */ 3568 */
3565 if (rt_task(p)) { 3569 if (rt_task(p)) {
3566 p->static_prio = NICE_TO_PRIO(nice); 3570 p->static_prio = NICE_TO_PRIO(nice);
@@ -3706,10 +3710,16 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
3706 BUG_ON(p->array); 3710 BUG_ON(p->array);
3707 p->policy = policy; 3711 p->policy = policy;
3708 p->rt_priority = prio; 3712 p->rt_priority = prio;
3709 if (policy != SCHED_NORMAL) 3713 if (policy != SCHED_NORMAL && policy != SCHED_BATCH) {
3710 p->prio = MAX_RT_PRIO-1 - p->rt_priority; 3714 p->prio = MAX_RT_PRIO-1 - p->rt_priority;
3711 else 3715 } else {
3712 p->prio = p->static_prio; 3716 p->prio = p->static_prio;
3717 /*
3718 * SCHED_BATCH tasks are treated as perpetual CPU hogs:
3719 */
3720 if (policy == SCHED_BATCH)
3721 p->sleep_avg = 0;
3722 }
3713} 3723}
3714 3724
3715/** 3725/**
@@ -3733,29 +3743,35 @@ recheck:
3733 if (policy < 0) 3743 if (policy < 0)
3734 policy = oldpolicy = p->policy; 3744 policy = oldpolicy = p->policy;
3735 else if (policy != SCHED_FIFO && policy != SCHED_RR && 3745 else if (policy != SCHED_FIFO && policy != SCHED_RR &&
3736 policy != SCHED_NORMAL) 3746 policy != SCHED_NORMAL && policy != SCHED_BATCH)
3737 return -EINVAL; 3747 return -EINVAL;
3738 /* 3748 /*
3739 * Valid priorities for SCHED_FIFO and SCHED_RR are 3749 * Valid priorities for SCHED_FIFO and SCHED_RR are
3740 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0. 3750 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
3751 * SCHED_BATCH is 0.
3741 */ 3752 */
3742 if (param->sched_priority < 0 || 3753 if (param->sched_priority < 0 ||
3743 (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || 3754 (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
3744 (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) 3755 (!p->mm && param->sched_priority > MAX_RT_PRIO-1))
3745 return -EINVAL; 3756 return -EINVAL;
3746 if ((policy == SCHED_NORMAL) != (param->sched_priority == 0)) 3757 if ((policy == SCHED_NORMAL || policy == SCHED_BATCH)
3758 != (param->sched_priority == 0))
3747 return -EINVAL; 3759 return -EINVAL;
3748 3760
3749 /* 3761 /*
3750 * Allow unprivileged RT tasks to decrease priority: 3762 * Allow unprivileged RT tasks to decrease priority:
3751 */ 3763 */
3752 if (!capable(CAP_SYS_NICE)) { 3764 if (!capable(CAP_SYS_NICE)) {
3753 /* can't change policy */ 3765 /*
3754 if (policy != p->policy && 3766 * can't change policy, except between SCHED_NORMAL
3755 !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) 3767 * and SCHED_BATCH:
3768 */
3769 if (((policy != SCHED_NORMAL && p->policy != SCHED_BATCH) &&
3770 (policy != SCHED_BATCH && p->policy != SCHED_NORMAL)) &&
3771 !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
3756 return -EPERM; 3772 return -EPERM;
3757 /* can't increase priority */ 3773 /* can't increase priority */
3758 if (policy != SCHED_NORMAL && 3774 if ((policy != SCHED_NORMAL && policy != SCHED_BATCH) &&
3759 param->sched_priority > p->rt_priority && 3775 param->sched_priority > p->rt_priority &&
3760 param->sched_priority > 3776 param->sched_priority >
3761 p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) 3777 p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
@@ -4233,6 +4249,7 @@ asmlinkage long sys_sched_get_priority_max(int policy)
4233 ret = MAX_USER_RT_PRIO-1; 4249 ret = MAX_USER_RT_PRIO-1;
4234 break; 4250 break;
4235 case SCHED_NORMAL: 4251 case SCHED_NORMAL:
4252 case SCHED_BATCH:
4236 ret = 0; 4253 ret = 0;
4237 break; 4254 break;
4238 } 4255 }
@@ -4256,6 +4273,7 @@ asmlinkage long sys_sched_get_priority_min(int policy)
4256 ret = 1; 4273 ret = 1;
4257 break; 4274 break;
4258 case SCHED_NORMAL: 4275 case SCHED_NORMAL:
4276 case SCHED_BATCH:
4259 ret = 0; 4277 ret = 0;
4260 } 4278 }
4261 return ret; 4279 return ret;