aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2008-01-25 15:08:29 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-25 15:08:29 -0500
commitfa85ae2418e6843953107cd6a06f645752829bc0 (patch)
tree004130ac471247a29d3f6adfbfe61c474e725779 /kernel
parent8f4d37ec073c17e2d4aa8851df5837d798606d6f (diff)
sched: rt time limit
Very simple time limit on the realtime scheduling classes. Allow the rq's realtime class to consume sched_rt_ratio of every sched_rt_period slice. If the class exceeds this quota the fair class will preempt the realtime class. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c70
-rw-r--r--kernel/sched_rt.c53
-rw-r--r--kernel/sysctl.c18
3 files changed, 120 insertions, 21 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 17f93d3eda91..e9a7beee9b79 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -342,13 +342,14 @@ struct cfs_rq {
342/* Real-Time classes' related field in a runqueue: */ 342/* Real-Time classes' related field in a runqueue: */
343struct rt_rq { 343struct rt_rq {
344 struct rt_prio_array active; 344 struct rt_prio_array active;
345 int rt_load_balance_idx;
346 struct list_head *rt_load_balance_head, *rt_load_balance_curr;
347 unsigned long rt_nr_running; 345 unsigned long rt_nr_running;
346#ifdef CONFIG_SMP
348 unsigned long rt_nr_migratory; 347 unsigned long rt_nr_migratory;
349 /* highest queued rt task prio */ 348 int highest_prio; /* highest queued rt task prio */
350 int highest_prio;
351 int overloaded; 349 int overloaded;
350#endif
351 u64 rt_time;
352 u64 rt_throttled;
352}; 353};
353 354
354#ifdef CONFIG_SMP 355#ifdef CONFIG_SMP
@@ -415,6 +416,7 @@ struct rq {
415 struct list_head leaf_cfs_rq_list; 416 struct list_head leaf_cfs_rq_list;
416#endif 417#endif
417 struct rt_rq rt; 418 struct rt_rq rt;
419 u64 rt_period_expire;
418 420
419 /* 421 /*
420 * This is part of a global counter where only the total sum 422 * This is part of a global counter where only the total sum
@@ -601,6 +603,21 @@ const_debug unsigned int sysctl_sched_features =
601const_debug unsigned int sysctl_sched_nr_migrate = 32; 603const_debug unsigned int sysctl_sched_nr_migrate = 32;
602 604
603/* 605/*
606 * period over which we measure -rt task cpu usage in ms.
607 * default: 1s
608 */
609const_debug unsigned int sysctl_sched_rt_period = 1000;
610
611#define SCHED_RT_FRAC_SHIFT 16
612#define SCHED_RT_FRAC (1UL << SCHED_RT_FRAC_SHIFT)
613
614/*
615 * ratio of time -rt tasks may consume.
616 * default: 100%
617 */
618const_debug unsigned int sysctl_sched_rt_ratio = SCHED_RT_FRAC;
619
620/*
604 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu 621 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
605 * clock constructed from sched_clock(): 622 * clock constructed from sched_clock():
606 */ 623 */
@@ -3674,8 +3691,8 @@ void scheduler_tick(void)
3674 rq->clock = next_tick; 3691 rq->clock = next_tick;
3675 rq->tick_timestamp = rq->clock; 3692 rq->tick_timestamp = rq->clock;
3676 update_cpu_load(rq); 3693 update_cpu_load(rq);
3677 if (curr != rq->idle) /* FIXME: needed? */ 3694 curr->sched_class->task_tick(rq, curr, 0);
3678 curr->sched_class->task_tick(rq, curr, 0); 3695 update_sched_rt_period(rq);
3679 spin_unlock(&rq->lock); 3696 spin_unlock(&rq->lock);
3680 3697
3681#ifdef CONFIG_SMP 3698#ifdef CONFIG_SMP
@@ -7041,6 +7058,29 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
7041 cfs_rq->min_vruntime = (u64)(-(1LL << 20)); 7058 cfs_rq->min_vruntime = (u64)(-(1LL << 20));
7042} 7059}
7043 7060
7061static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
7062{
7063 struct rt_prio_array *array;
7064 int i;
7065
7066 array = &rt_rq->active;
7067 for (i = 0; i < MAX_RT_PRIO; i++) {
7068 INIT_LIST_HEAD(array->queue + i);
7069 __clear_bit(i, array->bitmap);
7070 }
7071 /* delimiter for bitsearch: */
7072 __set_bit(MAX_RT_PRIO, array->bitmap);
7073
7074#ifdef CONFIG_SMP
7075 rt_rq->rt_nr_migratory = 0;
7076 rt_rq->highest_prio = MAX_RT_PRIO;
7077 rt_rq->overloaded = 0;
7078#endif
7079
7080 rt_rq->rt_time = 0;
7081 rt_rq->rt_throttled = 0;
7082}
7083
7044void __init sched_init(void) 7084void __init sched_init(void)
7045{ 7085{
7046 int highest_cpu = 0; 7086 int highest_cpu = 0;
@@ -7051,7 +7091,6 @@ void __init sched_init(void)
7051#endif 7091#endif
7052 7092
7053 for_each_possible_cpu(i) { 7093 for_each_possible_cpu(i) {
7054 struct rt_prio_array *array;
7055 struct rq *rq; 7094 struct rq *rq;
7056 7095
7057 rq = cpu_rq(i); 7096 rq = cpu_rq(i);
@@ -7083,6 +7122,8 @@ void __init sched_init(void)
7083 } 7122 }
7084 init_task_group.shares = init_task_group_load; 7123 init_task_group.shares = init_task_group_load;
7085#endif 7124#endif
7125 init_rt_rq(&rq->rt, rq);
7126 rq->rt_period_expire = 0;
7086 7127
7087 for (j = 0; j < CPU_LOAD_IDX_MAX; j++) 7128 for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
7088 rq->cpu_load[j] = 0; 7129 rq->cpu_load[j] = 0;
@@ -7095,22 +7136,11 @@ void __init sched_init(void)
7095 rq->cpu = i; 7136 rq->cpu = i;
7096 rq->migration_thread = NULL; 7137 rq->migration_thread = NULL;
7097 INIT_LIST_HEAD(&rq->migration_queue); 7138 INIT_LIST_HEAD(&rq->migration_queue);
7098 rq->rt.highest_prio = MAX_RT_PRIO;
7099 rq->rt.overloaded = 0;
7100 rq_attach_root(rq, &def_root_domain); 7139 rq_attach_root(rq, &def_root_domain);
7101#endif 7140#endif
7102 init_rq_hrtick(rq); 7141 init_rq_hrtick(rq);
7103
7104 atomic_set(&rq->nr_iowait, 0); 7142 atomic_set(&rq->nr_iowait, 0);
7105
7106 array = &rq->rt.active;
7107 for (j = 0; j < MAX_RT_PRIO; j++) {
7108 INIT_LIST_HEAD(array->queue + j);
7109 __clear_bit(j, array->bitmap);
7110 }
7111 highest_cpu = i; 7143 highest_cpu = i;
7112 /* delimiter for bitsearch: */
7113 __set_bit(MAX_RT_PRIO, array->bitmap);
7114 } 7144 }
7115 7145
7116 set_load_weight(&init_task); 7146 set_load_weight(&init_task);
@@ -7282,7 +7312,7 @@ void set_curr_task(int cpu, struct task_struct *p)
7282#ifdef CONFIG_SMP 7312#ifdef CONFIG_SMP
7283/* 7313/*
7284 * distribute shares of all task groups among their schedulable entities, 7314 * distribute shares of all task groups among their schedulable entities,
7285 * to reflect load distrbution across cpus. 7315 * to reflect load distribution across cpus.
7286 */ 7316 */
7287static int rebalance_shares(struct sched_domain *sd, int this_cpu) 7317static int rebalance_shares(struct sched_domain *sd, int this_cpu)
7288{ 7318{
@@ -7349,7 +7379,7 @@ static int rebalance_shares(struct sched_domain *sd, int this_cpu)
7349 * sysctl_sched_max_bal_int_shares represents the maximum interval between 7379 * sysctl_sched_max_bal_int_shares represents the maximum interval between
7350 * consecutive calls to rebalance_shares() in the same sched domain. 7380 * consecutive calls to rebalance_shares() in the same sched domain.
7351 * 7381 *
7352 * These settings allows for the appropriate tradeoff between accuracy of 7382 * These settings allows for the appropriate trade-off between accuracy of
7353 * fairness and the associated overhead. 7383 * fairness and the associated overhead.
7354 * 7384 *
7355 */ 7385 */
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 83fbbcb8019e..fd10d965aa06 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -45,6 +45,50 @@ static void update_rt_migration(struct rq *rq)
45} 45}
46#endif /* CONFIG_SMP */ 46#endif /* CONFIG_SMP */
47 47
48static int sched_rt_ratio_exceeded(struct rq *rq, struct rt_rq *rt_rq)
49{
50 u64 period, ratio;
51
52 if (sysctl_sched_rt_ratio == SCHED_RT_FRAC)
53 return 0;
54
55 if (rt_rq->rt_throttled)
56 return 1;
57
58 period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
59 ratio = (period * sysctl_sched_rt_ratio) >> SCHED_RT_FRAC_SHIFT;
60
61 if (rt_rq->rt_time > ratio) {
62 rt_rq->rt_throttled = rq->clock + period - rt_rq->rt_time;
63 return 1;
64 }
65
66 return 0;
67}
68
69static void update_sched_rt_period(struct rq *rq)
70{
71 while (rq->clock > rq->rt_period_expire) {
72 u64 period, ratio;
73
74 period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
75 ratio = (period * sysctl_sched_rt_ratio) >> SCHED_RT_FRAC_SHIFT;
76
77 rq->rt.rt_time -= min(rq->rt.rt_time, ratio);
78 rq->rt_period_expire += period;
79 }
80
81 /*
82 * When the rt throttle is expired, let them rip.
83 * (XXX: use hrtick when available)
84 */
85 if (rq->rt.rt_throttled && rq->clock > rq->rt.rt_throttled) {
86 rq->rt.rt_throttled = 0;
87 if (!sched_rt_ratio_exceeded(rq, &rq->rt))
88 resched_task(rq->curr);
89 }
90}
91
48/* 92/*
49 * Update the current task's runtime statistics. Skip current tasks that 93 * Update the current task's runtime statistics. Skip current tasks that
50 * are not in our scheduling class. 94 * are not in our scheduling class.
@@ -66,6 +110,11 @@ static void update_curr_rt(struct rq *rq)
66 curr->se.sum_exec_runtime += delta_exec; 110 curr->se.sum_exec_runtime += delta_exec;
67 curr->se.exec_start = rq->clock; 111 curr->se.exec_start = rq->clock;
68 cpuacct_charge(curr, delta_exec); 112 cpuacct_charge(curr, delta_exec);
113
114 rq->rt.rt_time += delta_exec;
115 update_sched_rt_period(rq);
116 if (sched_rt_ratio_exceeded(rq, &rq->rt))
117 resched_task(curr);
69} 118}
70 119
71static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq) 120static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq)
@@ -208,8 +257,12 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
208 struct rt_prio_array *array = &rq->rt.active; 257 struct rt_prio_array *array = &rq->rt.active;
209 struct task_struct *next; 258 struct task_struct *next;
210 struct list_head *queue; 259 struct list_head *queue;
260 struct rt_rq *rt_rq = &rq->rt;
211 int idx; 261 int idx;
212 262
263 if (sched_rt_ratio_exceeded(rq, rt_rq))
264 return NULL;
265
213 idx = sched_find_first_bit(array->bitmap); 266 idx = sched_find_first_bit(array->bitmap);
214 if (idx >= MAX_RT_PRIO) 267 if (idx >= MAX_RT_PRIO)
215 return NULL; 268 return NULL;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 96f31c1bc4f0..3afbd25f43eb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -306,7 +306,23 @@ static struct ctl_table kern_table[] = {
306 .procname = "sched_nr_migrate", 306 .procname = "sched_nr_migrate",
307 .data = &sysctl_sched_nr_migrate, 307 .data = &sysctl_sched_nr_migrate,
308 .maxlen = sizeof(unsigned int), 308 .maxlen = sizeof(unsigned int),
309 .mode = 644, 309 .mode = 0644,
310 .proc_handler = &proc_dointvec,
311 },
312 {
313 .ctl_name = CTL_UNNUMBERED,
314 .procname = "sched_rt_period_ms",
315 .data = &sysctl_sched_rt_period,
316 .maxlen = sizeof(unsigned int),
317 .mode = 0644,
318 .proc_handler = &proc_dointvec,
319 },
320 {
321 .ctl_name = CTL_UNNUMBERED,
322 .procname = "sched_rt_ratio",
323 .data = &sysctl_sched_rt_ratio,
324 .maxlen = sizeof(unsigned int),
325 .mode = 0644,
310 .proc_handler = &proc_dointvec, 326 .proc_handler = &proc_dointvec,
311 }, 327 },
312#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) 328#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)