diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2008-01-25 15:08:29 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-01-25 15:08:29 -0500 |
commit | fa85ae2418e6843953107cd6a06f645752829bc0 (patch) | |
tree | 004130ac471247a29d3f6adfbfe61c474e725779 | |
parent | 8f4d37ec073c17e2d4aa8851df5837d798606d6f (diff) |
sched: rt time limit
Very simple time limit on the realtime scheduling classes.
Allow the rq's realtime class to consume sched_rt_ratio of every
sched_rt_period slice. If the class exceeds this quota the fair class
will preempt the realtime class.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/sched.h | 2 | ||||
-rw-r--r-- | kernel/sched.c | 70 | ||||
-rw-r--r-- | kernel/sched_rt.c | 53 | ||||
-rw-r--r-- | kernel/sysctl.c | 18 |
4 files changed, 122 insertions, 21 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 43e0339d65fc..d5ea144df836 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1490,6 +1490,8 @@ extern unsigned int sysctl_sched_child_runs_first; | |||
1490 | extern unsigned int sysctl_sched_features; | 1490 | extern unsigned int sysctl_sched_features; |
1491 | extern unsigned int sysctl_sched_migration_cost; | 1491 | extern unsigned int sysctl_sched_migration_cost; |
1492 | extern unsigned int sysctl_sched_nr_migrate; | 1492 | extern unsigned int sysctl_sched_nr_migrate; |
1493 | extern unsigned int sysctl_sched_rt_period; | ||
1494 | extern unsigned int sysctl_sched_rt_ratio; | ||
1493 | #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) | 1495 | #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) |
1494 | extern unsigned int sysctl_sched_min_bal_int_shares; | 1496 | extern unsigned int sysctl_sched_min_bal_int_shares; |
1495 | extern unsigned int sysctl_sched_max_bal_int_shares; | 1497 | extern unsigned int sysctl_sched_max_bal_int_shares; |
diff --git a/kernel/sched.c b/kernel/sched.c index 17f93d3eda91..e9a7beee9b79 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -342,13 +342,14 @@ struct cfs_rq { | |||
342 | /* Real-Time classes' related field in a runqueue: */ | 342 | /* Real-Time classes' related field in a runqueue: */ |
343 | struct rt_rq { | 343 | struct rt_rq { |
344 | struct rt_prio_array active; | 344 | struct rt_prio_array active; |
345 | int rt_load_balance_idx; | ||
346 | struct list_head *rt_load_balance_head, *rt_load_balance_curr; | ||
347 | unsigned long rt_nr_running; | 345 | unsigned long rt_nr_running; |
346 | #ifdef CONFIG_SMP | ||
348 | unsigned long rt_nr_migratory; | 347 | unsigned long rt_nr_migratory; |
349 | /* highest queued rt task prio */ | 348 | int highest_prio; /* highest queued rt task prio */ |
350 | int highest_prio; | ||
351 | int overloaded; | 349 | int overloaded; |
350 | #endif | ||
351 | u64 rt_time; | ||
352 | u64 rt_throttled; | ||
352 | }; | 353 | }; |
353 | 354 | ||
354 | #ifdef CONFIG_SMP | 355 | #ifdef CONFIG_SMP |
@@ -415,6 +416,7 @@ struct rq { | |||
415 | struct list_head leaf_cfs_rq_list; | 416 | struct list_head leaf_cfs_rq_list; |
416 | #endif | 417 | #endif |
417 | struct rt_rq rt; | 418 | struct rt_rq rt; |
419 | u64 rt_period_expire; | ||
418 | 420 | ||
419 | /* | 421 | /* |
420 | * This is part of a global counter where only the total sum | 422 | * This is part of a global counter where only the total sum |
@@ -601,6 +603,21 @@ const_debug unsigned int sysctl_sched_features = | |||
601 | const_debug unsigned int sysctl_sched_nr_migrate = 32; | 603 | const_debug unsigned int sysctl_sched_nr_migrate = 32; |
602 | 604 | ||
603 | /* | 605 | /* |
606 | * period over which we measure -rt task cpu usage in ms. | ||
607 | * default: 1s | ||
608 | */ | ||
609 | const_debug unsigned int sysctl_sched_rt_period = 1000; | ||
610 | |||
611 | #define SCHED_RT_FRAC_SHIFT 16 | ||
612 | #define SCHED_RT_FRAC (1UL << SCHED_RT_FRAC_SHIFT) | ||
613 | |||
614 | /* | ||
615 | * ratio of time -rt tasks may consume. | ||
616 | * default: 100% | ||
617 | */ | ||
618 | const_debug unsigned int sysctl_sched_rt_ratio = SCHED_RT_FRAC; | ||
619 | |||
620 | /* | ||
604 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu | 621 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu |
605 | * clock constructed from sched_clock(): | 622 | * clock constructed from sched_clock(): |
606 | */ | 623 | */ |
@@ -3674,8 +3691,8 @@ void scheduler_tick(void) | |||
3674 | rq->clock = next_tick; | 3691 | rq->clock = next_tick; |
3675 | rq->tick_timestamp = rq->clock; | 3692 | rq->tick_timestamp = rq->clock; |
3676 | update_cpu_load(rq); | 3693 | update_cpu_load(rq); |
3677 | if (curr != rq->idle) /* FIXME: needed? */ | 3694 | curr->sched_class->task_tick(rq, curr, 0); |
3678 | curr->sched_class->task_tick(rq, curr, 0); | 3695 | update_sched_rt_period(rq); |
3679 | spin_unlock(&rq->lock); | 3696 | spin_unlock(&rq->lock); |
3680 | 3697 | ||
3681 | #ifdef CONFIG_SMP | 3698 | #ifdef CONFIG_SMP |
@@ -7041,6 +7058,29 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq) | |||
7041 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); | 7058 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); |
7042 | } | 7059 | } |
7043 | 7060 | ||
7061 | static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | ||
7062 | { | ||
7063 | struct rt_prio_array *array; | ||
7064 | int i; | ||
7065 | |||
7066 | array = &rt_rq->active; | ||
7067 | for (i = 0; i < MAX_RT_PRIO; i++) { | ||
7068 | INIT_LIST_HEAD(array->queue + i); | ||
7069 | __clear_bit(i, array->bitmap); | ||
7070 | } | ||
7071 | /* delimiter for bitsearch: */ | ||
7072 | __set_bit(MAX_RT_PRIO, array->bitmap); | ||
7073 | |||
7074 | #ifdef CONFIG_SMP | ||
7075 | rt_rq->rt_nr_migratory = 0; | ||
7076 | rt_rq->highest_prio = MAX_RT_PRIO; | ||
7077 | rt_rq->overloaded = 0; | ||
7078 | #endif | ||
7079 | |||
7080 | rt_rq->rt_time = 0; | ||
7081 | rt_rq->rt_throttled = 0; | ||
7082 | } | ||
7083 | |||
7044 | void __init sched_init(void) | 7084 | void __init sched_init(void) |
7045 | { | 7085 | { |
7046 | int highest_cpu = 0; | 7086 | int highest_cpu = 0; |
@@ -7051,7 +7091,6 @@ void __init sched_init(void) | |||
7051 | #endif | 7091 | #endif |
7052 | 7092 | ||
7053 | for_each_possible_cpu(i) { | 7093 | for_each_possible_cpu(i) { |
7054 | struct rt_prio_array *array; | ||
7055 | struct rq *rq; | 7094 | struct rq *rq; |
7056 | 7095 | ||
7057 | rq = cpu_rq(i); | 7096 | rq = cpu_rq(i); |
@@ -7083,6 +7122,8 @@ void __init sched_init(void) | |||
7083 | } | 7122 | } |
7084 | init_task_group.shares = init_task_group_load; | 7123 | init_task_group.shares = init_task_group_load; |
7085 | #endif | 7124 | #endif |
7125 | init_rt_rq(&rq->rt, rq); | ||
7126 | rq->rt_period_expire = 0; | ||
7086 | 7127 | ||
7087 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) | 7128 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) |
7088 | rq->cpu_load[j] = 0; | 7129 | rq->cpu_load[j] = 0; |
@@ -7095,22 +7136,11 @@ void __init sched_init(void) | |||
7095 | rq->cpu = i; | 7136 | rq->cpu = i; |
7096 | rq->migration_thread = NULL; | 7137 | rq->migration_thread = NULL; |
7097 | INIT_LIST_HEAD(&rq->migration_queue); | 7138 | INIT_LIST_HEAD(&rq->migration_queue); |
7098 | rq->rt.highest_prio = MAX_RT_PRIO; | ||
7099 | rq->rt.overloaded = 0; | ||
7100 | rq_attach_root(rq, &def_root_domain); | 7139 | rq_attach_root(rq, &def_root_domain); |
7101 | #endif | 7140 | #endif |
7102 | init_rq_hrtick(rq); | 7141 | init_rq_hrtick(rq); |
7103 | |||
7104 | atomic_set(&rq->nr_iowait, 0); | 7142 | atomic_set(&rq->nr_iowait, 0); |
7105 | |||
7106 | array = &rq->rt.active; | ||
7107 | for (j = 0; j < MAX_RT_PRIO; j++) { | ||
7108 | INIT_LIST_HEAD(array->queue + j); | ||
7109 | __clear_bit(j, array->bitmap); | ||
7110 | } | ||
7111 | highest_cpu = i; | 7143 | highest_cpu = i; |
7112 | /* delimiter for bitsearch: */ | ||
7113 | __set_bit(MAX_RT_PRIO, array->bitmap); | ||
7114 | } | 7144 | } |
7115 | 7145 | ||
7116 | set_load_weight(&init_task); | 7146 | set_load_weight(&init_task); |
@@ -7282,7 +7312,7 @@ void set_curr_task(int cpu, struct task_struct *p) | |||
7282 | #ifdef CONFIG_SMP | 7312 | #ifdef CONFIG_SMP |
7283 | /* | 7313 | /* |
7284 | * distribute shares of all task groups among their schedulable entities, | 7314 | * distribute shares of all task groups among their schedulable entities, |
7285 | * to reflect load distrbution across cpus. | 7315 | * to reflect load distribution across cpus. |
7286 | */ | 7316 | */ |
7287 | static int rebalance_shares(struct sched_domain *sd, int this_cpu) | 7317 | static int rebalance_shares(struct sched_domain *sd, int this_cpu) |
7288 | { | 7318 | { |
@@ -7349,7 +7379,7 @@ static int rebalance_shares(struct sched_domain *sd, int this_cpu) | |||
7349 | * sysctl_sched_max_bal_int_shares represents the maximum interval between | 7379 | * sysctl_sched_max_bal_int_shares represents the maximum interval between |
7350 | * consecutive calls to rebalance_shares() in the same sched domain. | 7380 | * consecutive calls to rebalance_shares() in the same sched domain. |
7351 | * | 7381 | * |
7352 | * These settings allows for the appropriate tradeoff between accuracy of | 7382 | * These settings allows for the appropriate trade-off between accuracy of |
7353 | * fairness and the associated overhead. | 7383 | * fairness and the associated overhead. |
7354 | * | 7384 | * |
7355 | */ | 7385 | */ |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 83fbbcb8019e..fd10d965aa06 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -45,6 +45,50 @@ static void update_rt_migration(struct rq *rq) | |||
45 | } | 45 | } |
46 | #endif /* CONFIG_SMP */ | 46 | #endif /* CONFIG_SMP */ |
47 | 47 | ||
48 | static int sched_rt_ratio_exceeded(struct rq *rq, struct rt_rq *rt_rq) | ||
49 | { | ||
50 | u64 period, ratio; | ||
51 | |||
52 | if (sysctl_sched_rt_ratio == SCHED_RT_FRAC) | ||
53 | return 0; | ||
54 | |||
55 | if (rt_rq->rt_throttled) | ||
56 | return 1; | ||
57 | |||
58 | period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; | ||
59 | ratio = (period * sysctl_sched_rt_ratio) >> SCHED_RT_FRAC_SHIFT; | ||
60 | |||
61 | if (rt_rq->rt_time > ratio) { | ||
62 | rt_rq->rt_throttled = rq->clock + period - rt_rq->rt_time; | ||
63 | return 1; | ||
64 | } | ||
65 | |||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | static void update_sched_rt_period(struct rq *rq) | ||
70 | { | ||
71 | while (rq->clock > rq->rt_period_expire) { | ||
72 | u64 period, ratio; | ||
73 | |||
74 | period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; | ||
75 | ratio = (period * sysctl_sched_rt_ratio) >> SCHED_RT_FRAC_SHIFT; | ||
76 | |||
77 | rq->rt.rt_time -= min(rq->rt.rt_time, ratio); | ||
78 | rq->rt_period_expire += period; | ||
79 | } | ||
80 | |||
81 | /* | ||
82 | * When the rt throttle is expired, let them rip. | ||
83 | * (XXX: use hrtick when available) | ||
84 | */ | ||
85 | if (rq->rt.rt_throttled && rq->clock > rq->rt.rt_throttled) { | ||
86 | rq->rt.rt_throttled = 0; | ||
87 | if (!sched_rt_ratio_exceeded(rq, &rq->rt)) | ||
88 | resched_task(rq->curr); | ||
89 | } | ||
90 | } | ||
91 | |||
48 | /* | 92 | /* |
49 | * Update the current task's runtime statistics. Skip current tasks that | 93 | * Update the current task's runtime statistics. Skip current tasks that |
50 | * are not in our scheduling class. | 94 | * are not in our scheduling class. |
@@ -66,6 +110,11 @@ static void update_curr_rt(struct rq *rq) | |||
66 | curr->se.sum_exec_runtime += delta_exec; | 110 | curr->se.sum_exec_runtime += delta_exec; |
67 | curr->se.exec_start = rq->clock; | 111 | curr->se.exec_start = rq->clock; |
68 | cpuacct_charge(curr, delta_exec); | 112 | cpuacct_charge(curr, delta_exec); |
113 | |||
114 | rq->rt.rt_time += delta_exec; | ||
115 | update_sched_rt_period(rq); | ||
116 | if (sched_rt_ratio_exceeded(rq, &rq->rt)) | ||
117 | resched_task(curr); | ||
69 | } | 118 | } |
70 | 119 | ||
71 | static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq) | 120 | static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq) |
@@ -208,8 +257,12 @@ static struct task_struct *pick_next_task_rt(struct rq *rq) | |||
208 | struct rt_prio_array *array = &rq->rt.active; | 257 | struct rt_prio_array *array = &rq->rt.active; |
209 | struct task_struct *next; | 258 | struct task_struct *next; |
210 | struct list_head *queue; | 259 | struct list_head *queue; |
260 | struct rt_rq *rt_rq = &rq->rt; | ||
211 | int idx; | 261 | int idx; |
212 | 262 | ||
263 | if (sched_rt_ratio_exceeded(rq, rt_rq)) | ||
264 | return NULL; | ||
265 | |||
213 | idx = sched_find_first_bit(array->bitmap); | 266 | idx = sched_find_first_bit(array->bitmap); |
214 | if (idx >= MAX_RT_PRIO) | 267 | if (idx >= MAX_RT_PRIO) |
215 | return NULL; | 268 | return NULL; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 96f31c1bc4f0..3afbd25f43eb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -306,7 +306,23 @@ static struct ctl_table kern_table[] = { | |||
306 | .procname = "sched_nr_migrate", | 306 | .procname = "sched_nr_migrate", |
307 | .data = &sysctl_sched_nr_migrate, | 307 | .data = &sysctl_sched_nr_migrate, |
308 | .maxlen = sizeof(unsigned int), | 308 | .maxlen = sizeof(unsigned int), |
309 | .mode = 644, | 309 | .mode = 0644, |
310 | .proc_handler = &proc_dointvec, | ||
311 | }, | ||
312 | { | ||
313 | .ctl_name = CTL_UNNUMBERED, | ||
314 | .procname = "sched_rt_period_ms", | ||
315 | .data = &sysctl_sched_rt_period, | ||
316 | .maxlen = sizeof(unsigned int), | ||
317 | .mode = 0644, | ||
318 | .proc_handler = &proc_dointvec, | ||
319 | }, | ||
320 | { | ||
321 | .ctl_name = CTL_UNNUMBERED, | ||
322 | .procname = "sched_rt_ratio", | ||
323 | .data = &sysctl_sched_rt_ratio, | ||
324 | .maxlen = sizeof(unsigned int), | ||
325 | .mode = 0644, | ||
310 | .proc_handler = &proc_dointvec, | 326 | .proc_handler = &proc_dointvec, |
311 | }, | 327 | }, |
312 | #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) | 328 | #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) |