diff options
| author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2008-01-25 15:08:29 -0500 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2008-01-25 15:08:29 -0500 |
| commit | fa85ae2418e6843953107cd6a06f645752829bc0 (patch) | |
| tree | 004130ac471247a29d3f6adfbfe61c474e725779 /kernel | |
| parent | 8f4d37ec073c17e2d4aa8851df5837d798606d6f (diff) | |
sched: rt time limit
Very simple time limit on the realtime scheduling classes.
Allow the rq's realtime class to consume sched_rt_ratio of every
sched_rt_period slice. If the class exceeds this quota the fair class
will preempt the realtime class.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/sched.c | 70 | ||||
| -rw-r--r-- | kernel/sched_rt.c | 53 | ||||
| -rw-r--r-- | kernel/sysctl.c | 18 |
3 files changed, 120 insertions, 21 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 17f93d3eda91..e9a7beee9b79 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -342,13 +342,14 @@ struct cfs_rq { | |||
| 342 | /* Real-Time classes' related field in a runqueue: */ | 342 | /* Real-Time classes' related field in a runqueue: */ |
| 343 | struct rt_rq { | 343 | struct rt_rq { |
| 344 | struct rt_prio_array active; | 344 | struct rt_prio_array active; |
| 345 | int rt_load_balance_idx; | ||
| 346 | struct list_head *rt_load_balance_head, *rt_load_balance_curr; | ||
| 347 | unsigned long rt_nr_running; | 345 | unsigned long rt_nr_running; |
| 346 | #ifdef CONFIG_SMP | ||
| 348 | unsigned long rt_nr_migratory; | 347 | unsigned long rt_nr_migratory; |
| 349 | /* highest queued rt task prio */ | 348 | int highest_prio; /* highest queued rt task prio */ |
| 350 | int highest_prio; | ||
| 351 | int overloaded; | 349 | int overloaded; |
| 350 | #endif | ||
| 351 | u64 rt_time; | ||
| 352 | u64 rt_throttled; | ||
| 352 | }; | 353 | }; |
| 353 | 354 | ||
| 354 | #ifdef CONFIG_SMP | 355 | #ifdef CONFIG_SMP |
| @@ -415,6 +416,7 @@ struct rq { | |||
| 415 | struct list_head leaf_cfs_rq_list; | 416 | struct list_head leaf_cfs_rq_list; |
| 416 | #endif | 417 | #endif |
| 417 | struct rt_rq rt; | 418 | struct rt_rq rt; |
| 419 | u64 rt_period_expire; | ||
| 418 | 420 | ||
| 419 | /* | 421 | /* |
| 420 | * This is part of a global counter where only the total sum | 422 | * This is part of a global counter where only the total sum |
| @@ -601,6 +603,21 @@ const_debug unsigned int sysctl_sched_features = | |||
| 601 | const_debug unsigned int sysctl_sched_nr_migrate = 32; | 603 | const_debug unsigned int sysctl_sched_nr_migrate = 32; |
| 602 | 604 | ||
| 603 | /* | 605 | /* |
| 606 | * period over which we measure -rt task cpu usage in ms. | ||
| 607 | * default: 1s | ||
| 608 | */ | ||
| 609 | const_debug unsigned int sysctl_sched_rt_period = 1000; | ||
| 610 | |||
| 611 | #define SCHED_RT_FRAC_SHIFT 16 | ||
| 612 | #define SCHED_RT_FRAC (1UL << SCHED_RT_FRAC_SHIFT) | ||
| 613 | |||
| 614 | /* | ||
| 615 | * ratio of time -rt tasks may consume. | ||
| 616 | * default: 100% | ||
| 617 | */ | ||
| 618 | const_debug unsigned int sysctl_sched_rt_ratio = SCHED_RT_FRAC; | ||
| 619 | |||
| 620 | /* | ||
| 604 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu | 621 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu |
| 605 | * clock constructed from sched_clock(): | 622 | * clock constructed from sched_clock(): |
| 606 | */ | 623 | */ |
| @@ -3674,8 +3691,8 @@ void scheduler_tick(void) | |||
| 3674 | rq->clock = next_tick; | 3691 | rq->clock = next_tick; |
| 3675 | rq->tick_timestamp = rq->clock; | 3692 | rq->tick_timestamp = rq->clock; |
| 3676 | update_cpu_load(rq); | 3693 | update_cpu_load(rq); |
| 3677 | if (curr != rq->idle) /* FIXME: needed? */ | 3694 | curr->sched_class->task_tick(rq, curr, 0); |
| 3678 | curr->sched_class->task_tick(rq, curr, 0); | 3695 | update_sched_rt_period(rq); |
| 3679 | spin_unlock(&rq->lock); | 3696 | spin_unlock(&rq->lock); |
| 3680 | 3697 | ||
| 3681 | #ifdef CONFIG_SMP | 3698 | #ifdef CONFIG_SMP |
| @@ -7041,6 +7058,29 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq) | |||
| 7041 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); | 7058 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); |
| 7042 | } | 7059 | } |
| 7043 | 7060 | ||
| 7061 | static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | ||
| 7062 | { | ||
| 7063 | struct rt_prio_array *array; | ||
| 7064 | int i; | ||
| 7065 | |||
| 7066 | array = &rt_rq->active; | ||
| 7067 | for (i = 0; i < MAX_RT_PRIO; i++) { | ||
| 7068 | INIT_LIST_HEAD(array->queue + i); | ||
| 7069 | __clear_bit(i, array->bitmap); | ||
| 7070 | } | ||
| 7071 | /* delimiter for bitsearch: */ | ||
| 7072 | __set_bit(MAX_RT_PRIO, array->bitmap); | ||
| 7073 | |||
| 7074 | #ifdef CONFIG_SMP | ||
| 7075 | rt_rq->rt_nr_migratory = 0; | ||
| 7076 | rt_rq->highest_prio = MAX_RT_PRIO; | ||
| 7077 | rt_rq->overloaded = 0; | ||
| 7078 | #endif | ||
| 7079 | |||
| 7080 | rt_rq->rt_time = 0; | ||
| 7081 | rt_rq->rt_throttled = 0; | ||
| 7082 | } | ||
| 7083 | |||
| 7044 | void __init sched_init(void) | 7084 | void __init sched_init(void) |
| 7045 | { | 7085 | { |
| 7046 | int highest_cpu = 0; | 7086 | int highest_cpu = 0; |
| @@ -7051,7 +7091,6 @@ void __init sched_init(void) | |||
| 7051 | #endif | 7091 | #endif |
| 7052 | 7092 | ||
| 7053 | for_each_possible_cpu(i) { | 7093 | for_each_possible_cpu(i) { |
| 7054 | struct rt_prio_array *array; | ||
| 7055 | struct rq *rq; | 7094 | struct rq *rq; |
| 7056 | 7095 | ||
| 7057 | rq = cpu_rq(i); | 7096 | rq = cpu_rq(i); |
| @@ -7083,6 +7122,8 @@ void __init sched_init(void) | |||
| 7083 | } | 7122 | } |
| 7084 | init_task_group.shares = init_task_group_load; | 7123 | init_task_group.shares = init_task_group_load; |
| 7085 | #endif | 7124 | #endif |
| 7125 | init_rt_rq(&rq->rt, rq); | ||
| 7126 | rq->rt_period_expire = 0; | ||
| 7086 | 7127 | ||
| 7087 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) | 7128 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) |
| 7088 | rq->cpu_load[j] = 0; | 7129 | rq->cpu_load[j] = 0; |
| @@ -7095,22 +7136,11 @@ void __init sched_init(void) | |||
| 7095 | rq->cpu = i; | 7136 | rq->cpu = i; |
| 7096 | rq->migration_thread = NULL; | 7137 | rq->migration_thread = NULL; |
| 7097 | INIT_LIST_HEAD(&rq->migration_queue); | 7138 | INIT_LIST_HEAD(&rq->migration_queue); |
| 7098 | rq->rt.highest_prio = MAX_RT_PRIO; | ||
| 7099 | rq->rt.overloaded = 0; | ||
| 7100 | rq_attach_root(rq, &def_root_domain); | 7139 | rq_attach_root(rq, &def_root_domain); |
| 7101 | #endif | 7140 | #endif |
| 7102 | init_rq_hrtick(rq); | 7141 | init_rq_hrtick(rq); |
| 7103 | |||
| 7104 | atomic_set(&rq->nr_iowait, 0); | 7142 | atomic_set(&rq->nr_iowait, 0); |
| 7105 | |||
| 7106 | array = &rq->rt.active; | ||
| 7107 | for (j = 0; j < MAX_RT_PRIO; j++) { | ||
| 7108 | INIT_LIST_HEAD(array->queue + j); | ||
| 7109 | __clear_bit(j, array->bitmap); | ||
| 7110 | } | ||
| 7111 | highest_cpu = i; | 7143 | highest_cpu = i; |
| 7112 | /* delimiter for bitsearch: */ | ||
| 7113 | __set_bit(MAX_RT_PRIO, array->bitmap); | ||
| 7114 | } | 7144 | } |
| 7115 | 7145 | ||
| 7116 | set_load_weight(&init_task); | 7146 | set_load_weight(&init_task); |
| @@ -7282,7 +7312,7 @@ void set_curr_task(int cpu, struct task_struct *p) | |||
| 7282 | #ifdef CONFIG_SMP | 7312 | #ifdef CONFIG_SMP |
| 7283 | /* | 7313 | /* |
| 7284 | * distribute shares of all task groups among their schedulable entities, | 7314 | * distribute shares of all task groups among their schedulable entities, |
| 7285 | * to reflect load distrbution across cpus. | 7315 | * to reflect load distribution across cpus. |
| 7286 | */ | 7316 | */ |
| 7287 | static int rebalance_shares(struct sched_domain *sd, int this_cpu) | 7317 | static int rebalance_shares(struct sched_domain *sd, int this_cpu) |
| 7288 | { | 7318 | { |
| @@ -7349,7 +7379,7 @@ static int rebalance_shares(struct sched_domain *sd, int this_cpu) | |||
| 7349 | * sysctl_sched_max_bal_int_shares represents the maximum interval between | 7379 | * sysctl_sched_max_bal_int_shares represents the maximum interval between |
| 7350 | * consecutive calls to rebalance_shares() in the same sched domain. | 7380 | * consecutive calls to rebalance_shares() in the same sched domain. |
| 7351 | * | 7381 | * |
| 7352 | * These settings allows for the appropriate tradeoff between accuracy of | 7382 | * These settings allows for the appropriate trade-off between accuracy of |
| 7353 | * fairness and the associated overhead. | 7383 | * fairness and the associated overhead. |
| 7354 | * | 7384 | * |
| 7355 | */ | 7385 | */ |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 83fbbcb8019e..fd10d965aa06 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
| @@ -45,6 +45,50 @@ static void update_rt_migration(struct rq *rq) | |||
| 45 | } | 45 | } |
| 46 | #endif /* CONFIG_SMP */ | 46 | #endif /* CONFIG_SMP */ |
| 47 | 47 | ||
| 48 | static int sched_rt_ratio_exceeded(struct rq *rq, struct rt_rq *rt_rq) | ||
| 49 | { | ||
| 50 | u64 period, ratio; | ||
| 51 | |||
| 52 | if (sysctl_sched_rt_ratio == SCHED_RT_FRAC) | ||
| 53 | return 0; | ||
| 54 | |||
| 55 | if (rt_rq->rt_throttled) | ||
| 56 | return 1; | ||
| 57 | |||
| 58 | period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; | ||
| 59 | ratio = (period * sysctl_sched_rt_ratio) >> SCHED_RT_FRAC_SHIFT; | ||
| 60 | |||
| 61 | if (rt_rq->rt_time > ratio) { | ||
| 62 | rt_rq->rt_throttled = rq->clock + period - rt_rq->rt_time; | ||
| 63 | return 1; | ||
| 64 | } | ||
| 65 | |||
| 66 | return 0; | ||
| 67 | } | ||
| 68 | |||
| 69 | static void update_sched_rt_period(struct rq *rq) | ||
| 70 | { | ||
| 71 | while (rq->clock > rq->rt_period_expire) { | ||
| 72 | u64 period, ratio; | ||
| 73 | |||
| 74 | period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; | ||
| 75 | ratio = (period * sysctl_sched_rt_ratio) >> SCHED_RT_FRAC_SHIFT; | ||
| 76 | |||
| 77 | rq->rt.rt_time -= min(rq->rt.rt_time, ratio); | ||
| 78 | rq->rt_period_expire += period; | ||
| 79 | } | ||
| 80 | |||
| 81 | /* | ||
| 82 | * When the rt throttle is expired, let them rip. | ||
| 83 | * (XXX: use hrtick when available) | ||
| 84 | */ | ||
| 85 | if (rq->rt.rt_throttled && rq->clock > rq->rt.rt_throttled) { | ||
| 86 | rq->rt.rt_throttled = 0; | ||
| 87 | if (!sched_rt_ratio_exceeded(rq, &rq->rt)) | ||
| 88 | resched_task(rq->curr); | ||
| 89 | } | ||
| 90 | } | ||
| 91 | |||
| 48 | /* | 92 | /* |
| 49 | * Update the current task's runtime statistics. Skip current tasks that | 93 | * Update the current task's runtime statistics. Skip current tasks that |
| 50 | * are not in our scheduling class. | 94 | * are not in our scheduling class. |
| @@ -66,6 +110,11 @@ static void update_curr_rt(struct rq *rq) | |||
| 66 | curr->se.sum_exec_runtime += delta_exec; | 110 | curr->se.sum_exec_runtime += delta_exec; |
| 67 | curr->se.exec_start = rq->clock; | 111 | curr->se.exec_start = rq->clock; |
| 68 | cpuacct_charge(curr, delta_exec); | 112 | cpuacct_charge(curr, delta_exec); |
| 113 | |||
| 114 | rq->rt.rt_time += delta_exec; | ||
| 115 | update_sched_rt_period(rq); | ||
| 116 | if (sched_rt_ratio_exceeded(rq, &rq->rt)) | ||
| 117 | resched_task(curr); | ||
| 69 | } | 118 | } |
| 70 | 119 | ||
| 71 | static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq) | 120 | static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq) |
| @@ -208,8 +257,12 @@ static struct task_struct *pick_next_task_rt(struct rq *rq) | |||
| 208 | struct rt_prio_array *array = &rq->rt.active; | 257 | struct rt_prio_array *array = &rq->rt.active; |
| 209 | struct task_struct *next; | 258 | struct task_struct *next; |
| 210 | struct list_head *queue; | 259 | struct list_head *queue; |
| 260 | struct rt_rq *rt_rq = &rq->rt; | ||
| 211 | int idx; | 261 | int idx; |
| 212 | 262 | ||
| 263 | if (sched_rt_ratio_exceeded(rq, rt_rq)) | ||
| 264 | return NULL; | ||
| 265 | |||
| 213 | idx = sched_find_first_bit(array->bitmap); | 266 | idx = sched_find_first_bit(array->bitmap); |
| 214 | if (idx >= MAX_RT_PRIO) | 267 | if (idx >= MAX_RT_PRIO) |
| 215 | return NULL; | 268 | return NULL; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 96f31c1bc4f0..3afbd25f43eb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -306,7 +306,23 @@ static struct ctl_table kern_table[] = { | |||
| 306 | .procname = "sched_nr_migrate", | 306 | .procname = "sched_nr_migrate", |
| 307 | .data = &sysctl_sched_nr_migrate, | 307 | .data = &sysctl_sched_nr_migrate, |
| 308 | .maxlen = sizeof(unsigned int), | 308 | .maxlen = sizeof(unsigned int), |
| 309 | .mode = 644, | 309 | .mode = 0644, |
| 310 | .proc_handler = &proc_dointvec, | ||
| 311 | }, | ||
| 312 | { | ||
| 313 | .ctl_name = CTL_UNNUMBERED, | ||
| 314 | .procname = "sched_rt_period_ms", | ||
| 315 | .data = &sysctl_sched_rt_period, | ||
| 316 | .maxlen = sizeof(unsigned int), | ||
| 317 | .mode = 0644, | ||
| 318 | .proc_handler = &proc_dointvec, | ||
| 319 | }, | ||
| 320 | { | ||
| 321 | .ctl_name = CTL_UNNUMBERED, | ||
| 322 | .procname = "sched_rt_ratio", | ||
| 323 | .data = &sysctl_sched_rt_ratio, | ||
| 324 | .maxlen = sizeof(unsigned int), | ||
| 325 | .mode = 0644, | ||
| 310 | .proc_handler = &proc_dointvec, | 326 | .proc_handler = &proc_dointvec, |
| 311 | }, | 327 | }, |
| 312 | #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) | 328 | #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) |
