diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2008-04-19 13:44:57 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-04-19 13:44:57 -0400 |
commit | d0b27fa77854b149ad4af08b0fe47fe712a47ade (patch) | |
tree | 2f4487c108a5132e8d52456567b4a67e78fbb4a6 /kernel | |
parent | 57d3da2911787a101a384532f4519f9640bae883 (diff) |
sched: rt-group: synchonised bandwidth period
Various SMP balancing algorithms require that the bandwidth period
run in sync.
Possible improvements are moving the rt_bandwidth thing into root_domain
and keeping a span per rt_bandwidth which marks throttled cpus.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched.c | 260 | ||||
-rw-r--r-- | kernel/sched_rt.c | 104 | ||||
-rw-r--r-- | kernel/sysctl.c | 4 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 5 | ||||
-rw-r--r-- | kernel/user.c | 28 |
5 files changed, 313 insertions, 88 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index e813e845d9cf..bb20323f7d09 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -115,6 +115,11 @@ unsigned long long __attribute__((weak)) sched_clock(void) | |||
115 | */ | 115 | */ |
116 | #define DEF_TIMESLICE (100 * HZ / 1000) | 116 | #define DEF_TIMESLICE (100 * HZ / 1000) |
117 | 117 | ||
118 | /* | ||
119 | * single value that denotes runtime == period, ie unlimited time. | ||
120 | */ | ||
121 | #define RUNTIME_INF ((u64)~0ULL) | ||
122 | |||
118 | #ifdef CONFIG_SMP | 123 | #ifdef CONFIG_SMP |
119 | /* | 124 | /* |
120 | * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) | 125 | * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) |
@@ -156,6 +161,80 @@ struct rt_prio_array { | |||
156 | struct list_head queue[MAX_RT_PRIO]; | 161 | struct list_head queue[MAX_RT_PRIO]; |
157 | }; | 162 | }; |
158 | 163 | ||
164 | struct rt_bandwidth { | ||
165 | ktime_t rt_period; | ||
166 | u64 rt_runtime; | ||
167 | struct hrtimer rt_period_timer; | ||
168 | }; | ||
169 | |||
170 | static struct rt_bandwidth def_rt_bandwidth; | ||
171 | |||
172 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); | ||
173 | |||
174 | static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) | ||
175 | { | ||
176 | struct rt_bandwidth *rt_b = | ||
177 | container_of(timer, struct rt_bandwidth, rt_period_timer); | ||
178 | ktime_t now; | ||
179 | int overrun; | ||
180 | int idle = 0; | ||
181 | |||
182 | for (;;) { | ||
183 | now = hrtimer_cb_get_time(timer); | ||
184 | overrun = hrtimer_forward(timer, now, rt_b->rt_period); | ||
185 | |||
186 | if (!overrun) | ||
187 | break; | ||
188 | |||
189 | idle = do_sched_rt_period_timer(rt_b, overrun); | ||
190 | } | ||
191 | |||
192 | return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; | ||
193 | } | ||
194 | |||
195 | static | ||
196 | void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) | ||
197 | { | ||
198 | rt_b->rt_period = ns_to_ktime(period); | ||
199 | rt_b->rt_runtime = runtime; | ||
200 | |||
201 | hrtimer_init(&rt_b->rt_period_timer, | ||
202 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
203 | rt_b->rt_period_timer.function = sched_rt_period_timer; | ||
204 | rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | ||
205 | } | ||
206 | |||
207 | static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | ||
208 | { | ||
209 | ktime_t now; | ||
210 | |||
211 | if (rt_b->rt_runtime == RUNTIME_INF) | ||
212 | return; | ||
213 | |||
214 | if (hrtimer_active(&rt_b->rt_period_timer)) | ||
215 | return; | ||
216 | |||
217 | spin_lock(&rt_b->rt_runtime_lock); | ||
218 | for (;;) { | ||
219 | if (hrtimer_active(&rt_b->rt_period_timer)) | ||
220 | break; | ||
221 | |||
222 | now = hrtimer_cb_get_time(&rt_b->rt_period_timer); | ||
223 | hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); | ||
224 | hrtimer_start(&rt_b->rt_period_timer, | ||
225 | rt_b->rt_period_timer.expires, | ||
226 | HRTIMER_MODE_ABS); | ||
227 | } | ||
228 | spin_unlock(&rt_b->rt_runtime_lock); | ||
229 | } | ||
230 | |||
231 | #ifdef CONFIG_RT_GROUP_SCHED | ||
232 | static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) | ||
233 | { | ||
234 | hrtimer_cancel(&rt_b->rt_period_timer); | ||
235 | } | ||
236 | #endif | ||
237 | |||
159 | #ifdef CONFIG_GROUP_SCHED | 238 | #ifdef CONFIG_GROUP_SCHED |
160 | 239 | ||
161 | #include <linux/cgroup.h> | 240 | #include <linux/cgroup.h> |
@@ -182,7 +261,7 @@ struct task_group { | |||
182 | struct sched_rt_entity **rt_se; | 261 | struct sched_rt_entity **rt_se; |
183 | struct rt_rq **rt_rq; | 262 | struct rt_rq **rt_rq; |
184 | 263 | ||
185 | u64 rt_runtime; | 264 | struct rt_bandwidth rt_bandwidth; |
186 | #endif | 265 | #endif |
187 | 266 | ||
188 | struct rcu_head rcu; | 267 | struct rcu_head rcu; |
@@ -407,8 +486,6 @@ struct rq { | |||
407 | 486 | ||
408 | struct cfs_rq cfs; | 487 | struct cfs_rq cfs; |
409 | struct rt_rq rt; | 488 | struct rt_rq rt; |
410 | u64 rt_period_expire; | ||
411 | int rt_throttled; | ||
412 | 489 | ||
413 | #ifdef CONFIG_FAIR_GROUP_SCHED | 490 | #ifdef CONFIG_FAIR_GROUP_SCHED |
414 | /* list of leaf cfs_rq on this cpu: */ | 491 | /* list of leaf cfs_rq on this cpu: */ |
@@ -592,23 +669,6 @@ static void update_rq_clock(struct rq *rq) | |||
592 | #define task_rq(p) cpu_rq(task_cpu(p)) | 669 | #define task_rq(p) cpu_rq(task_cpu(p)) |
593 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | 670 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) |
594 | 671 | ||
595 | unsigned long rt_needs_cpu(int cpu) | ||
596 | { | ||
597 | struct rq *rq = cpu_rq(cpu); | ||
598 | u64 delta; | ||
599 | |||
600 | if (!rq->rt_throttled) | ||
601 | return 0; | ||
602 | |||
603 | if (rq->clock > rq->rt_period_expire) | ||
604 | return 1; | ||
605 | |||
606 | delta = rq->rt_period_expire - rq->clock; | ||
607 | do_div(delta, NSEC_PER_SEC / HZ); | ||
608 | |||
609 | return (unsigned long)delta; | ||
610 | } | ||
611 | |||
612 | /* | 672 | /* |
613 | * Tunables that become constants when CONFIG_SCHED_DEBUG is off: | 673 | * Tunables that become constants when CONFIG_SCHED_DEBUG is off: |
614 | */ | 674 | */ |
@@ -664,10 +724,18 @@ static __read_mostly int scheduler_running; | |||
664 | */ | 724 | */ |
665 | int sysctl_sched_rt_runtime = 950000; | 725 | int sysctl_sched_rt_runtime = 950000; |
666 | 726 | ||
667 | /* | 727 | static inline u64 global_rt_period(void) |
668 | * single value that denotes runtime == period, ie unlimited time. | 728 | { |
669 | */ | 729 | return (u64)sysctl_sched_rt_period * NSEC_PER_USEC; |
670 | #define RUNTIME_INF ((u64)~0ULL) | 730 | } |
731 | |||
732 | static inline u64 global_rt_runtime(void) | ||
733 | { | ||
734 | if (sysctl_sched_rt_period < 0) | ||
735 | return RUNTIME_INF; | ||
736 | |||
737 | return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; | ||
738 | } | ||
671 | 739 | ||
672 | static const unsigned long long time_sync_thresh = 100000; | 740 | static const unsigned long long time_sync_thresh = 100000; |
673 | 741 | ||
@@ -3854,7 +3922,6 @@ void scheduler_tick(void) | |||
3854 | update_last_tick_seen(rq); | 3922 | update_last_tick_seen(rq); |
3855 | update_cpu_load(rq); | 3923 | update_cpu_load(rq); |
3856 | curr->sched_class->task_tick(rq, curr, 0); | 3924 | curr->sched_class->task_tick(rq, curr, 0); |
3857 | update_sched_rt_period(rq); | ||
3858 | spin_unlock(&rq->lock); | 3925 | spin_unlock(&rq->lock); |
3859 | 3926 | ||
3860 | #ifdef CONFIG_SMP | 3927 | #ifdef CONFIG_SMP |
@@ -4689,7 +4756,7 @@ recheck: | |||
4689 | * Do not allow realtime tasks into groups that have no runtime | 4756 | * Do not allow realtime tasks into groups that have no runtime |
4690 | * assigned. | 4757 | * assigned. |
4691 | */ | 4758 | */ |
4692 | if (rt_policy(policy) && task_group(p)->rt_runtime == 0) | 4759 | if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0) |
4693 | return -EPERM; | 4760 | return -EPERM; |
4694 | #endif | 4761 | #endif |
4695 | 4762 | ||
@@ -7288,6 +7355,14 @@ void __init sched_init(void) | |||
7288 | init_defrootdomain(); | 7355 | init_defrootdomain(); |
7289 | #endif | 7356 | #endif |
7290 | 7357 | ||
7358 | init_rt_bandwidth(&def_rt_bandwidth, | ||
7359 | global_rt_period(), global_rt_runtime()); | ||
7360 | |||
7361 | #ifdef CONFIG_RT_GROUP_SCHED | ||
7362 | init_rt_bandwidth(&init_task_group.rt_bandwidth, | ||
7363 | global_rt_period(), global_rt_runtime()); | ||
7364 | #endif | ||
7365 | |||
7291 | #ifdef CONFIG_GROUP_SCHED | 7366 | #ifdef CONFIG_GROUP_SCHED |
7292 | list_add(&init_task_group.list, &task_groups); | 7367 | list_add(&init_task_group.list, &task_groups); |
7293 | #endif | 7368 | #endif |
@@ -7312,15 +7387,11 @@ void __init sched_init(void) | |||
7312 | 7387 | ||
7313 | #endif | 7388 | #endif |
7314 | #ifdef CONFIG_RT_GROUP_SCHED | 7389 | #ifdef CONFIG_RT_GROUP_SCHED |
7315 | init_task_group.rt_runtime = | ||
7316 | sysctl_sched_rt_runtime * NSEC_PER_USEC; | ||
7317 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); | 7390 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); |
7318 | init_tg_rt_entry(rq, &init_task_group, | 7391 | init_tg_rt_entry(rq, &init_task_group, |
7319 | &per_cpu(init_rt_rq, i), | 7392 | &per_cpu(init_rt_rq, i), |
7320 | &per_cpu(init_sched_rt_entity, i), i, 1); | 7393 | &per_cpu(init_sched_rt_entity, i), i, 1); |
7321 | #endif | 7394 | #endif |
7322 | rq->rt_period_expire = 0; | ||
7323 | rq->rt_throttled = 0; | ||
7324 | 7395 | ||
7325 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) | 7396 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) |
7326 | rq->cpu_load[j] = 0; | 7397 | rq->cpu_load[j] = 0; |
@@ -7506,8 +7577,6 @@ void set_curr_task(int cpu, struct task_struct *p) | |||
7506 | 7577 | ||
7507 | #endif | 7578 | #endif |
7508 | 7579 | ||
7509 | #ifdef CONFIG_GROUP_SCHED | ||
7510 | |||
7511 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7580 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7512 | static void free_fair_sched_group(struct task_group *tg) | 7581 | static void free_fair_sched_group(struct task_group *tg) |
7513 | { | 7582 | { |
@@ -7596,6 +7665,8 @@ static void free_rt_sched_group(struct task_group *tg) | |||
7596 | { | 7665 | { |
7597 | int i; | 7666 | int i; |
7598 | 7667 | ||
7668 | destroy_rt_bandwidth(&tg->rt_bandwidth); | ||
7669 | |||
7599 | for_each_possible_cpu(i) { | 7670 | for_each_possible_cpu(i) { |
7600 | if (tg->rt_rq) | 7671 | if (tg->rt_rq) |
7601 | kfree(tg->rt_rq[i]); | 7672 | kfree(tg->rt_rq[i]); |
@@ -7621,7 +7692,8 @@ static int alloc_rt_sched_group(struct task_group *tg) | |||
7621 | if (!tg->rt_se) | 7692 | if (!tg->rt_se) |
7622 | goto err; | 7693 | goto err; |
7623 | 7694 | ||
7624 | tg->rt_runtime = 0; | 7695 | init_rt_bandwidth(&tg->rt_bandwidth, |
7696 | ktime_to_ns(def_rt_bandwidth.rt_period), 0); | ||
7625 | 7697 | ||
7626 | for_each_possible_cpu(i) { | 7698 | for_each_possible_cpu(i) { |
7627 | rq = cpu_rq(i); | 7699 | rq = cpu_rq(i); |
@@ -7674,6 +7746,7 @@ static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | |||
7674 | } | 7746 | } |
7675 | #endif | 7747 | #endif |
7676 | 7748 | ||
7749 | #ifdef CONFIG_GROUP_SCHED | ||
7677 | static void free_sched_group(struct task_group *tg) | 7750 | static void free_sched_group(struct task_group *tg) |
7678 | { | 7751 | { |
7679 | free_fair_sched_group(tg); | 7752 | free_fair_sched_group(tg); |
@@ -7775,6 +7848,7 @@ void sched_move_task(struct task_struct *tsk) | |||
7775 | 7848 | ||
7776 | task_rq_unlock(rq, &flags); | 7849 | task_rq_unlock(rq, &flags); |
7777 | } | 7850 | } |
7851 | #endif | ||
7778 | 7852 | ||
7779 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7853 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7780 | static void set_se_shares(struct sched_entity *se, unsigned long shares) | 7854 | static void set_se_shares(struct sched_entity *se, unsigned long shares) |
@@ -7871,16 +7945,15 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | |||
7871 | struct task_group *tgi; | 7945 | struct task_group *tgi; |
7872 | unsigned long total = 0; | 7946 | unsigned long total = 0; |
7873 | unsigned long global_ratio = | 7947 | unsigned long global_ratio = |
7874 | to_ratio(sysctl_sched_rt_period, | 7948 | to_ratio(global_rt_period(), global_rt_runtime()); |
7875 | sysctl_sched_rt_runtime < 0 ? | ||
7876 | RUNTIME_INF : sysctl_sched_rt_runtime); | ||
7877 | 7949 | ||
7878 | rcu_read_lock(); | 7950 | rcu_read_lock(); |
7879 | list_for_each_entry_rcu(tgi, &task_groups, list) { | 7951 | list_for_each_entry_rcu(tgi, &task_groups, list) { |
7880 | if (tgi == tg) | 7952 | if (tgi == tg) |
7881 | continue; | 7953 | continue; |
7882 | 7954 | ||
7883 | total += to_ratio(period, tgi->rt_runtime); | 7955 | total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period), |
7956 | tgi->rt_bandwidth.rt_runtime); | ||
7884 | } | 7957 | } |
7885 | rcu_read_unlock(); | 7958 | rcu_read_unlock(); |
7886 | 7959 | ||
@@ -7898,16 +7971,11 @@ static inline int tg_has_rt_tasks(struct task_group *tg) | |||
7898 | return 0; | 7971 | return 0; |
7899 | } | 7972 | } |
7900 | 7973 | ||
7901 | int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) | 7974 | static int tg_set_bandwidth(struct task_group *tg, |
7975 | u64 rt_period, u64 rt_runtime) | ||
7902 | { | 7976 | { |
7903 | u64 rt_runtime, rt_period; | ||
7904 | int err = 0; | 7977 | int err = 0; |
7905 | 7978 | ||
7906 | rt_period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC; | ||
7907 | rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC; | ||
7908 | if (rt_runtime_us == -1) | ||
7909 | rt_runtime = RUNTIME_INF; | ||
7910 | |||
7911 | mutex_lock(&rt_constraints_mutex); | 7979 | mutex_lock(&rt_constraints_mutex); |
7912 | read_lock(&tasklist_lock); | 7980 | read_lock(&tasklist_lock); |
7913 | if (rt_runtime_us == 0 && tg_has_rt_tasks(tg)) { | 7981 | if (rt_runtime_us == 0 && tg_has_rt_tasks(tg)) { |
@@ -7918,7 +7986,8 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) | |||
7918 | err = -EINVAL; | 7986 | err = -EINVAL; |
7919 | goto unlock; | 7987 | goto unlock; |
7920 | } | 7988 | } |
7921 | tg->rt_runtime = rt_runtime; | 7989 | tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); |
7990 | tg->rt_bandwidth.rt_runtime = rt_runtime; | ||
7922 | unlock: | 7991 | unlock: |
7923 | read_unlock(&tasklist_lock); | 7992 | read_unlock(&tasklist_lock); |
7924 | mutex_unlock(&rt_constraints_mutex); | 7993 | mutex_unlock(&rt_constraints_mutex); |
@@ -7926,19 +7995,96 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) | |||
7926 | return err; | 7995 | return err; |
7927 | } | 7996 | } |
7928 | 7997 | ||
7998 | int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) | ||
7999 | { | ||
8000 | u64 rt_runtime, rt_period; | ||
8001 | |||
8002 | rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period); | ||
8003 | rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC; | ||
8004 | if (rt_runtime_us < 0) | ||
8005 | rt_runtime = RUNTIME_INF; | ||
8006 | |||
8007 | return tg_set_bandwidth(tg, rt_period, rt_runtime); | ||
8008 | } | ||
8009 | |||
7929 | long sched_group_rt_runtime(struct task_group *tg) | 8010 | long sched_group_rt_runtime(struct task_group *tg) |
7930 | { | 8011 | { |
7931 | u64 rt_runtime_us; | 8012 | u64 rt_runtime_us; |
7932 | 8013 | ||
7933 | if (tg->rt_runtime == RUNTIME_INF) | 8014 | if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF) |
7934 | return -1; | 8015 | return -1; |
7935 | 8016 | ||
7936 | rt_runtime_us = tg->rt_runtime; | 8017 | rt_runtime_us = tg->rt_bandwidth.rt_runtime; |
7937 | do_div(rt_runtime_us, NSEC_PER_USEC); | 8018 | do_div(rt_runtime_us, NSEC_PER_USEC); |
7938 | return rt_runtime_us; | 8019 | return rt_runtime_us; |
7939 | } | 8020 | } |
8021 | |||
8022 | int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) | ||
8023 | { | ||
8024 | u64 rt_runtime, rt_period; | ||
8025 | |||
8026 | rt_period = (u64)rt_period_us * NSEC_PER_USEC; | ||
8027 | rt_runtime = tg->rt_bandwidth.rt_runtime; | ||
8028 | |||
8029 | return tg_set_bandwidth(tg, rt_period, rt_runtime); | ||
8030 | } | ||
8031 | |||
8032 | long sched_group_rt_period(struct task_group *tg) | ||
8033 | { | ||
8034 | u64 rt_period_us; | ||
8035 | |||
8036 | rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period); | ||
8037 | do_div(rt_period_us, NSEC_PER_USEC); | ||
8038 | return rt_period_us; | ||
8039 | } | ||
8040 | |||
8041 | static int sched_rt_global_constraints(void) | ||
8042 | { | ||
8043 | int ret = 0; | ||
8044 | |||
8045 | mutex_lock(&rt_constraints_mutex); | ||
8046 | if (!__rt_schedulable(NULL, 1, 0)) | ||
8047 | ret = -EINVAL; | ||
8048 | mutex_unlock(&rt_constraints_mutex); | ||
8049 | |||
8050 | return ret; | ||
8051 | } | ||
8052 | #else | ||
8053 | static int sched_rt_global_constraints(void) | ||
8054 | { | ||
8055 | return 0; | ||
8056 | } | ||
7940 | #endif | 8057 | #endif |
7941 | #endif /* CONFIG_GROUP_SCHED */ | 8058 | |
8059 | int sched_rt_handler(struct ctl_table *table, int write, | ||
8060 | struct file *filp, void __user *buffer, size_t *lenp, | ||
8061 | loff_t *ppos) | ||
8062 | { | ||
8063 | int ret; | ||
8064 | int old_period, old_runtime; | ||
8065 | static DEFINE_MUTEX(mutex); | ||
8066 | |||
8067 | mutex_lock(&mutex); | ||
8068 | old_period = sysctl_sched_rt_period; | ||
8069 | old_runtime = sysctl_sched_rt_runtime; | ||
8070 | |||
8071 | ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); | ||
8072 | |||
8073 | if (!ret && write) { | ||
8074 | ret = sched_rt_global_constraints(); | ||
8075 | if (ret) { | ||
8076 | sysctl_sched_rt_period = old_period; | ||
8077 | sysctl_sched_rt_runtime = old_runtime; | ||
8078 | } else { | ||
8079 | def_rt_bandwidth.rt_runtime = global_rt_runtime(); | ||
8080 | def_rt_bandwidth.rt_period = | ||
8081 | ns_to_ktime(global_rt_period()); | ||
8082 | } | ||
8083 | } | ||
8084 | mutex_unlock(&mutex); | ||
8085 | |||
8086 | return ret; | ||
8087 | } | ||
7942 | 8088 | ||
7943 | #ifdef CONFIG_CGROUP_SCHED | 8089 | #ifdef CONFIG_CGROUP_SCHED |
7944 | 8090 | ||
@@ -7988,7 +8134,7 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
7988 | { | 8134 | { |
7989 | #ifdef CONFIG_RT_GROUP_SCHED | 8135 | #ifdef CONFIG_RT_GROUP_SCHED |
7990 | /* Don't accept realtime tasks when there is no way for them to run */ | 8136 | /* Don't accept realtime tasks when there is no way for them to run */ |
7991 | if (rt_task(tsk) && cgroup_tg(cgrp)->rt_runtime == 0) | 8137 | if (rt_task(tsk) && cgroup_tg(cgrp)->rt_bandwidth.rt_runtime == 0) |
7992 | return -EINVAL; | 8138 | return -EINVAL; |
7993 | #else | 8139 | #else |
7994 | /* We don't support RT-tasks being in separate groups */ | 8140 | /* We don't support RT-tasks being in separate groups */ |
@@ -8066,6 +8212,17 @@ static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft, | |||
8066 | 8212 | ||
8067 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); | 8213 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); |
8068 | } | 8214 | } |
8215 | |||
8216 | static int cpu_rt_period_write_uint(struct cgroup *cgrp, struct cftype *cftype, | ||
8217 | u64 rt_period_us) | ||
8218 | { | ||
8219 | return sched_group_set_rt_period(cgroup_tg(cgrp), rt_period_us); | ||
8220 | } | ||
8221 | |||
8222 | static u64 cpu_rt_period_read_uint(struct cgroup *cgrp, struct cftype *cft) | ||
8223 | { | ||
8224 | return sched_group_rt_period(cgroup_tg(cgrp)); | ||
8225 | } | ||
8069 | #endif | 8226 | #endif |
8070 | 8227 | ||
8071 | static struct cftype cpu_files[] = { | 8228 | static struct cftype cpu_files[] = { |
@@ -8082,6 +8239,11 @@ static struct cftype cpu_files[] = { | |||
8082 | .read = cpu_rt_runtime_read, | 8239 | .read = cpu_rt_runtime_read, |
8083 | .write = cpu_rt_runtime_write, | 8240 | .write = cpu_rt_runtime_write, |
8084 | }, | 8241 | }, |
8242 | { | ||
8243 | .name = "rt_period_us", | ||
8244 | .read_uint = cpu_rt_period_read_uint, | ||
8245 | .write_uint = cpu_rt_period_write_uint, | ||
8246 | }, | ||
8085 | #endif | 8247 | #endif |
8086 | }; | 8248 | }; |
8087 | 8249 | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 0a6d2e516420..8bc176136666 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -62,7 +62,7 @@ static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) | |||
62 | if (!rt_rq->tg) | 62 | if (!rt_rq->tg) |
63 | return RUNTIME_INF; | 63 | return RUNTIME_INF; |
64 | 64 | ||
65 | return rt_rq->tg->rt_runtime; | 65 | return rt_rq->tg->rt_bandwidth.rt_runtime; |
66 | } | 66 | } |
67 | 67 | ||
68 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | 68 | #define for_each_leaf_rt_rq(rt_rq, rq) \ |
@@ -127,14 +127,29 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se) | |||
127 | return p->prio != p->normal_prio; | 127 | return p->prio != p->normal_prio; |
128 | } | 128 | } |
129 | 129 | ||
130 | #ifdef CONFIG_SMP | ||
131 | static inline cpumask_t sched_rt_period_mask(void) | ||
132 | { | ||
133 | return cpu_rq(smp_processor_id())->rd->span; | ||
134 | } | ||
130 | #else | 135 | #else |
136 | static inline cpumask_t sched_rt_period_mask(void) | ||
137 | { | ||
138 | return cpu_online_map; | ||
139 | } | ||
140 | #endif | ||
131 | 141 | ||
132 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) | 142 | static inline |
143 | struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) | ||
133 | { | 144 | { |
134 | if (sysctl_sched_rt_runtime == -1) | 145 | return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu]; |
135 | return RUNTIME_INF; | 146 | } |
136 | 147 | ||
137 | return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; | 148 | #else |
149 | |||
150 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) | ||
151 | { | ||
152 | return def_rt_bandwidth.rt_runtime; | ||
138 | } | 153 | } |
139 | 154 | ||
140 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | 155 | #define for_each_leaf_rt_rq(rt_rq, rq) \ |
@@ -173,8 +188,55 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq) | |||
173 | { | 188 | { |
174 | return rt_rq->rt_throttled; | 189 | return rt_rq->rt_throttled; |
175 | } | 190 | } |
191 | |||
192 | static inline cpumask_t sched_rt_period_mask(void) | ||
193 | { | ||
194 | return cpu_online_map; | ||
195 | } | ||
196 | |||
197 | static inline | ||
198 | struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) | ||
199 | { | ||
200 | return &cpu_rq(cpu)->rt; | ||
201 | } | ||
202 | |||
176 | #endif | 203 | #endif |
177 | 204 | ||
205 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | ||
206 | { | ||
207 | int i, idle = 1; | ||
208 | cpumask_t span; | ||
209 | |||
210 | if (rt_b->rt_runtime == RUNTIME_INF) | ||
211 | return 1; | ||
212 | |||
213 | span = sched_rt_period_mask(); | ||
214 | for_each_cpu_mask(i, span) { | ||
215 | int enqueue = 0; | ||
216 | struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); | ||
217 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
218 | |||
219 | spin_lock(&rq->lock); | ||
220 | if (rt_rq->rt_time) { | ||
221 | u64 runtime = rt_b->rt_runtime; | ||
222 | |||
223 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); | ||
224 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { | ||
225 | rt_rq->rt_throttled = 0; | ||
226 | enqueue = 1; | ||
227 | } | ||
228 | if (rt_rq->rt_time || rt_rq->rt_nr_running) | ||
229 | idle = 0; | ||
230 | } | ||
231 | |||
232 | if (enqueue) | ||
233 | sched_rt_rq_enqueue(rt_rq); | ||
234 | spin_unlock(&rq->lock); | ||
235 | } | ||
236 | |||
237 | return idle; | ||
238 | } | ||
239 | |||
178 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) | 240 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) |
179 | { | 241 | { |
180 | #ifdef CONFIG_RT_GROUP_SCHED | 242 | #ifdef CONFIG_RT_GROUP_SCHED |
@@ -198,11 +260,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) | |||
198 | return rt_rq_throttled(rt_rq); | 260 | return rt_rq_throttled(rt_rq); |
199 | 261 | ||
200 | if (rt_rq->rt_time > runtime) { | 262 | if (rt_rq->rt_time > runtime) { |
201 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
202 | |||
203 | rq->rt_throttled = 1; | ||
204 | rt_rq->rt_throttled = 1; | 263 | rt_rq->rt_throttled = 1; |
205 | |||
206 | if (rt_rq_throttled(rt_rq)) { | 264 | if (rt_rq_throttled(rt_rq)) { |
207 | sched_rt_rq_dequeue(rt_rq); | 265 | sched_rt_rq_dequeue(rt_rq); |
208 | return 1; | 266 | return 1; |
@@ -212,29 +270,6 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) | |||
212 | return 0; | 270 | return 0; |
213 | } | 271 | } |
214 | 272 | ||
215 | static void update_sched_rt_period(struct rq *rq) | ||
216 | { | ||
217 | struct rt_rq *rt_rq; | ||
218 | u64 period; | ||
219 | |||
220 | while (rq->clock > rq->rt_period_expire) { | ||
221 | period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC; | ||
222 | rq->rt_period_expire += period; | ||
223 | |||
224 | for_each_leaf_rt_rq(rt_rq, rq) { | ||
225 | u64 runtime = sched_rt_runtime(rt_rq); | ||
226 | |||
227 | rt_rq->rt_time -= min(rt_rq->rt_time, runtime); | ||
228 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { | ||
229 | rt_rq->rt_throttled = 0; | ||
230 | sched_rt_rq_enqueue(rt_rq); | ||
231 | } | ||
232 | } | ||
233 | |||
234 | rq->rt_throttled = 0; | ||
235 | } | ||
236 | } | ||
237 | |||
238 | /* | 273 | /* |
239 | * Update the current task's runtime statistics. Skip current tasks that | 274 | * Update the current task's runtime statistics. Skip current tasks that |
240 | * are not in our scheduling class. | 275 | * are not in our scheduling class. |
@@ -284,6 +319,11 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
284 | #ifdef CONFIG_RT_GROUP_SCHED | 319 | #ifdef CONFIG_RT_GROUP_SCHED |
285 | if (rt_se_boosted(rt_se)) | 320 | if (rt_se_boosted(rt_se)) |
286 | rt_rq->rt_nr_boosted++; | 321 | rt_rq->rt_nr_boosted++; |
322 | |||
323 | if (rt_rq->tg) | ||
324 | start_rt_bandwidth(&rt_rq->tg->rt_bandwidth); | ||
325 | #else | ||
326 | start_rt_bandwidth(&def_rt_bandwidth); | ||
287 | #endif | 327 | #endif |
288 | } | 328 | } |
289 | 329 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index be332e1a0c29..fd3364827ccf 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -307,7 +307,7 @@ static struct ctl_table kern_table[] = { | |||
307 | .data = &sysctl_sched_rt_period, | 307 | .data = &sysctl_sched_rt_period, |
308 | .maxlen = sizeof(unsigned int), | 308 | .maxlen = sizeof(unsigned int), |
309 | .mode = 0644, | 309 | .mode = 0644, |
310 | .proc_handler = &proc_dointvec, | 310 | .proc_handler = &sched_rt_handler, |
311 | }, | 311 | }, |
312 | { | 312 | { |
313 | .ctl_name = CTL_UNNUMBERED, | 313 | .ctl_name = CTL_UNNUMBERED, |
@@ -315,7 +315,7 @@ static struct ctl_table kern_table[] = { | |||
315 | .data = &sysctl_sched_rt_runtime, | 315 | .data = &sysctl_sched_rt_runtime, |
316 | .maxlen = sizeof(int), | 316 | .maxlen = sizeof(int), |
317 | .mode = 0644, | 317 | .mode = 0644, |
318 | .proc_handler = &proc_dointvec, | 318 | .proc_handler = &sched_rt_handler, |
319 | }, | 319 | }, |
320 | { | 320 | { |
321 | .ctl_name = CTL_UNNUMBERED, | 321 | .ctl_name = CTL_UNNUMBERED, |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 69dba0c71727..d358d4e3a958 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -191,7 +191,6 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) | |||
191 | void tick_nohz_stop_sched_tick(void) | 191 | void tick_nohz_stop_sched_tick(void) |
192 | { | 192 | { |
193 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; | 193 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; |
194 | unsigned long rt_jiffies; | ||
195 | struct tick_sched *ts; | 194 | struct tick_sched *ts; |
196 | ktime_t last_update, expires, now; | 195 | ktime_t last_update, expires, now; |
197 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | 196 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; |
@@ -243,10 +242,6 @@ void tick_nohz_stop_sched_tick(void) | |||
243 | next_jiffies = get_next_timer_interrupt(last_jiffies); | 242 | next_jiffies = get_next_timer_interrupt(last_jiffies); |
244 | delta_jiffies = next_jiffies - last_jiffies; | 243 | delta_jiffies = next_jiffies - last_jiffies; |
245 | 244 | ||
246 | rt_jiffies = rt_needs_cpu(cpu); | ||
247 | if (rt_jiffies && rt_jiffies < delta_jiffies) | ||
248 | delta_jiffies = rt_jiffies; | ||
249 | |||
250 | if (rcu_needs_cpu(cpu)) | 245 | if (rcu_needs_cpu(cpu)) |
251 | delta_jiffies = 1; | 246 | delta_jiffies = 1; |
252 | /* | 247 | /* |
diff --git a/kernel/user.c b/kernel/user.c index 7132022a040c..5925c6887c10 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -193,6 +193,33 @@ static ssize_t cpu_rt_runtime_store(struct kobject *kobj, | |||
193 | 193 | ||
194 | static struct kobj_attribute cpu_rt_runtime_attr = | 194 | static struct kobj_attribute cpu_rt_runtime_attr = |
195 | __ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store); | 195 | __ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store); |
196 | |||
197 | static ssize_t cpu_rt_period_show(struct kobject *kobj, | ||
198 | struct kobj_attribute *attr, | ||
199 | char *buf) | ||
200 | { | ||
201 | struct user_struct *up = container_of(kobj, struct user_struct, kobj); | ||
202 | |||
203 | return sprintf(buf, "%lu\n", sched_group_rt_period(up->tg)); | ||
204 | } | ||
205 | |||
206 | static ssize_t cpu_rt_period_store(struct kobject *kobj, | ||
207 | struct kobj_attribute *attr, | ||
208 | const char *buf, size_t size) | ||
209 | { | ||
210 | struct user_struct *up = container_of(kobj, struct user_struct, kobj); | ||
211 | unsigned long rt_period; | ||
212 | int rc; | ||
213 | |||
214 | sscanf(buf, "%lu", &rt_period); | ||
215 | |||
216 | rc = sched_group_set_rt_period(up->tg, rt_period); | ||
217 | |||
218 | return (rc ? rc : size); | ||
219 | } | ||
220 | |||
221 | static struct kobj_attribute cpu_rt_period_attr = | ||
222 | __ATTR(cpu_rt_period, 0644, cpu_rt_period_show, cpu_rt_period_store); | ||
196 | #endif | 223 | #endif |
197 | 224 | ||
198 | /* default attributes per uid directory */ | 225 | /* default attributes per uid directory */ |
@@ -202,6 +229,7 @@ static struct attribute *uids_attributes[] = { | |||
202 | #endif | 229 | #endif |
203 | #ifdef CONFIG_RT_GROUP_SCHED | 230 | #ifdef CONFIG_RT_GROUP_SCHED |
204 | &cpu_rt_runtime_attr.attr, | 231 | &cpu_rt_runtime_attr.attr, |
232 | &cpu_rt_period_attr.attr, | ||
205 | #endif | 233 | #endif |
206 | NULL | 234 | NULL |
207 | }; | 235 | }; |