aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2008-04-19 13:44:57 -0400
committerIngo Molnar <mingo@elte.hu>2008-04-19 13:44:57 -0400
commitd0b27fa77854b149ad4af08b0fe47fe712a47ade (patch)
tree2f4487c108a5132e8d52456567b4a67e78fbb4a6 /kernel
parent57d3da2911787a101a384532f4519f9640bae883 (diff)
sched: rt-group: synchonised bandwidth period
Various SMP balancing algorithms require that the bandwidth period run in sync. Possible improvements are moving the rt_bandwidth thing into root_domain and keeping a span per rt_bandwidth which marks throttled cpus. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c260
-rw-r--r--kernel/sched_rt.c104
-rw-r--r--kernel/sysctl.c4
-rw-r--r--kernel/time/tick-sched.c5
-rw-r--r--kernel/user.c28
5 files changed, 313 insertions, 88 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index e813e845d9cf..bb20323f7d09 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -115,6 +115,11 @@ unsigned long long __attribute__((weak)) sched_clock(void)
115 */ 115 */
116#define DEF_TIMESLICE (100 * HZ / 1000) 116#define DEF_TIMESLICE (100 * HZ / 1000)
117 117
118/*
119 * single value that denotes runtime == period, ie unlimited time.
120 */
121#define RUNTIME_INF ((u64)~0ULL)
122
118#ifdef CONFIG_SMP 123#ifdef CONFIG_SMP
119/* 124/*
120 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) 125 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
@@ -156,6 +161,80 @@ struct rt_prio_array {
156 struct list_head queue[MAX_RT_PRIO]; 161 struct list_head queue[MAX_RT_PRIO];
157}; 162};
158 163
164struct rt_bandwidth {
165 ktime_t rt_period;
166 u64 rt_runtime;
167 struct hrtimer rt_period_timer;
168};
169
170static struct rt_bandwidth def_rt_bandwidth;
171
172static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
173
174static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
175{
176 struct rt_bandwidth *rt_b =
177 container_of(timer, struct rt_bandwidth, rt_period_timer);
178 ktime_t now;
179 int overrun;
180 int idle = 0;
181
182 for (;;) {
183 now = hrtimer_cb_get_time(timer);
184 overrun = hrtimer_forward(timer, now, rt_b->rt_period);
185
186 if (!overrun)
187 break;
188
189 idle = do_sched_rt_period_timer(rt_b, overrun);
190 }
191
192 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
193}
194
195static
196void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
197{
198 rt_b->rt_period = ns_to_ktime(period);
199 rt_b->rt_runtime = runtime;
200
201 hrtimer_init(&rt_b->rt_period_timer,
202 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
203 rt_b->rt_period_timer.function = sched_rt_period_timer;
204 rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
205}
206
207static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
208{
209 ktime_t now;
210
211 if (rt_b->rt_runtime == RUNTIME_INF)
212 return;
213
214 if (hrtimer_active(&rt_b->rt_period_timer))
215 return;
216
217 spin_lock(&rt_b->rt_runtime_lock);
218 for (;;) {
219 if (hrtimer_active(&rt_b->rt_period_timer))
220 break;
221
222 now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
223 hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
224 hrtimer_start(&rt_b->rt_period_timer,
225 rt_b->rt_period_timer.expires,
226 HRTIMER_MODE_ABS);
227 }
228 spin_unlock(&rt_b->rt_runtime_lock);
229}
230
231#ifdef CONFIG_RT_GROUP_SCHED
232static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
233{
234 hrtimer_cancel(&rt_b->rt_period_timer);
235}
236#endif
237
159#ifdef CONFIG_GROUP_SCHED 238#ifdef CONFIG_GROUP_SCHED
160 239
161#include <linux/cgroup.h> 240#include <linux/cgroup.h>
@@ -182,7 +261,7 @@ struct task_group {
182 struct sched_rt_entity **rt_se; 261 struct sched_rt_entity **rt_se;
183 struct rt_rq **rt_rq; 262 struct rt_rq **rt_rq;
184 263
185 u64 rt_runtime; 264 struct rt_bandwidth rt_bandwidth;
186#endif 265#endif
187 266
188 struct rcu_head rcu; 267 struct rcu_head rcu;
@@ -407,8 +486,6 @@ struct rq {
407 486
408 struct cfs_rq cfs; 487 struct cfs_rq cfs;
409 struct rt_rq rt; 488 struct rt_rq rt;
410 u64 rt_period_expire;
411 int rt_throttled;
412 489
413#ifdef CONFIG_FAIR_GROUP_SCHED 490#ifdef CONFIG_FAIR_GROUP_SCHED
414 /* list of leaf cfs_rq on this cpu: */ 491 /* list of leaf cfs_rq on this cpu: */
@@ -592,23 +669,6 @@ static void update_rq_clock(struct rq *rq)
592#define task_rq(p) cpu_rq(task_cpu(p)) 669#define task_rq(p) cpu_rq(task_cpu(p))
593#define cpu_curr(cpu) (cpu_rq(cpu)->curr) 670#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
594 671
595unsigned long rt_needs_cpu(int cpu)
596{
597 struct rq *rq = cpu_rq(cpu);
598 u64 delta;
599
600 if (!rq->rt_throttled)
601 return 0;
602
603 if (rq->clock > rq->rt_period_expire)
604 return 1;
605
606 delta = rq->rt_period_expire - rq->clock;
607 do_div(delta, NSEC_PER_SEC / HZ);
608
609 return (unsigned long)delta;
610}
611
612/* 672/*
613 * Tunables that become constants when CONFIG_SCHED_DEBUG is off: 673 * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
614 */ 674 */
@@ -664,10 +724,18 @@ static __read_mostly int scheduler_running;
664 */ 724 */
665int sysctl_sched_rt_runtime = 950000; 725int sysctl_sched_rt_runtime = 950000;
666 726
667/* 727static inline u64 global_rt_period(void)
668 * single value that denotes runtime == period, ie unlimited time. 728{
669 */ 729 return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
670#define RUNTIME_INF ((u64)~0ULL) 730}
731
732static inline u64 global_rt_runtime(void)
733{
734 if (sysctl_sched_rt_period < 0)
735 return RUNTIME_INF;
736
737 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
738}
671 739
672static const unsigned long long time_sync_thresh = 100000; 740static const unsigned long long time_sync_thresh = 100000;
673 741
@@ -3854,7 +3922,6 @@ void scheduler_tick(void)
3854 update_last_tick_seen(rq); 3922 update_last_tick_seen(rq);
3855 update_cpu_load(rq); 3923 update_cpu_load(rq);
3856 curr->sched_class->task_tick(rq, curr, 0); 3924 curr->sched_class->task_tick(rq, curr, 0);
3857 update_sched_rt_period(rq);
3858 spin_unlock(&rq->lock); 3925 spin_unlock(&rq->lock);
3859 3926
3860#ifdef CONFIG_SMP 3927#ifdef CONFIG_SMP
@@ -4689,7 +4756,7 @@ recheck:
4689 * Do not allow realtime tasks into groups that have no runtime 4756 * Do not allow realtime tasks into groups that have no runtime
4690 * assigned. 4757 * assigned.
4691 */ 4758 */
4692 if (rt_policy(policy) && task_group(p)->rt_runtime == 0) 4759 if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
4693 return -EPERM; 4760 return -EPERM;
4694#endif 4761#endif
4695 4762
@@ -7288,6 +7355,14 @@ void __init sched_init(void)
7288 init_defrootdomain(); 7355 init_defrootdomain();
7289#endif 7356#endif
7290 7357
7358 init_rt_bandwidth(&def_rt_bandwidth,
7359 global_rt_period(), global_rt_runtime());
7360
7361#ifdef CONFIG_RT_GROUP_SCHED
7362 init_rt_bandwidth(&init_task_group.rt_bandwidth,
7363 global_rt_period(), global_rt_runtime());
7364#endif
7365
7291#ifdef CONFIG_GROUP_SCHED 7366#ifdef CONFIG_GROUP_SCHED
7292 list_add(&init_task_group.list, &task_groups); 7367 list_add(&init_task_group.list, &task_groups);
7293#endif 7368#endif
@@ -7312,15 +7387,11 @@ void __init sched_init(void)
7312 7387
7313#endif 7388#endif
7314#ifdef CONFIG_RT_GROUP_SCHED 7389#ifdef CONFIG_RT_GROUP_SCHED
7315 init_task_group.rt_runtime =
7316 sysctl_sched_rt_runtime * NSEC_PER_USEC;
7317 INIT_LIST_HEAD(&rq->leaf_rt_rq_list); 7390 INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
7318 init_tg_rt_entry(rq, &init_task_group, 7391 init_tg_rt_entry(rq, &init_task_group,
7319 &per_cpu(init_rt_rq, i), 7392 &per_cpu(init_rt_rq, i),
7320 &per_cpu(init_sched_rt_entity, i), i, 1); 7393 &per_cpu(init_sched_rt_entity, i), i, 1);
7321#endif 7394#endif
7322 rq->rt_period_expire = 0;
7323 rq->rt_throttled = 0;
7324 7395
7325 for (j = 0; j < CPU_LOAD_IDX_MAX; j++) 7396 for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
7326 rq->cpu_load[j] = 0; 7397 rq->cpu_load[j] = 0;
@@ -7506,8 +7577,6 @@ void set_curr_task(int cpu, struct task_struct *p)
7506 7577
7507#endif 7578#endif
7508 7579
7509#ifdef CONFIG_GROUP_SCHED
7510
7511#ifdef CONFIG_FAIR_GROUP_SCHED 7580#ifdef CONFIG_FAIR_GROUP_SCHED
7512static void free_fair_sched_group(struct task_group *tg) 7581static void free_fair_sched_group(struct task_group *tg)
7513{ 7582{
@@ -7596,6 +7665,8 @@ static void free_rt_sched_group(struct task_group *tg)
7596{ 7665{
7597 int i; 7666 int i;
7598 7667
7668 destroy_rt_bandwidth(&tg->rt_bandwidth);
7669
7599 for_each_possible_cpu(i) { 7670 for_each_possible_cpu(i) {
7600 if (tg->rt_rq) 7671 if (tg->rt_rq)
7601 kfree(tg->rt_rq[i]); 7672 kfree(tg->rt_rq[i]);
@@ -7621,7 +7692,8 @@ static int alloc_rt_sched_group(struct task_group *tg)
7621 if (!tg->rt_se) 7692 if (!tg->rt_se)
7622 goto err; 7693 goto err;
7623 7694
7624 tg->rt_runtime = 0; 7695 init_rt_bandwidth(&tg->rt_bandwidth,
7696 ktime_to_ns(def_rt_bandwidth.rt_period), 0);
7625 7697
7626 for_each_possible_cpu(i) { 7698 for_each_possible_cpu(i) {
7627 rq = cpu_rq(i); 7699 rq = cpu_rq(i);
@@ -7674,6 +7746,7 @@ static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
7674} 7746}
7675#endif 7747#endif
7676 7748
7749#ifdef CONFIG_GROUP_SCHED
7677static void free_sched_group(struct task_group *tg) 7750static void free_sched_group(struct task_group *tg)
7678{ 7751{
7679 free_fair_sched_group(tg); 7752 free_fair_sched_group(tg);
@@ -7775,6 +7848,7 @@ void sched_move_task(struct task_struct *tsk)
7775 7848
7776 task_rq_unlock(rq, &flags); 7849 task_rq_unlock(rq, &flags);
7777} 7850}
7851#endif
7778 7852
7779#ifdef CONFIG_FAIR_GROUP_SCHED 7853#ifdef CONFIG_FAIR_GROUP_SCHED
7780static void set_se_shares(struct sched_entity *se, unsigned long shares) 7854static void set_se_shares(struct sched_entity *se, unsigned long shares)
@@ -7871,16 +7945,15 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
7871 struct task_group *tgi; 7945 struct task_group *tgi;
7872 unsigned long total = 0; 7946 unsigned long total = 0;
7873 unsigned long global_ratio = 7947 unsigned long global_ratio =
7874 to_ratio(sysctl_sched_rt_period, 7948 to_ratio(global_rt_period(), global_rt_runtime());
7875 sysctl_sched_rt_runtime < 0 ?
7876 RUNTIME_INF : sysctl_sched_rt_runtime);
7877 7949
7878 rcu_read_lock(); 7950 rcu_read_lock();
7879 list_for_each_entry_rcu(tgi, &task_groups, list) { 7951 list_for_each_entry_rcu(tgi, &task_groups, list) {
7880 if (tgi == tg) 7952 if (tgi == tg)
7881 continue; 7953 continue;
7882 7954
7883 total += to_ratio(period, tgi->rt_runtime); 7955 total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
7956 tgi->rt_bandwidth.rt_runtime);
7884 } 7957 }
7885 rcu_read_unlock(); 7958 rcu_read_unlock();
7886 7959
@@ -7898,16 +7971,11 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
7898 return 0; 7971 return 0;
7899} 7972}
7900 7973
7901int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) 7974static int tg_set_bandwidth(struct task_group *tg,
7975 u64 rt_period, u64 rt_runtime)
7902{ 7976{
7903 u64 rt_runtime, rt_period;
7904 int err = 0; 7977 int err = 0;
7905 7978
7906 rt_period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
7907 rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
7908 if (rt_runtime_us == -1)
7909 rt_runtime = RUNTIME_INF;
7910
7911 mutex_lock(&rt_constraints_mutex); 7979 mutex_lock(&rt_constraints_mutex);
7912 read_lock(&tasklist_lock); 7980 read_lock(&tasklist_lock);
7913 if (rt_runtime_us == 0 && tg_has_rt_tasks(tg)) { 7981 if (rt_runtime_us == 0 && tg_has_rt_tasks(tg)) {
@@ -7918,7 +7986,8 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
7918 err = -EINVAL; 7986 err = -EINVAL;
7919 goto unlock; 7987 goto unlock;
7920 } 7988 }
7921 tg->rt_runtime = rt_runtime; 7989 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
7990 tg->rt_bandwidth.rt_runtime = rt_runtime;
7922 unlock: 7991 unlock:
7923 read_unlock(&tasklist_lock); 7992 read_unlock(&tasklist_lock);
7924 mutex_unlock(&rt_constraints_mutex); 7993 mutex_unlock(&rt_constraints_mutex);
@@ -7926,19 +7995,96 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
7926 return err; 7995 return err;
7927} 7996}
7928 7997
7998int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
7999{
8000 u64 rt_runtime, rt_period;
8001
8002 rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
8003 rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
8004 if (rt_runtime_us < 0)
8005 rt_runtime = RUNTIME_INF;
8006
8007 return tg_set_bandwidth(tg, rt_period, rt_runtime);
8008}
8009
7929long sched_group_rt_runtime(struct task_group *tg) 8010long sched_group_rt_runtime(struct task_group *tg)
7930{ 8011{
7931 u64 rt_runtime_us; 8012 u64 rt_runtime_us;
7932 8013
7933 if (tg->rt_runtime == RUNTIME_INF) 8014 if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF)
7934 return -1; 8015 return -1;
7935 8016
7936 rt_runtime_us = tg->rt_runtime; 8017 rt_runtime_us = tg->rt_bandwidth.rt_runtime;
7937 do_div(rt_runtime_us, NSEC_PER_USEC); 8018 do_div(rt_runtime_us, NSEC_PER_USEC);
7938 return rt_runtime_us; 8019 return rt_runtime_us;
7939} 8020}
8021
8022int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
8023{
8024 u64 rt_runtime, rt_period;
8025
8026 rt_period = (u64)rt_period_us * NSEC_PER_USEC;
8027 rt_runtime = tg->rt_bandwidth.rt_runtime;
8028
8029 return tg_set_bandwidth(tg, rt_period, rt_runtime);
8030}
8031
8032long sched_group_rt_period(struct task_group *tg)
8033{
8034 u64 rt_period_us;
8035
8036 rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period);
8037 do_div(rt_period_us, NSEC_PER_USEC);
8038 return rt_period_us;
8039}
8040
8041static int sched_rt_global_constraints(void)
8042{
8043 int ret = 0;
8044
8045 mutex_lock(&rt_constraints_mutex);
8046 if (!__rt_schedulable(NULL, 1, 0))
8047 ret = -EINVAL;
8048 mutex_unlock(&rt_constraints_mutex);
8049
8050 return ret;
8051}
8052#else
8053static int sched_rt_global_constraints(void)
8054{
8055 return 0;
8056}
7940#endif 8057#endif
7941#endif /* CONFIG_GROUP_SCHED */ 8058
8059int sched_rt_handler(struct ctl_table *table, int write,
8060 struct file *filp, void __user *buffer, size_t *lenp,
8061 loff_t *ppos)
8062{
8063 int ret;
8064 int old_period, old_runtime;
8065 static DEFINE_MUTEX(mutex);
8066
8067 mutex_lock(&mutex);
8068 old_period = sysctl_sched_rt_period;
8069 old_runtime = sysctl_sched_rt_runtime;
8070
8071 ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
8072
8073 if (!ret && write) {
8074 ret = sched_rt_global_constraints();
8075 if (ret) {
8076 sysctl_sched_rt_period = old_period;
8077 sysctl_sched_rt_runtime = old_runtime;
8078 } else {
8079 def_rt_bandwidth.rt_runtime = global_rt_runtime();
8080 def_rt_bandwidth.rt_period =
8081 ns_to_ktime(global_rt_period());
8082 }
8083 }
8084 mutex_unlock(&mutex);
8085
8086 return ret;
8087}
7942 8088
7943#ifdef CONFIG_CGROUP_SCHED 8089#ifdef CONFIG_CGROUP_SCHED
7944 8090
@@ -7988,7 +8134,7 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
7988{ 8134{
7989#ifdef CONFIG_RT_GROUP_SCHED 8135#ifdef CONFIG_RT_GROUP_SCHED
7990 /* Don't accept realtime tasks when there is no way for them to run */ 8136 /* Don't accept realtime tasks when there is no way for them to run */
7991 if (rt_task(tsk) && cgroup_tg(cgrp)->rt_runtime == 0) 8137 if (rt_task(tsk) && cgroup_tg(cgrp)->rt_bandwidth.rt_runtime == 0)
7992 return -EINVAL; 8138 return -EINVAL;
7993#else 8139#else
7994 /* We don't support RT-tasks being in separate groups */ 8140 /* We don't support RT-tasks being in separate groups */
@@ -8066,6 +8212,17 @@ static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft,
8066 8212
8067 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); 8213 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
8068} 8214}
8215
8216static int cpu_rt_period_write_uint(struct cgroup *cgrp, struct cftype *cftype,
8217 u64 rt_period_us)
8218{
8219 return sched_group_set_rt_period(cgroup_tg(cgrp), rt_period_us);
8220}
8221
8222static u64 cpu_rt_period_read_uint(struct cgroup *cgrp, struct cftype *cft)
8223{
8224 return sched_group_rt_period(cgroup_tg(cgrp));
8225}
8069#endif 8226#endif
8070 8227
8071static struct cftype cpu_files[] = { 8228static struct cftype cpu_files[] = {
@@ -8082,6 +8239,11 @@ static struct cftype cpu_files[] = {
8082 .read = cpu_rt_runtime_read, 8239 .read = cpu_rt_runtime_read,
8083 .write = cpu_rt_runtime_write, 8240 .write = cpu_rt_runtime_write,
8084 }, 8241 },
8242 {
8243 .name = "rt_period_us",
8244 .read_uint = cpu_rt_period_read_uint,
8245 .write_uint = cpu_rt_period_write_uint,
8246 },
8085#endif 8247#endif
8086}; 8248};
8087 8249
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 0a6d2e516420..8bc176136666 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -62,7 +62,7 @@ static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
62 if (!rt_rq->tg) 62 if (!rt_rq->tg)
63 return RUNTIME_INF; 63 return RUNTIME_INF;
64 64
65 return rt_rq->tg->rt_runtime; 65 return rt_rq->tg->rt_bandwidth.rt_runtime;
66} 66}
67 67
68#define for_each_leaf_rt_rq(rt_rq, rq) \ 68#define for_each_leaf_rt_rq(rt_rq, rq) \
@@ -127,14 +127,29 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se)
127 return p->prio != p->normal_prio; 127 return p->prio != p->normal_prio;
128} 128}
129 129
130#ifdef CONFIG_SMP
131static inline cpumask_t sched_rt_period_mask(void)
132{
133 return cpu_rq(smp_processor_id())->rd->span;
134}
130#else 135#else
136static inline cpumask_t sched_rt_period_mask(void)
137{
138 return cpu_online_map;
139}
140#endif
131 141
132static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 142static inline
143struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
133{ 144{
134 if (sysctl_sched_rt_runtime == -1) 145 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
135 return RUNTIME_INF; 146}
136 147
137 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; 148#else
149
150static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
151{
152 return def_rt_bandwidth.rt_runtime;
138} 153}
139 154
140#define for_each_leaf_rt_rq(rt_rq, rq) \ 155#define for_each_leaf_rt_rq(rt_rq, rq) \
@@ -173,8 +188,55 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq)
173{ 188{
174 return rt_rq->rt_throttled; 189 return rt_rq->rt_throttled;
175} 190}
191
192static inline cpumask_t sched_rt_period_mask(void)
193{
194 return cpu_online_map;
195}
196
197static inline
198struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
199{
200 return &cpu_rq(cpu)->rt;
201}
202
176#endif 203#endif
177 204
205static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
206{
207 int i, idle = 1;
208 cpumask_t span;
209
210 if (rt_b->rt_runtime == RUNTIME_INF)
211 return 1;
212
213 span = sched_rt_period_mask();
214 for_each_cpu_mask(i, span) {
215 int enqueue = 0;
216 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
217 struct rq *rq = rq_of_rt_rq(rt_rq);
218
219 spin_lock(&rq->lock);
220 if (rt_rq->rt_time) {
221 u64 runtime = rt_b->rt_runtime;
222
223 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
224 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
225 rt_rq->rt_throttled = 0;
226 enqueue = 1;
227 }
228 if (rt_rq->rt_time || rt_rq->rt_nr_running)
229 idle = 0;
230 }
231
232 if (enqueue)
233 sched_rt_rq_enqueue(rt_rq);
234 spin_unlock(&rq->lock);
235 }
236
237 return idle;
238}
239
178static inline int rt_se_prio(struct sched_rt_entity *rt_se) 240static inline int rt_se_prio(struct sched_rt_entity *rt_se)
179{ 241{
180#ifdef CONFIG_RT_GROUP_SCHED 242#ifdef CONFIG_RT_GROUP_SCHED
@@ -198,11 +260,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
198 return rt_rq_throttled(rt_rq); 260 return rt_rq_throttled(rt_rq);
199 261
200 if (rt_rq->rt_time > runtime) { 262 if (rt_rq->rt_time > runtime) {
201 struct rq *rq = rq_of_rt_rq(rt_rq);
202
203 rq->rt_throttled = 1;
204 rt_rq->rt_throttled = 1; 263 rt_rq->rt_throttled = 1;
205
206 if (rt_rq_throttled(rt_rq)) { 264 if (rt_rq_throttled(rt_rq)) {
207 sched_rt_rq_dequeue(rt_rq); 265 sched_rt_rq_dequeue(rt_rq);
208 return 1; 266 return 1;
@@ -212,29 +270,6 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
212 return 0; 270 return 0;
213} 271}
214 272
215static void update_sched_rt_period(struct rq *rq)
216{
217 struct rt_rq *rt_rq;
218 u64 period;
219
220 while (rq->clock > rq->rt_period_expire) {
221 period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
222 rq->rt_period_expire += period;
223
224 for_each_leaf_rt_rq(rt_rq, rq) {
225 u64 runtime = sched_rt_runtime(rt_rq);
226
227 rt_rq->rt_time -= min(rt_rq->rt_time, runtime);
228 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
229 rt_rq->rt_throttled = 0;
230 sched_rt_rq_enqueue(rt_rq);
231 }
232 }
233
234 rq->rt_throttled = 0;
235 }
236}
237
238/* 273/*
239 * Update the current task's runtime statistics. Skip current tasks that 274 * Update the current task's runtime statistics. Skip current tasks that
240 * are not in our scheduling class. 275 * are not in our scheduling class.
@@ -284,6 +319,11 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
284#ifdef CONFIG_RT_GROUP_SCHED 319#ifdef CONFIG_RT_GROUP_SCHED
285 if (rt_se_boosted(rt_se)) 320 if (rt_se_boosted(rt_se))
286 rt_rq->rt_nr_boosted++; 321 rt_rq->rt_nr_boosted++;
322
323 if (rt_rq->tg)
324 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
325#else
326 start_rt_bandwidth(&def_rt_bandwidth);
287#endif 327#endif
288} 328}
289 329
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index be332e1a0c29..fd3364827ccf 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -307,7 +307,7 @@ static struct ctl_table kern_table[] = {
307 .data = &sysctl_sched_rt_period, 307 .data = &sysctl_sched_rt_period,
308 .maxlen = sizeof(unsigned int), 308 .maxlen = sizeof(unsigned int),
309 .mode = 0644, 309 .mode = 0644,
310 .proc_handler = &proc_dointvec, 310 .proc_handler = &sched_rt_handler,
311 }, 311 },
312 { 312 {
313 .ctl_name = CTL_UNNUMBERED, 313 .ctl_name = CTL_UNNUMBERED,
@@ -315,7 +315,7 @@ static struct ctl_table kern_table[] = {
315 .data = &sysctl_sched_rt_runtime, 315 .data = &sysctl_sched_rt_runtime,
316 .maxlen = sizeof(int), 316 .maxlen = sizeof(int),
317 .mode = 0644, 317 .mode = 0644,
318 .proc_handler = &proc_dointvec, 318 .proc_handler = &sched_rt_handler,
319 }, 319 },
320 { 320 {
321 .ctl_name = CTL_UNNUMBERED, 321 .ctl_name = CTL_UNNUMBERED,
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 69dba0c71727..d358d4e3a958 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -191,7 +191,6 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
191void tick_nohz_stop_sched_tick(void) 191void tick_nohz_stop_sched_tick(void)
192{ 192{
193 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; 193 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
194 unsigned long rt_jiffies;
195 struct tick_sched *ts; 194 struct tick_sched *ts;
196 ktime_t last_update, expires, now; 195 ktime_t last_update, expires, now;
197 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; 196 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
@@ -243,10 +242,6 @@ void tick_nohz_stop_sched_tick(void)
243 next_jiffies = get_next_timer_interrupt(last_jiffies); 242 next_jiffies = get_next_timer_interrupt(last_jiffies);
244 delta_jiffies = next_jiffies - last_jiffies; 243 delta_jiffies = next_jiffies - last_jiffies;
245 244
246 rt_jiffies = rt_needs_cpu(cpu);
247 if (rt_jiffies && rt_jiffies < delta_jiffies)
248 delta_jiffies = rt_jiffies;
249
250 if (rcu_needs_cpu(cpu)) 245 if (rcu_needs_cpu(cpu))
251 delta_jiffies = 1; 246 delta_jiffies = 1;
252 /* 247 /*
diff --git a/kernel/user.c b/kernel/user.c
index 7132022a040c..5925c6887c10 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -193,6 +193,33 @@ static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
193 193
194static struct kobj_attribute cpu_rt_runtime_attr = 194static struct kobj_attribute cpu_rt_runtime_attr =
195 __ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store); 195 __ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store);
196
197static ssize_t cpu_rt_period_show(struct kobject *kobj,
198 struct kobj_attribute *attr,
199 char *buf)
200{
201 struct user_struct *up = container_of(kobj, struct user_struct, kobj);
202
203 return sprintf(buf, "%lu\n", sched_group_rt_period(up->tg));
204}
205
206static ssize_t cpu_rt_period_store(struct kobject *kobj,
207 struct kobj_attribute *attr,
208 const char *buf, size_t size)
209{
210 struct user_struct *up = container_of(kobj, struct user_struct, kobj);
211 unsigned long rt_period;
212 int rc;
213
214 sscanf(buf, "%lu", &rt_period);
215
216 rc = sched_group_set_rt_period(up->tg, rt_period);
217
218 return (rc ? rc : size);
219}
220
221static struct kobj_attribute cpu_rt_period_attr =
222 __ATTR(cpu_rt_period, 0644, cpu_rt_period_show, cpu_rt_period_store);
196#endif 223#endif
197 224
198/* default attributes per uid directory */ 225/* default attributes per uid directory */
@@ -202,6 +229,7 @@ static struct attribute *uids_attributes[] = {
202#endif 229#endif
203#ifdef CONFIG_RT_GROUP_SCHED 230#ifdef CONFIG_RT_GROUP_SCHED
204 &cpu_rt_runtime_attr.attr, 231 &cpu_rt_runtime_attr.attr,
232 &cpu_rt_period_attr.attr,
205#endif 233#endif
206 NULL 234 NULL
207}; 235};