aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2008-02-13 09:45:39 -0500
committerIngo Molnar <mingo@elte.hu>2008-02-13 09:45:39 -0500
commit9f0c1e560c43327b70998e6c702b2f01321130d9 (patch)
treeb2cc7ef5bb0dc9d7d2912de339bff3e0db3530c9 /kernel
parent23b0fdfc9299b137bd126e9dc22f62a59dae546d (diff)
sched: rt-group: interface
Change the rt_ratio interface to rt_runtime_us, to match rt_period_us. This avoids picking a granularity for the ratio. Extend the /sys/kernel/uids/<uid>/ interface to allow setting the group's rt_runtime. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c141
-rw-r--r--kernel/sched_rt.c53
-rw-r--r--kernel/sysctl.c32
-rw-r--r--kernel/user.c28
4 files changed, 178 insertions, 76 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index cecaea67ae9b..85a5fbff2b00 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -176,7 +176,7 @@ struct task_group {
176 struct sched_rt_entity **rt_se; 176 struct sched_rt_entity **rt_se;
177 struct rt_rq **rt_rq; 177 struct rt_rq **rt_rq;
178 178
179 unsigned int rt_ratio; 179 u64 rt_runtime;
180 180
181 /* 181 /*
182 * shares assigned to a task group governs how much of cpu bandwidth 182 * shares assigned to a task group governs how much of cpu bandwidth
@@ -642,19 +642,21 @@ const_debug unsigned int sysctl_sched_features =
642const_debug unsigned int sysctl_sched_nr_migrate = 32; 642const_debug unsigned int sysctl_sched_nr_migrate = 32;
643 643
644/* 644/*
645 * period over which we measure -rt task cpu usage in ms. 645 * period over which we measure -rt task cpu usage in us.
646 * default: 1s 646 * default: 1s
647 */ 647 */
648const_debug unsigned int sysctl_sched_rt_period = 1000; 648unsigned int sysctl_sched_rt_period = 1000000;
649 649
650#define SCHED_RT_FRAC_SHIFT 16 650/*
651#define SCHED_RT_FRAC (1UL << SCHED_RT_FRAC_SHIFT) 651 * part of the period that we allow rt tasks to run in us.
652 * default: 0.95s
653 */
654int sysctl_sched_rt_runtime = 950000;
652 655
653/* 656/*
654 * ratio of time -rt tasks may consume. 657 * single value that denotes runtime == period, ie unlimited time.
655 * default: 95%
656 */ 658 */
657const_debug unsigned int sysctl_sched_rt_ratio = 62259; 659#define RUNTIME_INF ((u64)~0ULL)
658 660
659/* 661/*
660 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu 662 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
@@ -7187,7 +7189,8 @@ void __init sched_init(void)
7187 &per_cpu(init_cfs_rq, i), 7189 &per_cpu(init_cfs_rq, i),
7188 &per_cpu(init_sched_entity, i), i, 1); 7190 &per_cpu(init_sched_entity, i), i, 1);
7189 7191
7190 init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */ 7192 init_task_group.rt_runtime =
7193 sysctl_sched_rt_runtime * NSEC_PER_USEC;
7191 INIT_LIST_HEAD(&rq->leaf_rt_rq_list); 7194 INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
7192 init_tg_rt_entry(rq, &init_task_group, 7195 init_tg_rt_entry(rq, &init_task_group,
7193 &per_cpu(init_rt_rq, i), 7196 &per_cpu(init_rt_rq, i),
@@ -7583,7 +7586,7 @@ struct task_group *sched_create_group(void)
7583 goto err; 7586 goto err;
7584 7587
7585 tg->shares = NICE_0_LOAD; 7588 tg->shares = NICE_0_LOAD;
7586 tg->rt_ratio = 0; /* XXX */ 7589 tg->rt_runtime = 0;
7587 7590
7588 for_each_possible_cpu(i) { 7591 for_each_possible_cpu(i) {
7589 rq = cpu_rq(i); 7592 rq = cpu_rq(i);
@@ -7785,30 +7788,76 @@ unsigned long sched_group_shares(struct task_group *tg)
7785} 7788}
7786 7789
7787/* 7790/*
7788 * Ensure the total rt_ratio <= sysctl_sched_rt_ratio 7791 * Ensure that the real time constraints are schedulable.
7789 */ 7792 */
7790int sched_group_set_rt_ratio(struct task_group *tg, unsigned long rt_ratio) 7793static DEFINE_MUTEX(rt_constraints_mutex);
7794
7795static unsigned long to_ratio(u64 period, u64 runtime)
7796{
7797 if (runtime == RUNTIME_INF)
7798 return 1ULL << 16;
7799
7800 runtime *= (1ULL << 16);
7801 div64_64(runtime, period);
7802 return runtime;
7803}
7804
7805static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
7791{ 7806{
7792 struct task_group *tgi; 7807 struct task_group *tgi;
7793 unsigned long total = 0; 7808 unsigned long total = 0;
7809 unsigned long global_ratio =
7810 to_ratio(sysctl_sched_rt_period,
7811 sysctl_sched_rt_runtime < 0 ?
7812 RUNTIME_INF : sysctl_sched_rt_runtime);
7794 7813
7795 rcu_read_lock(); 7814 rcu_read_lock();
7796 list_for_each_entry_rcu(tgi, &task_groups, list) 7815 list_for_each_entry_rcu(tgi, &task_groups, list) {
7797 total += tgi->rt_ratio; 7816 if (tgi == tg)
7798 rcu_read_unlock(); 7817 continue;
7799 7818
7800 if (total + rt_ratio - tg->rt_ratio > sysctl_sched_rt_ratio) 7819 total += to_ratio(period, tgi->rt_runtime);
7801 return -EINVAL; 7820 }
7821 rcu_read_unlock();
7802 7822
7803 tg->rt_ratio = rt_ratio; 7823 return total + to_ratio(period, runtime) < global_ratio;
7804 return 0;
7805} 7824}
7806 7825
7807unsigned long sched_group_rt_ratio(struct task_group *tg) 7826int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
7808{ 7827{
7809 return tg->rt_ratio; 7828 u64 rt_runtime, rt_period;
7829 int err = 0;
7830
7831 rt_period = sysctl_sched_rt_period * NSEC_PER_USEC;
7832 rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
7833 if (rt_runtime_us == -1)
7834 rt_runtime = rt_period;
7835
7836 mutex_lock(&rt_constraints_mutex);
7837 if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
7838 err = -EINVAL;
7839 goto unlock;
7840 }
7841 if (rt_runtime_us == -1)
7842 rt_runtime = RUNTIME_INF;
7843 tg->rt_runtime = rt_runtime;
7844 unlock:
7845 mutex_unlock(&rt_constraints_mutex);
7846
7847 return err;
7810} 7848}
7811 7849
7850long sched_group_rt_runtime(struct task_group *tg)
7851{
7852 u64 rt_runtime_us;
7853
7854 if (tg->rt_runtime == RUNTIME_INF)
7855 return -1;
7856
7857 rt_runtime_us = tg->rt_runtime;
7858 do_div(rt_runtime_us, NSEC_PER_USEC);
7859 return rt_runtime_us;
7860}
7812#endif /* CONFIG_FAIR_GROUP_SCHED */ 7861#endif /* CONFIG_FAIR_GROUP_SCHED */
7813 7862
7814#ifdef CONFIG_FAIR_CGROUP_SCHED 7863#ifdef CONFIG_FAIR_CGROUP_SCHED
@@ -7884,17 +7933,49 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft)
7884 return (u64) tg->shares; 7933 return (u64) tg->shares;
7885} 7934}
7886 7935
7887static int cpu_rt_ratio_write_uint(struct cgroup *cgrp, struct cftype *cftype, 7936static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
7888 u64 rt_ratio_val) 7937 struct file *file,
7938 const char __user *userbuf,
7939 size_t nbytes, loff_t *unused_ppos)
7889{ 7940{
7890 return sched_group_set_rt_ratio(cgroup_tg(cgrp), rt_ratio_val); 7941 char buffer[64];
7942 int retval = 0;
7943 s64 val;
7944 char *end;
7945
7946 if (!nbytes)
7947 return -EINVAL;
7948 if (nbytes >= sizeof(buffer))
7949 return -E2BIG;
7950 if (copy_from_user(buffer, userbuf, nbytes))
7951 return -EFAULT;
7952
7953 buffer[nbytes] = 0; /* nul-terminate */
7954
7955 /* strip newline if necessary */
7956 if (nbytes && (buffer[nbytes-1] == '\n'))
7957 buffer[nbytes-1] = 0;
7958 val = simple_strtoll(buffer, &end, 0);
7959 if (*end)
7960 return -EINVAL;
7961
7962 /* Pass to subsystem */
7963 retval = sched_group_set_rt_runtime(cgroup_tg(cgrp), val);
7964 if (!retval)
7965 retval = nbytes;
7966 return retval;
7891} 7967}
7892 7968
7893static u64 cpu_rt_ratio_read_uint(struct cgroup *cgrp, struct cftype *cft) 7969static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft,
7970 struct file *file,
7971 char __user *buf, size_t nbytes,
7972 loff_t *ppos)
7894{ 7973{
7895 struct task_group *tg = cgroup_tg(cgrp); 7974 char tmp[64];
7975 long val = sched_group_rt_runtime(cgroup_tg(cgrp));
7976 int len = sprintf(tmp, "%ld\n", val);
7896 7977
7897 return (u64) tg->rt_ratio; 7978 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
7898} 7979}
7899 7980
7900static struct cftype cpu_files[] = { 7981static struct cftype cpu_files[] = {
@@ -7904,9 +7985,9 @@ static struct cftype cpu_files[] = {
7904 .write_uint = cpu_shares_write_uint, 7985 .write_uint = cpu_shares_write_uint,
7905 }, 7986 },
7906 { 7987 {
7907 .name = "rt_ratio", 7988 .name = "rt_runtime_us",
7908 .read_uint = cpu_rt_ratio_read_uint, 7989 .read = cpu_rt_runtime_read,
7909 .write_uint = cpu_rt_ratio_write_uint, 7990 .write = cpu_rt_runtime_write,
7910 }, 7991 },
7911}; 7992};
7912 7993
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 8d4269381239..35825b28e429 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -57,12 +57,12 @@ static inline int on_rt_rq(struct sched_rt_entity *rt_se)
57 57
58#ifdef CONFIG_FAIR_GROUP_SCHED 58#ifdef CONFIG_FAIR_GROUP_SCHED
59 59
60static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq) 60static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
61{ 61{
62 if (!rt_rq->tg) 62 if (!rt_rq->tg)
63 return SCHED_RT_FRAC; 63 return RUNTIME_INF;
64 64
65 return rt_rq->tg->rt_ratio; 65 return rt_rq->tg->rt_runtime;
66} 66}
67 67
68#define for_each_leaf_rt_rq(rt_rq, rq) \ 68#define for_each_leaf_rt_rq(rt_rq, rq) \
@@ -89,7 +89,7 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
89static void enqueue_rt_entity(struct sched_rt_entity *rt_se); 89static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
90static void dequeue_rt_entity(struct sched_rt_entity *rt_se); 90static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
91 91
92static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) 92static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
93{ 93{
94 struct sched_rt_entity *rt_se = rt_rq->rt_se; 94 struct sched_rt_entity *rt_se = rt_rq->rt_se;
95 95
@@ -102,7 +102,7 @@ static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
102 } 102 }
103} 103}
104 104
105static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) 105static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
106{ 106{
107 struct sched_rt_entity *rt_se = rt_rq->rt_se; 107 struct sched_rt_entity *rt_se = rt_rq->rt_se;
108 108
@@ -129,9 +129,12 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se)
129 129
130#else 130#else
131 131
132static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq) 132static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
133{ 133{
134 return sysctl_sched_rt_ratio; 134 if (sysctl_sched_rt_runtime == -1)
135 return RUNTIME_INF;
136
137 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
135} 138}
136 139
137#define for_each_leaf_rt_rq(rt_rq, rq) \ 140#define for_each_leaf_rt_rq(rt_rq, rq) \
@@ -158,11 +161,11 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
158 return NULL; 161 return NULL;
159} 162}
160 163
161static inline void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) 164static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
162{ 165{
163} 166}
164 167
165static inline void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) 168static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
166{ 169{
167} 170}
168 171
@@ -184,28 +187,24 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se)
184 return rt_task_of(rt_se)->prio; 187 return rt_task_of(rt_se)->prio;
185} 188}
186 189
187static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq) 190static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
188{ 191{
189 unsigned int rt_ratio = sched_rt_ratio(rt_rq); 192 u64 runtime = sched_rt_runtime(rt_rq);
190 u64 period, ratio;
191 193
192 if (rt_ratio == SCHED_RT_FRAC) 194 if (runtime == RUNTIME_INF)
193 return 0; 195 return 0;
194 196
195 if (rt_rq->rt_throttled) 197 if (rt_rq->rt_throttled)
196 return rt_rq_throttled(rt_rq); 198 return rt_rq_throttled(rt_rq);
197 199
198 period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; 200 if (rt_rq->rt_time > runtime) {
199 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
200
201 if (rt_rq->rt_time > ratio) {
202 struct rq *rq = rq_of_rt_rq(rt_rq); 201 struct rq *rq = rq_of_rt_rq(rt_rq);
203 202
204 rq->rt_throttled = 1; 203 rq->rt_throttled = 1;
205 rt_rq->rt_throttled = 1; 204 rt_rq->rt_throttled = 1;
206 205
207 if (rt_rq_throttled(rt_rq)) { 206 if (rt_rq_throttled(rt_rq)) {
208 sched_rt_ratio_dequeue(rt_rq); 207 sched_rt_rq_dequeue(rt_rq);
209 return 1; 208 return 1;
210 } 209 }
211 } 210 }
@@ -219,17 +218,16 @@ static void update_sched_rt_period(struct rq *rq)
219 u64 period; 218 u64 period;
220 219
221 while (rq->clock > rq->rt_period_expire) { 220 while (rq->clock > rq->rt_period_expire) {
222 period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; 221 period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
223 rq->rt_period_expire += period; 222 rq->rt_period_expire += period;
224 223
225 for_each_leaf_rt_rq(rt_rq, rq) { 224 for_each_leaf_rt_rq(rt_rq, rq) {
226 unsigned long rt_ratio = sched_rt_ratio(rt_rq); 225 u64 runtime = sched_rt_runtime(rt_rq);
227 u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
228 226
229 rt_rq->rt_time -= min(rt_rq->rt_time, ratio); 227 rt_rq->rt_time -= min(rt_rq->rt_time, runtime);
230 if (rt_rq->rt_throttled) { 228 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
231 rt_rq->rt_throttled = 0; 229 rt_rq->rt_throttled = 0;
232 sched_rt_ratio_enqueue(rt_rq); 230 sched_rt_rq_enqueue(rt_rq);
233 } 231 }
234 } 232 }
235 233
@@ -262,12 +260,7 @@ static void update_curr_rt(struct rq *rq)
262 cpuacct_charge(curr, delta_exec); 260 cpuacct_charge(curr, delta_exec);
263 261
264 rt_rq->rt_time += delta_exec; 262 rt_rq->rt_time += delta_exec;
265 /* 263 if (sched_rt_runtime_exceeded(rt_rq))
266 * might make it a tad more accurate:
267 *
268 * update_sched_rt_period(rq);
269 */
270 if (sched_rt_ratio_exceeded(rt_rq))
271 resched_task(curr); 264 resched_task(curr);
272} 265}
273 266
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d41ef6b4cf72..924c674b76ea 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -311,22 +311,6 @@ static struct ctl_table kern_table[] = {
311 .mode = 0644, 311 .mode = 0644,
312 .proc_handler = &proc_dointvec, 312 .proc_handler = &proc_dointvec,
313 }, 313 },
314 {
315 .ctl_name = CTL_UNNUMBERED,
316 .procname = "sched_rt_period_ms",
317 .data = &sysctl_sched_rt_period,
318 .maxlen = sizeof(unsigned int),
319 .mode = 0644,
320 .proc_handler = &proc_dointvec,
321 },
322 {
323 .ctl_name = CTL_UNNUMBERED,
324 .procname = "sched_rt_ratio",
325 .data = &sysctl_sched_rt_ratio,
326 .maxlen = sizeof(unsigned int),
327 .mode = 0644,
328 .proc_handler = &proc_dointvec,
329 },
330#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) 314#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
331 { 315 {
332 .ctl_name = CTL_UNNUMBERED, 316 .ctl_name = CTL_UNNUMBERED,
@@ -348,6 +332,22 @@ static struct ctl_table kern_table[] = {
348#endif 332#endif
349 { 333 {
350 .ctl_name = CTL_UNNUMBERED, 334 .ctl_name = CTL_UNNUMBERED,
335 .procname = "sched_rt_period_us",
336 .data = &sysctl_sched_rt_period,
337 .maxlen = sizeof(unsigned int),
338 .mode = 0644,
339 .proc_handler = &proc_dointvec,
340 },
341 {
342 .ctl_name = CTL_UNNUMBERED,
343 .procname = "sched_rt_runtime_us",
344 .data = &sysctl_sched_rt_runtime,
345 .maxlen = sizeof(int),
346 .mode = 0644,
347 .proc_handler = &proc_dointvec,
348 },
349 {
350 .ctl_name = CTL_UNNUMBERED,
351 .procname = "sched_compat_yield", 351 .procname = "sched_compat_yield",
352 .data = &sysctl_sched_compat_yield, 352 .data = &sysctl_sched_compat_yield,
353 .maxlen = sizeof(unsigned int), 353 .maxlen = sizeof(unsigned int),
diff --git a/kernel/user.c b/kernel/user.c
index 7d7900c5a1fd..9f6d471bfd03 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -164,9 +164,37 @@ static ssize_t cpu_shares_store(struct kobject *kobj,
164static struct kobj_attribute cpu_share_attr = 164static struct kobj_attribute cpu_share_attr =
165 __ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store); 165 __ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store);
166 166
167static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
168 struct kobj_attribute *attr,
169 char *buf)
170{
171 struct user_struct *up = container_of(kobj, struct user_struct, kobj);
172
173 return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg));
174}
175
176static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
177 struct kobj_attribute *attr,
178 const char *buf, size_t size)
179{
180 struct user_struct *up = container_of(kobj, struct user_struct, kobj);
181 unsigned long rt_runtime;
182 int rc;
183
184 sscanf(buf, "%lu", &rt_runtime);
185
186 rc = sched_group_set_rt_runtime(up->tg, rt_runtime);
187
188 return (rc ? rc : size);
189}
190
191static struct kobj_attribute cpu_rt_runtime_attr =
192 __ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store);
193
167/* default attributes per uid directory */ 194/* default attributes per uid directory */
168static struct attribute *uids_attributes[] = { 195static struct attribute *uids_attributes[] = {
169 &cpu_share_attr.attr, 196 &cpu_share_attr.attr,
197 &cpu_rt_runtime_attr.attr,
170 NULL 198 NULL
171}; 199};
172 200