diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-13 11:22:41 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-13 11:22:41 -0500 |
commit | 3174ffaa939d8f771019f83761c668b1d5c1973b (patch) | |
tree | bcc73b265f225c33983d8935250f61b8ccadd51e | |
parent | d7ab95f8c54aed896ad739f261f79ed945472aca (diff) | |
parent | b68aa2300cabeb96801369a4bb37a4f19f59ed84 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched
* git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched:
sched: rt-group: refure unrunnable tasks
sched: rt-group: clean up the ifdeffery
sched: rt-group: make rt groups scheduling configurable
sched: rt-group: interface
sched: rt-group: deal with PI
sched: fix incorrect irq lock usage in normalize_rt_tasks()
sched: fair-group: separate tg->shares from task_group_lock
hrtimer: more hrtimer_init_sleeper() fallout.
-rw-r--r-- | Documentation/sched-rt-group.txt | 59 | ||||
-rw-r--r-- | include/linux/cgroup_subsys.h | 2 | ||||
-rw-r--r-- | include/linux/sched.h | 18 | ||||
-rw-r--r-- | init/Kconfig | 23 | ||||
-rw-r--r-- | kernel/rtmutex.c | 5 | ||||
-rw-r--r-- | kernel/sched.c | 494 | ||||
-rw-r--r-- | kernel/sched_rt.c | 102 | ||||
-rw-r--r-- | kernel/sysctl.c | 32 | ||||
-rw-r--r-- | kernel/user.c | 50 |
9 files changed, 576 insertions, 209 deletions
diff --git a/Documentation/sched-rt-group.txt b/Documentation/sched-rt-group.txt new file mode 100644 index 000000000000..1c6332f4543c --- /dev/null +++ b/Documentation/sched-rt-group.txt | |||
@@ -0,0 +1,59 @@ | |||
1 | |||
2 | |||
3 | Real-Time group scheduling. | ||
4 | |||
5 | The problem space: | ||
6 | |||
7 | In order to schedule multiple groups of realtime tasks each group must | ||
8 | be assigned a fixed portion of the CPU time available. Without a minimum | ||
9 | guarantee a realtime group can obviously fall short. A fuzzy upper limit | ||
10 | is of no use since it cannot be relied upon. Which leaves us with just | ||
11 | the single fixed portion. | ||
12 | |||
13 | CPU time is divided by means of specifying how much time can be spent | ||
14 | running in a given period. Say a frame fixed realtime renderer must | ||
15 | deliver 25 frames a second, which yields a period of 0.04s. Now say | ||
16 | it will also have to play some music and respond to input, leaving it | ||
17 | with around 80% for the graphics. We can then give this group a runtime | ||
18 | of 0.8 * 0.04s = 0.032s. | ||
19 | |||
20 | This way the graphics group will have a 0.04s period with a 0.032s runtime | ||
21 | limit. | ||
22 | |||
23 | Now if the audio thread needs to refill the DMA buffer every 0.005s, but | ||
24 | needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s | ||
25 | = 0.00015s. | ||
26 | |||
27 | |||
28 | The Interface: | ||
29 | |||
30 | system wide: | ||
31 | |||
32 | /proc/sys/kernel/sched_rt_period_ms | ||
33 | /proc/sys/kernel/sched_rt_runtime_us | ||
34 | |||
35 | CONFIG_FAIR_USER_SCHED | ||
36 | |||
37 | /sys/kernel/uids/<uid>/cpu_rt_runtime_us | ||
38 | |||
39 | or | ||
40 | |||
41 | CONFIG_FAIR_CGROUP_SCHED | ||
42 | |||
43 | /cgroup/<cgroup>/cpu.rt_runtime_us | ||
44 | |||
45 | [ time is specified in us because the interface is s32; this gives an | ||
46 | operating range of ~35m to 1us ] | ||
47 | |||
48 | The period takes values in [ 1, INT_MAX ], runtime in [ -1, INT_MAX - 1 ]. | ||
49 | |||
50 | A runtime of -1 specifies runtime == period, ie. no limit. | ||
51 | |||
52 | New groups get the period from /proc/sys/kernel/sched_rt_period_us and | ||
53 | a runtime of 0. | ||
54 | |||
55 | Settings are constrained to: | ||
56 | |||
57 | \Sum_{i} runtime_{i} / global_period <= global_runtime / global_period | ||
58 | |||
59 | in order to keep the configuration schedulable. | ||
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 228235c5ae53..ac6aad98b607 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h | |||
@@ -25,7 +25,7 @@ SUBSYS(ns) | |||
25 | 25 | ||
26 | /* */ | 26 | /* */ |
27 | 27 | ||
28 | #ifdef CONFIG_FAIR_CGROUP_SCHED | 28 | #ifdef CONFIG_CGROUP_SCHED |
29 | SUBSYS(cpu_cgroup) | 29 | SUBSYS(cpu_cgroup) |
30 | #endif | 30 | #endif |
31 | 31 | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 00e144117326..b9bb313fe1ae 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -590,7 +590,7 @@ struct user_struct { | |||
590 | struct hlist_node uidhash_node; | 590 | struct hlist_node uidhash_node; |
591 | uid_t uid; | 591 | uid_t uid; |
592 | 592 | ||
593 | #ifdef CONFIG_FAIR_USER_SCHED | 593 | #ifdef CONFIG_USER_SCHED |
594 | struct task_group *tg; | 594 | struct task_group *tg; |
595 | #ifdef CONFIG_SYSFS | 595 | #ifdef CONFIG_SYSFS |
596 | struct kobject kobj; | 596 | struct kobject kobj; |
@@ -973,7 +973,7 @@ struct sched_rt_entity { | |||
973 | unsigned long timeout; | 973 | unsigned long timeout; |
974 | int nr_cpus_allowed; | 974 | int nr_cpus_allowed; |
975 | 975 | ||
976 | #ifdef CONFIG_FAIR_GROUP_SCHED | 976 | #ifdef CONFIG_RT_GROUP_SCHED |
977 | struct sched_rt_entity *parent; | 977 | struct sched_rt_entity *parent; |
978 | /* rq on which this entity is (to be) queued: */ | 978 | /* rq on which this entity is (to be) queued: */ |
979 | struct rt_rq *rt_rq; | 979 | struct rt_rq *rt_rq; |
@@ -1541,8 +1541,6 @@ extern unsigned int sysctl_sched_child_runs_first; | |||
1541 | extern unsigned int sysctl_sched_features; | 1541 | extern unsigned int sysctl_sched_features; |
1542 | extern unsigned int sysctl_sched_migration_cost; | 1542 | extern unsigned int sysctl_sched_migration_cost; |
1543 | extern unsigned int sysctl_sched_nr_migrate; | 1543 | extern unsigned int sysctl_sched_nr_migrate; |
1544 | extern unsigned int sysctl_sched_rt_period; | ||
1545 | extern unsigned int sysctl_sched_rt_ratio; | ||
1546 | #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) | 1544 | #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) |
1547 | extern unsigned int sysctl_sched_min_bal_int_shares; | 1545 | extern unsigned int sysctl_sched_min_bal_int_shares; |
1548 | extern unsigned int sysctl_sched_max_bal_int_shares; | 1546 | extern unsigned int sysctl_sched_max_bal_int_shares; |
@@ -1552,6 +1550,8 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, | |||
1552 | struct file *file, void __user *buffer, size_t *length, | 1550 | struct file *file, void __user *buffer, size_t *length, |
1553 | loff_t *ppos); | 1551 | loff_t *ppos); |
1554 | #endif | 1552 | #endif |
1553 | extern unsigned int sysctl_sched_rt_period; | ||
1554 | extern int sysctl_sched_rt_runtime; | ||
1555 | 1555 | ||
1556 | extern unsigned int sysctl_sched_compat_yield; | 1556 | extern unsigned int sysctl_sched_compat_yield; |
1557 | 1557 | ||
@@ -2027,16 +2027,22 @@ extern int sched_mc_power_savings, sched_smt_power_savings; | |||
2027 | 2027 | ||
2028 | extern void normalize_rt_tasks(void); | 2028 | extern void normalize_rt_tasks(void); |
2029 | 2029 | ||
2030 | #ifdef CONFIG_FAIR_GROUP_SCHED | 2030 | #ifdef CONFIG_GROUP_SCHED |
2031 | 2031 | ||
2032 | extern struct task_group init_task_group; | 2032 | extern struct task_group init_task_group; |
2033 | 2033 | ||
2034 | extern struct task_group *sched_create_group(void); | 2034 | extern struct task_group *sched_create_group(void); |
2035 | extern void sched_destroy_group(struct task_group *tg); | 2035 | extern void sched_destroy_group(struct task_group *tg); |
2036 | extern void sched_move_task(struct task_struct *tsk); | 2036 | extern void sched_move_task(struct task_struct *tsk); |
2037 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
2037 | extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); | 2038 | extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); |
2038 | extern unsigned long sched_group_shares(struct task_group *tg); | 2039 | extern unsigned long sched_group_shares(struct task_group *tg); |
2039 | 2040 | #endif | |
2041 | #ifdef CONFIG_RT_GROUP_SCHED | ||
2042 | extern int sched_group_set_rt_runtime(struct task_group *tg, | ||
2043 | long rt_runtime_us); | ||
2044 | extern long sched_group_rt_runtime(struct task_group *tg); | ||
2045 | #endif | ||
2040 | #endif | 2046 | #endif |
2041 | 2047 | ||
2042 | #ifdef CONFIG_TASK_XACCT | 2048 | #ifdef CONFIG_TASK_XACCT |
diff --git a/init/Kconfig b/init/Kconfig index 824d48cb67bf..dcef8b55011a 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -311,25 +311,36 @@ config CPUSETS | |||
311 | 311 | ||
312 | Say N if unsure. | 312 | Say N if unsure. |
313 | 313 | ||
314 | config FAIR_GROUP_SCHED | 314 | config GROUP_SCHED |
315 | bool "Fair group CPU scheduler" | 315 | bool "Group CPU scheduler" |
316 | default y | 316 | default y |
317 | help | 317 | help |
318 | This feature lets CPU scheduler recognize task groups and control CPU | 318 | This feature lets CPU scheduler recognize task groups and control CPU |
319 | bandwidth allocation to such task groups. | 319 | bandwidth allocation to such task groups. |
320 | 320 | ||
321 | config FAIR_GROUP_SCHED | ||
322 | bool "Group scheduling for SCHED_OTHER" | ||
323 | depends on GROUP_SCHED | ||
324 | default y | ||
325 | |||
326 | config RT_GROUP_SCHED | ||
327 | bool "Group scheduling for SCHED_RR/FIFO" | ||
328 | depends on EXPERIMENTAL | ||
329 | depends on GROUP_SCHED | ||
330 | default n | ||
331 | |||
321 | choice | 332 | choice |
322 | depends on FAIR_GROUP_SCHED | 333 | depends on GROUP_SCHED |
323 | prompt "Basis for grouping tasks" | 334 | prompt "Basis for grouping tasks" |
324 | default FAIR_USER_SCHED | 335 | default USER_SCHED |
325 | 336 | ||
326 | config FAIR_USER_SCHED | 337 | config USER_SCHED |
327 | bool "user id" | 338 | bool "user id" |
328 | help | 339 | help |
329 | This option will choose userid as the basis for grouping | 340 | This option will choose userid as the basis for grouping |
330 | tasks, thus providing equal CPU bandwidth to each user. | 341 | tasks, thus providing equal CPU bandwidth to each user. |
331 | 342 | ||
332 | config FAIR_CGROUP_SCHED | 343 | config CGROUP_SCHED |
333 | bool "Control groups" | 344 | bool "Control groups" |
334 | depends on CGROUPS | 345 | depends on CGROUPS |
335 | help | 346 | help |
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 0deef71ff8d2..6522ae5b14a2 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
@@ -630,9 +630,12 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
630 | set_current_state(state); | 630 | set_current_state(state); |
631 | 631 | ||
632 | /* Setup the timer, when timeout != NULL */ | 632 | /* Setup the timer, when timeout != NULL */ |
633 | if (unlikely(timeout)) | 633 | if (unlikely(timeout)) { |
634 | hrtimer_start(&timeout->timer, timeout->timer.expires, | 634 | hrtimer_start(&timeout->timer, timeout->timer.expires, |
635 | HRTIMER_MODE_ABS); | 635 | HRTIMER_MODE_ABS); |
636 | if (!hrtimer_active(&timeout->timer)) | ||
637 | timeout->task = NULL; | ||
638 | } | ||
636 | 639 | ||
637 | for (;;) { | 640 | for (;;) { |
638 | /* Try to acquire the lock: */ | 641 | /* Try to acquire the lock: */ |
diff --git a/kernel/sched.c b/kernel/sched.c index 3eedd5260907..f28f19e65b59 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -155,7 +155,7 @@ struct rt_prio_array { | |||
155 | struct list_head queue[MAX_RT_PRIO]; | 155 | struct list_head queue[MAX_RT_PRIO]; |
156 | }; | 156 | }; |
157 | 157 | ||
158 | #ifdef CONFIG_FAIR_GROUP_SCHED | 158 | #ifdef CONFIG_GROUP_SCHED |
159 | 159 | ||
160 | #include <linux/cgroup.h> | 160 | #include <linux/cgroup.h> |
161 | 161 | ||
@@ -165,19 +165,16 @@ static LIST_HEAD(task_groups); | |||
165 | 165 | ||
166 | /* task group related information */ | 166 | /* task group related information */ |
167 | struct task_group { | 167 | struct task_group { |
168 | #ifdef CONFIG_FAIR_CGROUP_SCHED | 168 | #ifdef CONFIG_CGROUP_SCHED |
169 | struct cgroup_subsys_state css; | 169 | struct cgroup_subsys_state css; |
170 | #endif | 170 | #endif |
171 | |||
172 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
171 | /* schedulable entities of this group on each cpu */ | 173 | /* schedulable entities of this group on each cpu */ |
172 | struct sched_entity **se; | 174 | struct sched_entity **se; |
173 | /* runqueue "owned" by this group on each cpu */ | 175 | /* runqueue "owned" by this group on each cpu */ |
174 | struct cfs_rq **cfs_rq; | 176 | struct cfs_rq **cfs_rq; |
175 | 177 | ||
176 | struct sched_rt_entity **rt_se; | ||
177 | struct rt_rq **rt_rq; | ||
178 | |||
179 | unsigned int rt_ratio; | ||
180 | |||
181 | /* | 178 | /* |
182 | * shares assigned to a task group governs how much of cpu bandwidth | 179 | * shares assigned to a task group governs how much of cpu bandwidth |
183 | * is allocated to the group. The more shares a group has, the more is | 180 | * is allocated to the group. The more shares a group has, the more is |
@@ -213,33 +210,46 @@ struct task_group { | |||
213 | * | 210 | * |
214 | */ | 211 | */ |
215 | unsigned long shares; | 212 | unsigned long shares; |
213 | #endif | ||
214 | |||
215 | #ifdef CONFIG_RT_GROUP_SCHED | ||
216 | struct sched_rt_entity **rt_se; | ||
217 | struct rt_rq **rt_rq; | ||
218 | |||
219 | u64 rt_runtime; | ||
220 | #endif | ||
216 | 221 | ||
217 | struct rcu_head rcu; | 222 | struct rcu_head rcu; |
218 | struct list_head list; | 223 | struct list_head list; |
219 | }; | 224 | }; |
220 | 225 | ||
226 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
221 | /* Default task group's sched entity on each cpu */ | 227 | /* Default task group's sched entity on each cpu */ |
222 | static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); | 228 | static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); |
223 | /* Default task group's cfs_rq on each cpu */ | 229 | /* Default task group's cfs_rq on each cpu */ |
224 | static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; | 230 | static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; |
225 | 231 | ||
226 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); | ||
227 | static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; | ||
228 | |||
229 | static struct sched_entity *init_sched_entity_p[NR_CPUS]; | 232 | static struct sched_entity *init_sched_entity_p[NR_CPUS]; |
230 | static struct cfs_rq *init_cfs_rq_p[NR_CPUS]; | 233 | static struct cfs_rq *init_cfs_rq_p[NR_CPUS]; |
234 | #endif | ||
235 | |||
236 | #ifdef CONFIG_RT_GROUP_SCHED | ||
237 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); | ||
238 | static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; | ||
231 | 239 | ||
232 | static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS]; | 240 | static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS]; |
233 | static struct rt_rq *init_rt_rq_p[NR_CPUS]; | 241 | static struct rt_rq *init_rt_rq_p[NR_CPUS]; |
242 | #endif | ||
234 | 243 | ||
235 | /* task_group_mutex serializes add/remove of task groups and also changes to | 244 | /* task_group_lock serializes add/remove of task groups and also changes to |
236 | * a task group's cpu shares. | 245 | * a task group's cpu shares. |
237 | */ | 246 | */ |
238 | static DEFINE_MUTEX(task_group_mutex); | 247 | static DEFINE_SPINLOCK(task_group_lock); |
239 | 248 | ||
240 | /* doms_cur_mutex serializes access to doms_cur[] array */ | 249 | /* doms_cur_mutex serializes access to doms_cur[] array */ |
241 | static DEFINE_MUTEX(doms_cur_mutex); | 250 | static DEFINE_MUTEX(doms_cur_mutex); |
242 | 251 | ||
252 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
243 | #ifdef CONFIG_SMP | 253 | #ifdef CONFIG_SMP |
244 | /* kernel thread that runs rebalance_shares() periodically */ | 254 | /* kernel thread that runs rebalance_shares() periodically */ |
245 | static struct task_struct *lb_monitor_task; | 255 | static struct task_struct *lb_monitor_task; |
@@ -248,35 +258,40 @@ static int load_balance_monitor(void *unused); | |||
248 | 258 | ||
249 | static void set_se_shares(struct sched_entity *se, unsigned long shares); | 259 | static void set_se_shares(struct sched_entity *se, unsigned long shares); |
250 | 260 | ||
261 | #ifdef CONFIG_USER_SCHED | ||
262 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) | ||
263 | #else | ||
264 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD | ||
265 | #endif | ||
266 | |||
267 | #define MIN_GROUP_SHARES 2 | ||
268 | |||
269 | static int init_task_group_load = INIT_TASK_GROUP_LOAD; | ||
270 | #endif | ||
271 | |||
251 | /* Default task group. | 272 | /* Default task group. |
252 | * Every task in system belong to this group at bootup. | 273 | * Every task in system belong to this group at bootup. |
253 | */ | 274 | */ |
254 | struct task_group init_task_group = { | 275 | struct task_group init_task_group = { |
276 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
255 | .se = init_sched_entity_p, | 277 | .se = init_sched_entity_p, |
256 | .cfs_rq = init_cfs_rq_p, | 278 | .cfs_rq = init_cfs_rq_p, |
279 | #endif | ||
257 | 280 | ||
281 | #ifdef CONFIG_RT_GROUP_SCHED | ||
258 | .rt_se = init_sched_rt_entity_p, | 282 | .rt_se = init_sched_rt_entity_p, |
259 | .rt_rq = init_rt_rq_p, | 283 | .rt_rq = init_rt_rq_p, |
260 | }; | ||
261 | |||
262 | #ifdef CONFIG_FAIR_USER_SCHED | ||
263 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) | ||
264 | #else | ||
265 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD | ||
266 | #endif | 284 | #endif |
267 | 285 | }; | |
268 | #define MIN_GROUP_SHARES 2 | ||
269 | |||
270 | static int init_task_group_load = INIT_TASK_GROUP_LOAD; | ||
271 | 286 | ||
272 | /* return group to which a task belongs */ | 287 | /* return group to which a task belongs */ |
273 | static inline struct task_group *task_group(struct task_struct *p) | 288 | static inline struct task_group *task_group(struct task_struct *p) |
274 | { | 289 | { |
275 | struct task_group *tg; | 290 | struct task_group *tg; |
276 | 291 | ||
277 | #ifdef CONFIG_FAIR_USER_SCHED | 292 | #ifdef CONFIG_USER_SCHED |
278 | tg = p->user->tg; | 293 | tg = p->user->tg; |
279 | #elif defined(CONFIG_FAIR_CGROUP_SCHED) | 294 | #elif defined(CONFIG_CGROUP_SCHED) |
280 | tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), | 295 | tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), |
281 | struct task_group, css); | 296 | struct task_group, css); |
282 | #else | 297 | #else |
@@ -288,21 +303,15 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
288 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | 303 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ |
289 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | 304 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) |
290 | { | 305 | { |
306 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
291 | p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; | 307 | p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; |
292 | p->se.parent = task_group(p)->se[cpu]; | 308 | p->se.parent = task_group(p)->se[cpu]; |
309 | #endif | ||
293 | 310 | ||
311 | #ifdef CONFIG_RT_GROUP_SCHED | ||
294 | p->rt.rt_rq = task_group(p)->rt_rq[cpu]; | 312 | p->rt.rt_rq = task_group(p)->rt_rq[cpu]; |
295 | p->rt.parent = task_group(p)->rt_se[cpu]; | 313 | p->rt.parent = task_group(p)->rt_se[cpu]; |
296 | } | 314 | #endif |
297 | |||
298 | static inline void lock_task_group_list(void) | ||
299 | { | ||
300 | mutex_lock(&task_group_mutex); | ||
301 | } | ||
302 | |||
303 | static inline void unlock_task_group_list(void) | ||
304 | { | ||
305 | mutex_unlock(&task_group_mutex); | ||
306 | } | 315 | } |
307 | 316 | ||
308 | static inline void lock_doms_cur(void) | 317 | static inline void lock_doms_cur(void) |
@@ -318,12 +327,10 @@ static inline void unlock_doms_cur(void) | |||
318 | #else | 327 | #else |
319 | 328 | ||
320 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | 329 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } |
321 | static inline void lock_task_group_list(void) { } | ||
322 | static inline void unlock_task_group_list(void) { } | ||
323 | static inline void lock_doms_cur(void) { } | 330 | static inline void lock_doms_cur(void) { } |
324 | static inline void unlock_doms_cur(void) { } | 331 | static inline void unlock_doms_cur(void) { } |
325 | 332 | ||
326 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 333 | #endif /* CONFIG_GROUP_SCHED */ |
327 | 334 | ||
328 | /* CFS-related fields in a runqueue */ | 335 | /* CFS-related fields in a runqueue */ |
329 | struct cfs_rq { | 336 | struct cfs_rq { |
@@ -363,7 +370,7 @@ struct cfs_rq { | |||
363 | struct rt_rq { | 370 | struct rt_rq { |
364 | struct rt_prio_array active; | 371 | struct rt_prio_array active; |
365 | unsigned long rt_nr_running; | 372 | unsigned long rt_nr_running; |
366 | #if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED | 373 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
367 | int highest_prio; /* highest queued rt task prio */ | 374 | int highest_prio; /* highest queued rt task prio */ |
368 | #endif | 375 | #endif |
369 | #ifdef CONFIG_SMP | 376 | #ifdef CONFIG_SMP |
@@ -373,7 +380,9 @@ struct rt_rq { | |||
373 | int rt_throttled; | 380 | int rt_throttled; |
374 | u64 rt_time; | 381 | u64 rt_time; |
375 | 382 | ||
376 | #ifdef CONFIG_FAIR_GROUP_SCHED | 383 | #ifdef CONFIG_RT_GROUP_SCHED |
384 | unsigned long rt_nr_boosted; | ||
385 | |||
377 | struct rq *rq; | 386 | struct rq *rq; |
378 | struct list_head leaf_rt_rq_list; | 387 | struct list_head leaf_rt_rq_list; |
379 | struct task_group *tg; | 388 | struct task_group *tg; |
@@ -447,6 +456,8 @@ struct rq { | |||
447 | #ifdef CONFIG_FAIR_GROUP_SCHED | 456 | #ifdef CONFIG_FAIR_GROUP_SCHED |
448 | /* list of leaf cfs_rq on this cpu: */ | 457 | /* list of leaf cfs_rq on this cpu: */ |
449 | struct list_head leaf_cfs_rq_list; | 458 | struct list_head leaf_cfs_rq_list; |
459 | #endif | ||
460 | #ifdef CONFIG_RT_GROUP_SCHED | ||
450 | struct list_head leaf_rt_rq_list; | 461 | struct list_head leaf_rt_rq_list; |
451 | #endif | 462 | #endif |
452 | 463 | ||
@@ -652,19 +663,21 @@ const_debug unsigned int sysctl_sched_features = | |||
652 | const_debug unsigned int sysctl_sched_nr_migrate = 32; | 663 | const_debug unsigned int sysctl_sched_nr_migrate = 32; |
653 | 664 | ||
654 | /* | 665 | /* |
655 | * period over which we measure -rt task cpu usage in ms. | 666 | * period over which we measure -rt task cpu usage in us. |
656 | * default: 1s | 667 | * default: 1s |
657 | */ | 668 | */ |
658 | const_debug unsigned int sysctl_sched_rt_period = 1000; | 669 | unsigned int sysctl_sched_rt_period = 1000000; |
659 | 670 | ||
660 | #define SCHED_RT_FRAC_SHIFT 16 | 671 | /* |
661 | #define SCHED_RT_FRAC (1UL << SCHED_RT_FRAC_SHIFT) | 672 | * part of the period that we allow rt tasks to run in us. |
673 | * default: 0.95s | ||
674 | */ | ||
675 | int sysctl_sched_rt_runtime = 950000; | ||
662 | 676 | ||
663 | /* | 677 | /* |
664 | * ratio of time -rt tasks may consume. | 678 | * single value that denotes runtime == period, ie unlimited time. |
665 | * default: 95% | ||
666 | */ | 679 | */ |
667 | const_debug unsigned int sysctl_sched_rt_ratio = 62259; | 680 | #define RUNTIME_INF ((u64)~0ULL) |
668 | 681 | ||
669 | /* | 682 | /* |
670 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu | 683 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu |
@@ -4571,6 +4584,15 @@ recheck: | |||
4571 | return -EPERM; | 4584 | return -EPERM; |
4572 | } | 4585 | } |
4573 | 4586 | ||
4587 | #ifdef CONFIG_RT_GROUP_SCHED | ||
4588 | /* | ||
4589 | * Do not allow realtime tasks into groups that have no runtime | ||
4590 | * assigned. | ||
4591 | */ | ||
4592 | if (rt_policy(policy) && task_group(p)->rt_runtime == 0) | ||
4593 | return -EPERM; | ||
4594 | #endif | ||
4595 | |||
4574 | retval = security_task_setscheduler(p, policy, param); | 4596 | retval = security_task_setscheduler(p, policy, param); |
4575 | if (retval) | 4597 | if (retval) |
4576 | return retval; | 4598 | return retval; |
@@ -7112,7 +7134,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
7112 | /* delimiter for bitsearch: */ | 7134 | /* delimiter for bitsearch: */ |
7113 | __set_bit(MAX_RT_PRIO, array->bitmap); | 7135 | __set_bit(MAX_RT_PRIO, array->bitmap); |
7114 | 7136 | ||
7115 | #if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED | 7137 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
7116 | rt_rq->highest_prio = MAX_RT_PRIO; | 7138 | rt_rq->highest_prio = MAX_RT_PRIO; |
7117 | #endif | 7139 | #endif |
7118 | #ifdef CONFIG_SMP | 7140 | #ifdef CONFIG_SMP |
@@ -7123,7 +7145,8 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
7123 | rt_rq->rt_time = 0; | 7145 | rt_rq->rt_time = 0; |
7124 | rt_rq->rt_throttled = 0; | 7146 | rt_rq->rt_throttled = 0; |
7125 | 7147 | ||
7126 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7148 | #ifdef CONFIG_RT_GROUP_SCHED |
7149 | rt_rq->rt_nr_boosted = 0; | ||
7127 | rt_rq->rq = rq; | 7150 | rt_rq->rq = rq; |
7128 | #endif | 7151 | #endif |
7129 | } | 7152 | } |
@@ -7146,7 +7169,9 @@ static void init_tg_cfs_entry(struct rq *rq, struct task_group *tg, | |||
7146 | se->load.inv_weight = div64_64(1ULL<<32, se->load.weight); | 7169 | se->load.inv_weight = div64_64(1ULL<<32, se->load.weight); |
7147 | se->parent = NULL; | 7170 | se->parent = NULL; |
7148 | } | 7171 | } |
7172 | #endif | ||
7149 | 7173 | ||
7174 | #ifdef CONFIG_RT_GROUP_SCHED | ||
7150 | static void init_tg_rt_entry(struct rq *rq, struct task_group *tg, | 7175 | static void init_tg_rt_entry(struct rq *rq, struct task_group *tg, |
7151 | struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, | 7176 | struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, |
7152 | int cpu, int add) | 7177 | int cpu, int add) |
@@ -7175,7 +7200,7 @@ void __init sched_init(void) | |||
7175 | init_defrootdomain(); | 7200 | init_defrootdomain(); |
7176 | #endif | 7201 | #endif |
7177 | 7202 | ||
7178 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7203 | #ifdef CONFIG_GROUP_SCHED |
7179 | list_add(&init_task_group.list, &task_groups); | 7204 | list_add(&init_task_group.list, &task_groups); |
7180 | #endif | 7205 | #endif |
7181 | 7206 | ||
@@ -7196,7 +7221,10 @@ void __init sched_init(void) | |||
7196 | &per_cpu(init_cfs_rq, i), | 7221 | &per_cpu(init_cfs_rq, i), |
7197 | &per_cpu(init_sched_entity, i), i, 1); | 7222 | &per_cpu(init_sched_entity, i), i, 1); |
7198 | 7223 | ||
7199 | init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */ | 7224 | #endif |
7225 | #ifdef CONFIG_RT_GROUP_SCHED | ||
7226 | init_task_group.rt_runtime = | ||
7227 | sysctl_sched_rt_runtime * NSEC_PER_USEC; | ||
7200 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); | 7228 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); |
7201 | init_tg_rt_entry(rq, &init_task_group, | 7229 | init_tg_rt_entry(rq, &init_task_group, |
7202 | &per_cpu(init_rt_rq, i), | 7230 | &per_cpu(init_rt_rq, i), |
@@ -7303,7 +7331,7 @@ void normalize_rt_tasks(void) | |||
7303 | unsigned long flags; | 7331 | unsigned long flags; |
7304 | struct rq *rq; | 7332 | struct rq *rq; |
7305 | 7333 | ||
7306 | read_lock_irq(&tasklist_lock); | 7334 | read_lock_irqsave(&tasklist_lock, flags); |
7307 | do_each_thread(g, p) { | 7335 | do_each_thread(g, p) { |
7308 | /* | 7336 | /* |
7309 | * Only normalize user tasks: | 7337 | * Only normalize user tasks: |
@@ -7329,16 +7357,16 @@ void normalize_rt_tasks(void) | |||
7329 | continue; | 7357 | continue; |
7330 | } | 7358 | } |
7331 | 7359 | ||
7332 | spin_lock_irqsave(&p->pi_lock, flags); | 7360 | spin_lock(&p->pi_lock); |
7333 | rq = __task_rq_lock(p); | 7361 | rq = __task_rq_lock(p); |
7334 | 7362 | ||
7335 | normalize_task(rq, p); | 7363 | normalize_task(rq, p); |
7336 | 7364 | ||
7337 | __task_rq_unlock(rq); | 7365 | __task_rq_unlock(rq); |
7338 | spin_unlock_irqrestore(&p->pi_lock, flags); | 7366 | spin_unlock(&p->pi_lock); |
7339 | } while_each_thread(g, p); | 7367 | } while_each_thread(g, p); |
7340 | 7368 | ||
7341 | read_unlock_irq(&tasklist_lock); | 7369 | read_unlock_irqrestore(&tasklist_lock, flags); |
7342 | } | 7370 | } |
7343 | 7371 | ||
7344 | #endif /* CONFIG_MAGIC_SYSRQ */ | 7372 | #endif /* CONFIG_MAGIC_SYSRQ */ |
@@ -7387,9 +7415,9 @@ void set_curr_task(int cpu, struct task_struct *p) | |||
7387 | 7415 | ||
7388 | #endif | 7416 | #endif |
7389 | 7417 | ||
7390 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7418 | #ifdef CONFIG_GROUP_SCHED |
7391 | 7419 | ||
7392 | #ifdef CONFIG_SMP | 7420 | #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP |
7393 | /* | 7421 | /* |
7394 | * distribute shares of all task groups among their schedulable entities, | 7422 | * distribute shares of all task groups among their schedulable entities, |
7395 | * to reflect load distribution across cpus. | 7423 | * to reflect load distribution across cpus. |
@@ -7540,7 +7568,8 @@ static int load_balance_monitor(void *unused) | |||
7540 | } | 7568 | } |
7541 | #endif /* CONFIG_SMP */ | 7569 | #endif /* CONFIG_SMP */ |
7542 | 7570 | ||
7543 | static void free_sched_group(struct task_group *tg) | 7571 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7572 | static void free_fair_sched_group(struct task_group *tg) | ||
7544 | { | 7573 | { |
7545 | int i; | 7574 | int i; |
7546 | 7575 | ||
@@ -7549,49 +7578,27 @@ static void free_sched_group(struct task_group *tg) | |||
7549 | kfree(tg->cfs_rq[i]); | 7578 | kfree(tg->cfs_rq[i]); |
7550 | if (tg->se) | 7579 | if (tg->se) |
7551 | kfree(tg->se[i]); | 7580 | kfree(tg->se[i]); |
7552 | if (tg->rt_rq) | ||
7553 | kfree(tg->rt_rq[i]); | ||
7554 | if (tg->rt_se) | ||
7555 | kfree(tg->rt_se[i]); | ||
7556 | } | 7581 | } |
7557 | 7582 | ||
7558 | kfree(tg->cfs_rq); | 7583 | kfree(tg->cfs_rq); |
7559 | kfree(tg->se); | 7584 | kfree(tg->se); |
7560 | kfree(tg->rt_rq); | ||
7561 | kfree(tg->rt_se); | ||
7562 | kfree(tg); | ||
7563 | } | 7585 | } |
7564 | 7586 | ||
7565 | /* allocate runqueue etc for a new task group */ | 7587 | static int alloc_fair_sched_group(struct task_group *tg) |
7566 | struct task_group *sched_create_group(void) | ||
7567 | { | 7588 | { |
7568 | struct task_group *tg; | ||
7569 | struct cfs_rq *cfs_rq; | 7589 | struct cfs_rq *cfs_rq; |
7570 | struct sched_entity *se; | 7590 | struct sched_entity *se; |
7571 | struct rt_rq *rt_rq; | ||
7572 | struct sched_rt_entity *rt_se; | ||
7573 | struct rq *rq; | 7591 | struct rq *rq; |
7574 | int i; | 7592 | int i; |
7575 | 7593 | ||
7576 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); | ||
7577 | if (!tg) | ||
7578 | return ERR_PTR(-ENOMEM); | ||
7579 | |||
7580 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL); | 7594 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL); |
7581 | if (!tg->cfs_rq) | 7595 | if (!tg->cfs_rq) |
7582 | goto err; | 7596 | goto err; |
7583 | tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL); | 7597 | tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL); |
7584 | if (!tg->se) | 7598 | if (!tg->se) |
7585 | goto err; | 7599 | goto err; |
7586 | tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL); | ||
7587 | if (!tg->rt_rq) | ||
7588 | goto err; | ||
7589 | tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL); | ||
7590 | if (!tg->rt_se) | ||
7591 | goto err; | ||
7592 | 7600 | ||
7593 | tg->shares = NICE_0_LOAD; | 7601 | tg->shares = NICE_0_LOAD; |
7594 | tg->rt_ratio = 0; /* XXX */ | ||
7595 | 7602 | ||
7596 | for_each_possible_cpu(i) { | 7603 | for_each_possible_cpu(i) { |
7597 | rq = cpu_rq(i); | 7604 | rq = cpu_rq(i); |
@@ -7606,6 +7613,79 @@ struct task_group *sched_create_group(void) | |||
7606 | if (!se) | 7613 | if (!se) |
7607 | goto err; | 7614 | goto err; |
7608 | 7615 | ||
7616 | init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0); | ||
7617 | } | ||
7618 | |||
7619 | return 1; | ||
7620 | |||
7621 | err: | ||
7622 | return 0; | ||
7623 | } | ||
7624 | |||
7625 | static inline void register_fair_sched_group(struct task_group *tg, int cpu) | ||
7626 | { | ||
7627 | list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list, | ||
7628 | &cpu_rq(cpu)->leaf_cfs_rq_list); | ||
7629 | } | ||
7630 | |||
7631 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | ||
7632 | { | ||
7633 | list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); | ||
7634 | } | ||
7635 | #else | ||
7636 | static inline void free_fair_sched_group(struct task_group *tg) | ||
7637 | { | ||
7638 | } | ||
7639 | |||
7640 | static inline int alloc_fair_sched_group(struct task_group *tg) | ||
7641 | { | ||
7642 | return 1; | ||
7643 | } | ||
7644 | |||
7645 | static inline void register_fair_sched_group(struct task_group *tg, int cpu) | ||
7646 | { | ||
7647 | } | ||
7648 | |||
7649 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | ||
7650 | { | ||
7651 | } | ||
7652 | #endif | ||
7653 | |||
7654 | #ifdef CONFIG_RT_GROUP_SCHED | ||
7655 | static void free_rt_sched_group(struct task_group *tg) | ||
7656 | { | ||
7657 | int i; | ||
7658 | |||
7659 | for_each_possible_cpu(i) { | ||
7660 | if (tg->rt_rq) | ||
7661 | kfree(tg->rt_rq[i]); | ||
7662 | if (tg->rt_se) | ||
7663 | kfree(tg->rt_se[i]); | ||
7664 | } | ||
7665 | |||
7666 | kfree(tg->rt_rq); | ||
7667 | kfree(tg->rt_se); | ||
7668 | } | ||
7669 | |||
7670 | static int alloc_rt_sched_group(struct task_group *tg) | ||
7671 | { | ||
7672 | struct rt_rq *rt_rq; | ||
7673 | struct sched_rt_entity *rt_se; | ||
7674 | struct rq *rq; | ||
7675 | int i; | ||
7676 | |||
7677 | tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL); | ||
7678 | if (!tg->rt_rq) | ||
7679 | goto err; | ||
7680 | tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL); | ||
7681 | if (!tg->rt_se) | ||
7682 | goto err; | ||
7683 | |||
7684 | tg->rt_runtime = 0; | ||
7685 | |||
7686 | for_each_possible_cpu(i) { | ||
7687 | rq = cpu_rq(i); | ||
7688 | |||
7609 | rt_rq = kmalloc_node(sizeof(struct rt_rq), | 7689 | rt_rq = kmalloc_node(sizeof(struct rt_rq), |
7610 | GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); | 7690 | GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); |
7611 | if (!rt_rq) | 7691 | if (!rt_rq) |
@@ -7616,20 +7696,75 @@ struct task_group *sched_create_group(void) | |||
7616 | if (!rt_se) | 7696 | if (!rt_se) |
7617 | goto err; | 7697 | goto err; |
7618 | 7698 | ||
7619 | init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0); | ||
7620 | init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0); | 7699 | init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0); |
7621 | } | 7700 | } |
7622 | 7701 | ||
7623 | lock_task_group_list(); | 7702 | return 1; |
7703 | |||
7704 | err: | ||
7705 | return 0; | ||
7706 | } | ||
7707 | |||
7708 | static inline void register_rt_sched_group(struct task_group *tg, int cpu) | ||
7709 | { | ||
7710 | list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list, | ||
7711 | &cpu_rq(cpu)->leaf_rt_rq_list); | ||
7712 | } | ||
7713 | |||
7714 | static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | ||
7715 | { | ||
7716 | list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list); | ||
7717 | } | ||
7718 | #else | ||
7719 | static inline void free_rt_sched_group(struct task_group *tg) | ||
7720 | { | ||
7721 | } | ||
7722 | |||
7723 | static inline int alloc_rt_sched_group(struct task_group *tg) | ||
7724 | { | ||
7725 | return 1; | ||
7726 | } | ||
7727 | |||
7728 | static inline void register_rt_sched_group(struct task_group *tg, int cpu) | ||
7729 | { | ||
7730 | } | ||
7731 | |||
7732 | static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | ||
7733 | { | ||
7734 | } | ||
7735 | #endif | ||
7736 | |||
7737 | static void free_sched_group(struct task_group *tg) | ||
7738 | { | ||
7739 | free_fair_sched_group(tg); | ||
7740 | free_rt_sched_group(tg); | ||
7741 | kfree(tg); | ||
7742 | } | ||
7743 | |||
7744 | /* allocate runqueue etc for a new task group */ | ||
7745 | struct task_group *sched_create_group(void) | ||
7746 | { | ||
7747 | struct task_group *tg; | ||
7748 | unsigned long flags; | ||
7749 | int i; | ||
7750 | |||
7751 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); | ||
7752 | if (!tg) | ||
7753 | return ERR_PTR(-ENOMEM); | ||
7754 | |||
7755 | if (!alloc_fair_sched_group(tg)) | ||
7756 | goto err; | ||
7757 | |||
7758 | if (!alloc_rt_sched_group(tg)) | ||
7759 | goto err; | ||
7760 | |||
7761 | spin_lock_irqsave(&task_group_lock, flags); | ||
7624 | for_each_possible_cpu(i) { | 7762 | for_each_possible_cpu(i) { |
7625 | rq = cpu_rq(i); | 7763 | register_fair_sched_group(tg, i); |
7626 | cfs_rq = tg->cfs_rq[i]; | 7764 | register_rt_sched_group(tg, i); |
7627 | list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); | ||
7628 | rt_rq = tg->rt_rq[i]; | ||
7629 | list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list); | ||
7630 | } | 7765 | } |
7631 | list_add_rcu(&tg->list, &task_groups); | 7766 | list_add_rcu(&tg->list, &task_groups); |
7632 | unlock_task_group_list(); | 7767 | spin_unlock_irqrestore(&task_group_lock, flags); |
7633 | 7768 | ||
7634 | return tg; | 7769 | return tg; |
7635 | 7770 | ||
@@ -7648,21 +7783,16 @@ static void free_sched_group_rcu(struct rcu_head *rhp) | |||
7648 | /* Destroy runqueue etc associated with a task group */ | 7783 | /* Destroy runqueue etc associated with a task group */ |
7649 | void sched_destroy_group(struct task_group *tg) | 7784 | void sched_destroy_group(struct task_group *tg) |
7650 | { | 7785 | { |
7651 | struct cfs_rq *cfs_rq = NULL; | 7786 | unsigned long flags; |
7652 | struct rt_rq *rt_rq = NULL; | ||
7653 | int i; | 7787 | int i; |
7654 | 7788 | ||
7655 | lock_task_group_list(); | 7789 | spin_lock_irqsave(&task_group_lock, flags); |
7656 | for_each_possible_cpu(i) { | 7790 | for_each_possible_cpu(i) { |
7657 | cfs_rq = tg->cfs_rq[i]; | 7791 | unregister_fair_sched_group(tg, i); |
7658 | list_del_rcu(&cfs_rq->leaf_cfs_rq_list); | 7792 | unregister_rt_sched_group(tg, i); |
7659 | rt_rq = tg->rt_rq[i]; | ||
7660 | list_del_rcu(&rt_rq->leaf_rt_rq_list); | ||
7661 | } | 7793 | } |
7662 | list_del_rcu(&tg->list); | 7794 | list_del_rcu(&tg->list); |
7663 | unlock_task_group_list(); | 7795 | spin_unlock_irqrestore(&task_group_lock, flags); |
7664 | |||
7665 | BUG_ON(!cfs_rq); | ||
7666 | 7796 | ||
7667 | /* wait for possible concurrent references to cfs_rqs complete */ | 7797 | /* wait for possible concurrent references to cfs_rqs complete */ |
7668 | call_rcu(&tg->rcu, free_sched_group_rcu); | 7798 | call_rcu(&tg->rcu, free_sched_group_rcu); |
@@ -7703,6 +7833,7 @@ void sched_move_task(struct task_struct *tsk) | |||
7703 | task_rq_unlock(rq, &flags); | 7833 | task_rq_unlock(rq, &flags); |
7704 | } | 7834 | } |
7705 | 7835 | ||
7836 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
7706 | /* rq->lock to be locked by caller */ | 7837 | /* rq->lock to be locked by caller */ |
7707 | static void set_se_shares(struct sched_entity *se, unsigned long shares) | 7838 | static void set_se_shares(struct sched_entity *se, unsigned long shares) |
7708 | { | 7839 | { |
@@ -7728,13 +7859,14 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares) | |||
7728 | } | 7859 | } |
7729 | } | 7860 | } |
7730 | 7861 | ||
7862 | static DEFINE_MUTEX(shares_mutex); | ||
7863 | |||
7731 | int sched_group_set_shares(struct task_group *tg, unsigned long shares) | 7864 | int sched_group_set_shares(struct task_group *tg, unsigned long shares) |
7732 | { | 7865 | { |
7733 | int i; | 7866 | int i; |
7734 | struct cfs_rq *cfs_rq; | 7867 | unsigned long flags; |
7735 | struct rq *rq; | ||
7736 | 7868 | ||
7737 | lock_task_group_list(); | 7869 | mutex_lock(&shares_mutex); |
7738 | if (tg->shares == shares) | 7870 | if (tg->shares == shares) |
7739 | goto done; | 7871 | goto done; |
7740 | 7872 | ||
@@ -7746,10 +7878,10 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
7746 | * load_balance_fair) from referring to this group first, | 7878 | * load_balance_fair) from referring to this group first, |
7747 | * by taking it off the rq->leaf_cfs_rq_list on each cpu. | 7879 | * by taking it off the rq->leaf_cfs_rq_list on each cpu. |
7748 | */ | 7880 | */ |
7749 | for_each_possible_cpu(i) { | 7881 | spin_lock_irqsave(&task_group_lock, flags); |
7750 | cfs_rq = tg->cfs_rq[i]; | 7882 | for_each_possible_cpu(i) |
7751 | list_del_rcu(&cfs_rq->leaf_cfs_rq_list); | 7883 | unregister_fair_sched_group(tg, i); |
7752 | } | 7884 | spin_unlock_irqrestore(&task_group_lock, flags); |
7753 | 7885 | ||
7754 | /* wait for any ongoing reference to this group to finish */ | 7886 | /* wait for any ongoing reference to this group to finish */ |
7755 | synchronize_sched(); | 7887 | synchronize_sched(); |
@@ -7769,13 +7901,12 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
7769 | * Enable load balance activity on this group, by inserting it back on | 7901 | * Enable load balance activity on this group, by inserting it back on |
7770 | * each cpu's rq->leaf_cfs_rq_list. | 7902 | * each cpu's rq->leaf_cfs_rq_list. |
7771 | */ | 7903 | */ |
7772 | for_each_possible_cpu(i) { | 7904 | spin_lock_irqsave(&task_group_lock, flags); |
7773 | rq = cpu_rq(i); | 7905 | for_each_possible_cpu(i) |
7774 | cfs_rq = tg->cfs_rq[i]; | 7906 | register_fair_sched_group(tg, i); |
7775 | list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); | 7907 | spin_unlock_irqrestore(&task_group_lock, flags); |
7776 | } | ||
7777 | done: | 7908 | done: |
7778 | unlock_task_group_list(); | 7909 | mutex_unlock(&shares_mutex); |
7779 | return 0; | 7910 | return 0; |
7780 | } | 7911 | } |
7781 | 7912 | ||
@@ -7783,35 +7914,84 @@ unsigned long sched_group_shares(struct task_group *tg) | |||
7783 | { | 7914 | { |
7784 | return tg->shares; | 7915 | return tg->shares; |
7785 | } | 7916 | } |
7917 | #endif | ||
7786 | 7918 | ||
7919 | #ifdef CONFIG_RT_GROUP_SCHED | ||
7787 | /* | 7920 | /* |
7788 | * Ensure the total rt_ratio <= sysctl_sched_rt_ratio | 7921 | * Ensure that the real time constraints are schedulable. |
7789 | */ | 7922 | */ |
7790 | int sched_group_set_rt_ratio(struct task_group *tg, unsigned long rt_ratio) | 7923 | static DEFINE_MUTEX(rt_constraints_mutex); |
7924 | |||
7925 | static unsigned long to_ratio(u64 period, u64 runtime) | ||
7926 | { | ||
7927 | if (runtime == RUNTIME_INF) | ||
7928 | return 1ULL << 16; | ||
7929 | |||
7930 | runtime *= (1ULL << 16); | ||
7931 | div64_64(runtime, period); | ||
7932 | return runtime; | ||
7933 | } | ||
7934 | |||
7935 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | ||
7791 | { | 7936 | { |
7792 | struct task_group *tgi; | 7937 | struct task_group *tgi; |
7793 | unsigned long total = 0; | 7938 | unsigned long total = 0; |
7939 | unsigned long global_ratio = | ||
7940 | to_ratio(sysctl_sched_rt_period, | ||
7941 | sysctl_sched_rt_runtime < 0 ? | ||
7942 | RUNTIME_INF : sysctl_sched_rt_runtime); | ||
7794 | 7943 | ||
7795 | rcu_read_lock(); | 7944 | rcu_read_lock(); |
7796 | list_for_each_entry_rcu(tgi, &task_groups, list) | 7945 | list_for_each_entry_rcu(tgi, &task_groups, list) { |
7797 | total += tgi->rt_ratio; | 7946 | if (tgi == tg) |
7798 | rcu_read_unlock(); | 7947 | continue; |
7799 | 7948 | ||
7800 | if (total + rt_ratio - tg->rt_ratio > sysctl_sched_rt_ratio) | 7949 | total += to_ratio(period, tgi->rt_runtime); |
7801 | return -EINVAL; | 7950 | } |
7951 | rcu_read_unlock(); | ||
7802 | 7952 | ||
7803 | tg->rt_ratio = rt_ratio; | 7953 | return total + to_ratio(period, runtime) < global_ratio; |
7804 | return 0; | ||
7805 | } | 7954 | } |
7806 | 7955 | ||
7807 | unsigned long sched_group_rt_ratio(struct task_group *tg) | 7956 | int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) |
7808 | { | 7957 | { |
7809 | return tg->rt_ratio; | 7958 | u64 rt_runtime, rt_period; |
7959 | int err = 0; | ||
7960 | |||
7961 | rt_period = sysctl_sched_rt_period * NSEC_PER_USEC; | ||
7962 | rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC; | ||
7963 | if (rt_runtime_us == -1) | ||
7964 | rt_runtime = rt_period; | ||
7965 | |||
7966 | mutex_lock(&rt_constraints_mutex); | ||
7967 | if (!__rt_schedulable(tg, rt_period, rt_runtime)) { | ||
7968 | err = -EINVAL; | ||
7969 | goto unlock; | ||
7970 | } | ||
7971 | if (rt_runtime_us == -1) | ||
7972 | rt_runtime = RUNTIME_INF; | ||
7973 | tg->rt_runtime = rt_runtime; | ||
7974 | unlock: | ||
7975 | mutex_unlock(&rt_constraints_mutex); | ||
7976 | |||
7977 | return err; | ||
7810 | } | 7978 | } |
7811 | 7979 | ||
7812 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 7980 | long sched_group_rt_runtime(struct task_group *tg) |
7981 | { | ||
7982 | u64 rt_runtime_us; | ||
7983 | |||
7984 | if (tg->rt_runtime == RUNTIME_INF) | ||
7985 | return -1; | ||
7986 | |||
7987 | rt_runtime_us = tg->rt_runtime; | ||
7988 | do_div(rt_runtime_us, NSEC_PER_USEC); | ||
7989 | return rt_runtime_us; | ||
7990 | } | ||
7991 | #endif | ||
7992 | #endif /* CONFIG_GROUP_SCHED */ | ||
7813 | 7993 | ||
7814 | #ifdef CONFIG_FAIR_CGROUP_SCHED | 7994 | #ifdef CONFIG_CGROUP_SCHED |
7815 | 7995 | ||
7816 | /* return corresponding task_group object of a cgroup */ | 7996 | /* return corresponding task_group object of a cgroup */ |
7817 | static inline struct task_group *cgroup_tg(struct cgroup *cgrp) | 7997 | static inline struct task_group *cgroup_tg(struct cgroup *cgrp) |
@@ -7857,9 +8037,15 @@ static int | |||
7857 | cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 8037 | cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
7858 | struct task_struct *tsk) | 8038 | struct task_struct *tsk) |
7859 | { | 8039 | { |
8040 | #ifdef CONFIG_RT_GROUP_SCHED | ||
8041 | /* Don't accept realtime tasks when there is no way for them to run */ | ||
8042 | if (rt_task(tsk) && cgroup_tg(cgrp)->rt_runtime == 0) | ||
8043 | return -EINVAL; | ||
8044 | #else | ||
7860 | /* We don't support RT-tasks being in separate groups */ | 8045 | /* We don't support RT-tasks being in separate groups */ |
7861 | if (tsk->sched_class != &fair_sched_class) | 8046 | if (tsk->sched_class != &fair_sched_class) |
7862 | return -EINVAL; | 8047 | return -EINVAL; |
8048 | #endif | ||
7863 | 8049 | ||
7864 | return 0; | 8050 | return 0; |
7865 | } | 8051 | } |
@@ -7871,6 +8057,7 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
7871 | sched_move_task(tsk); | 8057 | sched_move_task(tsk); |
7872 | } | 8058 | } |
7873 | 8059 | ||
8060 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
7874 | static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype, | 8061 | static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype, |
7875 | u64 shareval) | 8062 | u64 shareval) |
7876 | { | 8063 | { |
@@ -7883,31 +8070,70 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft) | |||
7883 | 8070 | ||
7884 | return (u64) tg->shares; | 8071 | return (u64) tg->shares; |
7885 | } | 8072 | } |
8073 | #endif | ||
7886 | 8074 | ||
7887 | static int cpu_rt_ratio_write_uint(struct cgroup *cgrp, struct cftype *cftype, | 8075 | #ifdef CONFIG_RT_GROUP_SCHED |
7888 | u64 rt_ratio_val) | 8076 | static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, |
8077 | struct file *file, | ||
8078 | const char __user *userbuf, | ||
8079 | size_t nbytes, loff_t *unused_ppos) | ||
7889 | { | 8080 | { |
7890 | return sched_group_set_rt_ratio(cgroup_tg(cgrp), rt_ratio_val); | 8081 | char buffer[64]; |
8082 | int retval = 0; | ||
8083 | s64 val; | ||
8084 | char *end; | ||
8085 | |||
8086 | if (!nbytes) | ||
8087 | return -EINVAL; | ||
8088 | if (nbytes >= sizeof(buffer)) | ||
8089 | return -E2BIG; | ||
8090 | if (copy_from_user(buffer, userbuf, nbytes)) | ||
8091 | return -EFAULT; | ||
8092 | |||
8093 | buffer[nbytes] = 0; /* nul-terminate */ | ||
8094 | |||
8095 | /* strip newline if necessary */ | ||
8096 | if (nbytes && (buffer[nbytes-1] == '\n')) | ||
8097 | buffer[nbytes-1] = 0; | ||
8098 | val = simple_strtoll(buffer, &end, 0); | ||
8099 | if (*end) | ||
8100 | return -EINVAL; | ||
8101 | |||
8102 | /* Pass to subsystem */ | ||
8103 | retval = sched_group_set_rt_runtime(cgroup_tg(cgrp), val); | ||
8104 | if (!retval) | ||
8105 | retval = nbytes; | ||
8106 | return retval; | ||
7891 | } | 8107 | } |
7892 | 8108 | ||
7893 | static u64 cpu_rt_ratio_read_uint(struct cgroup *cgrp, struct cftype *cft) | 8109 | static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft, |
8110 | struct file *file, | ||
8111 | char __user *buf, size_t nbytes, | ||
8112 | loff_t *ppos) | ||
7894 | { | 8113 | { |
7895 | struct task_group *tg = cgroup_tg(cgrp); | 8114 | char tmp[64]; |
8115 | long val = sched_group_rt_runtime(cgroup_tg(cgrp)); | ||
8116 | int len = sprintf(tmp, "%ld\n", val); | ||
7896 | 8117 | ||
7897 | return (u64) tg->rt_ratio; | 8118 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); |
7898 | } | 8119 | } |
8120 | #endif | ||
7899 | 8121 | ||
7900 | static struct cftype cpu_files[] = { | 8122 | static struct cftype cpu_files[] = { |
8123 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
7901 | { | 8124 | { |
7902 | .name = "shares", | 8125 | .name = "shares", |
7903 | .read_uint = cpu_shares_read_uint, | 8126 | .read_uint = cpu_shares_read_uint, |
7904 | .write_uint = cpu_shares_write_uint, | 8127 | .write_uint = cpu_shares_write_uint, |
7905 | }, | 8128 | }, |
8129 | #endif | ||
8130 | #ifdef CONFIG_RT_GROUP_SCHED | ||
7906 | { | 8131 | { |
7907 | .name = "rt_ratio", | 8132 | .name = "rt_runtime_us", |
7908 | .read_uint = cpu_rt_ratio_read_uint, | 8133 | .read = cpu_rt_runtime_read, |
7909 | .write_uint = cpu_rt_ratio_write_uint, | 8134 | .write = cpu_rt_runtime_write, |
7910 | }, | 8135 | }, |
8136 | #endif | ||
7911 | }; | 8137 | }; |
7912 | 8138 | ||
7913 | static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) | 8139 | static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) |
@@ -7926,7 +8152,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
7926 | .early_init = 1, | 8152 | .early_init = 1, |
7927 | }; | 8153 | }; |
7928 | 8154 | ||
7929 | #endif /* CONFIG_FAIR_CGROUP_SCHED */ | 8155 | #endif /* CONFIG_CGROUP_SCHED */ |
7930 | 8156 | ||
7931 | #ifdef CONFIG_CGROUP_CPUACCT | 8157 | #ifdef CONFIG_CGROUP_CPUACCT |
7932 | 8158 | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 274b40d7bef2..f54792b175b2 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -55,14 +55,14 @@ static inline int on_rt_rq(struct sched_rt_entity *rt_se) | |||
55 | return !list_empty(&rt_se->run_list); | 55 | return !list_empty(&rt_se->run_list); |
56 | } | 56 | } |
57 | 57 | ||
58 | #ifdef CONFIG_FAIR_GROUP_SCHED | 58 | #ifdef CONFIG_RT_GROUP_SCHED |
59 | 59 | ||
60 | static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq) | 60 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) |
61 | { | 61 | { |
62 | if (!rt_rq->tg) | 62 | if (!rt_rq->tg) |
63 | return SCHED_RT_FRAC; | 63 | return RUNTIME_INF; |
64 | 64 | ||
65 | return rt_rq->tg->rt_ratio; | 65 | return rt_rq->tg->rt_runtime; |
66 | } | 66 | } |
67 | 67 | ||
68 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | 68 | #define for_each_leaf_rt_rq(rt_rq, rq) \ |
@@ -89,7 +89,7 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) | |||
89 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se); | 89 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se); |
90 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se); | 90 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se); |
91 | 91 | ||
92 | static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) | 92 | static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) |
93 | { | 93 | { |
94 | struct sched_rt_entity *rt_se = rt_rq->rt_se; | 94 | struct sched_rt_entity *rt_se = rt_rq->rt_se; |
95 | 95 | ||
@@ -102,7 +102,7 @@ static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) | |||
102 | } | 102 | } |
103 | } | 103 | } |
104 | 104 | ||
105 | static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) | 105 | static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) |
106 | { | 106 | { |
107 | struct sched_rt_entity *rt_se = rt_rq->rt_se; | 107 | struct sched_rt_entity *rt_se = rt_rq->rt_se; |
108 | 108 | ||
@@ -110,11 +110,31 @@ static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) | |||
110 | dequeue_rt_entity(rt_se); | 110 | dequeue_rt_entity(rt_se); |
111 | } | 111 | } |
112 | 112 | ||
113 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) | ||
114 | { | ||
115 | return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; | ||
116 | } | ||
117 | |||
118 | static int rt_se_boosted(struct sched_rt_entity *rt_se) | ||
119 | { | ||
120 | struct rt_rq *rt_rq = group_rt_rq(rt_se); | ||
121 | struct task_struct *p; | ||
122 | |||
123 | if (rt_rq) | ||
124 | return !!rt_rq->rt_nr_boosted; | ||
125 | |||
126 | p = rt_task_of(rt_se); | ||
127 | return p->prio != p->normal_prio; | ||
128 | } | ||
129 | |||
113 | #else | 130 | #else |
114 | 131 | ||
115 | static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq) | 132 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) |
116 | { | 133 | { |
117 | return sysctl_sched_rt_ratio; | 134 | if (sysctl_sched_rt_runtime == -1) |
135 | return RUNTIME_INF; | ||
136 | |||
137 | return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; | ||
118 | } | 138 | } |
119 | 139 | ||
120 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | 140 | #define for_each_leaf_rt_rq(rt_rq, rq) \ |
@@ -141,19 +161,23 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) | |||
141 | return NULL; | 161 | return NULL; |
142 | } | 162 | } |
143 | 163 | ||
144 | static inline void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) | 164 | static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) |
145 | { | 165 | { |
146 | } | 166 | } |
147 | 167 | ||
148 | static inline void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) | 168 | static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) |
149 | { | 169 | { |
150 | } | 170 | } |
151 | 171 | ||
172 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) | ||
173 | { | ||
174 | return rt_rq->rt_throttled; | ||
175 | } | ||
152 | #endif | 176 | #endif |
153 | 177 | ||
154 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) | 178 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) |
155 | { | 179 | { |
156 | #ifdef CONFIG_FAIR_GROUP_SCHED | 180 | #ifdef CONFIG_RT_GROUP_SCHED |
157 | struct rt_rq *rt_rq = group_rt_rq(rt_se); | 181 | struct rt_rq *rt_rq = group_rt_rq(rt_se); |
158 | 182 | ||
159 | if (rt_rq) | 183 | if (rt_rq) |
@@ -163,28 +187,26 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se) | |||
163 | return rt_task_of(rt_se)->prio; | 187 | return rt_task_of(rt_se)->prio; |
164 | } | 188 | } |
165 | 189 | ||
166 | static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq) | 190 | static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) |
167 | { | 191 | { |
168 | unsigned int rt_ratio = sched_rt_ratio(rt_rq); | 192 | u64 runtime = sched_rt_runtime(rt_rq); |
169 | u64 period, ratio; | ||
170 | 193 | ||
171 | if (rt_ratio == SCHED_RT_FRAC) | 194 | if (runtime == RUNTIME_INF) |
172 | return 0; | 195 | return 0; |
173 | 196 | ||
174 | if (rt_rq->rt_throttled) | 197 | if (rt_rq->rt_throttled) |
175 | return 1; | 198 | return rt_rq_throttled(rt_rq); |
176 | |||
177 | period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; | ||
178 | ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT; | ||
179 | 199 | ||
180 | if (rt_rq->rt_time > ratio) { | 200 | if (rt_rq->rt_time > runtime) { |
181 | struct rq *rq = rq_of_rt_rq(rt_rq); | 201 | struct rq *rq = rq_of_rt_rq(rt_rq); |
182 | 202 | ||
183 | rq->rt_throttled = 1; | 203 | rq->rt_throttled = 1; |
184 | rt_rq->rt_throttled = 1; | 204 | rt_rq->rt_throttled = 1; |
185 | 205 | ||
186 | sched_rt_ratio_dequeue(rt_rq); | 206 | if (rt_rq_throttled(rt_rq)) { |
187 | return 1; | 207 | sched_rt_rq_dequeue(rt_rq); |
208 | return 1; | ||
209 | } | ||
188 | } | 210 | } |
189 | 211 | ||
190 | return 0; | 212 | return 0; |
@@ -196,17 +218,16 @@ static void update_sched_rt_period(struct rq *rq) | |||
196 | u64 period; | 218 | u64 period; |
197 | 219 | ||
198 | while (rq->clock > rq->rt_period_expire) { | 220 | while (rq->clock > rq->rt_period_expire) { |
199 | period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; | 221 | period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC; |
200 | rq->rt_period_expire += period; | 222 | rq->rt_period_expire += period; |
201 | 223 | ||
202 | for_each_leaf_rt_rq(rt_rq, rq) { | 224 | for_each_leaf_rt_rq(rt_rq, rq) { |
203 | unsigned long rt_ratio = sched_rt_ratio(rt_rq); | 225 | u64 runtime = sched_rt_runtime(rt_rq); |
204 | u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT; | ||
205 | 226 | ||
206 | rt_rq->rt_time -= min(rt_rq->rt_time, ratio); | 227 | rt_rq->rt_time -= min(rt_rq->rt_time, runtime); |
207 | if (rt_rq->rt_throttled) { | 228 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { |
208 | rt_rq->rt_throttled = 0; | 229 | rt_rq->rt_throttled = 0; |
209 | sched_rt_ratio_enqueue(rt_rq); | 230 | sched_rt_rq_enqueue(rt_rq); |
210 | } | 231 | } |
211 | } | 232 | } |
212 | 233 | ||
@@ -239,12 +260,7 @@ static void update_curr_rt(struct rq *rq) | |||
239 | cpuacct_charge(curr, delta_exec); | 260 | cpuacct_charge(curr, delta_exec); |
240 | 261 | ||
241 | rt_rq->rt_time += delta_exec; | 262 | rt_rq->rt_time += delta_exec; |
242 | /* | 263 | if (sched_rt_runtime_exceeded(rt_rq)) |
243 | * might make it a tad more accurate: | ||
244 | * | ||
245 | * update_sched_rt_period(rq); | ||
246 | */ | ||
247 | if (sched_rt_ratio_exceeded(rt_rq)) | ||
248 | resched_task(curr); | 264 | resched_task(curr); |
249 | } | 265 | } |
250 | 266 | ||
@@ -253,7 +269,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
253 | { | 269 | { |
254 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 270 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
255 | rt_rq->rt_nr_running++; | 271 | rt_rq->rt_nr_running++; |
256 | #if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED | 272 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
257 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) | 273 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) |
258 | rt_rq->highest_prio = rt_se_prio(rt_se); | 274 | rt_rq->highest_prio = rt_se_prio(rt_se); |
259 | #endif | 275 | #endif |
@@ -265,6 +281,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
265 | 281 | ||
266 | update_rt_migration(rq_of_rt_rq(rt_rq)); | 282 | update_rt_migration(rq_of_rt_rq(rt_rq)); |
267 | #endif | 283 | #endif |
284 | #ifdef CONFIG_RT_GROUP_SCHED | ||
285 | if (rt_se_boosted(rt_se)) | ||
286 | rt_rq->rt_nr_boosted++; | ||
287 | #endif | ||
268 | } | 288 | } |
269 | 289 | ||
270 | static inline | 290 | static inline |
@@ -273,7 +293,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
273 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 293 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
274 | WARN_ON(!rt_rq->rt_nr_running); | 294 | WARN_ON(!rt_rq->rt_nr_running); |
275 | rt_rq->rt_nr_running--; | 295 | rt_rq->rt_nr_running--; |
276 | #if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED | 296 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
277 | if (rt_rq->rt_nr_running) { | 297 | if (rt_rq->rt_nr_running) { |
278 | struct rt_prio_array *array; | 298 | struct rt_prio_array *array; |
279 | 299 | ||
@@ -295,6 +315,12 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
295 | 315 | ||
296 | update_rt_migration(rq_of_rt_rq(rt_rq)); | 316 | update_rt_migration(rq_of_rt_rq(rt_rq)); |
297 | #endif /* CONFIG_SMP */ | 317 | #endif /* CONFIG_SMP */ |
318 | #ifdef CONFIG_RT_GROUP_SCHED | ||
319 | if (rt_se_boosted(rt_se)) | ||
320 | rt_rq->rt_nr_boosted--; | ||
321 | |||
322 | WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted); | ||
323 | #endif | ||
298 | } | 324 | } |
299 | 325 | ||
300 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se) | 326 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se) |
@@ -303,7 +329,7 @@ static void enqueue_rt_entity(struct sched_rt_entity *rt_se) | |||
303 | struct rt_prio_array *array = &rt_rq->active; | 329 | struct rt_prio_array *array = &rt_rq->active; |
304 | struct rt_rq *group_rq = group_rt_rq(rt_se); | 330 | struct rt_rq *group_rq = group_rt_rq(rt_se); |
305 | 331 | ||
306 | if (group_rq && group_rq->rt_throttled) | 332 | if (group_rq && rt_rq_throttled(group_rq)) |
307 | return; | 333 | return; |
308 | 334 | ||
309 | list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); | 335 | list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); |
@@ -496,7 +522,7 @@ static struct task_struct *pick_next_task_rt(struct rq *rq) | |||
496 | if (unlikely(!rt_rq->rt_nr_running)) | 522 | if (unlikely(!rt_rq->rt_nr_running)) |
497 | return NULL; | 523 | return NULL; |
498 | 524 | ||
499 | if (sched_rt_ratio_exceeded(rt_rq)) | 525 | if (rt_rq_throttled(rt_rq)) |
500 | return NULL; | 526 | return NULL; |
501 | 527 | ||
502 | do { | 528 | do { |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d41ef6b4cf72..924c674b76ea 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -311,22 +311,6 @@ static struct ctl_table kern_table[] = { | |||
311 | .mode = 0644, | 311 | .mode = 0644, |
312 | .proc_handler = &proc_dointvec, | 312 | .proc_handler = &proc_dointvec, |
313 | }, | 313 | }, |
314 | { | ||
315 | .ctl_name = CTL_UNNUMBERED, | ||
316 | .procname = "sched_rt_period_ms", | ||
317 | .data = &sysctl_sched_rt_period, | ||
318 | .maxlen = sizeof(unsigned int), | ||
319 | .mode = 0644, | ||
320 | .proc_handler = &proc_dointvec, | ||
321 | }, | ||
322 | { | ||
323 | .ctl_name = CTL_UNNUMBERED, | ||
324 | .procname = "sched_rt_ratio", | ||
325 | .data = &sysctl_sched_rt_ratio, | ||
326 | .maxlen = sizeof(unsigned int), | ||
327 | .mode = 0644, | ||
328 | .proc_handler = &proc_dointvec, | ||
329 | }, | ||
330 | #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) | 314 | #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) |
331 | { | 315 | { |
332 | .ctl_name = CTL_UNNUMBERED, | 316 | .ctl_name = CTL_UNNUMBERED, |
@@ -348,6 +332,22 @@ static struct ctl_table kern_table[] = { | |||
348 | #endif | 332 | #endif |
349 | { | 333 | { |
350 | .ctl_name = CTL_UNNUMBERED, | 334 | .ctl_name = CTL_UNNUMBERED, |
335 | .procname = "sched_rt_period_us", | ||
336 | .data = &sysctl_sched_rt_period, | ||
337 | .maxlen = sizeof(unsigned int), | ||
338 | .mode = 0644, | ||
339 | .proc_handler = &proc_dointvec, | ||
340 | }, | ||
341 | { | ||
342 | .ctl_name = CTL_UNNUMBERED, | ||
343 | .procname = "sched_rt_runtime_us", | ||
344 | .data = &sysctl_sched_rt_runtime, | ||
345 | .maxlen = sizeof(int), | ||
346 | .mode = 0644, | ||
347 | .proc_handler = &proc_dointvec, | ||
348 | }, | ||
349 | { | ||
350 | .ctl_name = CTL_UNNUMBERED, | ||
351 | .procname = "sched_compat_yield", | 351 | .procname = "sched_compat_yield", |
352 | .data = &sysctl_sched_compat_yield, | 352 | .data = &sysctl_sched_compat_yield, |
353 | .maxlen = sizeof(unsigned int), | 353 | .maxlen = sizeof(unsigned int), |
diff --git a/kernel/user.c b/kernel/user.c index 7d7900c5a1fd..7132022a040c 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -57,7 +57,7 @@ struct user_struct root_user = { | |||
57 | .uid_keyring = &root_user_keyring, | 57 | .uid_keyring = &root_user_keyring, |
58 | .session_keyring = &root_session_keyring, | 58 | .session_keyring = &root_session_keyring, |
59 | #endif | 59 | #endif |
60 | #ifdef CONFIG_FAIR_USER_SCHED | 60 | #ifdef CONFIG_USER_SCHED |
61 | .tg = &init_task_group, | 61 | .tg = &init_task_group, |
62 | #endif | 62 | #endif |
63 | }; | 63 | }; |
@@ -90,7 +90,7 @@ static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) | |||
90 | return NULL; | 90 | return NULL; |
91 | } | 91 | } |
92 | 92 | ||
93 | #ifdef CONFIG_FAIR_USER_SCHED | 93 | #ifdef CONFIG_USER_SCHED |
94 | 94 | ||
95 | static void sched_destroy_user(struct user_struct *up) | 95 | static void sched_destroy_user(struct user_struct *up) |
96 | { | 96 | { |
@@ -113,15 +113,15 @@ static void sched_switch_user(struct task_struct *p) | |||
113 | sched_move_task(p); | 113 | sched_move_task(p); |
114 | } | 114 | } |
115 | 115 | ||
116 | #else /* CONFIG_FAIR_USER_SCHED */ | 116 | #else /* CONFIG_USER_SCHED */ |
117 | 117 | ||
118 | static void sched_destroy_user(struct user_struct *up) { } | 118 | static void sched_destroy_user(struct user_struct *up) { } |
119 | static int sched_create_user(struct user_struct *up) { return 0; } | 119 | static int sched_create_user(struct user_struct *up) { return 0; } |
120 | static void sched_switch_user(struct task_struct *p) { } | 120 | static void sched_switch_user(struct task_struct *p) { } |
121 | 121 | ||
122 | #endif /* CONFIG_FAIR_USER_SCHED */ | 122 | #endif /* CONFIG_USER_SCHED */ |
123 | 123 | ||
124 | #if defined(CONFIG_FAIR_USER_SCHED) && defined(CONFIG_SYSFS) | 124 | #if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS) |
125 | 125 | ||
126 | static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */ | 126 | static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */ |
127 | static DEFINE_MUTEX(uids_mutex); | 127 | static DEFINE_MUTEX(uids_mutex); |
@@ -137,6 +137,7 @@ static inline void uids_mutex_unlock(void) | |||
137 | } | 137 | } |
138 | 138 | ||
139 | /* uid directory attributes */ | 139 | /* uid directory attributes */ |
140 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
140 | static ssize_t cpu_shares_show(struct kobject *kobj, | 141 | static ssize_t cpu_shares_show(struct kobject *kobj, |
141 | struct kobj_attribute *attr, | 142 | struct kobj_attribute *attr, |
142 | char *buf) | 143 | char *buf) |
@@ -163,10 +164,45 @@ static ssize_t cpu_shares_store(struct kobject *kobj, | |||
163 | 164 | ||
164 | static struct kobj_attribute cpu_share_attr = | 165 | static struct kobj_attribute cpu_share_attr = |
165 | __ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store); | 166 | __ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store); |
167 | #endif | ||
168 | |||
169 | #ifdef CONFIG_RT_GROUP_SCHED | ||
170 | static ssize_t cpu_rt_runtime_show(struct kobject *kobj, | ||
171 | struct kobj_attribute *attr, | ||
172 | char *buf) | ||
173 | { | ||
174 | struct user_struct *up = container_of(kobj, struct user_struct, kobj); | ||
175 | |||
176 | return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg)); | ||
177 | } | ||
178 | |||
179 | static ssize_t cpu_rt_runtime_store(struct kobject *kobj, | ||
180 | struct kobj_attribute *attr, | ||
181 | const char *buf, size_t size) | ||
182 | { | ||
183 | struct user_struct *up = container_of(kobj, struct user_struct, kobj); | ||
184 | unsigned long rt_runtime; | ||
185 | int rc; | ||
186 | |||
187 | sscanf(buf, "%lu", &rt_runtime); | ||
188 | |||
189 | rc = sched_group_set_rt_runtime(up->tg, rt_runtime); | ||
190 | |||
191 | return (rc ? rc : size); | ||
192 | } | ||
193 | |||
194 | static struct kobj_attribute cpu_rt_runtime_attr = | ||
195 | __ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store); | ||
196 | #endif | ||
166 | 197 | ||
167 | /* default attributes per uid directory */ | 198 | /* default attributes per uid directory */ |
168 | static struct attribute *uids_attributes[] = { | 199 | static struct attribute *uids_attributes[] = { |
200 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
169 | &cpu_share_attr.attr, | 201 | &cpu_share_attr.attr, |
202 | #endif | ||
203 | #ifdef CONFIG_RT_GROUP_SCHED | ||
204 | &cpu_rt_runtime_attr.attr, | ||
205 | #endif | ||
170 | NULL | 206 | NULL |
171 | }; | 207 | }; |
172 | 208 | ||
@@ -269,7 +305,7 @@ static inline void free_user(struct user_struct *up, unsigned long flags) | |||
269 | schedule_work(&up->work); | 305 | schedule_work(&up->work); |
270 | } | 306 | } |
271 | 307 | ||
272 | #else /* CONFIG_FAIR_USER_SCHED && CONFIG_SYSFS */ | 308 | #else /* CONFIG_USER_SCHED && CONFIG_SYSFS */ |
273 | 309 | ||
274 | int uids_sysfs_init(void) { return 0; } | 310 | int uids_sysfs_init(void) { return 0; } |
275 | static inline int uids_user_create(struct user_struct *up) { return 0; } | 311 | static inline int uids_user_create(struct user_struct *up) { return 0; } |
@@ -373,7 +409,7 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) | |||
373 | spin_lock_irq(&uidhash_lock); | 409 | spin_lock_irq(&uidhash_lock); |
374 | up = uid_hash_find(uid, hashent); | 410 | up = uid_hash_find(uid, hashent); |
375 | if (up) { | 411 | if (up) { |
376 | /* This case is not possible when CONFIG_FAIR_USER_SCHED | 412 | /* This case is not possible when CONFIG_USER_SCHED |
377 | * is defined, since we serialize alloc_uid() using | 413 | * is defined, since we serialize alloc_uid() using |
378 | * uids_mutex. Hence no need to call | 414 | * uids_mutex. Hence no need to call |
379 | * sched_destroy_user() or remove_user_sysfs_dir(). | 415 | * sched_destroy_user() or remove_user_sysfs_dir(). |