diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 18 | ||||
-rw-r--r-- | kernel/fork.c | 5 | ||||
-rw-r--r-- | kernel/irq/manage.c | 4 | ||||
-rw-r--r-- | kernel/kthread.c | 2 | ||||
-rw-r--r-- | kernel/mutex.c | 2 | ||||
-rw-r--r-- | kernel/printk.c | 8 | ||||
-rw-r--r-- | kernel/sched.c | 561 | ||||
-rw-r--r-- | kernel/sched_autogroup.c | 229 | ||||
-rw-r--r-- | kernel/sched_autogroup.h | 32 | ||||
-rw-r--r-- | kernel/sched_clock.c | 2 | ||||
-rw-r--r-- | kernel/sched_debug.c | 91 | ||||
-rw-r--r-- | kernel/sched_fair.c | 305 | ||||
-rw-r--r-- | kernel/sched_features.h | 2 | ||||
-rw-r--r-- | kernel/sched_rt.c | 24 | ||||
-rw-r--r-- | kernel/softirq.c | 4 | ||||
-rw-r--r-- | kernel/sys.c | 4 | ||||
-rw-r--r-- | kernel/sysctl.c | 37 | ||||
-rw-r--r-- | kernel/trace/trace_selftest.c | 2 | ||||
-rw-r--r-- | kernel/watchdog.c | 2 |
19 files changed, 756 insertions, 578 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index f6e726f18491..cb7a1efa9c2b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -189,7 +189,6 @@ static inline void check_for_tasks(int cpu) | |||
189 | } | 189 | } |
190 | 190 | ||
191 | struct take_cpu_down_param { | 191 | struct take_cpu_down_param { |
192 | struct task_struct *caller; | ||
193 | unsigned long mod; | 192 | unsigned long mod; |
194 | void *hcpu; | 193 | void *hcpu; |
195 | }; | 194 | }; |
@@ -198,7 +197,6 @@ struct take_cpu_down_param { | |||
198 | static int __ref take_cpu_down(void *_param) | 197 | static int __ref take_cpu_down(void *_param) |
199 | { | 198 | { |
200 | struct take_cpu_down_param *param = _param; | 199 | struct take_cpu_down_param *param = _param; |
201 | unsigned int cpu = (unsigned long)param->hcpu; | ||
202 | int err; | 200 | int err; |
203 | 201 | ||
204 | /* Ensure this CPU doesn't handle any more interrupts. */ | 202 | /* Ensure this CPU doesn't handle any more interrupts. */ |
@@ -208,11 +206,6 @@ static int __ref take_cpu_down(void *_param) | |||
208 | 206 | ||
209 | cpu_notify(CPU_DYING | param->mod, param->hcpu); | 207 | cpu_notify(CPU_DYING | param->mod, param->hcpu); |
210 | 208 | ||
211 | if (task_cpu(param->caller) == cpu) | ||
212 | move_task_off_dead_cpu(cpu, param->caller); | ||
213 | /* Force idle task to run as soon as we yield: it should | ||
214 | immediately notice cpu is offline and die quickly. */ | ||
215 | sched_idle_next(); | ||
216 | return 0; | 209 | return 0; |
217 | } | 210 | } |
218 | 211 | ||
@@ -223,7 +216,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
223 | void *hcpu = (void *)(long)cpu; | 216 | void *hcpu = (void *)(long)cpu; |
224 | unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; | 217 | unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; |
225 | struct take_cpu_down_param tcd_param = { | 218 | struct take_cpu_down_param tcd_param = { |
226 | .caller = current, | ||
227 | .mod = mod, | 219 | .mod = mod, |
228 | .hcpu = hcpu, | 220 | .hcpu = hcpu, |
229 | }; | 221 | }; |
@@ -253,9 +245,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
253 | } | 245 | } |
254 | BUG_ON(cpu_online(cpu)); | 246 | BUG_ON(cpu_online(cpu)); |
255 | 247 | ||
256 | /* Wait for it to sleep (leaving idle task). */ | 248 | /* |
249 | * The migration_call() CPU_DYING callback will have removed all | ||
250 | * runnable tasks from the cpu, there's only the idle task left now | ||
251 | * that the migration thread is done doing the stop_machine thing. | ||
252 | * | ||
253 | * Wait for the stop thread to go away. | ||
254 | */ | ||
257 | while (!idle_cpu(cpu)) | 255 | while (!idle_cpu(cpu)) |
258 | yield(); | 256 | cpu_relax(); |
259 | 257 | ||
260 | /* This actually kills the CPU. */ | 258 | /* This actually kills the CPU. */ |
261 | __cpu_die(cpu); | 259 | __cpu_die(cpu); |
diff --git a/kernel/fork.c b/kernel/fork.c index 3b159c5991b7..b6f2475f1e83 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -174,8 +174,10 @@ static inline void free_signal_struct(struct signal_struct *sig) | |||
174 | 174 | ||
175 | static inline void put_signal_struct(struct signal_struct *sig) | 175 | static inline void put_signal_struct(struct signal_struct *sig) |
176 | { | 176 | { |
177 | if (atomic_dec_and_test(&sig->sigcnt)) | 177 | if (atomic_dec_and_test(&sig->sigcnt)) { |
178 | sched_autogroup_exit(sig); | ||
178 | free_signal_struct(sig); | 179 | free_signal_struct(sig); |
180 | } | ||
179 | } | 181 | } |
180 | 182 | ||
181 | void __put_task_struct(struct task_struct *tsk) | 183 | void __put_task_struct(struct task_struct *tsk) |
@@ -904,6 +906,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
904 | posix_cpu_timers_init_group(sig); | 906 | posix_cpu_timers_init_group(sig); |
905 | 907 | ||
906 | tty_audit_fork(sig); | 908 | tty_audit_fork(sig); |
909 | sched_autogroup_fork(sig); | ||
907 | 910 | ||
908 | sig->oom_adj = current->signal->oom_adj; | 911 | sig->oom_adj = current->signal->oom_adj; |
909 | sig->oom_score_adj = current->signal->oom_score_adj; | 912 | sig->oom_score_adj = current->signal->oom_score_adj; |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 5f92acc5f952..91a5fa25054e 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -577,7 +577,9 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } | |||
577 | */ | 577 | */ |
578 | static int irq_thread(void *data) | 578 | static int irq_thread(void *data) |
579 | { | 579 | { |
580 | struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; | 580 | static struct sched_param param = { |
581 | .sched_priority = MAX_USER_RT_PRIO/2, | ||
582 | }; | ||
581 | struct irqaction *action = data; | 583 | struct irqaction *action = data; |
582 | struct irq_desc *desc = irq_to_desc(action->irq); | 584 | struct irq_desc *desc = irq_to_desc(action->irq); |
583 | int wake, oneshot = desc->status & IRQ_ONESHOT; | 585 | int wake, oneshot = desc->status & IRQ_ONESHOT; |
diff --git a/kernel/kthread.c b/kernel/kthread.c index 2dc3786349d1..74cf6f5e7ade 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -148,7 +148,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), | |||
148 | wait_for_completion(&create.done); | 148 | wait_for_completion(&create.done); |
149 | 149 | ||
150 | if (!IS_ERR(create.result)) { | 150 | if (!IS_ERR(create.result)) { |
151 | struct sched_param param = { .sched_priority = 0 }; | 151 | static struct sched_param param = { .sched_priority = 0 }; |
152 | va_list args; | 152 | va_list args; |
153 | 153 | ||
154 | va_start(args, namefmt); | 154 | va_start(args, namefmt); |
diff --git a/kernel/mutex.c b/kernel/mutex.c index 200407c1502f..a5889fb28ecf 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -199,7 +199,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
199 | * memory barriers as we'll eventually observe the right | 199 | * memory barriers as we'll eventually observe the right |
200 | * values at the cost of a few extra spins. | 200 | * values at the cost of a few extra spins. |
201 | */ | 201 | */ |
202 | cpu_relax(); | 202 | arch_mutex_cpu_relax(); |
203 | } | 203 | } |
204 | #endif | 204 | #endif |
205 | spin_lock_mutex(&lock->wait_lock, flags); | 205 | spin_lock_mutex(&lock->wait_lock, flags); |
diff --git a/kernel/printk.c b/kernel/printk.c index a23315dc4498..ab3ffc5b3b64 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -1074,17 +1074,17 @@ static DEFINE_PER_CPU(int, printk_pending); | |||
1074 | 1074 | ||
1075 | void printk_tick(void) | 1075 | void printk_tick(void) |
1076 | { | 1076 | { |
1077 | if (__get_cpu_var(printk_pending)) { | 1077 | if (__this_cpu_read(printk_pending)) { |
1078 | __get_cpu_var(printk_pending) = 0; | 1078 | __this_cpu_write(printk_pending, 0); |
1079 | wake_up_interruptible(&log_wait); | 1079 | wake_up_interruptible(&log_wait); |
1080 | } | 1080 | } |
1081 | } | 1081 | } |
1082 | 1082 | ||
1083 | int printk_needs_cpu(int cpu) | 1083 | int printk_needs_cpu(int cpu) |
1084 | { | 1084 | { |
1085 | if (unlikely(cpu_is_offline(cpu))) | 1085 | if (cpu_is_offline(cpu)) |
1086 | printk_tick(); | 1086 | printk_tick(); |
1087 | return per_cpu(printk_pending, cpu); | 1087 | return __this_cpu_read(printk_pending); |
1088 | } | 1088 | } |
1089 | 1089 | ||
1090 | void wake_up_klogd(void) | 1090 | void wake_up_klogd(void) |
diff --git a/kernel/sched.c b/kernel/sched.c index dc91a4d09ac3..3925a1bbf5dd 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -75,9 +75,11 @@ | |||
75 | 75 | ||
76 | #include <asm/tlb.h> | 76 | #include <asm/tlb.h> |
77 | #include <asm/irq_regs.h> | 77 | #include <asm/irq_regs.h> |
78 | #include <asm/mutex.h> | ||
78 | 79 | ||
79 | #include "sched_cpupri.h" | 80 | #include "sched_cpupri.h" |
80 | #include "workqueue_sched.h" | 81 | #include "workqueue_sched.h" |
82 | #include "sched_autogroup.h" | ||
81 | 83 | ||
82 | #define CREATE_TRACE_POINTS | 84 | #define CREATE_TRACE_POINTS |
83 | #include <trace/events/sched.h> | 85 | #include <trace/events/sched.h> |
@@ -253,6 +255,8 @@ struct task_group { | |||
253 | /* runqueue "owned" by this group on each cpu */ | 255 | /* runqueue "owned" by this group on each cpu */ |
254 | struct cfs_rq **cfs_rq; | 256 | struct cfs_rq **cfs_rq; |
255 | unsigned long shares; | 257 | unsigned long shares; |
258 | |||
259 | atomic_t load_weight; | ||
256 | #endif | 260 | #endif |
257 | 261 | ||
258 | #ifdef CONFIG_RT_GROUP_SCHED | 262 | #ifdef CONFIG_RT_GROUP_SCHED |
@@ -268,24 +272,19 @@ struct task_group { | |||
268 | struct task_group *parent; | 272 | struct task_group *parent; |
269 | struct list_head siblings; | 273 | struct list_head siblings; |
270 | struct list_head children; | 274 | struct list_head children; |
275 | |||
276 | #ifdef CONFIG_SCHED_AUTOGROUP | ||
277 | struct autogroup *autogroup; | ||
278 | #endif | ||
271 | }; | 279 | }; |
272 | 280 | ||
273 | #define root_task_group init_task_group | 281 | #define root_task_group init_task_group |
274 | 282 | ||
275 | /* task_group_lock serializes add/remove of task groups and also changes to | 283 | /* task_group_lock serializes the addition/removal of task groups */ |
276 | * a task group's cpu shares. | ||
277 | */ | ||
278 | static DEFINE_SPINLOCK(task_group_lock); | 284 | static DEFINE_SPINLOCK(task_group_lock); |
279 | 285 | ||
280 | #ifdef CONFIG_FAIR_GROUP_SCHED | 286 | #ifdef CONFIG_FAIR_GROUP_SCHED |
281 | 287 | ||
282 | #ifdef CONFIG_SMP | ||
283 | static int root_task_group_empty(void) | ||
284 | { | ||
285 | return list_empty(&root_task_group.children); | ||
286 | } | ||
287 | #endif | ||
288 | |||
289 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD | 288 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD |
290 | 289 | ||
291 | /* | 290 | /* |
@@ -342,6 +341,7 @@ struct cfs_rq { | |||
342 | * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This | 341 | * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This |
343 | * list is used during load balance. | 342 | * list is used during load balance. |
344 | */ | 343 | */ |
344 | int on_list; | ||
345 | struct list_head leaf_cfs_rq_list; | 345 | struct list_head leaf_cfs_rq_list; |
346 | struct task_group *tg; /* group that "owns" this runqueue */ | 346 | struct task_group *tg; /* group that "owns" this runqueue */ |
347 | 347 | ||
@@ -360,14 +360,17 @@ struct cfs_rq { | |||
360 | unsigned long h_load; | 360 | unsigned long h_load; |
361 | 361 | ||
362 | /* | 362 | /* |
363 | * this cpu's part of tg->shares | 363 | * Maintaining per-cpu shares distribution for group scheduling |
364 | * | ||
365 | * load_stamp is the last time we updated the load average | ||
366 | * load_last is the last time we updated the load average and saw load | ||
367 | * load_unacc_exec_time is currently unaccounted execution time | ||
364 | */ | 368 | */ |
365 | unsigned long shares; | 369 | u64 load_avg; |
370 | u64 load_period; | ||
371 | u64 load_stamp, load_last, load_unacc_exec_time; | ||
366 | 372 | ||
367 | /* | 373 | unsigned long load_contribution; |
368 | * load.weight at the time we set shares | ||
369 | */ | ||
370 | unsigned long rq_weight; | ||
371 | #endif | 374 | #endif |
372 | #endif | 375 | #endif |
373 | }; | 376 | }; |
@@ -605,11 +608,14 @@ static inline int cpu_of(struct rq *rq) | |||
605 | */ | 608 | */ |
606 | static inline struct task_group *task_group(struct task_struct *p) | 609 | static inline struct task_group *task_group(struct task_struct *p) |
607 | { | 610 | { |
611 | struct task_group *tg; | ||
608 | struct cgroup_subsys_state *css; | 612 | struct cgroup_subsys_state *css; |
609 | 613 | ||
610 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | 614 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, |
611 | lockdep_is_held(&task_rq(p)->lock)); | 615 | lockdep_is_held(&task_rq(p)->lock)); |
612 | return container_of(css, struct task_group, css); | 616 | tg = container_of(css, struct task_group, css); |
617 | |||
618 | return autogroup_task_group(p, tg); | ||
613 | } | 619 | } |
614 | 620 | ||
615 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | 621 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ |
@@ -797,20 +803,6 @@ late_initcall(sched_init_debug); | |||
797 | const_debug unsigned int sysctl_sched_nr_migrate = 32; | 803 | const_debug unsigned int sysctl_sched_nr_migrate = 32; |
798 | 804 | ||
799 | /* | 805 | /* |
800 | * ratelimit for updating the group shares. | ||
801 | * default: 0.25ms | ||
802 | */ | ||
803 | unsigned int sysctl_sched_shares_ratelimit = 250000; | ||
804 | unsigned int normalized_sysctl_sched_shares_ratelimit = 250000; | ||
805 | |||
806 | /* | ||
807 | * Inject some fuzzyness into changing the per-cpu group shares | ||
808 | * this avoids remote rq-locks at the expense of fairness. | ||
809 | * default: 4 | ||
810 | */ | ||
811 | unsigned int sysctl_sched_shares_thresh = 4; | ||
812 | |||
813 | /* | ||
814 | * period over which we average the RT time consumption, measured | 806 | * period over which we average the RT time consumption, measured |
815 | * in ms. | 807 | * in ms. |
816 | * | 808 | * |
@@ -1359,6 +1351,12 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec) | |||
1359 | lw->inv_weight = 0; | 1351 | lw->inv_weight = 0; |
1360 | } | 1352 | } |
1361 | 1353 | ||
1354 | static inline void update_load_set(struct load_weight *lw, unsigned long w) | ||
1355 | { | ||
1356 | lw->weight = w; | ||
1357 | lw->inv_weight = 0; | ||
1358 | } | ||
1359 | |||
1362 | /* | 1360 | /* |
1363 | * To aid in avoiding the subversion of "niceness" due to uneven distribution | 1361 | * To aid in avoiding the subversion of "niceness" due to uneven distribution |
1364 | * of tasks with abnormal "nice" values across CPUs the contribution that | 1362 | * of tasks with abnormal "nice" values across CPUs the contribution that |
@@ -1547,101 +1545,6 @@ static unsigned long cpu_avg_load_per_task(int cpu) | |||
1547 | 1545 | ||
1548 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1546 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1549 | 1547 | ||
1550 | static __read_mostly unsigned long __percpu *update_shares_data; | ||
1551 | |||
1552 | static void __set_se_shares(struct sched_entity *se, unsigned long shares); | ||
1553 | |||
1554 | /* | ||
1555 | * Calculate and set the cpu's group shares. | ||
1556 | */ | ||
1557 | static void update_group_shares_cpu(struct task_group *tg, int cpu, | ||
1558 | unsigned long sd_shares, | ||
1559 | unsigned long sd_rq_weight, | ||
1560 | unsigned long *usd_rq_weight) | ||
1561 | { | ||
1562 | unsigned long shares, rq_weight; | ||
1563 | int boost = 0; | ||
1564 | |||
1565 | rq_weight = usd_rq_weight[cpu]; | ||
1566 | if (!rq_weight) { | ||
1567 | boost = 1; | ||
1568 | rq_weight = NICE_0_LOAD; | ||
1569 | } | ||
1570 | |||
1571 | /* | ||
1572 | * \Sum_j shares_j * rq_weight_i | ||
1573 | * shares_i = ----------------------------- | ||
1574 | * \Sum_j rq_weight_j | ||
1575 | */ | ||
1576 | shares = (sd_shares * rq_weight) / sd_rq_weight; | ||
1577 | shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); | ||
1578 | |||
1579 | if (abs(shares - tg->se[cpu]->load.weight) > | ||
1580 | sysctl_sched_shares_thresh) { | ||
1581 | struct rq *rq = cpu_rq(cpu); | ||
1582 | unsigned long flags; | ||
1583 | |||
1584 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
1585 | tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight; | ||
1586 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | ||
1587 | __set_se_shares(tg->se[cpu], shares); | ||
1588 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
1589 | } | ||
1590 | } | ||
1591 | |||
1592 | /* | ||
1593 | * Re-compute the task group their per cpu shares over the given domain. | ||
1594 | * This needs to be done in a bottom-up fashion because the rq weight of a | ||
1595 | * parent group depends on the shares of its child groups. | ||
1596 | */ | ||
1597 | static int tg_shares_up(struct task_group *tg, void *data) | ||
1598 | { | ||
1599 | unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0; | ||
1600 | unsigned long *usd_rq_weight; | ||
1601 | struct sched_domain *sd = data; | ||
1602 | unsigned long flags; | ||
1603 | int i; | ||
1604 | |||
1605 | if (!tg->se[0]) | ||
1606 | return 0; | ||
1607 | |||
1608 | local_irq_save(flags); | ||
1609 | usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id()); | ||
1610 | |||
1611 | for_each_cpu(i, sched_domain_span(sd)) { | ||
1612 | weight = tg->cfs_rq[i]->load.weight; | ||
1613 | usd_rq_weight[i] = weight; | ||
1614 | |||
1615 | rq_weight += weight; | ||
1616 | /* | ||
1617 | * If there are currently no tasks on the cpu pretend there | ||
1618 | * is one of average load so that when a new task gets to | ||
1619 | * run here it will not get delayed by group starvation. | ||
1620 | */ | ||
1621 | if (!weight) | ||
1622 | weight = NICE_0_LOAD; | ||
1623 | |||
1624 | sum_weight += weight; | ||
1625 | shares += tg->cfs_rq[i]->shares; | ||
1626 | } | ||
1627 | |||
1628 | if (!rq_weight) | ||
1629 | rq_weight = sum_weight; | ||
1630 | |||
1631 | if ((!shares && rq_weight) || shares > tg->shares) | ||
1632 | shares = tg->shares; | ||
1633 | |||
1634 | if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) | ||
1635 | shares = tg->shares; | ||
1636 | |||
1637 | for_each_cpu(i, sched_domain_span(sd)) | ||
1638 | update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight); | ||
1639 | |||
1640 | local_irq_restore(flags); | ||
1641 | |||
1642 | return 0; | ||
1643 | } | ||
1644 | |||
1645 | /* | 1548 | /* |
1646 | * Compute the cpu's hierarchical load factor for each task group. | 1549 | * Compute the cpu's hierarchical load factor for each task group. |
1647 | * This needs to be done in a top-down fashion because the load of a child | 1550 | * This needs to be done in a top-down fashion because the load of a child |
@@ -1656,7 +1559,7 @@ static int tg_load_down(struct task_group *tg, void *data) | |||
1656 | load = cpu_rq(cpu)->load.weight; | 1559 | load = cpu_rq(cpu)->load.weight; |
1657 | } else { | 1560 | } else { |
1658 | load = tg->parent->cfs_rq[cpu]->h_load; | 1561 | load = tg->parent->cfs_rq[cpu]->h_load; |
1659 | load *= tg->cfs_rq[cpu]->shares; | 1562 | load *= tg->se[cpu]->load.weight; |
1660 | load /= tg->parent->cfs_rq[cpu]->load.weight + 1; | 1563 | load /= tg->parent->cfs_rq[cpu]->load.weight + 1; |
1661 | } | 1564 | } |
1662 | 1565 | ||
@@ -1665,34 +1568,11 @@ static int tg_load_down(struct task_group *tg, void *data) | |||
1665 | return 0; | 1568 | return 0; |
1666 | } | 1569 | } |
1667 | 1570 | ||
1668 | static void update_shares(struct sched_domain *sd) | ||
1669 | { | ||
1670 | s64 elapsed; | ||
1671 | u64 now; | ||
1672 | |||
1673 | if (root_task_group_empty()) | ||
1674 | return; | ||
1675 | |||
1676 | now = local_clock(); | ||
1677 | elapsed = now - sd->last_update; | ||
1678 | |||
1679 | if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { | ||
1680 | sd->last_update = now; | ||
1681 | walk_tg_tree(tg_nop, tg_shares_up, sd); | ||
1682 | } | ||
1683 | } | ||
1684 | |||
1685 | static void update_h_load(long cpu) | 1571 | static void update_h_load(long cpu) |
1686 | { | 1572 | { |
1687 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); | 1573 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); |
1688 | } | 1574 | } |
1689 | 1575 | ||
1690 | #else | ||
1691 | |||
1692 | static inline void update_shares(struct sched_domain *sd) | ||
1693 | { | ||
1694 | } | ||
1695 | |||
1696 | #endif | 1576 | #endif |
1697 | 1577 | ||
1698 | #ifdef CONFIG_PREEMPT | 1578 | #ifdef CONFIG_PREEMPT |
@@ -1814,15 +1694,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | |||
1814 | 1694 | ||
1815 | #endif | 1695 | #endif |
1816 | 1696 | ||
1817 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1818 | static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | ||
1819 | { | ||
1820 | #ifdef CONFIG_SMP | ||
1821 | cfs_rq->shares = shares; | ||
1822 | #endif | ||
1823 | } | ||
1824 | #endif | ||
1825 | |||
1826 | static void calc_load_account_idle(struct rq *this_rq); | 1697 | static void calc_load_account_idle(struct rq *this_rq); |
1827 | static void update_sysctl(void); | 1698 | static void update_sysctl(void); |
1828 | static int get_update_sysctl_factor(void); | 1699 | static int get_update_sysctl_factor(void); |
@@ -2006,6 +1877,7 @@ static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { } | |||
2006 | #include "sched_idletask.c" | 1877 | #include "sched_idletask.c" |
2007 | #include "sched_fair.c" | 1878 | #include "sched_fair.c" |
2008 | #include "sched_rt.c" | 1879 | #include "sched_rt.c" |
1880 | #include "sched_autogroup.c" | ||
2009 | #include "sched_stoptask.c" | 1881 | #include "sched_stoptask.c" |
2010 | #ifdef CONFIG_SCHED_DEBUG | 1882 | #ifdef CONFIG_SCHED_DEBUG |
2011 | # include "sched_debug.c" | 1883 | # include "sched_debug.c" |
@@ -2198,10 +2070,8 @@ static int migration_cpu_stop(void *data); | |||
2198 | * The task's runqueue lock must be held. | 2070 | * The task's runqueue lock must be held. |
2199 | * Returns true if you have to wait for migration thread. | 2071 | * Returns true if you have to wait for migration thread. |
2200 | */ | 2072 | */ |
2201 | static bool migrate_task(struct task_struct *p, int dest_cpu) | 2073 | static bool migrate_task(struct task_struct *p, struct rq *rq) |
2202 | { | 2074 | { |
2203 | struct rq *rq = task_rq(p); | ||
2204 | |||
2205 | /* | 2075 | /* |
2206 | * If the task is not on a runqueue (and not running), then | 2076 | * If the task is not on a runqueue (and not running), then |
2207 | * the next wake-up will properly place the task. | 2077 | * the next wake-up will properly place the task. |
@@ -2381,18 +2251,15 @@ static int select_fallback_rq(int cpu, struct task_struct *p) | |||
2381 | return dest_cpu; | 2251 | return dest_cpu; |
2382 | 2252 | ||
2383 | /* No more Mr. Nice Guy. */ | 2253 | /* No more Mr. Nice Guy. */ |
2384 | if (unlikely(dest_cpu >= nr_cpu_ids)) { | 2254 | dest_cpu = cpuset_cpus_allowed_fallback(p); |
2385 | dest_cpu = cpuset_cpus_allowed_fallback(p); | 2255 | /* |
2386 | /* | 2256 | * Don't tell them about moving exiting tasks or |
2387 | * Don't tell them about moving exiting tasks or | 2257 | * kernel threads (both mm NULL), since they never |
2388 | * kernel threads (both mm NULL), since they never | 2258 | * leave kernel. |
2389 | * leave kernel. | 2259 | */ |
2390 | */ | 2260 | if (p->mm && printk_ratelimit()) { |
2391 | if (p->mm && printk_ratelimit()) { | 2261 | printk(KERN_INFO "process %d (%s) no longer affine to cpu%d\n", |
2392 | printk(KERN_INFO "process %d (%s) no " | 2262 | task_pid_nr(p), p->comm, cpu); |
2393 | "longer affine to cpu%d\n", | ||
2394 | task_pid_nr(p), p->comm, cpu); | ||
2395 | } | ||
2396 | } | 2263 | } |
2397 | 2264 | ||
2398 | return dest_cpu; | 2265 | return dest_cpu; |
@@ -2728,7 +2595,9 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2728 | /* Want to start with kernel preemption disabled. */ | 2595 | /* Want to start with kernel preemption disabled. */ |
2729 | task_thread_info(p)->preempt_count = 1; | 2596 | task_thread_info(p)->preempt_count = 1; |
2730 | #endif | 2597 | #endif |
2598 | #ifdef CONFIG_SMP | ||
2731 | plist_node_init(&p->pushable_tasks, MAX_PRIO); | 2599 | plist_node_init(&p->pushable_tasks, MAX_PRIO); |
2600 | #endif | ||
2732 | 2601 | ||
2733 | put_cpu(); | 2602 | put_cpu(); |
2734 | } | 2603 | } |
@@ -3364,7 +3233,7 @@ void sched_exec(void) | |||
3364 | * select_task_rq() can race against ->cpus_allowed | 3233 | * select_task_rq() can race against ->cpus_allowed |
3365 | */ | 3234 | */ |
3366 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && | 3235 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && |
3367 | likely(cpu_active(dest_cpu)) && migrate_task(p, dest_cpu)) { | 3236 | likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) { |
3368 | struct migration_arg arg = { p, dest_cpu }; | 3237 | struct migration_arg arg = { p, dest_cpu }; |
3369 | 3238 | ||
3370 | task_rq_unlock(rq, &flags); | 3239 | task_rq_unlock(rq, &flags); |
@@ -4029,7 +3898,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) | |||
4029 | if (task_thread_info(rq->curr) != owner || need_resched()) | 3898 | if (task_thread_info(rq->curr) != owner || need_resched()) |
4030 | return 0; | 3899 | return 0; |
4031 | 3900 | ||
4032 | cpu_relax(); | 3901 | arch_mutex_cpu_relax(); |
4033 | } | 3902 | } |
4034 | 3903 | ||
4035 | return 1; | 3904 | return 1; |
@@ -4716,7 +4585,7 @@ static bool check_same_owner(struct task_struct *p) | |||
4716 | } | 4585 | } |
4717 | 4586 | ||
4718 | static int __sched_setscheduler(struct task_struct *p, int policy, | 4587 | static int __sched_setscheduler(struct task_struct *p, int policy, |
4719 | struct sched_param *param, bool user) | 4588 | const struct sched_param *param, bool user) |
4720 | { | 4589 | { |
4721 | int retval, oldprio, oldpolicy = -1, on_rq, running; | 4590 | int retval, oldprio, oldpolicy = -1, on_rq, running; |
4722 | unsigned long flags; | 4591 | unsigned long flags; |
@@ -4871,7 +4740,7 @@ recheck: | |||
4871 | * NOTE that the task may be already dead. | 4740 | * NOTE that the task may be already dead. |
4872 | */ | 4741 | */ |
4873 | int sched_setscheduler(struct task_struct *p, int policy, | 4742 | int sched_setscheduler(struct task_struct *p, int policy, |
4874 | struct sched_param *param) | 4743 | const struct sched_param *param) |
4875 | { | 4744 | { |
4876 | return __sched_setscheduler(p, policy, param, true); | 4745 | return __sched_setscheduler(p, policy, param, true); |
4877 | } | 4746 | } |
@@ -4889,7 +4758,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler); | |||
4889 | * but our caller might not have that capability. | 4758 | * but our caller might not have that capability. |
4890 | */ | 4759 | */ |
4891 | int sched_setscheduler_nocheck(struct task_struct *p, int policy, | 4760 | int sched_setscheduler_nocheck(struct task_struct *p, int policy, |
4892 | struct sched_param *param) | 4761 | const struct sched_param *param) |
4893 | { | 4762 | { |
4894 | return __sched_setscheduler(p, policy, param, false); | 4763 | return __sched_setscheduler(p, policy, param, false); |
4895 | } | 4764 | } |
@@ -5405,7 +5274,7 @@ void sched_show_task(struct task_struct *p) | |||
5405 | unsigned state; | 5274 | unsigned state; |
5406 | 5275 | ||
5407 | state = p->state ? __ffs(p->state) + 1 : 0; | 5276 | state = p->state ? __ffs(p->state) + 1 : 0; |
5408 | printk(KERN_INFO "%-13.13s %c", p->comm, | 5277 | printk(KERN_INFO "%-15.15s %c", p->comm, |
5409 | state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); | 5278 | state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); |
5410 | #if BITS_PER_LONG == 32 | 5279 | #if BITS_PER_LONG == 32 |
5411 | if (state == TASK_RUNNING) | 5280 | if (state == TASK_RUNNING) |
@@ -5569,7 +5438,6 @@ static void update_sysctl(void) | |||
5569 | SET_SYSCTL(sched_min_granularity); | 5438 | SET_SYSCTL(sched_min_granularity); |
5570 | SET_SYSCTL(sched_latency); | 5439 | SET_SYSCTL(sched_latency); |
5571 | SET_SYSCTL(sched_wakeup_granularity); | 5440 | SET_SYSCTL(sched_wakeup_granularity); |
5572 | SET_SYSCTL(sched_shares_ratelimit); | ||
5573 | #undef SET_SYSCTL | 5441 | #undef SET_SYSCTL |
5574 | } | 5442 | } |
5575 | 5443 | ||
@@ -5645,7 +5513,7 @@ again: | |||
5645 | goto out; | 5513 | goto out; |
5646 | 5514 | ||
5647 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); | 5515 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); |
5648 | if (migrate_task(p, dest_cpu)) { | 5516 | if (migrate_task(p, rq)) { |
5649 | struct migration_arg arg = { p, dest_cpu }; | 5517 | struct migration_arg arg = { p, dest_cpu }; |
5650 | /* Need help from migration thread: drop lock and wait. */ | 5518 | /* Need help from migration thread: drop lock and wait. */ |
5651 | task_rq_unlock(rq, &flags); | 5519 | task_rq_unlock(rq, &flags); |
@@ -5727,29 +5595,20 @@ static int migration_cpu_stop(void *data) | |||
5727 | } | 5595 | } |
5728 | 5596 | ||
5729 | #ifdef CONFIG_HOTPLUG_CPU | 5597 | #ifdef CONFIG_HOTPLUG_CPU |
5598 | |||
5730 | /* | 5599 | /* |
5731 | * Figure out where task on dead CPU should go, use force if necessary. | 5600 | * Ensures that the idle task is using init_mm right before its cpu goes |
5601 | * offline. | ||
5732 | */ | 5602 | */ |
5733 | void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | 5603 | void idle_task_exit(void) |
5734 | { | 5604 | { |
5735 | struct rq *rq = cpu_rq(dead_cpu); | 5605 | struct mm_struct *mm = current->active_mm; |
5736 | int needs_cpu, uninitialized_var(dest_cpu); | ||
5737 | unsigned long flags; | ||
5738 | 5606 | ||
5739 | local_irq_save(flags); | 5607 | BUG_ON(cpu_online(smp_processor_id())); |
5740 | 5608 | ||
5741 | raw_spin_lock(&rq->lock); | 5609 | if (mm != &init_mm) |
5742 | needs_cpu = (task_cpu(p) == dead_cpu) && (p->state != TASK_WAKING); | 5610 | switch_mm(mm, &init_mm, current); |
5743 | if (needs_cpu) | 5611 | mmdrop(mm); |
5744 | dest_cpu = select_fallback_rq(dead_cpu, p); | ||
5745 | raw_spin_unlock(&rq->lock); | ||
5746 | /* | ||
5747 | * It can only fail if we race with set_cpus_allowed(), | ||
5748 | * in the racer should migrate the task anyway. | ||
5749 | */ | ||
5750 | if (needs_cpu) | ||
5751 | __migrate_task(p, dead_cpu, dest_cpu); | ||
5752 | local_irq_restore(flags); | ||
5753 | } | 5612 | } |
5754 | 5613 | ||
5755 | /* | 5614 | /* |
@@ -5762,128 +5621,69 @@ void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | |||
5762 | static void migrate_nr_uninterruptible(struct rq *rq_src) | 5621 | static void migrate_nr_uninterruptible(struct rq *rq_src) |
5763 | { | 5622 | { |
5764 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); | 5623 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); |
5765 | unsigned long flags; | ||
5766 | 5624 | ||
5767 | local_irq_save(flags); | ||
5768 | double_rq_lock(rq_src, rq_dest); | ||
5769 | rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible; | 5625 | rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible; |
5770 | rq_src->nr_uninterruptible = 0; | 5626 | rq_src->nr_uninterruptible = 0; |
5771 | double_rq_unlock(rq_src, rq_dest); | ||
5772 | local_irq_restore(flags); | ||
5773 | } | ||
5774 | |||
5775 | /* Run through task list and migrate tasks from the dead cpu. */ | ||
5776 | static void migrate_live_tasks(int src_cpu) | ||
5777 | { | ||
5778 | struct task_struct *p, *t; | ||
5779 | |||
5780 | read_lock(&tasklist_lock); | ||
5781 | |||
5782 | do_each_thread(t, p) { | ||
5783 | if (p == current) | ||
5784 | continue; | ||
5785 | |||
5786 | if (task_cpu(p) == src_cpu) | ||
5787 | move_task_off_dead_cpu(src_cpu, p); | ||
5788 | } while_each_thread(t, p); | ||
5789 | |||
5790 | read_unlock(&tasklist_lock); | ||
5791 | } | 5627 | } |
5792 | 5628 | ||
5793 | /* | 5629 | /* |
5794 | * Schedules idle task to be the next runnable task on current CPU. | 5630 | * remove the tasks which were accounted by rq from calc_load_tasks. |
5795 | * It does so by boosting its priority to highest possible. | ||
5796 | * Used by CPU offline code. | ||
5797 | */ | 5631 | */ |
5798 | void sched_idle_next(void) | 5632 | static void calc_global_load_remove(struct rq *rq) |
5799 | { | 5633 | { |
5800 | int this_cpu = smp_processor_id(); | 5634 | atomic_long_sub(rq->calc_load_active, &calc_load_tasks); |
5801 | struct rq *rq = cpu_rq(this_cpu); | 5635 | rq->calc_load_active = 0; |
5802 | struct task_struct *p = rq->idle; | ||
5803 | unsigned long flags; | ||
5804 | |||
5805 | /* cpu has to be offline */ | ||
5806 | BUG_ON(cpu_online(this_cpu)); | ||
5807 | |||
5808 | /* | ||
5809 | * Strictly not necessary since rest of the CPUs are stopped by now | ||
5810 | * and interrupts disabled on the current cpu. | ||
5811 | */ | ||
5812 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
5813 | |||
5814 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); | ||
5815 | |||
5816 | activate_task(rq, p, 0); | ||
5817 | |||
5818 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
5819 | } | 5636 | } |
5820 | 5637 | ||
5821 | /* | 5638 | /* |
5822 | * Ensures that the idle task is using init_mm right before its cpu goes | 5639 | * Migrate all tasks from the rq, sleeping tasks will be migrated by |
5823 | * offline. | 5640 | * try_to_wake_up()->select_task_rq(). |
5641 | * | ||
5642 | * Called with rq->lock held even though we'er in stop_machine() and | ||
5643 | * there's no concurrency possible, we hold the required locks anyway | ||
5644 | * because of lock validation efforts. | ||
5824 | */ | 5645 | */ |
5825 | void idle_task_exit(void) | 5646 | static void migrate_tasks(unsigned int dead_cpu) |
5826 | { | ||
5827 | struct mm_struct *mm = current->active_mm; | ||
5828 | |||
5829 | BUG_ON(cpu_online(smp_processor_id())); | ||
5830 | |||
5831 | if (mm != &init_mm) | ||
5832 | switch_mm(mm, &init_mm, current); | ||
5833 | mmdrop(mm); | ||
5834 | } | ||
5835 | |||
5836 | /* called under rq->lock with disabled interrupts */ | ||
5837 | static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) | ||
5838 | { | 5647 | { |
5839 | struct rq *rq = cpu_rq(dead_cpu); | 5648 | struct rq *rq = cpu_rq(dead_cpu); |
5840 | 5649 | struct task_struct *next, *stop = rq->stop; | |
5841 | /* Must be exiting, otherwise would be on tasklist. */ | 5650 | int dest_cpu; |
5842 | BUG_ON(!p->exit_state); | ||
5843 | |||
5844 | /* Cannot have done final schedule yet: would have vanished. */ | ||
5845 | BUG_ON(p->state == TASK_DEAD); | ||
5846 | |||
5847 | get_task_struct(p); | ||
5848 | 5651 | ||
5849 | /* | 5652 | /* |
5850 | * Drop lock around migration; if someone else moves it, | 5653 | * Fudge the rq selection such that the below task selection loop |
5851 | * that's OK. No task can be added to this CPU, so iteration is | 5654 | * doesn't get stuck on the currently eligible stop task. |
5852 | * fine. | 5655 | * |
5656 | * We're currently inside stop_machine() and the rq is either stuck | ||
5657 | * in the stop_machine_cpu_stop() loop, or we're executing this code, | ||
5658 | * either way we should never end up calling schedule() until we're | ||
5659 | * done here. | ||
5853 | */ | 5660 | */ |
5854 | raw_spin_unlock_irq(&rq->lock); | 5661 | rq->stop = NULL; |
5855 | move_task_off_dead_cpu(dead_cpu, p); | ||
5856 | raw_spin_lock_irq(&rq->lock); | ||
5857 | |||
5858 | put_task_struct(p); | ||
5859 | } | ||
5860 | |||
5861 | /* release_task() removes task from tasklist, so we won't find dead tasks. */ | ||
5862 | static void migrate_dead_tasks(unsigned int dead_cpu) | ||
5863 | { | ||
5864 | struct rq *rq = cpu_rq(dead_cpu); | ||
5865 | struct task_struct *next; | ||
5866 | 5662 | ||
5867 | for ( ; ; ) { | 5663 | for ( ; ; ) { |
5868 | if (!rq->nr_running) | 5664 | /* |
5665 | * There's this thread running, bail when that's the only | ||
5666 | * remaining thread. | ||
5667 | */ | ||
5668 | if (rq->nr_running == 1) | ||
5869 | break; | 5669 | break; |
5670 | |||
5870 | next = pick_next_task(rq); | 5671 | next = pick_next_task(rq); |
5871 | if (!next) | 5672 | BUG_ON(!next); |
5872 | break; | ||
5873 | next->sched_class->put_prev_task(rq, next); | 5673 | next->sched_class->put_prev_task(rq, next); |
5874 | migrate_dead(dead_cpu, next); | ||
5875 | 5674 | ||
5675 | /* Find suitable destination for @next, with force if needed. */ | ||
5676 | dest_cpu = select_fallback_rq(dead_cpu, next); | ||
5677 | raw_spin_unlock(&rq->lock); | ||
5678 | |||
5679 | __migrate_task(next, dead_cpu, dest_cpu); | ||
5680 | |||
5681 | raw_spin_lock(&rq->lock); | ||
5876 | } | 5682 | } |
5877 | } | ||
5878 | 5683 | ||
5879 | /* | 5684 | rq->stop = stop; |
5880 | * remove the tasks which were accounted by rq from calc_load_tasks. | ||
5881 | */ | ||
5882 | static void calc_global_load_remove(struct rq *rq) | ||
5883 | { | ||
5884 | atomic_long_sub(rq->calc_load_active, &calc_load_tasks); | ||
5885 | rq->calc_load_active = 0; | ||
5886 | } | 5685 | } |
5686 | |||
5887 | #endif /* CONFIG_HOTPLUG_CPU */ | 5687 | #endif /* CONFIG_HOTPLUG_CPU */ |
5888 | 5688 | ||
5889 | #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) | 5689 | #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) |
@@ -6093,15 +5893,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
6093 | unsigned long flags; | 5893 | unsigned long flags; |
6094 | struct rq *rq = cpu_rq(cpu); | 5894 | struct rq *rq = cpu_rq(cpu); |
6095 | 5895 | ||
6096 | switch (action) { | 5896 | switch (action & ~CPU_TASKS_FROZEN) { |
6097 | 5897 | ||
6098 | case CPU_UP_PREPARE: | 5898 | case CPU_UP_PREPARE: |
6099 | case CPU_UP_PREPARE_FROZEN: | ||
6100 | rq->calc_load_update = calc_load_update; | 5899 | rq->calc_load_update = calc_load_update; |
6101 | break; | 5900 | break; |
6102 | 5901 | ||
6103 | case CPU_ONLINE: | 5902 | case CPU_ONLINE: |
6104 | case CPU_ONLINE_FROZEN: | ||
6105 | /* Update our root-domain */ | 5903 | /* Update our root-domain */ |
6106 | raw_spin_lock_irqsave(&rq->lock, flags); | 5904 | raw_spin_lock_irqsave(&rq->lock, flags); |
6107 | if (rq->rd) { | 5905 | if (rq->rd) { |
@@ -6113,30 +5911,19 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
6113 | break; | 5911 | break; |
6114 | 5912 | ||
6115 | #ifdef CONFIG_HOTPLUG_CPU | 5913 | #ifdef CONFIG_HOTPLUG_CPU |
6116 | case CPU_DEAD: | ||
6117 | case CPU_DEAD_FROZEN: | ||
6118 | migrate_live_tasks(cpu); | ||
6119 | /* Idle task back to normal (off runqueue, low prio) */ | ||
6120 | raw_spin_lock_irq(&rq->lock); | ||
6121 | deactivate_task(rq, rq->idle, 0); | ||
6122 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); | ||
6123 | rq->idle->sched_class = &idle_sched_class; | ||
6124 | migrate_dead_tasks(cpu); | ||
6125 | raw_spin_unlock_irq(&rq->lock); | ||
6126 | migrate_nr_uninterruptible(rq); | ||
6127 | BUG_ON(rq->nr_running != 0); | ||
6128 | calc_global_load_remove(rq); | ||
6129 | break; | ||
6130 | |||
6131 | case CPU_DYING: | 5914 | case CPU_DYING: |
6132 | case CPU_DYING_FROZEN: | ||
6133 | /* Update our root-domain */ | 5915 | /* Update our root-domain */ |
6134 | raw_spin_lock_irqsave(&rq->lock, flags); | 5916 | raw_spin_lock_irqsave(&rq->lock, flags); |
6135 | if (rq->rd) { | 5917 | if (rq->rd) { |
6136 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | 5918 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
6137 | set_rq_offline(rq); | 5919 | set_rq_offline(rq); |
6138 | } | 5920 | } |
5921 | migrate_tasks(cpu); | ||
5922 | BUG_ON(rq->nr_running != 1); /* the migration thread */ | ||
6139 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 5923 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
5924 | |||
5925 | migrate_nr_uninterruptible(rq); | ||
5926 | calc_global_load_remove(rq); | ||
6140 | break; | 5927 | break; |
6141 | #endif | 5928 | #endif |
6142 | } | 5929 | } |
@@ -7867,15 +7654,13 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
7867 | 7654 | ||
7868 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7655 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7869 | static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, | 7656 | static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, |
7870 | struct sched_entity *se, int cpu, int add, | 7657 | struct sched_entity *se, int cpu, |
7871 | struct sched_entity *parent) | 7658 | struct sched_entity *parent) |
7872 | { | 7659 | { |
7873 | struct rq *rq = cpu_rq(cpu); | 7660 | struct rq *rq = cpu_rq(cpu); |
7874 | tg->cfs_rq[cpu] = cfs_rq; | 7661 | tg->cfs_rq[cpu] = cfs_rq; |
7875 | init_cfs_rq(cfs_rq, rq); | 7662 | init_cfs_rq(cfs_rq, rq); |
7876 | cfs_rq->tg = tg; | 7663 | cfs_rq->tg = tg; |
7877 | if (add) | ||
7878 | list_add(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); | ||
7879 | 7664 | ||
7880 | tg->se[cpu] = se; | 7665 | tg->se[cpu] = se; |
7881 | /* se could be NULL for init_task_group */ | 7666 | /* se could be NULL for init_task_group */ |
@@ -7888,15 +7673,14 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, | |||
7888 | se->cfs_rq = parent->my_q; | 7673 | se->cfs_rq = parent->my_q; |
7889 | 7674 | ||
7890 | se->my_q = cfs_rq; | 7675 | se->my_q = cfs_rq; |
7891 | se->load.weight = tg->shares; | 7676 | update_load_set(&se->load, 0); |
7892 | se->load.inv_weight = 0; | ||
7893 | se->parent = parent; | 7677 | se->parent = parent; |
7894 | } | 7678 | } |
7895 | #endif | 7679 | #endif |
7896 | 7680 | ||
7897 | #ifdef CONFIG_RT_GROUP_SCHED | 7681 | #ifdef CONFIG_RT_GROUP_SCHED |
7898 | static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, | 7682 | static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, |
7899 | struct sched_rt_entity *rt_se, int cpu, int add, | 7683 | struct sched_rt_entity *rt_se, int cpu, |
7900 | struct sched_rt_entity *parent) | 7684 | struct sched_rt_entity *parent) |
7901 | { | 7685 | { |
7902 | struct rq *rq = cpu_rq(cpu); | 7686 | struct rq *rq = cpu_rq(cpu); |
@@ -7905,8 +7689,6 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, | |||
7905 | init_rt_rq(rt_rq, rq); | 7689 | init_rt_rq(rt_rq, rq); |
7906 | rt_rq->tg = tg; | 7690 | rt_rq->tg = tg; |
7907 | rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; | 7691 | rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; |
7908 | if (add) | ||
7909 | list_add(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list); | ||
7910 | 7692 | ||
7911 | tg->rt_se[cpu] = rt_se; | 7693 | tg->rt_se[cpu] = rt_se; |
7912 | if (!rt_se) | 7694 | if (!rt_se) |
@@ -7979,13 +7761,9 @@ void __init sched_init(void) | |||
7979 | #ifdef CONFIG_CGROUP_SCHED | 7761 | #ifdef CONFIG_CGROUP_SCHED |
7980 | list_add(&init_task_group.list, &task_groups); | 7762 | list_add(&init_task_group.list, &task_groups); |
7981 | INIT_LIST_HEAD(&init_task_group.children); | 7763 | INIT_LIST_HEAD(&init_task_group.children); |
7982 | 7764 | autogroup_init(&init_task); | |
7983 | #endif /* CONFIG_CGROUP_SCHED */ | 7765 | #endif /* CONFIG_CGROUP_SCHED */ |
7984 | 7766 | ||
7985 | #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP | ||
7986 | update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long), | ||
7987 | __alignof__(unsigned long)); | ||
7988 | #endif | ||
7989 | for_each_possible_cpu(i) { | 7767 | for_each_possible_cpu(i) { |
7990 | struct rq *rq; | 7768 | struct rq *rq; |
7991 | 7769 | ||
@@ -8019,7 +7797,7 @@ void __init sched_init(void) | |||
8019 | * We achieve this by letting init_task_group's tasks sit | 7797 | * We achieve this by letting init_task_group's tasks sit |
8020 | * directly in rq->cfs (i.e init_task_group->se[] = NULL). | 7798 | * directly in rq->cfs (i.e init_task_group->se[] = NULL). |
8021 | */ | 7799 | */ |
8022 | init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, 1, NULL); | 7800 | init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, NULL); |
8023 | #endif | 7801 | #endif |
8024 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 7802 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
8025 | 7803 | ||
@@ -8027,7 +7805,7 @@ void __init sched_init(void) | |||
8027 | #ifdef CONFIG_RT_GROUP_SCHED | 7805 | #ifdef CONFIG_RT_GROUP_SCHED |
8028 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); | 7806 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); |
8029 | #ifdef CONFIG_CGROUP_SCHED | 7807 | #ifdef CONFIG_CGROUP_SCHED |
8030 | init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, 1, NULL); | 7808 | init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, NULL); |
8031 | #endif | 7809 | #endif |
8032 | #endif | 7810 | #endif |
8033 | 7811 | ||
@@ -8303,7 +8081,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
8303 | if (!se) | 8081 | if (!se) |
8304 | goto err_free_rq; | 8082 | goto err_free_rq; |
8305 | 8083 | ||
8306 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); | 8084 | init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); |
8307 | } | 8085 | } |
8308 | 8086 | ||
8309 | return 1; | 8087 | return 1; |
@@ -8314,15 +8092,21 @@ err: | |||
8314 | return 0; | 8092 | return 0; |
8315 | } | 8093 | } |
8316 | 8094 | ||
8317 | static inline void register_fair_sched_group(struct task_group *tg, int cpu) | ||
8318 | { | ||
8319 | list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list, | ||
8320 | &cpu_rq(cpu)->leaf_cfs_rq_list); | ||
8321 | } | ||
8322 | |||
8323 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | 8095 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) |
8324 | { | 8096 | { |
8325 | list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); | 8097 | struct rq *rq = cpu_rq(cpu); |
8098 | unsigned long flags; | ||
8099 | |||
8100 | /* | ||
8101 | * Only empty task groups can be destroyed; so we can speculatively | ||
8102 | * check on_list without danger of it being re-added. | ||
8103 | */ | ||
8104 | if (!tg->cfs_rq[cpu]->on_list) | ||
8105 | return; | ||
8106 | |||
8107 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
8108 | list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); | ||
8109 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
8326 | } | 8110 | } |
8327 | #else /* !CONFG_FAIR_GROUP_SCHED */ | 8111 | #else /* !CONFG_FAIR_GROUP_SCHED */ |
8328 | static inline void free_fair_sched_group(struct task_group *tg) | 8112 | static inline void free_fair_sched_group(struct task_group *tg) |
@@ -8335,10 +8119,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
8335 | return 1; | 8119 | return 1; |
8336 | } | 8120 | } |
8337 | 8121 | ||
8338 | static inline void register_fair_sched_group(struct task_group *tg, int cpu) | ||
8339 | { | ||
8340 | } | ||
8341 | |||
8342 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | 8122 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) |
8343 | { | 8123 | { |
8344 | } | 8124 | } |
@@ -8393,7 +8173,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
8393 | if (!rt_se) | 8173 | if (!rt_se) |
8394 | goto err_free_rq; | 8174 | goto err_free_rq; |
8395 | 8175 | ||
8396 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); | 8176 | init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); |
8397 | } | 8177 | } |
8398 | 8178 | ||
8399 | return 1; | 8179 | return 1; |
@@ -8403,17 +8183,6 @@ err_free_rq: | |||
8403 | err: | 8183 | err: |
8404 | return 0; | 8184 | return 0; |
8405 | } | 8185 | } |
8406 | |||
8407 | static inline void register_rt_sched_group(struct task_group *tg, int cpu) | ||
8408 | { | ||
8409 | list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list, | ||
8410 | &cpu_rq(cpu)->leaf_rt_rq_list); | ||
8411 | } | ||
8412 | |||
8413 | static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | ||
8414 | { | ||
8415 | list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list); | ||
8416 | } | ||
8417 | #else /* !CONFIG_RT_GROUP_SCHED */ | 8186 | #else /* !CONFIG_RT_GROUP_SCHED */ |
8418 | static inline void free_rt_sched_group(struct task_group *tg) | 8187 | static inline void free_rt_sched_group(struct task_group *tg) |
8419 | { | 8188 | { |
@@ -8424,14 +8193,6 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
8424 | { | 8193 | { |
8425 | return 1; | 8194 | return 1; |
8426 | } | 8195 | } |
8427 | |||
8428 | static inline void register_rt_sched_group(struct task_group *tg, int cpu) | ||
8429 | { | ||
8430 | } | ||
8431 | |||
8432 | static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | ||
8433 | { | ||
8434 | } | ||
8435 | #endif /* CONFIG_RT_GROUP_SCHED */ | 8196 | #endif /* CONFIG_RT_GROUP_SCHED */ |
8436 | 8197 | ||
8437 | #ifdef CONFIG_CGROUP_SCHED | 8198 | #ifdef CONFIG_CGROUP_SCHED |
@@ -8447,7 +8208,6 @@ struct task_group *sched_create_group(struct task_group *parent) | |||
8447 | { | 8208 | { |
8448 | struct task_group *tg; | 8209 | struct task_group *tg; |
8449 | unsigned long flags; | 8210 | unsigned long flags; |
8450 | int i; | ||
8451 | 8211 | ||
8452 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); | 8212 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); |
8453 | if (!tg) | 8213 | if (!tg) |
@@ -8460,10 +8220,6 @@ struct task_group *sched_create_group(struct task_group *parent) | |||
8460 | goto err; | 8220 | goto err; |
8461 | 8221 | ||
8462 | spin_lock_irqsave(&task_group_lock, flags); | 8222 | spin_lock_irqsave(&task_group_lock, flags); |
8463 | for_each_possible_cpu(i) { | ||
8464 | register_fair_sched_group(tg, i); | ||
8465 | register_rt_sched_group(tg, i); | ||
8466 | } | ||
8467 | list_add_rcu(&tg->list, &task_groups); | 8223 | list_add_rcu(&tg->list, &task_groups); |
8468 | 8224 | ||
8469 | WARN_ON(!parent); /* root should already exist */ | 8225 | WARN_ON(!parent); /* root should already exist */ |
@@ -8493,11 +8249,11 @@ void sched_destroy_group(struct task_group *tg) | |||
8493 | unsigned long flags; | 8249 | unsigned long flags; |
8494 | int i; | 8250 | int i; |
8495 | 8251 | ||
8496 | spin_lock_irqsave(&task_group_lock, flags); | 8252 | /* end participation in shares distribution */ |
8497 | for_each_possible_cpu(i) { | 8253 | for_each_possible_cpu(i) |
8498 | unregister_fair_sched_group(tg, i); | 8254 | unregister_fair_sched_group(tg, i); |
8499 | unregister_rt_sched_group(tg, i); | 8255 | |
8500 | } | 8256 | spin_lock_irqsave(&task_group_lock, flags); |
8501 | list_del_rcu(&tg->list); | 8257 | list_del_rcu(&tg->list); |
8502 | list_del_rcu(&tg->siblings); | 8258 | list_del_rcu(&tg->siblings); |
8503 | spin_unlock_irqrestore(&task_group_lock, flags); | 8259 | spin_unlock_irqrestore(&task_group_lock, flags); |
@@ -8544,33 +8300,6 @@ void sched_move_task(struct task_struct *tsk) | |||
8544 | #endif /* CONFIG_CGROUP_SCHED */ | 8300 | #endif /* CONFIG_CGROUP_SCHED */ |
8545 | 8301 | ||
8546 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8302 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8547 | static void __set_se_shares(struct sched_entity *se, unsigned long shares) | ||
8548 | { | ||
8549 | struct cfs_rq *cfs_rq = se->cfs_rq; | ||
8550 | int on_rq; | ||
8551 | |||
8552 | on_rq = se->on_rq; | ||
8553 | if (on_rq) | ||
8554 | dequeue_entity(cfs_rq, se, 0); | ||
8555 | |||
8556 | se->load.weight = shares; | ||
8557 | se->load.inv_weight = 0; | ||
8558 | |||
8559 | if (on_rq) | ||
8560 | enqueue_entity(cfs_rq, se, 0); | ||
8561 | } | ||
8562 | |||
8563 | static void set_se_shares(struct sched_entity *se, unsigned long shares) | ||
8564 | { | ||
8565 | struct cfs_rq *cfs_rq = se->cfs_rq; | ||
8566 | struct rq *rq = cfs_rq->rq; | ||
8567 | unsigned long flags; | ||
8568 | |||
8569 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
8570 | __set_se_shares(se, shares); | ||
8571 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
8572 | } | ||
8573 | |||
8574 | static DEFINE_MUTEX(shares_mutex); | 8303 | static DEFINE_MUTEX(shares_mutex); |
8575 | 8304 | ||
8576 | int sched_group_set_shares(struct task_group *tg, unsigned long shares) | 8305 | int sched_group_set_shares(struct task_group *tg, unsigned long shares) |
@@ -8593,37 +8322,19 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
8593 | if (tg->shares == shares) | 8322 | if (tg->shares == shares) |
8594 | goto done; | 8323 | goto done; |
8595 | 8324 | ||
8596 | spin_lock_irqsave(&task_group_lock, flags); | ||
8597 | for_each_possible_cpu(i) | ||
8598 | unregister_fair_sched_group(tg, i); | ||
8599 | list_del_rcu(&tg->siblings); | ||
8600 | spin_unlock_irqrestore(&task_group_lock, flags); | ||
8601 | |||
8602 | /* wait for any ongoing reference to this group to finish */ | ||
8603 | synchronize_sched(); | ||
8604 | |||
8605 | /* | ||
8606 | * Now we are free to modify the group's share on each cpu | ||
8607 | * w/o tripping rebalance_share or load_balance_fair. | ||
8608 | */ | ||
8609 | tg->shares = shares; | 8325 | tg->shares = shares; |
8610 | for_each_possible_cpu(i) { | 8326 | for_each_possible_cpu(i) { |
8611 | /* | 8327 | struct rq *rq = cpu_rq(i); |
8612 | * force a rebalance | 8328 | struct sched_entity *se; |
8613 | */ | 8329 | |
8614 | cfs_rq_set_shares(tg->cfs_rq[i], 0); | 8330 | se = tg->se[i]; |
8615 | set_se_shares(tg->se[i], shares); | 8331 | /* Propagate contribution to hierarchy */ |
8332 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
8333 | for_each_sched_entity(se) | ||
8334 | update_cfs_shares(group_cfs_rq(se), 0); | ||
8335 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
8616 | } | 8336 | } |
8617 | 8337 | ||
8618 | /* | ||
8619 | * Enable load balance activity on this group, by inserting it back on | ||
8620 | * each cpu's rq->leaf_cfs_rq_list. | ||
8621 | */ | ||
8622 | spin_lock_irqsave(&task_group_lock, flags); | ||
8623 | for_each_possible_cpu(i) | ||
8624 | register_fair_sched_group(tg, i); | ||
8625 | list_add_rcu(&tg->siblings, &tg->parent->children); | ||
8626 | spin_unlock_irqrestore(&task_group_lock, flags); | ||
8627 | done: | 8338 | done: |
8628 | mutex_unlock(&shares_mutex); | 8339 | mutex_unlock(&shares_mutex); |
8629 | return 0; | 8340 | return 0; |
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c new file mode 100644 index 000000000000..57a7ac286a02 --- /dev/null +++ b/kernel/sched_autogroup.c | |||
@@ -0,0 +1,229 @@ | |||
1 | #ifdef CONFIG_SCHED_AUTOGROUP | ||
2 | |||
3 | #include <linux/proc_fs.h> | ||
4 | #include <linux/seq_file.h> | ||
5 | #include <linux/kallsyms.h> | ||
6 | #include <linux/utsname.h> | ||
7 | |||
8 | unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; | ||
9 | static struct autogroup autogroup_default; | ||
10 | static atomic_t autogroup_seq_nr; | ||
11 | |||
12 | static void autogroup_init(struct task_struct *init_task) | ||
13 | { | ||
14 | autogroup_default.tg = &init_task_group; | ||
15 | init_task_group.autogroup = &autogroup_default; | ||
16 | kref_init(&autogroup_default.kref); | ||
17 | init_rwsem(&autogroup_default.lock); | ||
18 | init_task->signal->autogroup = &autogroup_default; | ||
19 | } | ||
20 | |||
21 | static inline void autogroup_free(struct task_group *tg) | ||
22 | { | ||
23 | kfree(tg->autogroup); | ||
24 | } | ||
25 | |||
26 | static inline void autogroup_destroy(struct kref *kref) | ||
27 | { | ||
28 | struct autogroup *ag = container_of(kref, struct autogroup, kref); | ||
29 | |||
30 | sched_destroy_group(ag->tg); | ||
31 | } | ||
32 | |||
33 | static inline void autogroup_kref_put(struct autogroup *ag) | ||
34 | { | ||
35 | kref_put(&ag->kref, autogroup_destroy); | ||
36 | } | ||
37 | |||
38 | static inline struct autogroup *autogroup_kref_get(struct autogroup *ag) | ||
39 | { | ||
40 | kref_get(&ag->kref); | ||
41 | return ag; | ||
42 | } | ||
43 | |||
44 | static inline struct autogroup *autogroup_create(void) | ||
45 | { | ||
46 | struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); | ||
47 | struct task_group *tg; | ||
48 | |||
49 | if (!ag) | ||
50 | goto out_fail; | ||
51 | |||
52 | tg = sched_create_group(&init_task_group); | ||
53 | |||
54 | if (IS_ERR(tg)) | ||
55 | goto out_free; | ||
56 | |||
57 | kref_init(&ag->kref); | ||
58 | init_rwsem(&ag->lock); | ||
59 | ag->id = atomic_inc_return(&autogroup_seq_nr); | ||
60 | ag->tg = tg; | ||
61 | tg->autogroup = ag; | ||
62 | |||
63 | return ag; | ||
64 | |||
65 | out_free: | ||
66 | kfree(ag); | ||
67 | out_fail: | ||
68 | if (printk_ratelimit()) { | ||
69 | printk(KERN_WARNING "autogroup_create: %s failure.\n", | ||
70 | ag ? "sched_create_group()" : "kmalloc()"); | ||
71 | } | ||
72 | |||
73 | return autogroup_kref_get(&autogroup_default); | ||
74 | } | ||
75 | |||
76 | static inline bool | ||
77 | task_wants_autogroup(struct task_struct *p, struct task_group *tg) | ||
78 | { | ||
79 | if (tg != &root_task_group) | ||
80 | return false; | ||
81 | |||
82 | if (p->sched_class != &fair_sched_class) | ||
83 | return false; | ||
84 | |||
85 | /* | ||
86 | * We can only assume the task group can't go away on us if | ||
87 | * autogroup_move_group() can see us on ->thread_group list. | ||
88 | */ | ||
89 | if (p->flags & PF_EXITING) | ||
90 | return false; | ||
91 | |||
92 | return true; | ||
93 | } | ||
94 | |||
95 | static inline struct task_group * | ||
96 | autogroup_task_group(struct task_struct *p, struct task_group *tg) | ||
97 | { | ||
98 | int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); | ||
99 | |||
100 | if (enabled && task_wants_autogroup(p, tg)) | ||
101 | return p->signal->autogroup->tg; | ||
102 | |||
103 | return tg; | ||
104 | } | ||
105 | |||
106 | static void | ||
107 | autogroup_move_group(struct task_struct *p, struct autogroup *ag) | ||
108 | { | ||
109 | struct autogroup *prev; | ||
110 | struct task_struct *t; | ||
111 | unsigned long flags; | ||
112 | |||
113 | BUG_ON(!lock_task_sighand(p, &flags)); | ||
114 | |||
115 | prev = p->signal->autogroup; | ||
116 | if (prev == ag) { | ||
117 | unlock_task_sighand(p, &flags); | ||
118 | return; | ||
119 | } | ||
120 | |||
121 | p->signal->autogroup = autogroup_kref_get(ag); | ||
122 | |||
123 | t = p; | ||
124 | do { | ||
125 | sched_move_task(t); | ||
126 | } while_each_thread(p, t); | ||
127 | |||
128 | unlock_task_sighand(p, &flags); | ||
129 | autogroup_kref_put(prev); | ||
130 | } | ||
131 | |||
132 | /* Allocates GFP_KERNEL, cannot be called under any spinlock */ | ||
133 | void sched_autogroup_create_attach(struct task_struct *p) | ||
134 | { | ||
135 | struct autogroup *ag = autogroup_create(); | ||
136 | |||
137 | autogroup_move_group(p, ag); | ||
138 | /* drop extra refrence added by autogroup_create() */ | ||
139 | autogroup_kref_put(ag); | ||
140 | } | ||
141 | EXPORT_SYMBOL(sched_autogroup_create_attach); | ||
142 | |||
143 | /* Cannot be called under siglock. Currently has no users */ | ||
144 | void sched_autogroup_detach(struct task_struct *p) | ||
145 | { | ||
146 | autogroup_move_group(p, &autogroup_default); | ||
147 | } | ||
148 | EXPORT_SYMBOL(sched_autogroup_detach); | ||
149 | |||
150 | void sched_autogroup_fork(struct signal_struct *sig) | ||
151 | { | ||
152 | struct task_struct *p = current; | ||
153 | |||
154 | spin_lock_irq(&p->sighand->siglock); | ||
155 | sig->autogroup = autogroup_kref_get(p->signal->autogroup); | ||
156 | spin_unlock_irq(&p->sighand->siglock); | ||
157 | } | ||
158 | |||
159 | void sched_autogroup_exit(struct signal_struct *sig) | ||
160 | { | ||
161 | autogroup_kref_put(sig->autogroup); | ||
162 | } | ||
163 | |||
164 | static int __init setup_autogroup(char *str) | ||
165 | { | ||
166 | sysctl_sched_autogroup_enabled = 0; | ||
167 | |||
168 | return 1; | ||
169 | } | ||
170 | |||
171 | __setup("noautogroup", setup_autogroup); | ||
172 | |||
173 | #ifdef CONFIG_PROC_FS | ||
174 | |||
175 | /* Called with siglock held. */ | ||
176 | int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice) | ||
177 | { | ||
178 | static unsigned long next = INITIAL_JIFFIES; | ||
179 | struct autogroup *ag; | ||
180 | int err; | ||
181 | |||
182 | if (*nice < -20 || *nice > 19) | ||
183 | return -EINVAL; | ||
184 | |||
185 | err = security_task_setnice(current, *nice); | ||
186 | if (err) | ||
187 | return err; | ||
188 | |||
189 | if (*nice < 0 && !can_nice(current, *nice)) | ||
190 | return -EPERM; | ||
191 | |||
192 | /* this is a heavy operation taking global locks.. */ | ||
193 | if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next)) | ||
194 | return -EAGAIN; | ||
195 | |||
196 | next = HZ / 10 + jiffies; | ||
197 | ag = autogroup_kref_get(p->signal->autogroup); | ||
198 | |||
199 | down_write(&ag->lock); | ||
200 | err = sched_group_set_shares(ag->tg, prio_to_weight[*nice + 20]); | ||
201 | if (!err) | ||
202 | ag->nice = *nice; | ||
203 | up_write(&ag->lock); | ||
204 | |||
205 | autogroup_kref_put(ag); | ||
206 | |||
207 | return err; | ||
208 | } | ||
209 | |||
210 | void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) | ||
211 | { | ||
212 | struct autogroup *ag = autogroup_kref_get(p->signal->autogroup); | ||
213 | |||
214 | down_read(&ag->lock); | ||
215 | seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice); | ||
216 | up_read(&ag->lock); | ||
217 | |||
218 | autogroup_kref_put(ag); | ||
219 | } | ||
220 | #endif /* CONFIG_PROC_FS */ | ||
221 | |||
222 | #ifdef CONFIG_SCHED_DEBUG | ||
223 | static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) | ||
224 | { | ||
225 | return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); | ||
226 | } | ||
227 | #endif /* CONFIG_SCHED_DEBUG */ | ||
228 | |||
229 | #endif /* CONFIG_SCHED_AUTOGROUP */ | ||
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h new file mode 100644 index 000000000000..5358e241cb20 --- /dev/null +++ b/kernel/sched_autogroup.h | |||
@@ -0,0 +1,32 @@ | |||
1 | #ifdef CONFIG_SCHED_AUTOGROUP | ||
2 | |||
3 | struct autogroup { | ||
4 | struct kref kref; | ||
5 | struct task_group *tg; | ||
6 | struct rw_semaphore lock; | ||
7 | unsigned long id; | ||
8 | int nice; | ||
9 | }; | ||
10 | |||
11 | static inline struct task_group * | ||
12 | autogroup_task_group(struct task_struct *p, struct task_group *tg); | ||
13 | |||
14 | #else /* !CONFIG_SCHED_AUTOGROUP */ | ||
15 | |||
16 | static inline void autogroup_init(struct task_struct *init_task) { } | ||
17 | static inline void autogroup_free(struct task_group *tg) { } | ||
18 | |||
19 | static inline struct task_group * | ||
20 | autogroup_task_group(struct task_struct *p, struct task_group *tg) | ||
21 | { | ||
22 | return tg; | ||
23 | } | ||
24 | |||
25 | #ifdef CONFIG_SCHED_DEBUG | ||
26 | static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) | ||
27 | { | ||
28 | return 0; | ||
29 | } | ||
30 | #endif | ||
31 | |||
32 | #endif /* CONFIG_SCHED_AUTOGROUP */ | ||
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 52f1a149bfb1..9d8af0b3fb64 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c | |||
@@ -79,7 +79,7 @@ unsigned long long __attribute__((weak)) sched_clock(void) | |||
79 | } | 79 | } |
80 | EXPORT_SYMBOL_GPL(sched_clock); | 80 | EXPORT_SYMBOL_GPL(sched_clock); |
81 | 81 | ||
82 | static __read_mostly int sched_clock_running; | 82 | __read_mostly int sched_clock_running; |
83 | 83 | ||
84 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | 84 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK |
85 | __read_mostly int sched_clock_stable; | 85 | __read_mostly int sched_clock_stable; |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 2e1b0d17dd9b..1dfae3d014b5 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -54,8 +54,7 @@ static unsigned long nsec_low(unsigned long long nsec) | |||
54 | #define SPLIT_NS(x) nsec_high(x), nsec_low(x) | 54 | #define SPLIT_NS(x) nsec_high(x), nsec_low(x) |
55 | 55 | ||
56 | #ifdef CONFIG_FAIR_GROUP_SCHED | 56 | #ifdef CONFIG_FAIR_GROUP_SCHED |
57 | static void print_cfs_group_stats(struct seq_file *m, int cpu, | 57 | static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg) |
58 | struct task_group *tg) | ||
59 | { | 58 | { |
60 | struct sched_entity *se = tg->se[cpu]; | 59 | struct sched_entity *se = tg->se[cpu]; |
61 | if (!se) | 60 | if (!se) |
@@ -110,16 +109,6 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) | |||
110 | 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); | 109 | 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); |
111 | #endif | 110 | #endif |
112 | 111 | ||
113 | #ifdef CONFIG_CGROUP_SCHED | ||
114 | { | ||
115 | char path[64]; | ||
116 | |||
117 | rcu_read_lock(); | ||
118 | cgroup_path(task_group(p)->css.cgroup, path, sizeof(path)); | ||
119 | rcu_read_unlock(); | ||
120 | SEQ_printf(m, " %s", path); | ||
121 | } | ||
122 | #endif | ||
123 | SEQ_printf(m, "\n"); | 112 | SEQ_printf(m, "\n"); |
124 | } | 113 | } |
125 | 114 | ||
@@ -147,19 +136,6 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) | |||
147 | read_unlock_irqrestore(&tasklist_lock, flags); | 136 | read_unlock_irqrestore(&tasklist_lock, flags); |
148 | } | 137 | } |
149 | 138 | ||
150 | #if defined(CONFIG_CGROUP_SCHED) && \ | ||
151 | (defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)) | ||
152 | static void task_group_path(struct task_group *tg, char *buf, int buflen) | ||
153 | { | ||
154 | /* may be NULL if the underlying cgroup isn't fully-created yet */ | ||
155 | if (!tg->css.cgroup) { | ||
156 | buf[0] = '\0'; | ||
157 | return; | ||
158 | } | ||
159 | cgroup_path(tg->css.cgroup, buf, buflen); | ||
160 | } | ||
161 | #endif | ||
162 | |||
163 | void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | 139 | void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) |
164 | { | 140 | { |
165 | s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, | 141 | s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, |
@@ -168,16 +144,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
168 | struct sched_entity *last; | 144 | struct sched_entity *last; |
169 | unsigned long flags; | 145 | unsigned long flags; |
170 | 146 | ||
171 | #if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) | ||
172 | char path[128]; | ||
173 | struct task_group *tg = cfs_rq->tg; | ||
174 | |||
175 | task_group_path(tg, path, sizeof(path)); | ||
176 | |||
177 | SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); | ||
178 | #else | ||
179 | SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); | 147 | SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); |
180 | #endif | ||
181 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", | 148 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", |
182 | SPLIT_NS(cfs_rq->exec_clock)); | 149 | SPLIT_NS(cfs_rq->exec_clock)); |
183 | 150 | ||
@@ -202,32 +169,29 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
202 | spread0 = min_vruntime - rq0_min_vruntime; | 169 | spread0 = min_vruntime - rq0_min_vruntime; |
203 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0", | 170 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0", |
204 | SPLIT_NS(spread0)); | 171 | SPLIT_NS(spread0)); |
205 | SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); | ||
206 | SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); | ||
207 | |||
208 | SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", | 172 | SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", |
209 | cfs_rq->nr_spread_over); | 173 | cfs_rq->nr_spread_over); |
174 | SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); | ||
175 | SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); | ||
210 | #ifdef CONFIG_FAIR_GROUP_SCHED | 176 | #ifdef CONFIG_FAIR_GROUP_SCHED |
211 | #ifdef CONFIG_SMP | 177 | #ifdef CONFIG_SMP |
212 | SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares); | 178 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_avg", |
179 | SPLIT_NS(cfs_rq->load_avg)); | ||
180 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_period", | ||
181 | SPLIT_NS(cfs_rq->load_period)); | ||
182 | SEQ_printf(m, " .%-30s: %ld\n", "load_contrib", | ||
183 | cfs_rq->load_contribution); | ||
184 | SEQ_printf(m, " .%-30s: %d\n", "load_tg", | ||
185 | atomic_read(&cfs_rq->tg->load_weight)); | ||
213 | #endif | 186 | #endif |
187 | |||
214 | print_cfs_group_stats(m, cpu, cfs_rq->tg); | 188 | print_cfs_group_stats(m, cpu, cfs_rq->tg); |
215 | #endif | 189 | #endif |
216 | } | 190 | } |
217 | 191 | ||
218 | void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) | 192 | void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) |
219 | { | 193 | { |
220 | #if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED) | ||
221 | char path[128]; | ||
222 | struct task_group *tg = rt_rq->tg; | ||
223 | |||
224 | task_group_path(tg, path, sizeof(path)); | ||
225 | |||
226 | SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path); | ||
227 | #else | ||
228 | SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); | 194 | SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); |
229 | #endif | ||
230 | |||
231 | 195 | ||
232 | #define P(x) \ | 196 | #define P(x) \ |
233 | SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) | 197 | SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) |
@@ -243,6 +207,8 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) | |||
243 | #undef P | 207 | #undef P |
244 | } | 208 | } |
245 | 209 | ||
210 | extern __read_mostly int sched_clock_running; | ||
211 | |||
246 | static void print_cpu(struct seq_file *m, int cpu) | 212 | static void print_cpu(struct seq_file *m, int cpu) |
247 | { | 213 | { |
248 | struct rq *rq = cpu_rq(cpu); | 214 | struct rq *rq = cpu_rq(cpu); |
@@ -314,21 +280,42 @@ static const char *sched_tunable_scaling_names[] = { | |||
314 | 280 | ||
315 | static int sched_debug_show(struct seq_file *m, void *v) | 281 | static int sched_debug_show(struct seq_file *m, void *v) |
316 | { | 282 | { |
317 | u64 now = ktime_to_ns(ktime_get()); | 283 | u64 ktime, sched_clk, cpu_clk; |
284 | unsigned long flags; | ||
318 | int cpu; | 285 | int cpu; |
319 | 286 | ||
320 | SEQ_printf(m, "Sched Debug Version: v0.09, %s %.*s\n", | 287 | local_irq_save(flags); |
288 | ktime = ktime_to_ns(ktime_get()); | ||
289 | sched_clk = sched_clock(); | ||
290 | cpu_clk = local_clock(); | ||
291 | local_irq_restore(flags); | ||
292 | |||
293 | SEQ_printf(m, "Sched Debug Version: v0.10, %s %.*s\n", | ||
321 | init_utsname()->release, | 294 | init_utsname()->release, |
322 | (int)strcspn(init_utsname()->version, " "), | 295 | (int)strcspn(init_utsname()->version, " "), |
323 | init_utsname()->version); | 296 | init_utsname()->version); |
324 | 297 | ||
325 | SEQ_printf(m, "now at %Lu.%06ld msecs\n", SPLIT_NS(now)); | 298 | #define P(x) \ |
299 | SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x)) | ||
300 | #define PN(x) \ | ||
301 | SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) | ||
302 | PN(ktime); | ||
303 | PN(sched_clk); | ||
304 | PN(cpu_clk); | ||
305 | P(jiffies); | ||
306 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | ||
307 | P(sched_clock_stable); | ||
308 | #endif | ||
309 | #undef PN | ||
310 | #undef P | ||
311 | |||
312 | SEQ_printf(m, "\n"); | ||
313 | SEQ_printf(m, "sysctl_sched\n"); | ||
326 | 314 | ||
327 | #define P(x) \ | 315 | #define P(x) \ |
328 | SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x)) | 316 | SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x)) |
329 | #define PN(x) \ | 317 | #define PN(x) \ |
330 | SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) | 318 | SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) |
331 | P(jiffies); | ||
332 | PN(sysctl_sched_latency); | 319 | PN(sysctl_sched_latency); |
333 | PN(sysctl_sched_min_granularity); | 320 | PN(sysctl_sched_min_granularity); |
334 | PN(sysctl_sched_wakeup_granularity); | 321 | PN(sysctl_sched_wakeup_granularity); |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 00ebd7686676..c88671718bc9 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -89,6 +89,13 @@ unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; | |||
89 | 89 | ||
90 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | 90 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
91 | 91 | ||
92 | /* | ||
93 | * The exponential sliding window over which load is averaged for shares | ||
94 | * distribution. | ||
95 | * (default: 10msec) | ||
96 | */ | ||
97 | unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL; | ||
98 | |||
92 | static const struct sched_class fair_sched_class; | 99 | static const struct sched_class fair_sched_class; |
93 | 100 | ||
94 | /************************************************************** | 101 | /************************************************************** |
@@ -143,6 +150,36 @@ static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu) | |||
143 | return cfs_rq->tg->cfs_rq[this_cpu]; | 150 | return cfs_rq->tg->cfs_rq[this_cpu]; |
144 | } | 151 | } |
145 | 152 | ||
153 | static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) | ||
154 | { | ||
155 | if (!cfs_rq->on_list) { | ||
156 | /* | ||
157 | * Ensure we either appear before our parent (if already | ||
158 | * enqueued) or force our parent to appear after us when it is | ||
159 | * enqueued. The fact that we always enqueue bottom-up | ||
160 | * reduces this to two cases. | ||
161 | */ | ||
162 | if (cfs_rq->tg->parent && | ||
163 | cfs_rq->tg->parent->cfs_rq[cpu_of(rq_of(cfs_rq))]->on_list) { | ||
164 | list_add_rcu(&cfs_rq->leaf_cfs_rq_list, | ||
165 | &rq_of(cfs_rq)->leaf_cfs_rq_list); | ||
166 | } else { | ||
167 | list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list, | ||
168 | &rq_of(cfs_rq)->leaf_cfs_rq_list); | ||
169 | } | ||
170 | |||
171 | cfs_rq->on_list = 1; | ||
172 | } | ||
173 | } | ||
174 | |||
175 | static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq) | ||
176 | { | ||
177 | if (cfs_rq->on_list) { | ||
178 | list_del_rcu(&cfs_rq->leaf_cfs_rq_list); | ||
179 | cfs_rq->on_list = 0; | ||
180 | } | ||
181 | } | ||
182 | |||
146 | /* Iterate thr' all leaf cfs_rq's on a runqueue */ | 183 | /* Iterate thr' all leaf cfs_rq's on a runqueue */ |
147 | #define for_each_leaf_cfs_rq(rq, cfs_rq) \ | 184 | #define for_each_leaf_cfs_rq(rq, cfs_rq) \ |
148 | list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list) | 185 | list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list) |
@@ -246,6 +283,14 @@ static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu) | |||
246 | return &cpu_rq(this_cpu)->cfs; | 283 | return &cpu_rq(this_cpu)->cfs; |
247 | } | 284 | } |
248 | 285 | ||
286 | static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) | ||
287 | { | ||
288 | } | ||
289 | |||
290 | static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq) | ||
291 | { | ||
292 | } | ||
293 | |||
249 | #define for_each_leaf_cfs_rq(rq, cfs_rq) \ | 294 | #define for_each_leaf_cfs_rq(rq, cfs_rq) \ |
250 | for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL) | 295 | for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL) |
251 | 296 | ||
@@ -417,7 +462,6 @@ int sched_proc_update_handler(struct ctl_table *table, int write, | |||
417 | WRT_SYSCTL(sched_min_granularity); | 462 | WRT_SYSCTL(sched_min_granularity); |
418 | WRT_SYSCTL(sched_latency); | 463 | WRT_SYSCTL(sched_latency); |
419 | WRT_SYSCTL(sched_wakeup_granularity); | 464 | WRT_SYSCTL(sched_wakeup_granularity); |
420 | WRT_SYSCTL(sched_shares_ratelimit); | ||
421 | #undef WRT_SYSCTL | 465 | #undef WRT_SYSCTL |
422 | 466 | ||
423 | return 0; | 467 | return 0; |
@@ -495,6 +539,9 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
495 | return calc_delta_fair(sched_slice(cfs_rq, se), se); | 539 | return calc_delta_fair(sched_slice(cfs_rq, se), se); |
496 | } | 540 | } |
497 | 541 | ||
542 | static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update); | ||
543 | static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta); | ||
544 | |||
498 | /* | 545 | /* |
499 | * Update the current task's runtime statistics. Skip current tasks that | 546 | * Update the current task's runtime statistics. Skip current tasks that |
500 | * are not in our scheduling class. | 547 | * are not in our scheduling class. |
@@ -514,6 +561,14 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
514 | 561 | ||
515 | curr->vruntime += delta_exec_weighted; | 562 | curr->vruntime += delta_exec_weighted; |
516 | update_min_vruntime(cfs_rq); | 563 | update_min_vruntime(cfs_rq); |
564 | |||
565 | #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED | ||
566 | cfs_rq->load_unacc_exec_time += delta_exec; | ||
567 | if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { | ||
568 | update_cfs_load(cfs_rq, 0); | ||
569 | update_cfs_shares(cfs_rq, 0); | ||
570 | } | ||
571 | #endif | ||
517 | } | 572 | } |
518 | 573 | ||
519 | static void update_curr(struct cfs_rq *cfs_rq) | 574 | static void update_curr(struct cfs_rq *cfs_rq) |
@@ -633,7 +688,6 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
633 | list_add(&se->group_node, &cfs_rq->tasks); | 688 | list_add(&se->group_node, &cfs_rq->tasks); |
634 | } | 689 | } |
635 | cfs_rq->nr_running++; | 690 | cfs_rq->nr_running++; |
636 | se->on_rq = 1; | ||
637 | } | 691 | } |
638 | 692 | ||
639 | static void | 693 | static void |
@@ -647,9 +701,124 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
647 | list_del_init(&se->group_node); | 701 | list_del_init(&se->group_node); |
648 | } | 702 | } |
649 | cfs_rq->nr_running--; | 703 | cfs_rq->nr_running--; |
650 | se->on_rq = 0; | ||
651 | } | 704 | } |
652 | 705 | ||
706 | #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED | ||
707 | static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, | ||
708 | int global_update) | ||
709 | { | ||
710 | struct task_group *tg = cfs_rq->tg; | ||
711 | long load_avg; | ||
712 | |||
713 | load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1); | ||
714 | load_avg -= cfs_rq->load_contribution; | ||
715 | |||
716 | if (global_update || abs(load_avg) > cfs_rq->load_contribution / 8) { | ||
717 | atomic_add(load_avg, &tg->load_weight); | ||
718 | cfs_rq->load_contribution += load_avg; | ||
719 | } | ||
720 | } | ||
721 | |||
722 | static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | ||
723 | { | ||
724 | u64 period = sysctl_sched_shares_window; | ||
725 | u64 now, delta; | ||
726 | unsigned long load = cfs_rq->load.weight; | ||
727 | |||
728 | if (!cfs_rq) | ||
729 | return; | ||
730 | |||
731 | now = rq_of(cfs_rq)->clock; | ||
732 | delta = now - cfs_rq->load_stamp; | ||
733 | |||
734 | /* truncate load history at 4 idle periods */ | ||
735 | if (cfs_rq->load_stamp > cfs_rq->load_last && | ||
736 | now - cfs_rq->load_last > 4 * period) { | ||
737 | cfs_rq->load_period = 0; | ||
738 | cfs_rq->load_avg = 0; | ||
739 | } | ||
740 | |||
741 | cfs_rq->load_stamp = now; | ||
742 | cfs_rq->load_unacc_exec_time = 0; | ||
743 | cfs_rq->load_period += delta; | ||
744 | if (load) { | ||
745 | cfs_rq->load_last = now; | ||
746 | cfs_rq->load_avg += delta * load; | ||
747 | } | ||
748 | |||
749 | /* consider updating load contribution on each fold or truncate */ | ||
750 | if (global_update || cfs_rq->load_period > period | ||
751 | || !cfs_rq->load_period) | ||
752 | update_cfs_rq_load_contribution(cfs_rq, global_update); | ||
753 | |||
754 | while (cfs_rq->load_period > period) { | ||
755 | /* | ||
756 | * Inline assembly required to prevent the compiler | ||
757 | * optimising this loop into a divmod call. | ||
758 | * See __iter_div_u64_rem() for another example of this. | ||
759 | */ | ||
760 | asm("" : "+rm" (cfs_rq->load_period)); | ||
761 | cfs_rq->load_period /= 2; | ||
762 | cfs_rq->load_avg /= 2; | ||
763 | } | ||
764 | |||
765 | if (!cfs_rq->curr && !cfs_rq->nr_running && !cfs_rq->load_avg) | ||
766 | list_del_leaf_cfs_rq(cfs_rq); | ||
767 | } | ||
768 | |||
769 | static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, | ||
770 | unsigned long weight) | ||
771 | { | ||
772 | if (se->on_rq) | ||
773 | account_entity_dequeue(cfs_rq, se); | ||
774 | |||
775 | update_load_set(&se->load, weight); | ||
776 | |||
777 | if (se->on_rq) | ||
778 | account_entity_enqueue(cfs_rq, se); | ||
779 | } | ||
780 | |||
781 | static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) | ||
782 | { | ||
783 | struct task_group *tg; | ||
784 | struct sched_entity *se; | ||
785 | long load_weight, load, shares; | ||
786 | |||
787 | if (!cfs_rq) | ||
788 | return; | ||
789 | |||
790 | tg = cfs_rq->tg; | ||
791 | se = tg->se[cpu_of(rq_of(cfs_rq))]; | ||
792 | if (!se) | ||
793 | return; | ||
794 | |||
795 | load = cfs_rq->load.weight + weight_delta; | ||
796 | |||
797 | load_weight = atomic_read(&tg->load_weight); | ||
798 | load_weight -= cfs_rq->load_contribution; | ||
799 | load_weight += load; | ||
800 | |||
801 | shares = (tg->shares * load); | ||
802 | if (load_weight) | ||
803 | shares /= load_weight; | ||
804 | |||
805 | if (shares < MIN_SHARES) | ||
806 | shares = MIN_SHARES; | ||
807 | if (shares > tg->shares) | ||
808 | shares = tg->shares; | ||
809 | |||
810 | reweight_entity(cfs_rq_of(se), se, shares); | ||
811 | } | ||
812 | #else /* CONFIG_FAIR_GROUP_SCHED */ | ||
813 | static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | ||
814 | { | ||
815 | } | ||
816 | |||
817 | static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) | ||
818 | { | ||
819 | } | ||
820 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | ||
821 | |||
653 | static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) | 822 | static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) |
654 | { | 823 | { |
655 | #ifdef CONFIG_SCHEDSTATS | 824 | #ifdef CONFIG_SCHEDSTATS |
@@ -771,6 +940,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
771 | * Update run-time statistics of the 'current'. | 940 | * Update run-time statistics of the 'current'. |
772 | */ | 941 | */ |
773 | update_curr(cfs_rq); | 942 | update_curr(cfs_rq); |
943 | update_cfs_load(cfs_rq, 0); | ||
944 | update_cfs_shares(cfs_rq, se->load.weight); | ||
774 | account_entity_enqueue(cfs_rq, se); | 945 | account_entity_enqueue(cfs_rq, se); |
775 | 946 | ||
776 | if (flags & ENQUEUE_WAKEUP) { | 947 | if (flags & ENQUEUE_WAKEUP) { |
@@ -782,6 +953,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
782 | check_spread(cfs_rq, se); | 953 | check_spread(cfs_rq, se); |
783 | if (se != cfs_rq->curr) | 954 | if (se != cfs_rq->curr) |
784 | __enqueue_entity(cfs_rq, se); | 955 | __enqueue_entity(cfs_rq, se); |
956 | se->on_rq = 1; | ||
957 | |||
958 | if (cfs_rq->nr_running == 1) | ||
959 | list_add_leaf_cfs_rq(cfs_rq); | ||
785 | } | 960 | } |
786 | 961 | ||
787 | static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) | 962 | static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) |
@@ -825,8 +1000,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
825 | 1000 | ||
826 | if (se != cfs_rq->curr) | 1001 | if (se != cfs_rq->curr) |
827 | __dequeue_entity(cfs_rq, se); | 1002 | __dequeue_entity(cfs_rq, se); |
1003 | se->on_rq = 0; | ||
1004 | update_cfs_load(cfs_rq, 0); | ||
828 | account_entity_dequeue(cfs_rq, se); | 1005 | account_entity_dequeue(cfs_rq, se); |
829 | update_min_vruntime(cfs_rq); | 1006 | update_min_vruntime(cfs_rq); |
1007 | update_cfs_shares(cfs_rq, 0); | ||
830 | 1008 | ||
831 | /* | 1009 | /* |
832 | * Normalize the entity after updating the min_vruntime because the | 1010 | * Normalize the entity after updating the min_vruntime because the |
@@ -1055,6 +1233,13 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
1055 | flags = ENQUEUE_WAKEUP; | 1233 | flags = ENQUEUE_WAKEUP; |
1056 | } | 1234 | } |
1057 | 1235 | ||
1236 | for_each_sched_entity(se) { | ||
1237 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | ||
1238 | |||
1239 | update_cfs_load(cfs_rq, 0); | ||
1240 | update_cfs_shares(cfs_rq, 0); | ||
1241 | } | ||
1242 | |||
1058 | hrtick_update(rq); | 1243 | hrtick_update(rq); |
1059 | } | 1244 | } |
1060 | 1245 | ||
@@ -1071,12 +1256,20 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
1071 | for_each_sched_entity(se) { | 1256 | for_each_sched_entity(se) { |
1072 | cfs_rq = cfs_rq_of(se); | 1257 | cfs_rq = cfs_rq_of(se); |
1073 | dequeue_entity(cfs_rq, se, flags); | 1258 | dequeue_entity(cfs_rq, se, flags); |
1259 | |||
1074 | /* Don't dequeue parent if it has other entities besides us */ | 1260 | /* Don't dequeue parent if it has other entities besides us */ |
1075 | if (cfs_rq->load.weight) | 1261 | if (cfs_rq->load.weight) |
1076 | break; | 1262 | break; |
1077 | flags |= DEQUEUE_SLEEP; | 1263 | flags |= DEQUEUE_SLEEP; |
1078 | } | 1264 | } |
1079 | 1265 | ||
1266 | for_each_sched_entity(se) { | ||
1267 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | ||
1268 | |||
1269 | update_cfs_load(cfs_rq, 0); | ||
1270 | update_cfs_shares(cfs_rq, 0); | ||
1271 | } | ||
1272 | |||
1080 | hrtick_update(rq); | 1273 | hrtick_update(rq); |
1081 | } | 1274 | } |
1082 | 1275 | ||
@@ -1143,51 +1336,20 @@ static void task_waking_fair(struct rq *rq, struct task_struct *p) | |||
1143 | * Adding load to a group doesn't make a group heavier, but can cause movement | 1336 | * Adding load to a group doesn't make a group heavier, but can cause movement |
1144 | * of group shares between cpus. Assuming the shares were perfectly aligned one | 1337 | * of group shares between cpus. Assuming the shares were perfectly aligned one |
1145 | * can calculate the shift in shares. | 1338 | * can calculate the shift in shares. |
1146 | * | ||
1147 | * The problem is that perfectly aligning the shares is rather expensive, hence | ||
1148 | * we try to avoid doing that too often - see update_shares(), which ratelimits | ||
1149 | * this change. | ||
1150 | * | ||
1151 | * We compensate this by not only taking the current delta into account, but | ||
1152 | * also considering the delta between when the shares were last adjusted and | ||
1153 | * now. | ||
1154 | * | ||
1155 | * We still saw a performance dip, some tracing learned us that between | ||
1156 | * cgroup:/ and cgroup:/foo balancing the number of affine wakeups increased | ||
1157 | * significantly. Therefore try to bias the error in direction of failing | ||
1158 | * the affine wakeup. | ||
1159 | * | ||
1160 | */ | 1339 | */ |
1161 | static long effective_load(struct task_group *tg, int cpu, | 1340 | static long effective_load(struct task_group *tg, int cpu, long wl, long wg) |
1162 | long wl, long wg) | ||
1163 | { | 1341 | { |
1164 | struct sched_entity *se = tg->se[cpu]; | 1342 | struct sched_entity *se = tg->se[cpu]; |
1165 | 1343 | ||
1166 | if (!tg->parent) | 1344 | if (!tg->parent) |
1167 | return wl; | 1345 | return wl; |
1168 | 1346 | ||
1169 | /* | ||
1170 | * By not taking the decrease of shares on the other cpu into | ||
1171 | * account our error leans towards reducing the affine wakeups. | ||
1172 | */ | ||
1173 | if (!wl && sched_feat(ASYM_EFF_LOAD)) | ||
1174 | return wl; | ||
1175 | |||
1176 | for_each_sched_entity(se) { | 1347 | for_each_sched_entity(se) { |
1177 | long S, rw, s, a, b; | 1348 | long S, rw, s, a, b; |
1178 | long more_w; | ||
1179 | |||
1180 | /* | ||
1181 | * Instead of using this increment, also add the difference | ||
1182 | * between when the shares were last updated and now. | ||
1183 | */ | ||
1184 | more_w = se->my_q->load.weight - se->my_q->rq_weight; | ||
1185 | wl += more_w; | ||
1186 | wg += more_w; | ||
1187 | 1349 | ||
1188 | S = se->my_q->tg->shares; | 1350 | S = se->my_q->tg->shares; |
1189 | s = se->my_q->shares; | 1351 | s = se->load.weight; |
1190 | rw = se->my_q->rq_weight; | 1352 | rw = se->my_q->load.weight; |
1191 | 1353 | ||
1192 | a = S*(rw + wl); | 1354 | a = S*(rw + wl); |
1193 | b = S*rw + s*wg; | 1355 | b = S*rw + s*wg; |
@@ -1508,23 +1670,6 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_ | |||
1508 | sd = tmp; | 1670 | sd = tmp; |
1509 | } | 1671 | } |
1510 | 1672 | ||
1511 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1512 | if (sched_feat(LB_SHARES_UPDATE)) { | ||
1513 | /* | ||
1514 | * Pick the largest domain to update shares over | ||
1515 | */ | ||
1516 | tmp = sd; | ||
1517 | if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight)) | ||
1518 | tmp = affine_sd; | ||
1519 | |||
1520 | if (tmp) { | ||
1521 | raw_spin_unlock(&rq->lock); | ||
1522 | update_shares(tmp); | ||
1523 | raw_spin_lock(&rq->lock); | ||
1524 | } | ||
1525 | } | ||
1526 | #endif | ||
1527 | |||
1528 | if (affine_sd) { | 1673 | if (affine_sd) { |
1529 | if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) | 1674 | if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) |
1530 | return select_idle_sibling(p, cpu); | 1675 | return select_idle_sibling(p, cpu); |
@@ -1909,6 +2054,48 @@ out: | |||
1909 | } | 2054 | } |
1910 | 2055 | ||
1911 | #ifdef CONFIG_FAIR_GROUP_SCHED | 2056 | #ifdef CONFIG_FAIR_GROUP_SCHED |
2057 | /* | ||
2058 | * update tg->load_weight by folding this cpu's load_avg | ||
2059 | */ | ||
2060 | static int update_shares_cpu(struct task_group *tg, int cpu) | ||
2061 | { | ||
2062 | struct cfs_rq *cfs_rq; | ||
2063 | unsigned long flags; | ||
2064 | struct rq *rq; | ||
2065 | |||
2066 | if (!tg->se[cpu]) | ||
2067 | return 0; | ||
2068 | |||
2069 | rq = cpu_rq(cpu); | ||
2070 | cfs_rq = tg->cfs_rq[cpu]; | ||
2071 | |||
2072 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
2073 | |||
2074 | update_rq_clock(rq); | ||
2075 | update_cfs_load(cfs_rq, 1); | ||
2076 | |||
2077 | /* | ||
2078 | * We need to update shares after updating tg->load_weight in | ||
2079 | * order to adjust the weight of groups with long running tasks. | ||
2080 | */ | ||
2081 | update_cfs_shares(cfs_rq, 0); | ||
2082 | |||
2083 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
2084 | |||
2085 | return 0; | ||
2086 | } | ||
2087 | |||
2088 | static void update_shares(int cpu) | ||
2089 | { | ||
2090 | struct cfs_rq *cfs_rq; | ||
2091 | struct rq *rq = cpu_rq(cpu); | ||
2092 | |||
2093 | rcu_read_lock(); | ||
2094 | for_each_leaf_cfs_rq(rq, cfs_rq) | ||
2095 | update_shares_cpu(cfs_rq->tg, cpu); | ||
2096 | rcu_read_unlock(); | ||
2097 | } | ||
2098 | |||
1912 | static unsigned long | 2099 | static unsigned long |
1913 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 2100 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
1914 | unsigned long max_load_move, | 2101 | unsigned long max_load_move, |
@@ -1956,6 +2143,10 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
1956 | return max_load_move - rem_load_move; | 2143 | return max_load_move - rem_load_move; |
1957 | } | 2144 | } |
1958 | #else | 2145 | #else |
2146 | static inline void update_shares(int cpu) | ||
2147 | { | ||
2148 | } | ||
2149 | |||
1959 | static unsigned long | 2150 | static unsigned long |
1960 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 2151 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
1961 | unsigned long max_load_move, | 2152 | unsigned long max_load_move, |
@@ -3032,7 +3223,6 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
3032 | schedstat_inc(sd, lb_count[idle]); | 3223 | schedstat_inc(sd, lb_count[idle]); |
3033 | 3224 | ||
3034 | redo: | 3225 | redo: |
3035 | update_shares(sd); | ||
3036 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, | 3226 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, |
3037 | cpus, balance); | 3227 | cpus, balance); |
3038 | 3228 | ||
@@ -3174,8 +3364,6 @@ out_one_pinned: | |||
3174 | else | 3364 | else |
3175 | ld_moved = 0; | 3365 | ld_moved = 0; |
3176 | out: | 3366 | out: |
3177 | if (ld_moved) | ||
3178 | update_shares(sd); | ||
3179 | return ld_moved; | 3367 | return ld_moved; |
3180 | } | 3368 | } |
3181 | 3369 | ||
@@ -3199,6 +3387,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
3199 | */ | 3387 | */ |
3200 | raw_spin_unlock(&this_rq->lock); | 3388 | raw_spin_unlock(&this_rq->lock); |
3201 | 3389 | ||
3390 | update_shares(this_cpu); | ||
3202 | for_each_domain(this_cpu, sd) { | 3391 | for_each_domain(this_cpu, sd) { |
3203 | unsigned long interval; | 3392 | unsigned long interval; |
3204 | int balance = 1; | 3393 | int balance = 1; |
@@ -3569,6 +3758,8 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3569 | int update_next_balance = 0; | 3758 | int update_next_balance = 0; |
3570 | int need_serialize; | 3759 | int need_serialize; |
3571 | 3760 | ||
3761 | update_shares(cpu); | ||
3762 | |||
3572 | for_each_domain(cpu, sd) { | 3763 | for_each_domain(cpu, sd) { |
3573 | if (!(sd->flags & SD_LOAD_BALANCE)) | 3764 | if (!(sd->flags & SD_LOAD_BALANCE)) |
3574 | continue; | 3765 | continue; |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 185f920ec1a2..68e69acc29b9 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
@@ -52,8 +52,6 @@ SCHED_FEAT(ARCH_POWER, 0) | |||
52 | SCHED_FEAT(HRTICK, 0) | 52 | SCHED_FEAT(HRTICK, 0) |
53 | SCHED_FEAT(DOUBLE_TICK, 0) | 53 | SCHED_FEAT(DOUBLE_TICK, 0) |
54 | SCHED_FEAT(LB_BIAS, 1) | 54 | SCHED_FEAT(LB_BIAS, 1) |
55 | SCHED_FEAT(LB_SHARES_UPDATE, 1) | ||
56 | SCHED_FEAT(ASYM_EFF_LOAD, 1) | ||
57 | 55 | ||
58 | /* | 56 | /* |
59 | * Spin-wait on mutex acquisition when the mutex owner is running on | 57 | * Spin-wait on mutex acquisition when the mutex owner is running on |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index bea7d79f7e9c..c914ec747ca6 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -183,6 +183,17 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq) | |||
183 | return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); | 183 | return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); |
184 | } | 184 | } |
185 | 185 | ||
186 | static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) | ||
187 | { | ||
188 | list_add_rcu(&rt_rq->leaf_rt_rq_list, | ||
189 | &rq_of_rt_rq(rt_rq)->leaf_rt_rq_list); | ||
190 | } | ||
191 | |||
192 | static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq) | ||
193 | { | ||
194 | list_del_rcu(&rt_rq->leaf_rt_rq_list); | ||
195 | } | ||
196 | |||
186 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | 197 | #define for_each_leaf_rt_rq(rt_rq, rq) \ |
187 | list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) | 198 | list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) |
188 | 199 | ||
@@ -276,6 +287,14 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq) | |||
276 | return ktime_to_ns(def_rt_bandwidth.rt_period); | 287 | return ktime_to_ns(def_rt_bandwidth.rt_period); |
277 | } | 288 | } |
278 | 289 | ||
290 | static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) | ||
291 | { | ||
292 | } | ||
293 | |||
294 | static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq) | ||
295 | { | ||
296 | } | ||
297 | |||
279 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | 298 | #define for_each_leaf_rt_rq(rt_rq, rq) \ |
280 | for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL) | 299 | for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL) |
281 | 300 | ||
@@ -825,6 +844,9 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) | |||
825 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) | 844 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) |
826 | return; | 845 | return; |
827 | 846 | ||
847 | if (!rt_rq->rt_nr_running) | ||
848 | list_add_leaf_rt_rq(rt_rq); | ||
849 | |||
828 | if (head) | 850 | if (head) |
829 | list_add(&rt_se->run_list, queue); | 851 | list_add(&rt_se->run_list, queue); |
830 | else | 852 | else |
@@ -844,6 +866,8 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) | |||
844 | __clear_bit(rt_se_prio(rt_se), array->bitmap); | 866 | __clear_bit(rt_se_prio(rt_se), array->bitmap); |
845 | 867 | ||
846 | dec_rt_tasks(rt_se, rt_rq); | 868 | dec_rt_tasks(rt_se, rt_rq); |
869 | if (!rt_rq->rt_nr_running) | ||
870 | list_del_leaf_rt_rq(rt_rq); | ||
847 | } | 871 | } |
848 | 872 | ||
849 | /* | 873 | /* |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 18f4be0d5fe0..d4d918a91881 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -853,7 +853,9 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, | |||
853 | cpumask_any(cpu_online_mask)); | 853 | cpumask_any(cpu_online_mask)); |
854 | case CPU_DEAD: | 854 | case CPU_DEAD: |
855 | case CPU_DEAD_FROZEN: { | 855 | case CPU_DEAD_FROZEN: { |
856 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | 856 | static struct sched_param param = { |
857 | .sched_priority = MAX_RT_PRIO-1 | ||
858 | }; | ||
857 | 859 | ||
858 | p = per_cpu(ksoftirqd, hotcpu); | 860 | p = per_cpu(ksoftirqd, hotcpu); |
859 | per_cpu(ksoftirqd, hotcpu) = NULL; | 861 | per_cpu(ksoftirqd, hotcpu) = NULL; |
diff --git a/kernel/sys.c b/kernel/sys.c index 7f5a0cd296a9..2745dcdb6c6c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1080,8 +1080,10 @@ SYSCALL_DEFINE0(setsid) | |||
1080 | err = session; | 1080 | err = session; |
1081 | out: | 1081 | out: |
1082 | write_unlock_irq(&tasklist_lock); | 1082 | write_unlock_irq(&tasklist_lock); |
1083 | if (err > 0) | 1083 | if (err > 0) { |
1084 | proc_sid_connector(group_leader); | 1084 | proc_sid_connector(group_leader); |
1085 | sched_autogroup_create_attach(group_leader); | ||
1086 | } | ||
1085 | return err; | 1087 | return err; |
1086 | } | 1088 | } |
1087 | 1089 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5abfa1518554..121e4fff03d1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -259,8 +259,6 @@ static int min_wakeup_granularity_ns; /* 0 usecs */ | |||
259 | static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ | 259 | static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ |
260 | static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; | 260 | static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; |
261 | static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; | 261 | static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; |
262 | static int min_sched_shares_ratelimit = 100000; /* 100 usec */ | ||
263 | static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */ | ||
264 | #endif | 262 | #endif |
265 | 263 | ||
266 | #ifdef CONFIG_COMPACTION | 264 | #ifdef CONFIG_COMPACTION |
@@ -305,15 +303,6 @@ static struct ctl_table kern_table[] = { | |||
305 | .extra2 = &max_wakeup_granularity_ns, | 303 | .extra2 = &max_wakeup_granularity_ns, |
306 | }, | 304 | }, |
307 | { | 305 | { |
308 | .procname = "sched_shares_ratelimit", | ||
309 | .data = &sysctl_sched_shares_ratelimit, | ||
310 | .maxlen = sizeof(unsigned int), | ||
311 | .mode = 0644, | ||
312 | .proc_handler = sched_proc_update_handler, | ||
313 | .extra1 = &min_sched_shares_ratelimit, | ||
314 | .extra2 = &max_sched_shares_ratelimit, | ||
315 | }, | ||
316 | { | ||
317 | .procname = "sched_tunable_scaling", | 306 | .procname = "sched_tunable_scaling", |
318 | .data = &sysctl_sched_tunable_scaling, | 307 | .data = &sysctl_sched_tunable_scaling, |
319 | .maxlen = sizeof(enum sched_tunable_scaling), | 308 | .maxlen = sizeof(enum sched_tunable_scaling), |
@@ -323,14 +312,6 @@ static struct ctl_table kern_table[] = { | |||
323 | .extra2 = &max_sched_tunable_scaling, | 312 | .extra2 = &max_sched_tunable_scaling, |
324 | }, | 313 | }, |
325 | { | 314 | { |
326 | .procname = "sched_shares_thresh", | ||
327 | .data = &sysctl_sched_shares_thresh, | ||
328 | .maxlen = sizeof(unsigned int), | ||
329 | .mode = 0644, | ||
330 | .proc_handler = proc_dointvec_minmax, | ||
331 | .extra1 = &zero, | ||
332 | }, | ||
333 | { | ||
334 | .procname = "sched_migration_cost", | 315 | .procname = "sched_migration_cost", |
335 | .data = &sysctl_sched_migration_cost, | 316 | .data = &sysctl_sched_migration_cost, |
336 | .maxlen = sizeof(unsigned int), | 317 | .maxlen = sizeof(unsigned int), |
@@ -352,6 +333,13 @@ static struct ctl_table kern_table[] = { | |||
352 | .proc_handler = proc_dointvec, | 333 | .proc_handler = proc_dointvec, |
353 | }, | 334 | }, |
354 | { | 335 | { |
336 | .procname = "sched_shares_window", | ||
337 | .data = &sysctl_sched_shares_window, | ||
338 | .maxlen = sizeof(unsigned int), | ||
339 | .mode = 0644, | ||
340 | .proc_handler = proc_dointvec, | ||
341 | }, | ||
342 | { | ||
355 | .procname = "timer_migration", | 343 | .procname = "timer_migration", |
356 | .data = &sysctl_timer_migration, | 344 | .data = &sysctl_timer_migration, |
357 | .maxlen = sizeof(unsigned int), | 345 | .maxlen = sizeof(unsigned int), |
@@ -382,6 +370,17 @@ static struct ctl_table kern_table[] = { | |||
382 | .mode = 0644, | 370 | .mode = 0644, |
383 | .proc_handler = proc_dointvec, | 371 | .proc_handler = proc_dointvec, |
384 | }, | 372 | }, |
373 | #ifdef CONFIG_SCHED_AUTOGROUP | ||
374 | { | ||
375 | .procname = "sched_autogroup_enabled", | ||
376 | .data = &sysctl_sched_autogroup_enabled, | ||
377 | .maxlen = sizeof(unsigned int), | ||
378 | .mode = 0644, | ||
379 | .proc_handler = proc_dointvec, | ||
380 | .extra1 = &zero, | ||
381 | .extra2 = &one, | ||
382 | }, | ||
383 | #endif | ||
385 | #ifdef CONFIG_PROVE_LOCKING | 384 | #ifdef CONFIG_PROVE_LOCKING |
386 | { | 385 | { |
387 | .procname = "prove_locking", | 386 | .procname = "prove_locking", |
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 155a415b3209..562c56e048fd 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c | |||
@@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr) | |||
558 | static int trace_wakeup_test_thread(void *data) | 558 | static int trace_wakeup_test_thread(void *data) |
559 | { | 559 | { |
560 | /* Make this a RT thread, doesn't need to be too high */ | 560 | /* Make this a RT thread, doesn't need to be too high */ |
561 | struct sched_param param = { .sched_priority = 5 }; | 561 | static struct sched_param param = { .sched_priority = 5 }; |
562 | struct completion *x = data; | 562 | struct completion *x = data; |
563 | 563 | ||
564 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 564 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 6e3c41a4024c..14b8120d5232 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -307,7 +307,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
307 | */ | 307 | */ |
308 | static int watchdog(void *unused) | 308 | static int watchdog(void *unused) |
309 | { | 309 | { |
310 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | 310 | static struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; |
311 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); | 311 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); |
312 | 312 | ||
313 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 313 | sched_setscheduler(current, SCHED_FIFO, ¶m); |