diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 231 |
1 files changed, 187 insertions, 44 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 73513f4e19df..26efa475bdc1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -231,13 +231,20 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
231 | 231 | ||
232 | spin_lock(&rt_b->rt_runtime_lock); | 232 | spin_lock(&rt_b->rt_runtime_lock); |
233 | for (;;) { | 233 | for (;;) { |
234 | unsigned long delta; | ||
235 | ktime_t soft, hard; | ||
236 | |||
234 | if (hrtimer_active(&rt_b->rt_period_timer)) | 237 | if (hrtimer_active(&rt_b->rt_period_timer)) |
235 | break; | 238 | break; |
236 | 239 | ||
237 | now = hrtimer_cb_get_time(&rt_b->rt_period_timer); | 240 | now = hrtimer_cb_get_time(&rt_b->rt_period_timer); |
238 | hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); | 241 | hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); |
239 | hrtimer_start_expires(&rt_b->rt_period_timer, | 242 | |
240 | HRTIMER_MODE_ABS); | 243 | soft = hrtimer_get_softexpires(&rt_b->rt_period_timer); |
244 | hard = hrtimer_get_expires(&rt_b->rt_period_timer); | ||
245 | delta = ktime_to_ns(ktime_sub(hard, soft)); | ||
246 | __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, | ||
247 | HRTIMER_MODE_ABS, 0); | ||
241 | } | 248 | } |
242 | spin_unlock(&rt_b->rt_runtime_lock); | 249 | spin_unlock(&rt_b->rt_runtime_lock); |
243 | } | 250 | } |
@@ -1110,7 +1117,7 @@ static void hrtick_start(struct rq *rq, u64 delay) | |||
1110 | if (rq == this_rq()) { | 1117 | if (rq == this_rq()) { |
1111 | hrtimer_restart(timer); | 1118 | hrtimer_restart(timer); |
1112 | } else if (!rq->hrtick_csd_pending) { | 1119 | } else if (!rq->hrtick_csd_pending) { |
1113 | __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd); | 1120 | __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0); |
1114 | rq->hrtick_csd_pending = 1; | 1121 | rq->hrtick_csd_pending = 1; |
1115 | } | 1122 | } |
1116 | } | 1123 | } |
@@ -1146,7 +1153,8 @@ static __init void init_hrtick(void) | |||
1146 | */ | 1153 | */ |
1147 | static void hrtick_start(struct rq *rq, u64 delay) | 1154 | static void hrtick_start(struct rq *rq, u64 delay) |
1148 | { | 1155 | { |
1149 | hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL); | 1156 | __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, |
1157 | HRTIMER_MODE_REL, 0); | ||
1150 | } | 1158 | } |
1151 | 1159 | ||
1152 | static inline void init_hrtick(void) | 1160 | static inline void init_hrtick(void) |
@@ -1410,10 +1418,22 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
1410 | struct rq_iterator *iterator); | 1418 | struct rq_iterator *iterator); |
1411 | #endif | 1419 | #endif |
1412 | 1420 | ||
1421 | /* Time spent by the tasks of the cpu accounting group executing in ... */ | ||
1422 | enum cpuacct_stat_index { | ||
1423 | CPUACCT_STAT_USER, /* ... user mode */ | ||
1424 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ | ||
1425 | |||
1426 | CPUACCT_STAT_NSTATS, | ||
1427 | }; | ||
1428 | |||
1413 | #ifdef CONFIG_CGROUP_CPUACCT | 1429 | #ifdef CONFIG_CGROUP_CPUACCT |
1414 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime); | 1430 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime); |
1431 | static void cpuacct_update_stats(struct task_struct *tsk, | ||
1432 | enum cpuacct_stat_index idx, cputime_t val); | ||
1415 | #else | 1433 | #else |
1416 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} | 1434 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} |
1435 | static inline void cpuacct_update_stats(struct task_struct *tsk, | ||
1436 | enum cpuacct_stat_index idx, cputime_t val) {} | ||
1417 | #endif | 1437 | #endif |
1418 | 1438 | ||
1419 | static inline void inc_cpu_load(struct rq *rq, unsigned long load) | 1439 | static inline void inc_cpu_load(struct rq *rq, unsigned long load) |
@@ -3818,19 +3838,23 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, | |||
3818 | */ | 3838 | */ |
3819 | #define MAX_PINNED_INTERVAL 512 | 3839 | #define MAX_PINNED_INTERVAL 512 |
3820 | 3840 | ||
3841 | /* Working cpumask for load_balance and load_balance_newidle. */ | ||
3842 | static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask); | ||
3843 | |||
3821 | /* | 3844 | /* |
3822 | * Check this_cpu to ensure it is balanced within domain. Attempt to move | 3845 | * Check this_cpu to ensure it is balanced within domain. Attempt to move |
3823 | * tasks if there is an imbalance. | 3846 | * tasks if there is an imbalance. |
3824 | */ | 3847 | */ |
3825 | static int load_balance(int this_cpu, struct rq *this_rq, | 3848 | static int load_balance(int this_cpu, struct rq *this_rq, |
3826 | struct sched_domain *sd, enum cpu_idle_type idle, | 3849 | struct sched_domain *sd, enum cpu_idle_type idle, |
3827 | int *balance, struct cpumask *cpus) | 3850 | int *balance) |
3828 | { | 3851 | { |
3829 | int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; | 3852 | int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; |
3830 | struct sched_group *group; | 3853 | struct sched_group *group; |
3831 | unsigned long imbalance; | 3854 | unsigned long imbalance; |
3832 | struct rq *busiest; | 3855 | struct rq *busiest; |
3833 | unsigned long flags; | 3856 | unsigned long flags; |
3857 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | ||
3834 | 3858 | ||
3835 | cpumask_setall(cpus); | 3859 | cpumask_setall(cpus); |
3836 | 3860 | ||
@@ -3985,8 +4009,7 @@ out: | |||
3985 | * this_rq is locked. | 4009 | * this_rq is locked. |
3986 | */ | 4010 | */ |
3987 | static int | 4011 | static int |
3988 | load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, | 4012 | load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) |
3989 | struct cpumask *cpus) | ||
3990 | { | 4013 | { |
3991 | struct sched_group *group; | 4014 | struct sched_group *group; |
3992 | struct rq *busiest = NULL; | 4015 | struct rq *busiest = NULL; |
@@ -3994,6 +4017,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, | |||
3994 | int ld_moved = 0; | 4017 | int ld_moved = 0; |
3995 | int sd_idle = 0; | 4018 | int sd_idle = 0; |
3996 | int all_pinned = 0; | 4019 | int all_pinned = 0; |
4020 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | ||
3997 | 4021 | ||
3998 | cpumask_setall(cpus); | 4022 | cpumask_setall(cpus); |
3999 | 4023 | ||
@@ -4134,10 +4158,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
4134 | struct sched_domain *sd; | 4158 | struct sched_domain *sd; |
4135 | int pulled_task = 0; | 4159 | int pulled_task = 0; |
4136 | unsigned long next_balance = jiffies + HZ; | 4160 | unsigned long next_balance = jiffies + HZ; |
4137 | cpumask_var_t tmpmask; | ||
4138 | |||
4139 | if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC)) | ||
4140 | return; | ||
4141 | 4161 | ||
4142 | for_each_domain(this_cpu, sd) { | 4162 | for_each_domain(this_cpu, sd) { |
4143 | unsigned long interval; | 4163 | unsigned long interval; |
@@ -4148,7 +4168,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
4148 | if (sd->flags & SD_BALANCE_NEWIDLE) | 4168 | if (sd->flags & SD_BALANCE_NEWIDLE) |
4149 | /* If we've pulled tasks over stop searching: */ | 4169 | /* If we've pulled tasks over stop searching: */ |
4150 | pulled_task = load_balance_newidle(this_cpu, this_rq, | 4170 | pulled_task = load_balance_newidle(this_cpu, this_rq, |
4151 | sd, tmpmask); | 4171 | sd); |
4152 | 4172 | ||
4153 | interval = msecs_to_jiffies(sd->balance_interval); | 4173 | interval = msecs_to_jiffies(sd->balance_interval); |
4154 | if (time_after(next_balance, sd->last_balance + interval)) | 4174 | if (time_after(next_balance, sd->last_balance + interval)) |
@@ -4163,7 +4183,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
4163 | */ | 4183 | */ |
4164 | this_rq->next_balance = next_balance; | 4184 | this_rq->next_balance = next_balance; |
4165 | } | 4185 | } |
4166 | free_cpumask_var(tmpmask); | ||
4167 | } | 4186 | } |
4168 | 4187 | ||
4169 | /* | 4188 | /* |
@@ -4313,11 +4332,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
4313 | unsigned long next_balance = jiffies + 60*HZ; | 4332 | unsigned long next_balance = jiffies + 60*HZ; |
4314 | int update_next_balance = 0; | 4333 | int update_next_balance = 0; |
4315 | int need_serialize; | 4334 | int need_serialize; |
4316 | cpumask_var_t tmp; | ||
4317 | |||
4318 | /* Fails alloc? Rebalancing probably not a priority right now. */ | ||
4319 | if (!alloc_cpumask_var(&tmp, GFP_ATOMIC)) | ||
4320 | return; | ||
4321 | 4335 | ||
4322 | for_each_domain(cpu, sd) { | 4336 | for_each_domain(cpu, sd) { |
4323 | if (!(sd->flags & SD_LOAD_BALANCE)) | 4337 | if (!(sd->flags & SD_LOAD_BALANCE)) |
@@ -4342,7 +4356,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
4342 | } | 4356 | } |
4343 | 4357 | ||
4344 | if (time_after_eq(jiffies, sd->last_balance + interval)) { | 4358 | if (time_after_eq(jiffies, sd->last_balance + interval)) { |
4345 | if (load_balance(cpu, rq, sd, idle, &balance, tmp)) { | 4359 | if (load_balance(cpu, rq, sd, idle, &balance)) { |
4346 | /* | 4360 | /* |
4347 | * We've pulled tasks over so either we're no | 4361 | * We've pulled tasks over so either we're no |
4348 | * longer idle, or one of our SMT siblings is | 4362 | * longer idle, or one of our SMT siblings is |
@@ -4376,8 +4390,6 @@ out: | |||
4376 | */ | 4390 | */ |
4377 | if (likely(update_next_balance)) | 4391 | if (likely(update_next_balance)) |
4378 | rq->next_balance = next_balance; | 4392 | rq->next_balance = next_balance; |
4379 | |||
4380 | free_cpumask_var(tmp); | ||
4381 | } | 4393 | } |
4382 | 4394 | ||
4383 | /* | 4395 | /* |
@@ -4511,9 +4523,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat); | |||
4511 | EXPORT_PER_CPU_SYMBOL(kstat); | 4523 | EXPORT_PER_CPU_SYMBOL(kstat); |
4512 | 4524 | ||
4513 | /* | 4525 | /* |
4514 | * Return any ns on the sched_clock that have not yet been banked in | 4526 | * Return any ns on the sched_clock that have not yet been accounted in |
4515 | * @p in case that task is currently running. | 4527 | * @p in case that task is currently running. |
4528 | * | ||
4529 | * Called with task_rq_lock() held on @rq. | ||
4516 | */ | 4530 | */ |
4531 | static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) | ||
4532 | { | ||
4533 | u64 ns = 0; | ||
4534 | |||
4535 | if (task_current(rq, p)) { | ||
4536 | update_rq_clock(rq); | ||
4537 | ns = rq->clock - p->se.exec_start; | ||
4538 | if ((s64)ns < 0) | ||
4539 | ns = 0; | ||
4540 | } | ||
4541 | |||
4542 | return ns; | ||
4543 | } | ||
4544 | |||
4517 | unsigned long long task_delta_exec(struct task_struct *p) | 4545 | unsigned long long task_delta_exec(struct task_struct *p) |
4518 | { | 4546 | { |
4519 | unsigned long flags; | 4547 | unsigned long flags; |
@@ -4521,16 +4549,49 @@ unsigned long long task_delta_exec(struct task_struct *p) | |||
4521 | u64 ns = 0; | 4549 | u64 ns = 0; |
4522 | 4550 | ||
4523 | rq = task_rq_lock(p, &flags); | 4551 | rq = task_rq_lock(p, &flags); |
4552 | ns = do_task_delta_exec(p, rq); | ||
4553 | task_rq_unlock(rq, &flags); | ||
4524 | 4554 | ||
4525 | if (task_current(rq, p)) { | 4555 | return ns; |
4526 | u64 delta_exec; | 4556 | } |
4527 | 4557 | ||
4528 | update_rq_clock(rq); | 4558 | /* |
4529 | delta_exec = rq->clock - p->se.exec_start; | 4559 | * Return accounted runtime for the task. |
4530 | if ((s64)delta_exec > 0) | 4560 | * In case the task is currently running, return the runtime plus current's |
4531 | ns = delta_exec; | 4561 | * pending runtime that have not been accounted yet. |
4532 | } | 4562 | */ |
4563 | unsigned long long task_sched_runtime(struct task_struct *p) | ||
4564 | { | ||
4565 | unsigned long flags; | ||
4566 | struct rq *rq; | ||
4567 | u64 ns = 0; | ||
4568 | |||
4569 | rq = task_rq_lock(p, &flags); | ||
4570 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); | ||
4571 | task_rq_unlock(rq, &flags); | ||
4572 | |||
4573 | return ns; | ||
4574 | } | ||
4533 | 4575 | ||
4576 | /* | ||
4577 | * Return sum_exec_runtime for the thread group. | ||
4578 | * In case the task is currently running, return the sum plus current's | ||
4579 | * pending runtime that have not been accounted yet. | ||
4580 | * | ||
4581 | * Note that the thread group might have other running tasks as well, | ||
4582 | * so the return value not includes other pending runtime that other | ||
4583 | * running tasks might have. | ||
4584 | */ | ||
4585 | unsigned long long thread_group_sched_runtime(struct task_struct *p) | ||
4586 | { | ||
4587 | struct task_cputime totals; | ||
4588 | unsigned long flags; | ||
4589 | struct rq *rq; | ||
4590 | u64 ns; | ||
4591 | |||
4592 | rq = task_rq_lock(p, &flags); | ||
4593 | thread_group_cputime(p, &totals); | ||
4594 | ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); | ||
4534 | task_rq_unlock(rq, &flags); | 4595 | task_rq_unlock(rq, &flags); |
4535 | 4596 | ||
4536 | return ns; | 4597 | return ns; |
@@ -4559,6 +4620,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime, | |||
4559 | cpustat->nice = cputime64_add(cpustat->nice, tmp); | 4620 | cpustat->nice = cputime64_add(cpustat->nice, tmp); |
4560 | else | 4621 | else |
4561 | cpustat->user = cputime64_add(cpustat->user, tmp); | 4622 | cpustat->user = cputime64_add(cpustat->user, tmp); |
4623 | |||
4624 | cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime); | ||
4562 | /* Account for user time used */ | 4625 | /* Account for user time used */ |
4563 | acct_update_integrals(p); | 4626 | acct_update_integrals(p); |
4564 | } | 4627 | } |
@@ -4620,6 +4683,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
4620 | else | 4683 | else |
4621 | cpustat->system = cputime64_add(cpustat->system, tmp); | 4684 | cpustat->system = cputime64_add(cpustat->system, tmp); |
4622 | 4685 | ||
4686 | cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); | ||
4687 | |||
4623 | /* Account for system time used */ | 4688 | /* Account for system time used */ |
4624 | acct_update_integrals(p); | 4689 | acct_update_integrals(p); |
4625 | } | 4690 | } |
@@ -4667,7 +4732,7 @@ void account_process_tick(struct task_struct *p, int user_tick) | |||
4667 | 4732 | ||
4668 | if (user_tick) | 4733 | if (user_tick) |
4669 | account_user_time(p, one_jiffy, one_jiffy_scaled); | 4734 | account_user_time(p, one_jiffy, one_jiffy_scaled); |
4670 | else if (p != rq->idle) | 4735 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) |
4671 | account_system_time(p, HARDIRQ_OFFSET, one_jiffy, | 4736 | account_system_time(p, HARDIRQ_OFFSET, one_jiffy, |
4672 | one_jiffy_scaled); | 4737 | one_jiffy_scaled); |
4673 | else | 4738 | else |
@@ -4781,10 +4846,7 @@ void scheduler_tick(void) | |||
4781 | #endif | 4846 | #endif |
4782 | } | 4847 | } |
4783 | 4848 | ||
4784 | #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ | 4849 | notrace unsigned long get_parent_ip(unsigned long addr) |
4785 | defined(CONFIG_PREEMPT_TRACER)) | ||
4786 | |||
4787 | static inline unsigned long get_parent_ip(unsigned long addr) | ||
4788 | { | 4850 | { |
4789 | if (in_lock_functions(addr)) { | 4851 | if (in_lock_functions(addr)) { |
4790 | addr = CALLER_ADDR2; | 4852 | addr = CALLER_ADDR2; |
@@ -4794,6 +4856,9 @@ static inline unsigned long get_parent_ip(unsigned long addr) | |||
4794 | return addr; | 4856 | return addr; |
4795 | } | 4857 | } |
4796 | 4858 | ||
4859 | #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ | ||
4860 | defined(CONFIG_PREEMPT_TRACER)) | ||
4861 | |||
4797 | void __kprobes add_preempt_count(int val) | 4862 | void __kprobes add_preempt_count(int val) |
4798 | { | 4863 | { |
4799 | #ifdef CONFIG_DEBUG_PREEMPT | 4864 | #ifdef CONFIG_DEBUG_PREEMPT |
@@ -7302,7 +7367,12 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
7302 | cpumask_or(groupmask, groupmask, sched_group_cpus(group)); | 7367 | cpumask_or(groupmask, groupmask, sched_group_cpus(group)); |
7303 | 7368 | ||
7304 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); | 7369 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); |
7370 | |||
7305 | printk(KERN_CONT " %s", str); | 7371 | printk(KERN_CONT " %s", str); |
7372 | if (group->__cpu_power != SCHED_LOAD_SCALE) { | ||
7373 | printk(KERN_CONT " (__cpu_power = %d)", | ||
7374 | group->__cpu_power); | ||
7375 | } | ||
7306 | 7376 | ||
7307 | group = group->next; | 7377 | group = group->next; |
7308 | } while (group != sd->groups); | 7378 | } while (group != sd->groups); |
@@ -7728,7 +7798,7 @@ cpu_to_core_group(int cpu, const struct cpumask *cpu_map, | |||
7728 | { | 7798 | { |
7729 | int group; | 7799 | int group; |
7730 | 7800 | ||
7731 | cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map); | 7801 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); |
7732 | group = cpumask_first(mask); | 7802 | group = cpumask_first(mask); |
7733 | if (sg) | 7803 | if (sg) |
7734 | *sg = &per_cpu(sched_group_core, group).sg; | 7804 | *sg = &per_cpu(sched_group_core, group).sg; |
@@ -7757,7 +7827,7 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, | |||
7757 | cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); | 7827 | cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); |
7758 | group = cpumask_first(mask); | 7828 | group = cpumask_first(mask); |
7759 | #elif defined(CONFIG_SCHED_SMT) | 7829 | #elif defined(CONFIG_SCHED_SMT) |
7760 | cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map); | 7830 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); |
7761 | group = cpumask_first(mask); | 7831 | group = cpumask_first(mask); |
7762 | #else | 7832 | #else |
7763 | group = cpu; | 7833 | group = cpu; |
@@ -8100,7 +8170,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
8100 | SD_INIT(sd, SIBLING); | 8170 | SD_INIT(sd, SIBLING); |
8101 | set_domain_attribute(sd, attr); | 8171 | set_domain_attribute(sd, attr); |
8102 | cpumask_and(sched_domain_span(sd), | 8172 | cpumask_and(sched_domain_span(sd), |
8103 | &per_cpu(cpu_sibling_map, i), cpu_map); | 8173 | topology_thread_cpumask(i), cpu_map); |
8104 | sd->parent = p; | 8174 | sd->parent = p; |
8105 | p->child = sd; | 8175 | p->child = sd; |
8106 | cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask); | 8176 | cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask); |
@@ -8111,7 +8181,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
8111 | /* Set up CPU (sibling) groups */ | 8181 | /* Set up CPU (sibling) groups */ |
8112 | for_each_cpu(i, cpu_map) { | 8182 | for_each_cpu(i, cpu_map) { |
8113 | cpumask_and(this_sibling_map, | 8183 | cpumask_and(this_sibling_map, |
8114 | &per_cpu(cpu_sibling_map, i), cpu_map); | 8184 | topology_thread_cpumask(i), cpu_map); |
8115 | if (i != cpumask_first(this_sibling_map)) | 8185 | if (i != cpumask_first(this_sibling_map)) |
8116 | continue; | 8186 | continue; |
8117 | 8187 | ||
@@ -8787,6 +8857,9 @@ void __init sched_init(void) | |||
8787 | #ifdef CONFIG_USER_SCHED | 8857 | #ifdef CONFIG_USER_SCHED |
8788 | alloc_size *= 2; | 8858 | alloc_size *= 2; |
8789 | #endif | 8859 | #endif |
8860 | #ifdef CONFIG_CPUMASK_OFFSTACK | ||
8861 | alloc_size += num_possible_cpus() * cpumask_size(); | ||
8862 | #endif | ||
8790 | /* | 8863 | /* |
8791 | * As sched_init() is called before page_alloc is setup, | 8864 | * As sched_init() is called before page_alloc is setup, |
8792 | * we use alloc_bootmem(). | 8865 | * we use alloc_bootmem(). |
@@ -8824,6 +8897,12 @@ void __init sched_init(void) | |||
8824 | ptr += nr_cpu_ids * sizeof(void **); | 8897 | ptr += nr_cpu_ids * sizeof(void **); |
8825 | #endif /* CONFIG_USER_SCHED */ | 8898 | #endif /* CONFIG_USER_SCHED */ |
8826 | #endif /* CONFIG_RT_GROUP_SCHED */ | 8899 | #endif /* CONFIG_RT_GROUP_SCHED */ |
8900 | #ifdef CONFIG_CPUMASK_OFFSTACK | ||
8901 | for_each_possible_cpu(i) { | ||
8902 | per_cpu(load_balance_tmpmask, i) = (void *)ptr; | ||
8903 | ptr += cpumask_size(); | ||
8904 | } | ||
8905 | #endif /* CONFIG_CPUMASK_OFFSTACK */ | ||
8827 | } | 8906 | } |
8828 | 8907 | ||
8829 | #ifdef CONFIG_SMP | 8908 | #ifdef CONFIG_SMP |
@@ -9916,6 +9995,7 @@ struct cpuacct { | |||
9916 | struct cgroup_subsys_state css; | 9995 | struct cgroup_subsys_state css; |
9917 | /* cpuusage holds pointer to a u64-type object on every cpu */ | 9996 | /* cpuusage holds pointer to a u64-type object on every cpu */ |
9918 | u64 *cpuusage; | 9997 | u64 *cpuusage; |
9998 | struct percpu_counter cpustat[CPUACCT_STAT_NSTATS]; | ||
9919 | struct cpuacct *parent; | 9999 | struct cpuacct *parent; |
9920 | }; | 10000 | }; |
9921 | 10001 | ||
@@ -9940,20 +10020,32 @@ static struct cgroup_subsys_state *cpuacct_create( | |||
9940 | struct cgroup_subsys *ss, struct cgroup *cgrp) | 10020 | struct cgroup_subsys *ss, struct cgroup *cgrp) |
9941 | { | 10021 | { |
9942 | struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); | 10022 | struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); |
10023 | int i; | ||
9943 | 10024 | ||
9944 | if (!ca) | 10025 | if (!ca) |
9945 | return ERR_PTR(-ENOMEM); | 10026 | goto out; |
9946 | 10027 | ||
9947 | ca->cpuusage = alloc_percpu(u64); | 10028 | ca->cpuusage = alloc_percpu(u64); |
9948 | if (!ca->cpuusage) { | 10029 | if (!ca->cpuusage) |
9949 | kfree(ca); | 10030 | goto out_free_ca; |
9950 | return ERR_PTR(-ENOMEM); | 10031 | |
9951 | } | 10032 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) |
10033 | if (percpu_counter_init(&ca->cpustat[i], 0)) | ||
10034 | goto out_free_counters; | ||
9952 | 10035 | ||
9953 | if (cgrp->parent) | 10036 | if (cgrp->parent) |
9954 | ca->parent = cgroup_ca(cgrp->parent); | 10037 | ca->parent = cgroup_ca(cgrp->parent); |
9955 | 10038 | ||
9956 | return &ca->css; | 10039 | return &ca->css; |
10040 | |||
10041 | out_free_counters: | ||
10042 | while (--i >= 0) | ||
10043 | percpu_counter_destroy(&ca->cpustat[i]); | ||
10044 | free_percpu(ca->cpuusage); | ||
10045 | out_free_ca: | ||
10046 | kfree(ca); | ||
10047 | out: | ||
10048 | return ERR_PTR(-ENOMEM); | ||
9957 | } | 10049 | } |
9958 | 10050 | ||
9959 | /* destroy an existing cpu accounting group */ | 10051 | /* destroy an existing cpu accounting group */ |
@@ -9961,7 +10053,10 @@ static void | |||
9961 | cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | 10053 | cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) |
9962 | { | 10054 | { |
9963 | struct cpuacct *ca = cgroup_ca(cgrp); | 10055 | struct cpuacct *ca = cgroup_ca(cgrp); |
10056 | int i; | ||
9964 | 10057 | ||
10058 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) | ||
10059 | percpu_counter_destroy(&ca->cpustat[i]); | ||
9965 | free_percpu(ca->cpuusage); | 10060 | free_percpu(ca->cpuusage); |
9966 | kfree(ca); | 10061 | kfree(ca); |
9967 | } | 10062 | } |
@@ -10048,6 +10143,25 @@ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, | |||
10048 | return 0; | 10143 | return 0; |
10049 | } | 10144 | } |
10050 | 10145 | ||
10146 | static const char *cpuacct_stat_desc[] = { | ||
10147 | [CPUACCT_STAT_USER] = "user", | ||
10148 | [CPUACCT_STAT_SYSTEM] = "system", | ||
10149 | }; | ||
10150 | |||
10151 | static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, | ||
10152 | struct cgroup_map_cb *cb) | ||
10153 | { | ||
10154 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
10155 | int i; | ||
10156 | |||
10157 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) { | ||
10158 | s64 val = percpu_counter_read(&ca->cpustat[i]); | ||
10159 | val = cputime64_to_clock_t(val); | ||
10160 | cb->fill(cb, cpuacct_stat_desc[i], val); | ||
10161 | } | ||
10162 | return 0; | ||
10163 | } | ||
10164 | |||
10051 | static struct cftype files[] = { | 10165 | static struct cftype files[] = { |
10052 | { | 10166 | { |
10053 | .name = "usage", | 10167 | .name = "usage", |
@@ -10058,7 +10172,10 @@ static struct cftype files[] = { | |||
10058 | .name = "usage_percpu", | 10172 | .name = "usage_percpu", |
10059 | .read_seq_string = cpuacct_percpu_seq_read, | 10173 | .read_seq_string = cpuacct_percpu_seq_read, |
10060 | }, | 10174 | }, |
10061 | 10175 | { | |
10176 | .name = "stat", | ||
10177 | .read_map = cpuacct_stats_show, | ||
10178 | }, | ||
10062 | }; | 10179 | }; |
10063 | 10180 | ||
10064 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | 10181 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) |
@@ -10080,12 +10197,38 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) | |||
10080 | return; | 10197 | return; |
10081 | 10198 | ||
10082 | cpu = task_cpu(tsk); | 10199 | cpu = task_cpu(tsk); |
10200 | |||
10201 | rcu_read_lock(); | ||
10202 | |||
10083 | ca = task_ca(tsk); | 10203 | ca = task_ca(tsk); |
10084 | 10204 | ||
10085 | for (; ca; ca = ca->parent) { | 10205 | for (; ca; ca = ca->parent) { |
10086 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | 10206 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
10087 | *cpuusage += cputime; | 10207 | *cpuusage += cputime; |
10088 | } | 10208 | } |
10209 | |||
10210 | rcu_read_unlock(); | ||
10211 | } | ||
10212 | |||
10213 | /* | ||
10214 | * Charge the system/user time to the task's accounting group. | ||
10215 | */ | ||
10216 | static void cpuacct_update_stats(struct task_struct *tsk, | ||
10217 | enum cpuacct_stat_index idx, cputime_t val) | ||
10218 | { | ||
10219 | struct cpuacct *ca; | ||
10220 | |||
10221 | if (unlikely(!cpuacct_subsys.active)) | ||
10222 | return; | ||
10223 | |||
10224 | rcu_read_lock(); | ||
10225 | ca = task_ca(tsk); | ||
10226 | |||
10227 | do { | ||
10228 | percpu_counter_add(&ca->cpustat[idx], val); | ||
10229 | ca = ca->parent; | ||
10230 | } while (ca); | ||
10231 | rcu_read_unlock(); | ||
10089 | } | 10232 | } |
10090 | 10233 | ||
10091 | struct cgroup_subsys cpuacct_subsys = { | 10234 | struct cgroup_subsys cpuacct_subsys = { |