aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c414
1 files changed, 109 insertions, 305 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7f12624a393c..58453b8272fd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -512,11 +512,6 @@ static inline void init_hrtick(void)
512 * the target CPU. 512 * the target CPU.
513 */ 513 */
514#ifdef CONFIG_SMP 514#ifdef CONFIG_SMP
515
516#ifndef tsk_is_polling
517#define tsk_is_polling(t) 0
518#endif
519
520void resched_task(struct task_struct *p) 515void resched_task(struct task_struct *p)
521{ 516{
522 int cpu; 517 int cpu;
@@ -549,7 +544,7 @@ void resched_cpu(int cpu)
549 raw_spin_unlock_irqrestore(&rq->lock, flags); 544 raw_spin_unlock_irqrestore(&rq->lock, flags);
550} 545}
551 546
552#ifdef CONFIG_NO_HZ 547#ifdef CONFIG_NO_HZ_COMMON
553/* 548/*
554 * In the semi idle case, use the nearest busy cpu for migrating timers 549 * In the semi idle case, use the nearest busy cpu for migrating timers
555 * from an idle cpu. This is good for power-savings. 550 * from an idle cpu. This is good for power-savings.
@@ -587,7 +582,7 @@ unlock:
587 * account when the CPU goes back to idle and evaluates the timer 582 * account when the CPU goes back to idle and evaluates the timer
588 * wheel for the next timer event. 583 * wheel for the next timer event.
589 */ 584 */
590void wake_up_idle_cpu(int cpu) 585static void wake_up_idle_cpu(int cpu)
591{ 586{
592 struct rq *rq = cpu_rq(cpu); 587 struct rq *rq = cpu_rq(cpu);
593 588
@@ -617,20 +612,56 @@ void wake_up_idle_cpu(int cpu)
617 smp_send_reschedule(cpu); 612 smp_send_reschedule(cpu);
618} 613}
619 614
615static bool wake_up_full_nohz_cpu(int cpu)
616{
617 if (tick_nohz_full_cpu(cpu)) {
618 if (cpu != smp_processor_id() ||
619 tick_nohz_tick_stopped())
620 smp_send_reschedule(cpu);
621 return true;
622 }
623
624 return false;
625}
626
627void wake_up_nohz_cpu(int cpu)
628{
629 if (!wake_up_full_nohz_cpu(cpu))
630 wake_up_idle_cpu(cpu);
631}
632
620static inline bool got_nohz_idle_kick(void) 633static inline bool got_nohz_idle_kick(void)
621{ 634{
622 int cpu = smp_processor_id(); 635 int cpu = smp_processor_id();
623 return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)); 636 return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu));
624} 637}
625 638
626#else /* CONFIG_NO_HZ */ 639#else /* CONFIG_NO_HZ_COMMON */
627 640
628static inline bool got_nohz_idle_kick(void) 641static inline bool got_nohz_idle_kick(void)
629{ 642{
630 return false; 643 return false;
631} 644}
632 645
633#endif /* CONFIG_NO_HZ */ 646#endif /* CONFIG_NO_HZ_COMMON */
647
648#ifdef CONFIG_NO_HZ_FULL
649bool sched_can_stop_tick(void)
650{
651 struct rq *rq;
652
653 rq = this_rq();
654
655 /* Make sure rq->nr_running update is visible after the IPI */
656 smp_rmb();
657
658 /* More than one running task need preemption */
659 if (rq->nr_running > 1)
660 return false;
661
662 return true;
663}
664#endif /* CONFIG_NO_HZ_FULL */
634 665
635void sched_avg_update(struct rq *rq) 666void sched_avg_update(struct rq *rq)
636{ 667{
@@ -1288,8 +1319,8 @@ static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
1288static void 1319static void
1289ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) 1320ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
1290{ 1321{
1291 trace_sched_wakeup(p, true);
1292 check_preempt_curr(rq, p, wake_flags); 1322 check_preempt_curr(rq, p, wake_flags);
1323 trace_sched_wakeup(p, true);
1293 1324
1294 p->state = TASK_RUNNING; 1325 p->state = TASK_RUNNING;
1295#ifdef CONFIG_SMP 1326#ifdef CONFIG_SMP
@@ -1362,7 +1393,8 @@ static void sched_ttwu_pending(void)
1362 1393
1363void scheduler_ipi(void) 1394void scheduler_ipi(void)
1364{ 1395{
1365 if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) 1396 if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()
1397 && !tick_nohz_full_cpu(smp_processor_id()))
1366 return; 1398 return;
1367 1399
1368 /* 1400 /*
@@ -1379,6 +1411,7 @@ void scheduler_ipi(void)
1379 * somewhat pessimize the simple resched case. 1411 * somewhat pessimize the simple resched case.
1380 */ 1412 */
1381 irq_enter(); 1413 irq_enter();
1414 tick_nohz_full_check();
1382 sched_ttwu_pending(); 1415 sched_ttwu_pending();
1383 1416
1384 /* 1417 /*
@@ -1498,8 +1531,10 @@ static void try_to_wake_up_local(struct task_struct *p)
1498{ 1531{
1499 struct rq *rq = task_rq(p); 1532 struct rq *rq = task_rq(p);
1500 1533
1501 BUG_ON(rq != this_rq()); 1534 if (WARN_ON_ONCE(rq != this_rq()) ||
1502 BUG_ON(p == current); 1535 WARN_ON_ONCE(p == current))
1536 return;
1537
1503 lockdep_assert_held(&rq->lock); 1538 lockdep_assert_held(&rq->lock);
1504 1539
1505 if (!raw_spin_trylock(&p->pi_lock)) { 1540 if (!raw_spin_trylock(&p->pi_lock)) {
@@ -1858,6 +1893,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
1858 kprobe_flush_task(prev); 1893 kprobe_flush_task(prev);
1859 put_task_struct(prev); 1894 put_task_struct(prev);
1860 } 1895 }
1896
1897 tick_nohz_task_switch(current);
1861} 1898}
1862 1899
1863#ifdef CONFIG_SMP 1900#ifdef CONFIG_SMP
@@ -2121,7 +2158,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
2121 return load >> FSHIFT; 2158 return load >> FSHIFT;
2122} 2159}
2123 2160
2124#ifdef CONFIG_NO_HZ 2161#ifdef CONFIG_NO_HZ_COMMON
2125/* 2162/*
2126 * Handle NO_HZ for the global load-average. 2163 * Handle NO_HZ for the global load-average.
2127 * 2164 *
@@ -2347,12 +2384,12 @@ static void calc_global_nohz(void)
2347 smp_wmb(); 2384 smp_wmb();
2348 calc_load_idx++; 2385 calc_load_idx++;
2349} 2386}
2350#else /* !CONFIG_NO_HZ */ 2387#else /* !CONFIG_NO_HZ_COMMON */
2351 2388
2352static inline long calc_load_fold_idle(void) { return 0; } 2389static inline long calc_load_fold_idle(void) { return 0; }
2353static inline void calc_global_nohz(void) { } 2390static inline void calc_global_nohz(void) { }
2354 2391
2355#endif /* CONFIG_NO_HZ */ 2392#endif /* CONFIG_NO_HZ_COMMON */
2356 2393
2357/* 2394/*
2358 * calc_load - update the avenrun load estimates 10 ticks after the 2395 * calc_load - update the avenrun load estimates 10 ticks after the
@@ -2512,7 +2549,7 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
2512 sched_avg_update(this_rq); 2549 sched_avg_update(this_rq);
2513} 2550}
2514 2551
2515#ifdef CONFIG_NO_HZ 2552#ifdef CONFIG_NO_HZ_COMMON
2516/* 2553/*
2517 * There is no sane way to deal with nohz on smp when using jiffies because the 2554 * There is no sane way to deal with nohz on smp when using jiffies because the
2518 * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading 2555 * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
@@ -2572,7 +2609,7 @@ void update_cpu_load_nohz(void)
2572 } 2609 }
2573 raw_spin_unlock(&this_rq->lock); 2610 raw_spin_unlock(&this_rq->lock);
2574} 2611}
2575#endif /* CONFIG_NO_HZ */ 2612#endif /* CONFIG_NO_HZ_COMMON */
2576 2613
2577/* 2614/*
2578 * Called from scheduler_tick() 2615 * Called from scheduler_tick()
@@ -2699,8 +2736,35 @@ void scheduler_tick(void)
2699 rq->idle_balance = idle_cpu(cpu); 2736 rq->idle_balance = idle_cpu(cpu);
2700 trigger_load_balance(rq, cpu); 2737 trigger_load_balance(rq, cpu);
2701#endif 2738#endif
2739 rq_last_tick_reset(rq);
2702} 2740}
2703 2741
2742#ifdef CONFIG_NO_HZ_FULL
2743/**
2744 * scheduler_tick_max_deferment
2745 *
2746 * Keep at least one tick per second when a single
2747 * active task is running because the scheduler doesn't
2748 * yet completely support full dynticks environment.
2749 *
2750 * This makes sure that uptime, CFS vruntime, load
2751 * balancing, etc... continue to move forward, even
2752 * with a very low granularity.
2753 */
2754u64 scheduler_tick_max_deferment(void)
2755{
2756 struct rq *rq = this_rq();
2757 unsigned long next, now = ACCESS_ONCE(jiffies);
2758
2759 next = rq->last_sched_tick + HZ;
2760
2761 if (time_before_eq(next, now))
2762 return 0;
2763
2764 return jiffies_to_usecs(next - now) * NSEC_PER_USEC;
2765}
2766#endif
2767
2704notrace unsigned long get_parent_ip(unsigned long addr) 2768notrace unsigned long get_parent_ip(unsigned long addr)
2705{ 2769{
2706 if (in_lock_functions(addr)) { 2770 if (in_lock_functions(addr)) {
@@ -2997,51 +3061,6 @@ void __sched schedule_preempt_disabled(void)
2997 preempt_disable(); 3061 preempt_disable();
2998} 3062}
2999 3063
3000#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
3001
3002static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
3003{
3004 if (lock->owner != owner)
3005 return false;
3006
3007 /*
3008 * Ensure we emit the owner->on_cpu, dereference _after_ checking
3009 * lock->owner still matches owner, if that fails, owner might
3010 * point to free()d memory, if it still matches, the rcu_read_lock()
3011 * ensures the memory stays valid.
3012 */
3013 barrier();
3014
3015 return owner->on_cpu;
3016}
3017
3018/*
3019 * Look out! "owner" is an entirely speculative pointer
3020 * access and not reliable.
3021 */
3022int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
3023{
3024 if (!sched_feat(OWNER_SPIN))
3025 return 0;
3026
3027 rcu_read_lock();
3028 while (owner_running(lock, owner)) {
3029 if (need_resched())
3030 break;
3031
3032 arch_mutex_cpu_relax();
3033 }
3034 rcu_read_unlock();
3035
3036 /*
3037 * We break out the loop above on need_resched() and when the
3038 * owner changed, which is a sign for heavy contention. Return
3039 * success only when lock->owner is NULL.
3040 */
3041 return lock->owner == NULL;
3042}
3043#endif
3044
3045#ifdef CONFIG_PREEMPT 3064#ifdef CONFIG_PREEMPT
3046/* 3065/*
3047 * this is the entry point to schedule() from in-kernel preemption 3066 * this is the entry point to schedule() from in-kernel preemption
@@ -3082,11 +3101,13 @@ EXPORT_SYMBOL(preempt_schedule);
3082asmlinkage void __sched preempt_schedule_irq(void) 3101asmlinkage void __sched preempt_schedule_irq(void)
3083{ 3102{
3084 struct thread_info *ti = current_thread_info(); 3103 struct thread_info *ti = current_thread_info();
3104 enum ctx_state prev_state;
3085 3105
3086 /* Catch callers which need to be fixed */ 3106 /* Catch callers which need to be fixed */
3087 BUG_ON(ti->preempt_count || !irqs_disabled()); 3107 BUG_ON(ti->preempt_count || !irqs_disabled());
3088 3108
3089 user_exit(); 3109 prev_state = exception_enter();
3110
3090 do { 3111 do {
3091 add_preempt_count(PREEMPT_ACTIVE); 3112 add_preempt_count(PREEMPT_ACTIVE);
3092 local_irq_enable(); 3113 local_irq_enable();
@@ -3100,6 +3121,8 @@ asmlinkage void __sched preempt_schedule_irq(void)
3100 */ 3121 */
3101 barrier(); 3122 barrier();
3102 } while (need_resched()); 3123 } while (need_resched());
3124
3125 exception_exit(prev_state);
3103} 3126}
3104 3127
3105#endif /* CONFIG_PREEMPT */ 3128#endif /* CONFIG_PREEMPT */
@@ -4126,6 +4149,10 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
4126 get_task_struct(p); 4149 get_task_struct(p);
4127 rcu_read_unlock(); 4150 rcu_read_unlock();
4128 4151
4152 if (p->flags & PF_NO_SETAFFINITY) {
4153 retval = -EINVAL;
4154 goto out_put_task;
4155 }
4129 if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { 4156 if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
4130 retval = -ENOMEM; 4157 retval = -ENOMEM;
4131 goto out_put_task; 4158 goto out_put_task;
@@ -4626,6 +4653,7 @@ void sched_show_task(struct task_struct *p)
4626 task_pid_nr(p), ppid, 4653 task_pid_nr(p), ppid,
4627 (unsigned long)task_thread_info(p)->flags); 4654 (unsigned long)task_thread_info(p)->flags);
4628 4655
4656 print_worker_info(KERN_INFO, p);
4629 show_stack(p, NULL); 4657 show_stack(p, NULL);
4630} 4658}
4631 4659
@@ -4773,11 +4801,6 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
4773 goto out; 4801 goto out;
4774 } 4802 }
4775 4803
4776 if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) {
4777 ret = -EINVAL;
4778 goto out;
4779 }
4780
4781 do_set_cpus_allowed(p, new_mask); 4804 do_set_cpus_allowed(p, new_mask);
4782 4805
4783 /* Can the task run on the task's current CPU? If so, we're done */ 4806 /* Can the task run on the task's current CPU? If so, we're done */
@@ -4999,7 +5022,7 @@ static void sd_free_ctl_entry(struct ctl_table **tablep)
4999} 5022}
5000 5023
5001static int min_load_idx = 0; 5024static int min_load_idx = 0;
5002static int max_load_idx = CPU_LOAD_IDX_MAX; 5025static int max_load_idx = CPU_LOAD_IDX_MAX-1;
5003 5026
5004static void 5027static void
5005set_table_entry(struct ctl_table *entry, 5028set_table_entry(struct ctl_table *entry,
@@ -6248,7 +6271,7 @@ static void sched_init_numa(void)
6248 * 'level' contains the number of unique distances, excluding the 6271 * 'level' contains the number of unique distances, excluding the
6249 * identity distance node_distance(i,i). 6272 * identity distance node_distance(i,i).
6250 * 6273 *
6251 * The sched_domains_nume_distance[] array includes the actual distance 6274 * The sched_domains_numa_distance[] array includes the actual distance
6252 * numbers. 6275 * numbers.
6253 */ 6276 */
6254 6277
@@ -6861,11 +6884,15 @@ int in_sched_functions(unsigned long addr)
6861} 6884}
6862 6885
6863#ifdef CONFIG_CGROUP_SCHED 6886#ifdef CONFIG_CGROUP_SCHED
6887/*
6888 * Default task group.
6889 * Every task in system belongs to this group at bootup.
6890 */
6864struct task_group root_task_group; 6891struct task_group root_task_group;
6865LIST_HEAD(task_groups); 6892LIST_HEAD(task_groups);
6866#endif 6893#endif
6867 6894
6868DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask); 6895DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
6869 6896
6870void __init sched_init(void) 6897void __init sched_init(void)
6871{ 6898{
@@ -6902,7 +6929,7 @@ void __init sched_init(void)
6902#endif /* CONFIG_RT_GROUP_SCHED */ 6929#endif /* CONFIG_RT_GROUP_SCHED */
6903#ifdef CONFIG_CPUMASK_OFFSTACK 6930#ifdef CONFIG_CPUMASK_OFFSTACK
6904 for_each_possible_cpu(i) { 6931 for_each_possible_cpu(i) {
6905 per_cpu(load_balance_tmpmask, i) = (void *)ptr; 6932 per_cpu(load_balance_mask, i) = (void *)ptr;
6906 ptr += cpumask_size(); 6933 ptr += cpumask_size();
6907 } 6934 }
6908#endif /* CONFIG_CPUMASK_OFFSTACK */ 6935#endif /* CONFIG_CPUMASK_OFFSTACK */
@@ -6928,12 +6955,6 @@ void __init sched_init(void)
6928 6955
6929#endif /* CONFIG_CGROUP_SCHED */ 6956#endif /* CONFIG_CGROUP_SCHED */
6930 6957
6931#ifdef CONFIG_CGROUP_CPUACCT
6932 root_cpuacct.cpustat = &kernel_cpustat;
6933 root_cpuacct.cpuusage = alloc_percpu(u64);
6934 /* Too early, not expected to fail */
6935 BUG_ON(!root_cpuacct.cpuusage);
6936#endif
6937 for_each_possible_cpu(i) { 6958 for_each_possible_cpu(i) {
6938 struct rq *rq; 6959 struct rq *rq;
6939 6960
@@ -6997,9 +7018,12 @@ void __init sched_init(void)
6997 INIT_LIST_HEAD(&rq->cfs_tasks); 7018 INIT_LIST_HEAD(&rq->cfs_tasks);
6998 7019
6999 rq_attach_root(rq, &def_root_domain); 7020 rq_attach_root(rq, &def_root_domain);
7000#ifdef CONFIG_NO_HZ 7021#ifdef CONFIG_NO_HZ_COMMON
7001 rq->nohz_flags = 0; 7022 rq->nohz_flags = 0;
7002#endif 7023#endif
7024#ifdef CONFIG_NO_HZ_FULL
7025 rq->last_sched_tick = 0;
7026#endif
7003#endif 7027#endif
7004 init_rq_hrtick(rq); 7028 init_rq_hrtick(rq);
7005 atomic_set(&rq->nr_iowait, 0); 7029 atomic_set(&rq->nr_iowait, 0);
@@ -7455,7 +7479,7 @@ unlock:
7455 return err; 7479 return err;
7456} 7480}
7457 7481
7458int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) 7482static int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
7459{ 7483{
7460 u64 rt_runtime, rt_period; 7484 u64 rt_runtime, rt_period;
7461 7485
@@ -7467,7 +7491,7 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
7467 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); 7491 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
7468} 7492}
7469 7493
7470long sched_group_rt_runtime(struct task_group *tg) 7494static long sched_group_rt_runtime(struct task_group *tg)
7471{ 7495{
7472 u64 rt_runtime_us; 7496 u64 rt_runtime_us;
7473 7497
@@ -7479,7 +7503,7 @@ long sched_group_rt_runtime(struct task_group *tg)
7479 return rt_runtime_us; 7503 return rt_runtime_us;
7480} 7504}
7481 7505
7482int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) 7506static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
7483{ 7507{
7484 u64 rt_runtime, rt_period; 7508 u64 rt_runtime, rt_period;
7485 7509
@@ -7492,7 +7516,7 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
7492 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); 7516 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
7493} 7517}
7494 7518
7495long sched_group_rt_period(struct task_group *tg) 7519static long sched_group_rt_period(struct task_group *tg)
7496{ 7520{
7497 u64 rt_period_us; 7521 u64 rt_period_us;
7498 7522
@@ -7527,7 +7551,7 @@ static int sched_rt_global_constraints(void)
7527 return ret; 7551 return ret;
7528} 7552}
7529 7553
7530int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) 7554static int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
7531{ 7555{
7532 /* Don't accept realtime tasks when there is no way for them to run */ 7556 /* Don't accept realtime tasks when there is no way for them to run */
7533 if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0) 7557 if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
@@ -8035,226 +8059,6 @@ struct cgroup_subsys cpu_cgroup_subsys = {
8035 8059
8036#endif /* CONFIG_CGROUP_SCHED */ 8060#endif /* CONFIG_CGROUP_SCHED */
8037 8061
8038#ifdef CONFIG_CGROUP_CPUACCT
8039
8040/*
8041 * CPU accounting code for task groups.
8042 *
8043 * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
8044 * (balbir@in.ibm.com).
8045 */
8046
8047struct cpuacct root_cpuacct;
8048
8049/* create a new cpu accounting group */
8050static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp)
8051{
8052 struct cpuacct *ca;
8053
8054 if (!cgrp->parent)
8055 return &root_cpuacct.css;
8056
8057 ca = kzalloc(sizeof(*ca), GFP_KERNEL);
8058 if (!ca)
8059 goto out;
8060
8061 ca->cpuusage = alloc_percpu(u64);
8062 if (!ca->cpuusage)
8063 goto out_free_ca;
8064
8065 ca->cpustat = alloc_percpu(struct kernel_cpustat);
8066 if (!ca->cpustat)
8067 goto out_free_cpuusage;
8068
8069 return &ca->css;
8070
8071out_free_cpuusage:
8072 free_percpu(ca->cpuusage);
8073out_free_ca:
8074 kfree(ca);
8075out:
8076 return ERR_PTR(-ENOMEM);
8077}
8078
8079/* destroy an existing cpu accounting group */
8080static void cpuacct_css_free(struct cgroup *cgrp)
8081{
8082 struct cpuacct *ca = cgroup_ca(cgrp);
8083
8084 free_percpu(ca->cpustat);
8085 free_percpu(ca->cpuusage);
8086 kfree(ca);
8087}
8088
8089static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
8090{
8091 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
8092 u64 data;
8093
8094#ifndef CONFIG_64BIT
8095 /*
8096 * Take rq->lock to make 64-bit read safe on 32-bit platforms.
8097 */
8098 raw_spin_lock_irq(&cpu_rq(cpu)->lock);
8099 data = *cpuusage;
8100 raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
8101#else
8102 data = *cpuusage;
8103#endif
8104
8105 return data;
8106}
8107
8108static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
8109{
8110 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
8111
8112#ifndef CONFIG_64BIT
8113 /*
8114 * Take rq->lock to make 64-bit write safe on 32-bit platforms.
8115 */
8116 raw_spin_lock_irq(&cpu_rq(cpu)->lock);
8117 *cpuusage = val;
8118 raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
8119#else
8120 *cpuusage = val;
8121#endif
8122}
8123
8124/* return total cpu usage (in nanoseconds) of a group */
8125static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
8126{
8127 struct cpuacct *ca = cgroup_ca(cgrp);
8128 u64 totalcpuusage = 0;
8129 int i;
8130
8131 for_each_present_cpu(i)
8132 totalcpuusage += cpuacct_cpuusage_read(ca, i);
8133
8134 return totalcpuusage;
8135}
8136
8137static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype,
8138 u64 reset)
8139{
8140 struct cpuacct *ca = cgroup_ca(cgrp);
8141 int err = 0;
8142 int i;
8143
8144 if (reset) {
8145 err = -EINVAL;
8146 goto out;
8147 }
8148
8149 for_each_present_cpu(i)
8150 cpuacct_cpuusage_write(ca, i, 0);
8151
8152out:
8153 return err;
8154}
8155
8156static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
8157 struct seq_file *m)
8158{
8159 struct cpuacct *ca = cgroup_ca(cgroup);
8160 u64 percpu;
8161 int i;
8162
8163 for_each_present_cpu(i) {
8164 percpu = cpuacct_cpuusage_read(ca, i);
8165 seq_printf(m, "%llu ", (unsigned long long) percpu);
8166 }
8167 seq_printf(m, "\n");
8168 return 0;
8169}
8170
8171static const char *cpuacct_stat_desc[] = {
8172 [CPUACCT_STAT_USER] = "user",
8173 [CPUACCT_STAT_SYSTEM] = "system",
8174};
8175
8176static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
8177 struct cgroup_map_cb *cb)
8178{
8179 struct cpuacct *ca = cgroup_ca(cgrp);
8180 int cpu;
8181 s64 val = 0;
8182
8183 for_each_online_cpu(cpu) {
8184 struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
8185 val += kcpustat->cpustat[CPUTIME_USER];
8186 val += kcpustat->cpustat[CPUTIME_NICE];
8187 }
8188 val = cputime64_to_clock_t(val);
8189 cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val);
8190
8191 val = 0;
8192 for_each_online_cpu(cpu) {
8193 struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
8194 val += kcpustat->cpustat[CPUTIME_SYSTEM];
8195 val += kcpustat->cpustat[CPUTIME_IRQ];
8196 val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
8197 }
8198
8199 val = cputime64_to_clock_t(val);
8200 cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
8201
8202 return 0;
8203}
8204
8205static struct cftype files[] = {
8206 {
8207 .name = "usage",
8208 .read_u64 = cpuusage_read,
8209 .write_u64 = cpuusage_write,
8210 },
8211 {
8212 .name = "usage_percpu",
8213 .read_seq_string = cpuacct_percpu_seq_read,
8214 },
8215 {
8216 .name = "stat",
8217 .read_map = cpuacct_stats_show,
8218 },
8219 { } /* terminate */
8220};
8221
8222/*
8223 * charge this task's execution time to its accounting group.
8224 *
8225 * called with rq->lock held.
8226 */
8227void cpuacct_charge(struct task_struct *tsk, u64 cputime)
8228{
8229 struct cpuacct *ca;
8230 int cpu;
8231
8232 if (unlikely(!cpuacct_subsys.active))
8233 return;
8234
8235 cpu = task_cpu(tsk);
8236
8237 rcu_read_lock();
8238
8239 ca = task_ca(tsk);
8240
8241 for (; ca; ca = parent_ca(ca)) {
8242 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
8243 *cpuusage += cputime;
8244 }
8245
8246 rcu_read_unlock();
8247}
8248
8249struct cgroup_subsys cpuacct_subsys = {
8250 .name = "cpuacct",
8251 .css_alloc = cpuacct_css_alloc,
8252 .css_free = cpuacct_css_free,
8253 .subsys_id = cpuacct_subsys_id,
8254 .base_cftypes = files,
8255};
8256#endif /* CONFIG_CGROUP_CPUACCT */
8257
8258void dump_cpu_task(int cpu) 8062void dump_cpu_task(int cpu)
8259{ 8063{
8260 pr_info("Task dump for CPU %d:\n", cpu); 8064 pr_info("Task dump for CPU %d:\n", cpu);