diff options
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r-- | kernel/sched/core.c | 414 |
1 files changed, 109 insertions, 305 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7f12624a393c..58453b8272fd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -512,11 +512,6 @@ static inline void init_hrtick(void) | |||
512 | * the target CPU. | 512 | * the target CPU. |
513 | */ | 513 | */ |
514 | #ifdef CONFIG_SMP | 514 | #ifdef CONFIG_SMP |
515 | |||
516 | #ifndef tsk_is_polling | ||
517 | #define tsk_is_polling(t) 0 | ||
518 | #endif | ||
519 | |||
520 | void resched_task(struct task_struct *p) | 515 | void resched_task(struct task_struct *p) |
521 | { | 516 | { |
522 | int cpu; | 517 | int cpu; |
@@ -549,7 +544,7 @@ void resched_cpu(int cpu) | |||
549 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 544 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
550 | } | 545 | } |
551 | 546 | ||
552 | #ifdef CONFIG_NO_HZ | 547 | #ifdef CONFIG_NO_HZ_COMMON |
553 | /* | 548 | /* |
554 | * In the semi idle case, use the nearest busy cpu for migrating timers | 549 | * In the semi idle case, use the nearest busy cpu for migrating timers |
555 | * from an idle cpu. This is good for power-savings. | 550 | * from an idle cpu. This is good for power-savings. |
@@ -587,7 +582,7 @@ unlock: | |||
587 | * account when the CPU goes back to idle and evaluates the timer | 582 | * account when the CPU goes back to idle and evaluates the timer |
588 | * wheel for the next timer event. | 583 | * wheel for the next timer event. |
589 | */ | 584 | */ |
590 | void wake_up_idle_cpu(int cpu) | 585 | static void wake_up_idle_cpu(int cpu) |
591 | { | 586 | { |
592 | struct rq *rq = cpu_rq(cpu); | 587 | struct rq *rq = cpu_rq(cpu); |
593 | 588 | ||
@@ -617,20 +612,56 @@ void wake_up_idle_cpu(int cpu) | |||
617 | smp_send_reschedule(cpu); | 612 | smp_send_reschedule(cpu); |
618 | } | 613 | } |
619 | 614 | ||
615 | static bool wake_up_full_nohz_cpu(int cpu) | ||
616 | { | ||
617 | if (tick_nohz_full_cpu(cpu)) { | ||
618 | if (cpu != smp_processor_id() || | ||
619 | tick_nohz_tick_stopped()) | ||
620 | smp_send_reschedule(cpu); | ||
621 | return true; | ||
622 | } | ||
623 | |||
624 | return false; | ||
625 | } | ||
626 | |||
627 | void wake_up_nohz_cpu(int cpu) | ||
628 | { | ||
629 | if (!wake_up_full_nohz_cpu(cpu)) | ||
630 | wake_up_idle_cpu(cpu); | ||
631 | } | ||
632 | |||
620 | static inline bool got_nohz_idle_kick(void) | 633 | static inline bool got_nohz_idle_kick(void) |
621 | { | 634 | { |
622 | int cpu = smp_processor_id(); | 635 | int cpu = smp_processor_id(); |
623 | return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)); | 636 | return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)); |
624 | } | 637 | } |
625 | 638 | ||
626 | #else /* CONFIG_NO_HZ */ | 639 | #else /* CONFIG_NO_HZ_COMMON */ |
627 | 640 | ||
628 | static inline bool got_nohz_idle_kick(void) | 641 | static inline bool got_nohz_idle_kick(void) |
629 | { | 642 | { |
630 | return false; | 643 | return false; |
631 | } | 644 | } |
632 | 645 | ||
633 | #endif /* CONFIG_NO_HZ */ | 646 | #endif /* CONFIG_NO_HZ_COMMON */ |
647 | |||
648 | #ifdef CONFIG_NO_HZ_FULL | ||
649 | bool sched_can_stop_tick(void) | ||
650 | { | ||
651 | struct rq *rq; | ||
652 | |||
653 | rq = this_rq(); | ||
654 | |||
655 | /* Make sure rq->nr_running update is visible after the IPI */ | ||
656 | smp_rmb(); | ||
657 | |||
658 | /* More than one running task need preemption */ | ||
659 | if (rq->nr_running > 1) | ||
660 | return false; | ||
661 | |||
662 | return true; | ||
663 | } | ||
664 | #endif /* CONFIG_NO_HZ_FULL */ | ||
634 | 665 | ||
635 | void sched_avg_update(struct rq *rq) | 666 | void sched_avg_update(struct rq *rq) |
636 | { | 667 | { |
@@ -1288,8 +1319,8 @@ static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags) | |||
1288 | static void | 1319 | static void |
1289 | ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) | 1320 | ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) |
1290 | { | 1321 | { |
1291 | trace_sched_wakeup(p, true); | ||
1292 | check_preempt_curr(rq, p, wake_flags); | 1322 | check_preempt_curr(rq, p, wake_flags); |
1323 | trace_sched_wakeup(p, true); | ||
1293 | 1324 | ||
1294 | p->state = TASK_RUNNING; | 1325 | p->state = TASK_RUNNING; |
1295 | #ifdef CONFIG_SMP | 1326 | #ifdef CONFIG_SMP |
@@ -1362,7 +1393,8 @@ static void sched_ttwu_pending(void) | |||
1362 | 1393 | ||
1363 | void scheduler_ipi(void) | 1394 | void scheduler_ipi(void) |
1364 | { | 1395 | { |
1365 | if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) | 1396 | if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick() |
1397 | && !tick_nohz_full_cpu(smp_processor_id())) | ||
1366 | return; | 1398 | return; |
1367 | 1399 | ||
1368 | /* | 1400 | /* |
@@ -1379,6 +1411,7 @@ void scheduler_ipi(void) | |||
1379 | * somewhat pessimize the simple resched case. | 1411 | * somewhat pessimize the simple resched case. |
1380 | */ | 1412 | */ |
1381 | irq_enter(); | 1413 | irq_enter(); |
1414 | tick_nohz_full_check(); | ||
1382 | sched_ttwu_pending(); | 1415 | sched_ttwu_pending(); |
1383 | 1416 | ||
1384 | /* | 1417 | /* |
@@ -1498,8 +1531,10 @@ static void try_to_wake_up_local(struct task_struct *p) | |||
1498 | { | 1531 | { |
1499 | struct rq *rq = task_rq(p); | 1532 | struct rq *rq = task_rq(p); |
1500 | 1533 | ||
1501 | BUG_ON(rq != this_rq()); | 1534 | if (WARN_ON_ONCE(rq != this_rq()) || |
1502 | BUG_ON(p == current); | 1535 | WARN_ON_ONCE(p == current)) |
1536 | return; | ||
1537 | |||
1503 | lockdep_assert_held(&rq->lock); | 1538 | lockdep_assert_held(&rq->lock); |
1504 | 1539 | ||
1505 | if (!raw_spin_trylock(&p->pi_lock)) { | 1540 | if (!raw_spin_trylock(&p->pi_lock)) { |
@@ -1858,6 +1893,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
1858 | kprobe_flush_task(prev); | 1893 | kprobe_flush_task(prev); |
1859 | put_task_struct(prev); | 1894 | put_task_struct(prev); |
1860 | } | 1895 | } |
1896 | |||
1897 | tick_nohz_task_switch(current); | ||
1861 | } | 1898 | } |
1862 | 1899 | ||
1863 | #ifdef CONFIG_SMP | 1900 | #ifdef CONFIG_SMP |
@@ -2121,7 +2158,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) | |||
2121 | return load >> FSHIFT; | 2158 | return load >> FSHIFT; |
2122 | } | 2159 | } |
2123 | 2160 | ||
2124 | #ifdef CONFIG_NO_HZ | 2161 | #ifdef CONFIG_NO_HZ_COMMON |
2125 | /* | 2162 | /* |
2126 | * Handle NO_HZ for the global load-average. | 2163 | * Handle NO_HZ for the global load-average. |
2127 | * | 2164 | * |
@@ -2347,12 +2384,12 @@ static void calc_global_nohz(void) | |||
2347 | smp_wmb(); | 2384 | smp_wmb(); |
2348 | calc_load_idx++; | 2385 | calc_load_idx++; |
2349 | } | 2386 | } |
2350 | #else /* !CONFIG_NO_HZ */ | 2387 | #else /* !CONFIG_NO_HZ_COMMON */ |
2351 | 2388 | ||
2352 | static inline long calc_load_fold_idle(void) { return 0; } | 2389 | static inline long calc_load_fold_idle(void) { return 0; } |
2353 | static inline void calc_global_nohz(void) { } | 2390 | static inline void calc_global_nohz(void) { } |
2354 | 2391 | ||
2355 | #endif /* CONFIG_NO_HZ */ | 2392 | #endif /* CONFIG_NO_HZ_COMMON */ |
2356 | 2393 | ||
2357 | /* | 2394 | /* |
2358 | * calc_load - update the avenrun load estimates 10 ticks after the | 2395 | * calc_load - update the avenrun load estimates 10 ticks after the |
@@ -2512,7 +2549,7 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, | |||
2512 | sched_avg_update(this_rq); | 2549 | sched_avg_update(this_rq); |
2513 | } | 2550 | } |
2514 | 2551 | ||
2515 | #ifdef CONFIG_NO_HZ | 2552 | #ifdef CONFIG_NO_HZ_COMMON |
2516 | /* | 2553 | /* |
2517 | * There is no sane way to deal with nohz on smp when using jiffies because the | 2554 | * There is no sane way to deal with nohz on smp when using jiffies because the |
2518 | * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading | 2555 | * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading |
@@ -2572,7 +2609,7 @@ void update_cpu_load_nohz(void) | |||
2572 | } | 2609 | } |
2573 | raw_spin_unlock(&this_rq->lock); | 2610 | raw_spin_unlock(&this_rq->lock); |
2574 | } | 2611 | } |
2575 | #endif /* CONFIG_NO_HZ */ | 2612 | #endif /* CONFIG_NO_HZ_COMMON */ |
2576 | 2613 | ||
2577 | /* | 2614 | /* |
2578 | * Called from scheduler_tick() | 2615 | * Called from scheduler_tick() |
@@ -2699,8 +2736,35 @@ void scheduler_tick(void) | |||
2699 | rq->idle_balance = idle_cpu(cpu); | 2736 | rq->idle_balance = idle_cpu(cpu); |
2700 | trigger_load_balance(rq, cpu); | 2737 | trigger_load_balance(rq, cpu); |
2701 | #endif | 2738 | #endif |
2739 | rq_last_tick_reset(rq); | ||
2702 | } | 2740 | } |
2703 | 2741 | ||
2742 | #ifdef CONFIG_NO_HZ_FULL | ||
2743 | /** | ||
2744 | * scheduler_tick_max_deferment | ||
2745 | * | ||
2746 | * Keep at least one tick per second when a single | ||
2747 | * active task is running because the scheduler doesn't | ||
2748 | * yet completely support full dynticks environment. | ||
2749 | * | ||
2750 | * This makes sure that uptime, CFS vruntime, load | ||
2751 | * balancing, etc... continue to move forward, even | ||
2752 | * with a very low granularity. | ||
2753 | */ | ||
2754 | u64 scheduler_tick_max_deferment(void) | ||
2755 | { | ||
2756 | struct rq *rq = this_rq(); | ||
2757 | unsigned long next, now = ACCESS_ONCE(jiffies); | ||
2758 | |||
2759 | next = rq->last_sched_tick + HZ; | ||
2760 | |||
2761 | if (time_before_eq(next, now)) | ||
2762 | return 0; | ||
2763 | |||
2764 | return jiffies_to_usecs(next - now) * NSEC_PER_USEC; | ||
2765 | } | ||
2766 | #endif | ||
2767 | |||
2704 | notrace unsigned long get_parent_ip(unsigned long addr) | 2768 | notrace unsigned long get_parent_ip(unsigned long addr) |
2705 | { | 2769 | { |
2706 | if (in_lock_functions(addr)) { | 2770 | if (in_lock_functions(addr)) { |
@@ -2997,51 +3061,6 @@ void __sched schedule_preempt_disabled(void) | |||
2997 | preempt_disable(); | 3061 | preempt_disable(); |
2998 | } | 3062 | } |
2999 | 3063 | ||
3000 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | ||
3001 | |||
3002 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) | ||
3003 | { | ||
3004 | if (lock->owner != owner) | ||
3005 | return false; | ||
3006 | |||
3007 | /* | ||
3008 | * Ensure we emit the owner->on_cpu, dereference _after_ checking | ||
3009 | * lock->owner still matches owner, if that fails, owner might | ||
3010 | * point to free()d memory, if it still matches, the rcu_read_lock() | ||
3011 | * ensures the memory stays valid. | ||
3012 | */ | ||
3013 | barrier(); | ||
3014 | |||
3015 | return owner->on_cpu; | ||
3016 | } | ||
3017 | |||
3018 | /* | ||
3019 | * Look out! "owner" is an entirely speculative pointer | ||
3020 | * access and not reliable. | ||
3021 | */ | ||
3022 | int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) | ||
3023 | { | ||
3024 | if (!sched_feat(OWNER_SPIN)) | ||
3025 | return 0; | ||
3026 | |||
3027 | rcu_read_lock(); | ||
3028 | while (owner_running(lock, owner)) { | ||
3029 | if (need_resched()) | ||
3030 | break; | ||
3031 | |||
3032 | arch_mutex_cpu_relax(); | ||
3033 | } | ||
3034 | rcu_read_unlock(); | ||
3035 | |||
3036 | /* | ||
3037 | * We break out the loop above on need_resched() and when the | ||
3038 | * owner changed, which is a sign for heavy contention. Return | ||
3039 | * success only when lock->owner is NULL. | ||
3040 | */ | ||
3041 | return lock->owner == NULL; | ||
3042 | } | ||
3043 | #endif | ||
3044 | |||
3045 | #ifdef CONFIG_PREEMPT | 3064 | #ifdef CONFIG_PREEMPT |
3046 | /* | 3065 | /* |
3047 | * this is the entry point to schedule() from in-kernel preemption | 3066 | * this is the entry point to schedule() from in-kernel preemption |
@@ -3082,11 +3101,13 @@ EXPORT_SYMBOL(preempt_schedule); | |||
3082 | asmlinkage void __sched preempt_schedule_irq(void) | 3101 | asmlinkage void __sched preempt_schedule_irq(void) |
3083 | { | 3102 | { |
3084 | struct thread_info *ti = current_thread_info(); | 3103 | struct thread_info *ti = current_thread_info(); |
3104 | enum ctx_state prev_state; | ||
3085 | 3105 | ||
3086 | /* Catch callers which need to be fixed */ | 3106 | /* Catch callers which need to be fixed */ |
3087 | BUG_ON(ti->preempt_count || !irqs_disabled()); | 3107 | BUG_ON(ti->preempt_count || !irqs_disabled()); |
3088 | 3108 | ||
3089 | user_exit(); | 3109 | prev_state = exception_enter(); |
3110 | |||
3090 | do { | 3111 | do { |
3091 | add_preempt_count(PREEMPT_ACTIVE); | 3112 | add_preempt_count(PREEMPT_ACTIVE); |
3092 | local_irq_enable(); | 3113 | local_irq_enable(); |
@@ -3100,6 +3121,8 @@ asmlinkage void __sched preempt_schedule_irq(void) | |||
3100 | */ | 3121 | */ |
3101 | barrier(); | 3122 | barrier(); |
3102 | } while (need_resched()); | 3123 | } while (need_resched()); |
3124 | |||
3125 | exception_exit(prev_state); | ||
3103 | } | 3126 | } |
3104 | 3127 | ||
3105 | #endif /* CONFIG_PREEMPT */ | 3128 | #endif /* CONFIG_PREEMPT */ |
@@ -4126,6 +4149,10 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
4126 | get_task_struct(p); | 4149 | get_task_struct(p); |
4127 | rcu_read_unlock(); | 4150 | rcu_read_unlock(); |
4128 | 4151 | ||
4152 | if (p->flags & PF_NO_SETAFFINITY) { | ||
4153 | retval = -EINVAL; | ||
4154 | goto out_put_task; | ||
4155 | } | ||
4129 | if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { | 4156 | if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { |
4130 | retval = -ENOMEM; | 4157 | retval = -ENOMEM; |
4131 | goto out_put_task; | 4158 | goto out_put_task; |
@@ -4626,6 +4653,7 @@ void sched_show_task(struct task_struct *p) | |||
4626 | task_pid_nr(p), ppid, | 4653 | task_pid_nr(p), ppid, |
4627 | (unsigned long)task_thread_info(p)->flags); | 4654 | (unsigned long)task_thread_info(p)->flags); |
4628 | 4655 | ||
4656 | print_worker_info(KERN_INFO, p); | ||
4629 | show_stack(p, NULL); | 4657 | show_stack(p, NULL); |
4630 | } | 4658 | } |
4631 | 4659 | ||
@@ -4773,11 +4801,6 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
4773 | goto out; | 4801 | goto out; |
4774 | } | 4802 | } |
4775 | 4803 | ||
4776 | if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) { | ||
4777 | ret = -EINVAL; | ||
4778 | goto out; | ||
4779 | } | ||
4780 | |||
4781 | do_set_cpus_allowed(p, new_mask); | 4804 | do_set_cpus_allowed(p, new_mask); |
4782 | 4805 | ||
4783 | /* Can the task run on the task's current CPU? If so, we're done */ | 4806 | /* Can the task run on the task's current CPU? If so, we're done */ |
@@ -4999,7 +5022,7 @@ static void sd_free_ctl_entry(struct ctl_table **tablep) | |||
4999 | } | 5022 | } |
5000 | 5023 | ||
5001 | static int min_load_idx = 0; | 5024 | static int min_load_idx = 0; |
5002 | static int max_load_idx = CPU_LOAD_IDX_MAX; | 5025 | static int max_load_idx = CPU_LOAD_IDX_MAX-1; |
5003 | 5026 | ||
5004 | static void | 5027 | static void |
5005 | set_table_entry(struct ctl_table *entry, | 5028 | set_table_entry(struct ctl_table *entry, |
@@ -6248,7 +6271,7 @@ static void sched_init_numa(void) | |||
6248 | * 'level' contains the number of unique distances, excluding the | 6271 | * 'level' contains the number of unique distances, excluding the |
6249 | * identity distance node_distance(i,i). | 6272 | * identity distance node_distance(i,i). |
6250 | * | 6273 | * |
6251 | * The sched_domains_nume_distance[] array includes the actual distance | 6274 | * The sched_domains_numa_distance[] array includes the actual distance |
6252 | * numbers. | 6275 | * numbers. |
6253 | */ | 6276 | */ |
6254 | 6277 | ||
@@ -6861,11 +6884,15 @@ int in_sched_functions(unsigned long addr) | |||
6861 | } | 6884 | } |
6862 | 6885 | ||
6863 | #ifdef CONFIG_CGROUP_SCHED | 6886 | #ifdef CONFIG_CGROUP_SCHED |
6887 | /* | ||
6888 | * Default task group. | ||
6889 | * Every task in system belongs to this group at bootup. | ||
6890 | */ | ||
6864 | struct task_group root_task_group; | 6891 | struct task_group root_task_group; |
6865 | LIST_HEAD(task_groups); | 6892 | LIST_HEAD(task_groups); |
6866 | #endif | 6893 | #endif |
6867 | 6894 | ||
6868 | DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask); | 6895 | DECLARE_PER_CPU(cpumask_var_t, load_balance_mask); |
6869 | 6896 | ||
6870 | void __init sched_init(void) | 6897 | void __init sched_init(void) |
6871 | { | 6898 | { |
@@ -6902,7 +6929,7 @@ void __init sched_init(void) | |||
6902 | #endif /* CONFIG_RT_GROUP_SCHED */ | 6929 | #endif /* CONFIG_RT_GROUP_SCHED */ |
6903 | #ifdef CONFIG_CPUMASK_OFFSTACK | 6930 | #ifdef CONFIG_CPUMASK_OFFSTACK |
6904 | for_each_possible_cpu(i) { | 6931 | for_each_possible_cpu(i) { |
6905 | per_cpu(load_balance_tmpmask, i) = (void *)ptr; | 6932 | per_cpu(load_balance_mask, i) = (void *)ptr; |
6906 | ptr += cpumask_size(); | 6933 | ptr += cpumask_size(); |
6907 | } | 6934 | } |
6908 | #endif /* CONFIG_CPUMASK_OFFSTACK */ | 6935 | #endif /* CONFIG_CPUMASK_OFFSTACK */ |
@@ -6928,12 +6955,6 @@ void __init sched_init(void) | |||
6928 | 6955 | ||
6929 | #endif /* CONFIG_CGROUP_SCHED */ | 6956 | #endif /* CONFIG_CGROUP_SCHED */ |
6930 | 6957 | ||
6931 | #ifdef CONFIG_CGROUP_CPUACCT | ||
6932 | root_cpuacct.cpustat = &kernel_cpustat; | ||
6933 | root_cpuacct.cpuusage = alloc_percpu(u64); | ||
6934 | /* Too early, not expected to fail */ | ||
6935 | BUG_ON(!root_cpuacct.cpuusage); | ||
6936 | #endif | ||
6937 | for_each_possible_cpu(i) { | 6958 | for_each_possible_cpu(i) { |
6938 | struct rq *rq; | 6959 | struct rq *rq; |
6939 | 6960 | ||
@@ -6997,9 +7018,12 @@ void __init sched_init(void) | |||
6997 | INIT_LIST_HEAD(&rq->cfs_tasks); | 7018 | INIT_LIST_HEAD(&rq->cfs_tasks); |
6998 | 7019 | ||
6999 | rq_attach_root(rq, &def_root_domain); | 7020 | rq_attach_root(rq, &def_root_domain); |
7000 | #ifdef CONFIG_NO_HZ | 7021 | #ifdef CONFIG_NO_HZ_COMMON |
7001 | rq->nohz_flags = 0; | 7022 | rq->nohz_flags = 0; |
7002 | #endif | 7023 | #endif |
7024 | #ifdef CONFIG_NO_HZ_FULL | ||
7025 | rq->last_sched_tick = 0; | ||
7026 | #endif | ||
7003 | #endif | 7027 | #endif |
7004 | init_rq_hrtick(rq); | 7028 | init_rq_hrtick(rq); |
7005 | atomic_set(&rq->nr_iowait, 0); | 7029 | atomic_set(&rq->nr_iowait, 0); |
@@ -7455,7 +7479,7 @@ unlock: | |||
7455 | return err; | 7479 | return err; |
7456 | } | 7480 | } |
7457 | 7481 | ||
7458 | int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) | 7482 | static int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) |
7459 | { | 7483 | { |
7460 | u64 rt_runtime, rt_period; | 7484 | u64 rt_runtime, rt_period; |
7461 | 7485 | ||
@@ -7467,7 +7491,7 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) | |||
7467 | return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); | 7491 | return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); |
7468 | } | 7492 | } |
7469 | 7493 | ||
7470 | long sched_group_rt_runtime(struct task_group *tg) | 7494 | static long sched_group_rt_runtime(struct task_group *tg) |
7471 | { | 7495 | { |
7472 | u64 rt_runtime_us; | 7496 | u64 rt_runtime_us; |
7473 | 7497 | ||
@@ -7479,7 +7503,7 @@ long sched_group_rt_runtime(struct task_group *tg) | |||
7479 | return rt_runtime_us; | 7503 | return rt_runtime_us; |
7480 | } | 7504 | } |
7481 | 7505 | ||
7482 | int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) | 7506 | static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) |
7483 | { | 7507 | { |
7484 | u64 rt_runtime, rt_period; | 7508 | u64 rt_runtime, rt_period; |
7485 | 7509 | ||
@@ -7492,7 +7516,7 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) | |||
7492 | return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); | 7516 | return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); |
7493 | } | 7517 | } |
7494 | 7518 | ||
7495 | long sched_group_rt_period(struct task_group *tg) | 7519 | static long sched_group_rt_period(struct task_group *tg) |
7496 | { | 7520 | { |
7497 | u64 rt_period_us; | 7521 | u64 rt_period_us; |
7498 | 7522 | ||
@@ -7527,7 +7551,7 @@ static int sched_rt_global_constraints(void) | |||
7527 | return ret; | 7551 | return ret; |
7528 | } | 7552 | } |
7529 | 7553 | ||
7530 | int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) | 7554 | static int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) |
7531 | { | 7555 | { |
7532 | /* Don't accept realtime tasks when there is no way for them to run */ | 7556 | /* Don't accept realtime tasks when there is no way for them to run */ |
7533 | if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0) | 7557 | if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0) |
@@ -8035,226 +8059,6 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
8035 | 8059 | ||
8036 | #endif /* CONFIG_CGROUP_SCHED */ | 8060 | #endif /* CONFIG_CGROUP_SCHED */ |
8037 | 8061 | ||
8038 | #ifdef CONFIG_CGROUP_CPUACCT | ||
8039 | |||
8040 | /* | ||
8041 | * CPU accounting code for task groups. | ||
8042 | * | ||
8043 | * Based on the work by Paul Menage (menage@google.com) and Balbir Singh | ||
8044 | * (balbir@in.ibm.com). | ||
8045 | */ | ||
8046 | |||
8047 | struct cpuacct root_cpuacct; | ||
8048 | |||
8049 | /* create a new cpu accounting group */ | ||
8050 | static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp) | ||
8051 | { | ||
8052 | struct cpuacct *ca; | ||
8053 | |||
8054 | if (!cgrp->parent) | ||
8055 | return &root_cpuacct.css; | ||
8056 | |||
8057 | ca = kzalloc(sizeof(*ca), GFP_KERNEL); | ||
8058 | if (!ca) | ||
8059 | goto out; | ||
8060 | |||
8061 | ca->cpuusage = alloc_percpu(u64); | ||
8062 | if (!ca->cpuusage) | ||
8063 | goto out_free_ca; | ||
8064 | |||
8065 | ca->cpustat = alloc_percpu(struct kernel_cpustat); | ||
8066 | if (!ca->cpustat) | ||
8067 | goto out_free_cpuusage; | ||
8068 | |||
8069 | return &ca->css; | ||
8070 | |||
8071 | out_free_cpuusage: | ||
8072 | free_percpu(ca->cpuusage); | ||
8073 | out_free_ca: | ||
8074 | kfree(ca); | ||
8075 | out: | ||
8076 | return ERR_PTR(-ENOMEM); | ||
8077 | } | ||
8078 | |||
8079 | /* destroy an existing cpu accounting group */ | ||
8080 | static void cpuacct_css_free(struct cgroup *cgrp) | ||
8081 | { | ||
8082 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
8083 | |||
8084 | free_percpu(ca->cpustat); | ||
8085 | free_percpu(ca->cpuusage); | ||
8086 | kfree(ca); | ||
8087 | } | ||
8088 | |||
8089 | static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) | ||
8090 | { | ||
8091 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | ||
8092 | u64 data; | ||
8093 | |||
8094 | #ifndef CONFIG_64BIT | ||
8095 | /* | ||
8096 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. | ||
8097 | */ | ||
8098 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); | ||
8099 | data = *cpuusage; | ||
8100 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); | ||
8101 | #else | ||
8102 | data = *cpuusage; | ||
8103 | #endif | ||
8104 | |||
8105 | return data; | ||
8106 | } | ||
8107 | |||
8108 | static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) | ||
8109 | { | ||
8110 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | ||
8111 | |||
8112 | #ifndef CONFIG_64BIT | ||
8113 | /* | ||
8114 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. | ||
8115 | */ | ||
8116 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); | ||
8117 | *cpuusage = val; | ||
8118 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); | ||
8119 | #else | ||
8120 | *cpuusage = val; | ||
8121 | #endif | ||
8122 | } | ||
8123 | |||
8124 | /* return total cpu usage (in nanoseconds) of a group */ | ||
8125 | static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) | ||
8126 | { | ||
8127 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
8128 | u64 totalcpuusage = 0; | ||
8129 | int i; | ||
8130 | |||
8131 | for_each_present_cpu(i) | ||
8132 | totalcpuusage += cpuacct_cpuusage_read(ca, i); | ||
8133 | |||
8134 | return totalcpuusage; | ||
8135 | } | ||
8136 | |||
8137 | static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype, | ||
8138 | u64 reset) | ||
8139 | { | ||
8140 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
8141 | int err = 0; | ||
8142 | int i; | ||
8143 | |||
8144 | if (reset) { | ||
8145 | err = -EINVAL; | ||
8146 | goto out; | ||
8147 | } | ||
8148 | |||
8149 | for_each_present_cpu(i) | ||
8150 | cpuacct_cpuusage_write(ca, i, 0); | ||
8151 | |||
8152 | out: | ||
8153 | return err; | ||
8154 | } | ||
8155 | |||
8156 | static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, | ||
8157 | struct seq_file *m) | ||
8158 | { | ||
8159 | struct cpuacct *ca = cgroup_ca(cgroup); | ||
8160 | u64 percpu; | ||
8161 | int i; | ||
8162 | |||
8163 | for_each_present_cpu(i) { | ||
8164 | percpu = cpuacct_cpuusage_read(ca, i); | ||
8165 | seq_printf(m, "%llu ", (unsigned long long) percpu); | ||
8166 | } | ||
8167 | seq_printf(m, "\n"); | ||
8168 | return 0; | ||
8169 | } | ||
8170 | |||
8171 | static const char *cpuacct_stat_desc[] = { | ||
8172 | [CPUACCT_STAT_USER] = "user", | ||
8173 | [CPUACCT_STAT_SYSTEM] = "system", | ||
8174 | }; | ||
8175 | |||
8176 | static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, | ||
8177 | struct cgroup_map_cb *cb) | ||
8178 | { | ||
8179 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
8180 | int cpu; | ||
8181 | s64 val = 0; | ||
8182 | |||
8183 | for_each_online_cpu(cpu) { | ||
8184 | struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); | ||
8185 | val += kcpustat->cpustat[CPUTIME_USER]; | ||
8186 | val += kcpustat->cpustat[CPUTIME_NICE]; | ||
8187 | } | ||
8188 | val = cputime64_to_clock_t(val); | ||
8189 | cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val); | ||
8190 | |||
8191 | val = 0; | ||
8192 | for_each_online_cpu(cpu) { | ||
8193 | struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); | ||
8194 | val += kcpustat->cpustat[CPUTIME_SYSTEM]; | ||
8195 | val += kcpustat->cpustat[CPUTIME_IRQ]; | ||
8196 | val += kcpustat->cpustat[CPUTIME_SOFTIRQ]; | ||
8197 | } | ||
8198 | |||
8199 | val = cputime64_to_clock_t(val); | ||
8200 | cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); | ||
8201 | |||
8202 | return 0; | ||
8203 | } | ||
8204 | |||
8205 | static struct cftype files[] = { | ||
8206 | { | ||
8207 | .name = "usage", | ||
8208 | .read_u64 = cpuusage_read, | ||
8209 | .write_u64 = cpuusage_write, | ||
8210 | }, | ||
8211 | { | ||
8212 | .name = "usage_percpu", | ||
8213 | .read_seq_string = cpuacct_percpu_seq_read, | ||
8214 | }, | ||
8215 | { | ||
8216 | .name = "stat", | ||
8217 | .read_map = cpuacct_stats_show, | ||
8218 | }, | ||
8219 | { } /* terminate */ | ||
8220 | }; | ||
8221 | |||
8222 | /* | ||
8223 | * charge this task's execution time to its accounting group. | ||
8224 | * | ||
8225 | * called with rq->lock held. | ||
8226 | */ | ||
8227 | void cpuacct_charge(struct task_struct *tsk, u64 cputime) | ||
8228 | { | ||
8229 | struct cpuacct *ca; | ||
8230 | int cpu; | ||
8231 | |||
8232 | if (unlikely(!cpuacct_subsys.active)) | ||
8233 | return; | ||
8234 | |||
8235 | cpu = task_cpu(tsk); | ||
8236 | |||
8237 | rcu_read_lock(); | ||
8238 | |||
8239 | ca = task_ca(tsk); | ||
8240 | |||
8241 | for (; ca; ca = parent_ca(ca)) { | ||
8242 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | ||
8243 | *cpuusage += cputime; | ||
8244 | } | ||
8245 | |||
8246 | rcu_read_unlock(); | ||
8247 | } | ||
8248 | |||
8249 | struct cgroup_subsys cpuacct_subsys = { | ||
8250 | .name = "cpuacct", | ||
8251 | .css_alloc = cpuacct_css_alloc, | ||
8252 | .css_free = cpuacct_css_free, | ||
8253 | .subsys_id = cpuacct_subsys_id, | ||
8254 | .base_cftypes = files, | ||
8255 | }; | ||
8256 | #endif /* CONFIG_CGROUP_CPUACCT */ | ||
8257 | |||
8258 | void dump_cpu_task(int cpu) | 8062 | void dump_cpu_task(int cpu) |
8259 | { | 8063 | { |
8260 | pr_info("Task dump for CPU %d:\n", cpu); | 8064 | pr_info("Task dump for CPU %d:\n", cpu); |