diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2012-06-16 09:57:37 -0400 |
---|---|---|
committer | Frederic Weisbecker <fweisbec@gmail.com> | 2012-08-20 07:05:17 -0400 |
commit | 73fbec604432e1fbfeb1bc59a110dac1f98160f6 (patch) | |
tree | 1bcdf943945b61aa1b2d2193ebd72197bf788a33 /kernel/sched | |
parent | b952741c80790d2dc9f17fac6f15d87d58dea2a1 (diff) |
sched: Move cputime code to its own file
Extract cputime code from the giant sched/core.c and
put it in its own file. This make it easier to deal with
this particular area and de-bloat a bit more core.c
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/Makefile | 2 | ||||
-rw-r--r-- | kernel/sched/core.c | 557 | ||||
-rw-r--r-- | kernel/sched/cputime.c | 504 | ||||
-rw-r--r-- | kernel/sched/sched.h | 63 |
4 files changed, 570 insertions, 556 deletions
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 173ea52f3af0..f06d249e103b 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile | |||
@@ -11,7 +11,7 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) | |||
11 | CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer | 11 | CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer |
12 | endif | 12 | endif |
13 | 13 | ||
14 | obj-y += core.o clock.o idle_task.o fair.o rt.o stop_task.o | 14 | obj-y += core.o clock.o cputime.o idle_task.o fair.o rt.o stop_task.o |
15 | obj-$(CONFIG_SMP) += cpupri.o | 15 | obj-$(CONFIG_SMP) += cpupri.o |
16 | obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o | 16 | obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o |
17 | obj-$(CONFIG_SCHEDSTATS) += stats.o | 17 | obj-$(CONFIG_SCHEDSTATS) += stats.o |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4376c9f34790..ae3bcaa3afbf 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -740,126 +740,6 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags) | |||
740 | dequeue_task(rq, p, flags); | 740 | dequeue_task(rq, p, flags); |
741 | } | 741 | } |
742 | 742 | ||
743 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
744 | |||
745 | /* | ||
746 | * There are no locks covering percpu hardirq/softirq time. | ||
747 | * They are only modified in account_system_vtime, on corresponding CPU | ||
748 | * with interrupts disabled. So, writes are safe. | ||
749 | * They are read and saved off onto struct rq in update_rq_clock(). | ||
750 | * This may result in other CPU reading this CPU's irq time and can | ||
751 | * race with irq/account_system_vtime on this CPU. We would either get old | ||
752 | * or new value with a side effect of accounting a slice of irq time to wrong | ||
753 | * task when irq is in progress while we read rq->clock. That is a worthy | ||
754 | * compromise in place of having locks on each irq in account_system_time. | ||
755 | */ | ||
756 | static DEFINE_PER_CPU(u64, cpu_hardirq_time); | ||
757 | static DEFINE_PER_CPU(u64, cpu_softirq_time); | ||
758 | |||
759 | static DEFINE_PER_CPU(u64, irq_start_time); | ||
760 | static int sched_clock_irqtime; | ||
761 | |||
762 | void enable_sched_clock_irqtime(void) | ||
763 | { | ||
764 | sched_clock_irqtime = 1; | ||
765 | } | ||
766 | |||
767 | void disable_sched_clock_irqtime(void) | ||
768 | { | ||
769 | sched_clock_irqtime = 0; | ||
770 | } | ||
771 | |||
772 | #ifndef CONFIG_64BIT | ||
773 | static DEFINE_PER_CPU(seqcount_t, irq_time_seq); | ||
774 | |||
775 | static inline void irq_time_write_begin(void) | ||
776 | { | ||
777 | __this_cpu_inc(irq_time_seq.sequence); | ||
778 | smp_wmb(); | ||
779 | } | ||
780 | |||
781 | static inline void irq_time_write_end(void) | ||
782 | { | ||
783 | smp_wmb(); | ||
784 | __this_cpu_inc(irq_time_seq.sequence); | ||
785 | } | ||
786 | |||
787 | static inline u64 irq_time_read(int cpu) | ||
788 | { | ||
789 | u64 irq_time; | ||
790 | unsigned seq; | ||
791 | |||
792 | do { | ||
793 | seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu)); | ||
794 | irq_time = per_cpu(cpu_softirq_time, cpu) + | ||
795 | per_cpu(cpu_hardirq_time, cpu); | ||
796 | } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq)); | ||
797 | |||
798 | return irq_time; | ||
799 | } | ||
800 | #else /* CONFIG_64BIT */ | ||
801 | static inline void irq_time_write_begin(void) | ||
802 | { | ||
803 | } | ||
804 | |||
805 | static inline void irq_time_write_end(void) | ||
806 | { | ||
807 | } | ||
808 | |||
809 | static inline u64 irq_time_read(int cpu) | ||
810 | { | ||
811 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); | ||
812 | } | ||
813 | #endif /* CONFIG_64BIT */ | ||
814 | |||
815 | /* | ||
816 | * Called before incrementing preempt_count on {soft,}irq_enter | ||
817 | * and before decrementing preempt_count on {soft,}irq_exit. | ||
818 | */ | ||
819 | void account_system_vtime(struct task_struct *curr) | ||
820 | { | ||
821 | unsigned long flags; | ||
822 | s64 delta; | ||
823 | int cpu; | ||
824 | |||
825 | if (!sched_clock_irqtime) | ||
826 | return; | ||
827 | |||
828 | local_irq_save(flags); | ||
829 | |||
830 | cpu = smp_processor_id(); | ||
831 | delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); | ||
832 | __this_cpu_add(irq_start_time, delta); | ||
833 | |||
834 | irq_time_write_begin(); | ||
835 | /* | ||
836 | * We do not account for softirq time from ksoftirqd here. | ||
837 | * We want to continue accounting softirq time to ksoftirqd thread | ||
838 | * in that case, so as not to confuse scheduler with a special task | ||
839 | * that do not consume any time, but still wants to run. | ||
840 | */ | ||
841 | if (hardirq_count()) | ||
842 | __this_cpu_add(cpu_hardirq_time, delta); | ||
843 | else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) | ||
844 | __this_cpu_add(cpu_softirq_time, delta); | ||
845 | |||
846 | irq_time_write_end(); | ||
847 | local_irq_restore(flags); | ||
848 | } | ||
849 | EXPORT_SYMBOL_GPL(account_system_vtime); | ||
850 | |||
851 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
852 | |||
853 | #ifdef CONFIG_PARAVIRT | ||
854 | static inline u64 steal_ticks(u64 steal) | ||
855 | { | ||
856 | if (unlikely(steal > NSEC_PER_SEC)) | ||
857 | return div_u64(steal, TICK_NSEC); | ||
858 | |||
859 | return __iter_div_u64_rem(steal, TICK_NSEC, &steal); | ||
860 | } | ||
861 | #endif | ||
862 | |||
863 | static void update_rq_clock_task(struct rq *rq, s64 delta) | 743 | static void update_rq_clock_task(struct rq *rq, s64 delta) |
864 | { | 744 | { |
865 | /* | 745 | /* |
@@ -920,43 +800,6 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) | |||
920 | #endif | 800 | #endif |
921 | } | 801 | } |
922 | 802 | ||
923 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
924 | static int irqtime_account_hi_update(void) | ||
925 | { | ||
926 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
927 | unsigned long flags; | ||
928 | u64 latest_ns; | ||
929 | int ret = 0; | ||
930 | |||
931 | local_irq_save(flags); | ||
932 | latest_ns = this_cpu_read(cpu_hardirq_time); | ||
933 | if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ]) | ||
934 | ret = 1; | ||
935 | local_irq_restore(flags); | ||
936 | return ret; | ||
937 | } | ||
938 | |||
939 | static int irqtime_account_si_update(void) | ||
940 | { | ||
941 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
942 | unsigned long flags; | ||
943 | u64 latest_ns; | ||
944 | int ret = 0; | ||
945 | |||
946 | local_irq_save(flags); | ||
947 | latest_ns = this_cpu_read(cpu_softirq_time); | ||
948 | if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) | ||
949 | ret = 1; | ||
950 | local_irq_restore(flags); | ||
951 | return ret; | ||
952 | } | ||
953 | |||
954 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
955 | |||
956 | #define sched_clock_irqtime (0) | ||
957 | |||
958 | #endif | ||
959 | |||
960 | void sched_set_stop_task(int cpu, struct task_struct *stop) | 803 | void sched_set_stop_task(int cpu, struct task_struct *stop) |
961 | { | 804 | { |
962 | struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; | 805 | struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; |
@@ -2809,404 +2652,6 @@ unsigned long long task_sched_runtime(struct task_struct *p) | |||
2809 | return ns; | 2652 | return ns; |
2810 | } | 2653 | } |
2811 | 2654 | ||
2812 | #ifdef CONFIG_CGROUP_CPUACCT | ||
2813 | struct cgroup_subsys cpuacct_subsys; | ||
2814 | struct cpuacct root_cpuacct; | ||
2815 | #endif | ||
2816 | |||
2817 | static inline void task_group_account_field(struct task_struct *p, int index, | ||
2818 | u64 tmp) | ||
2819 | { | ||
2820 | #ifdef CONFIG_CGROUP_CPUACCT | ||
2821 | struct kernel_cpustat *kcpustat; | ||
2822 | struct cpuacct *ca; | ||
2823 | #endif | ||
2824 | /* | ||
2825 | * Since all updates are sure to touch the root cgroup, we | ||
2826 | * get ourselves ahead and touch it first. If the root cgroup | ||
2827 | * is the only cgroup, then nothing else should be necessary. | ||
2828 | * | ||
2829 | */ | ||
2830 | __get_cpu_var(kernel_cpustat).cpustat[index] += tmp; | ||
2831 | |||
2832 | #ifdef CONFIG_CGROUP_CPUACCT | ||
2833 | if (unlikely(!cpuacct_subsys.active)) | ||
2834 | return; | ||
2835 | |||
2836 | rcu_read_lock(); | ||
2837 | ca = task_ca(p); | ||
2838 | while (ca && (ca != &root_cpuacct)) { | ||
2839 | kcpustat = this_cpu_ptr(ca->cpustat); | ||
2840 | kcpustat->cpustat[index] += tmp; | ||
2841 | ca = parent_ca(ca); | ||
2842 | } | ||
2843 | rcu_read_unlock(); | ||
2844 | #endif | ||
2845 | } | ||
2846 | |||
2847 | |||
2848 | /* | ||
2849 | * Account user cpu time to a process. | ||
2850 | * @p: the process that the cpu time gets accounted to | ||
2851 | * @cputime: the cpu time spent in user space since the last update | ||
2852 | * @cputime_scaled: cputime scaled by cpu frequency | ||
2853 | */ | ||
2854 | void account_user_time(struct task_struct *p, cputime_t cputime, | ||
2855 | cputime_t cputime_scaled) | ||
2856 | { | ||
2857 | int index; | ||
2858 | |||
2859 | /* Add user time to process. */ | ||
2860 | p->utime += cputime; | ||
2861 | p->utimescaled += cputime_scaled; | ||
2862 | account_group_user_time(p, cputime); | ||
2863 | |||
2864 | index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; | ||
2865 | |||
2866 | /* Add user time to cpustat. */ | ||
2867 | task_group_account_field(p, index, (__force u64) cputime); | ||
2868 | |||
2869 | /* Account for user time used */ | ||
2870 | acct_update_integrals(p); | ||
2871 | } | ||
2872 | |||
2873 | /* | ||
2874 | * Account guest cpu time to a process. | ||
2875 | * @p: the process that the cpu time gets accounted to | ||
2876 | * @cputime: the cpu time spent in virtual machine since the last update | ||
2877 | * @cputime_scaled: cputime scaled by cpu frequency | ||
2878 | */ | ||
2879 | static void account_guest_time(struct task_struct *p, cputime_t cputime, | ||
2880 | cputime_t cputime_scaled) | ||
2881 | { | ||
2882 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
2883 | |||
2884 | /* Add guest time to process. */ | ||
2885 | p->utime += cputime; | ||
2886 | p->utimescaled += cputime_scaled; | ||
2887 | account_group_user_time(p, cputime); | ||
2888 | p->gtime += cputime; | ||
2889 | |||
2890 | /* Add guest time to cpustat. */ | ||
2891 | if (TASK_NICE(p) > 0) { | ||
2892 | cpustat[CPUTIME_NICE] += (__force u64) cputime; | ||
2893 | cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; | ||
2894 | } else { | ||
2895 | cpustat[CPUTIME_USER] += (__force u64) cputime; | ||
2896 | cpustat[CPUTIME_GUEST] += (__force u64) cputime; | ||
2897 | } | ||
2898 | } | ||
2899 | |||
2900 | /* | ||
2901 | * Account system cpu time to a process and desired cpustat field | ||
2902 | * @p: the process that the cpu time gets accounted to | ||
2903 | * @cputime: the cpu time spent in kernel space since the last update | ||
2904 | * @cputime_scaled: cputime scaled by cpu frequency | ||
2905 | * @target_cputime64: pointer to cpustat field that has to be updated | ||
2906 | */ | ||
2907 | static inline | ||
2908 | void __account_system_time(struct task_struct *p, cputime_t cputime, | ||
2909 | cputime_t cputime_scaled, int index) | ||
2910 | { | ||
2911 | /* Add system time to process. */ | ||
2912 | p->stime += cputime; | ||
2913 | p->stimescaled += cputime_scaled; | ||
2914 | account_group_system_time(p, cputime); | ||
2915 | |||
2916 | /* Add system time to cpustat. */ | ||
2917 | task_group_account_field(p, index, (__force u64) cputime); | ||
2918 | |||
2919 | /* Account for system time used */ | ||
2920 | acct_update_integrals(p); | ||
2921 | } | ||
2922 | |||
2923 | /* | ||
2924 | * Account system cpu time to a process. | ||
2925 | * @p: the process that the cpu time gets accounted to | ||
2926 | * @hardirq_offset: the offset to subtract from hardirq_count() | ||
2927 | * @cputime: the cpu time spent in kernel space since the last update | ||
2928 | * @cputime_scaled: cputime scaled by cpu frequency | ||
2929 | */ | ||
2930 | void account_system_time(struct task_struct *p, int hardirq_offset, | ||
2931 | cputime_t cputime, cputime_t cputime_scaled) | ||
2932 | { | ||
2933 | int index; | ||
2934 | |||
2935 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { | ||
2936 | account_guest_time(p, cputime, cputime_scaled); | ||
2937 | return; | ||
2938 | } | ||
2939 | |||
2940 | if (hardirq_count() - hardirq_offset) | ||
2941 | index = CPUTIME_IRQ; | ||
2942 | else if (in_serving_softirq()) | ||
2943 | index = CPUTIME_SOFTIRQ; | ||
2944 | else | ||
2945 | index = CPUTIME_SYSTEM; | ||
2946 | |||
2947 | __account_system_time(p, cputime, cputime_scaled, index); | ||
2948 | } | ||
2949 | |||
2950 | /* | ||
2951 | * Account for involuntary wait time. | ||
2952 | * @cputime: the cpu time spent in involuntary wait | ||
2953 | */ | ||
2954 | void account_steal_time(cputime_t cputime) | ||
2955 | { | ||
2956 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
2957 | |||
2958 | cpustat[CPUTIME_STEAL] += (__force u64) cputime; | ||
2959 | } | ||
2960 | |||
2961 | /* | ||
2962 | * Account for idle time. | ||
2963 | * @cputime: the cpu time spent in idle wait | ||
2964 | */ | ||
2965 | void account_idle_time(cputime_t cputime) | ||
2966 | { | ||
2967 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
2968 | struct rq *rq = this_rq(); | ||
2969 | |||
2970 | if (atomic_read(&rq->nr_iowait) > 0) | ||
2971 | cpustat[CPUTIME_IOWAIT] += (__force u64) cputime; | ||
2972 | else | ||
2973 | cpustat[CPUTIME_IDLE] += (__force u64) cputime; | ||
2974 | } | ||
2975 | |||
2976 | static __always_inline bool steal_account_process_tick(void) | ||
2977 | { | ||
2978 | #ifdef CONFIG_PARAVIRT | ||
2979 | if (static_key_false(¶virt_steal_enabled)) { | ||
2980 | u64 steal, st = 0; | ||
2981 | |||
2982 | steal = paravirt_steal_clock(smp_processor_id()); | ||
2983 | steal -= this_rq()->prev_steal_time; | ||
2984 | |||
2985 | st = steal_ticks(steal); | ||
2986 | this_rq()->prev_steal_time += st * TICK_NSEC; | ||
2987 | |||
2988 | account_steal_time(st); | ||
2989 | return st; | ||
2990 | } | ||
2991 | #endif | ||
2992 | return false; | ||
2993 | } | ||
2994 | |||
2995 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
2996 | |||
2997 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
2998 | /* | ||
2999 | * Account a tick to a process and cpustat | ||
3000 | * @p: the process that the cpu time gets accounted to | ||
3001 | * @user_tick: is the tick from userspace | ||
3002 | * @rq: the pointer to rq | ||
3003 | * | ||
3004 | * Tick demultiplexing follows the order | ||
3005 | * - pending hardirq update | ||
3006 | * - pending softirq update | ||
3007 | * - user_time | ||
3008 | * - idle_time | ||
3009 | * - system time | ||
3010 | * - check for guest_time | ||
3011 | * - else account as system_time | ||
3012 | * | ||
3013 | * Check for hardirq is done both for system and user time as there is | ||
3014 | * no timer going off while we are on hardirq and hence we may never get an | ||
3015 | * opportunity to update it solely in system time. | ||
3016 | * p->stime and friends are only updated on system time and not on irq | ||
3017 | * softirq as those do not count in task exec_runtime any more. | ||
3018 | */ | ||
3019 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | ||
3020 | struct rq *rq) | ||
3021 | { | ||
3022 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | ||
3023 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
3024 | |||
3025 | if (steal_account_process_tick()) | ||
3026 | return; | ||
3027 | |||
3028 | if (irqtime_account_hi_update()) { | ||
3029 | cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; | ||
3030 | } else if (irqtime_account_si_update()) { | ||
3031 | cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; | ||
3032 | } else if (this_cpu_ksoftirqd() == p) { | ||
3033 | /* | ||
3034 | * ksoftirqd time do not get accounted in cpu_softirq_time. | ||
3035 | * So, we have to handle it separately here. | ||
3036 | * Also, p->stime needs to be updated for ksoftirqd. | ||
3037 | */ | ||
3038 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | ||
3039 | CPUTIME_SOFTIRQ); | ||
3040 | } else if (user_tick) { | ||
3041 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
3042 | } else if (p == rq->idle) { | ||
3043 | account_idle_time(cputime_one_jiffy); | ||
3044 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ | ||
3045 | account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
3046 | } else { | ||
3047 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | ||
3048 | CPUTIME_SYSTEM); | ||
3049 | } | ||
3050 | } | ||
3051 | |||
3052 | static void irqtime_account_idle_ticks(int ticks) | ||
3053 | { | ||
3054 | int i; | ||
3055 | struct rq *rq = this_rq(); | ||
3056 | |||
3057 | for (i = 0; i < ticks; i++) | ||
3058 | irqtime_account_process_tick(current, 0, rq); | ||
3059 | } | ||
3060 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
3061 | static void irqtime_account_idle_ticks(int ticks) {} | ||
3062 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | ||
3063 | struct rq *rq) {} | ||
3064 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
3065 | |||
3066 | /* | ||
3067 | * Account a single tick of cpu time. | ||
3068 | * @p: the process that the cpu time gets accounted to | ||
3069 | * @user_tick: indicates if the tick is a user or a system tick | ||
3070 | */ | ||
3071 | void account_process_tick(struct task_struct *p, int user_tick) | ||
3072 | { | ||
3073 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | ||
3074 | struct rq *rq = this_rq(); | ||
3075 | |||
3076 | if (sched_clock_irqtime) { | ||
3077 | irqtime_account_process_tick(p, user_tick, rq); | ||
3078 | return; | ||
3079 | } | ||
3080 | |||
3081 | if (steal_account_process_tick()) | ||
3082 | return; | ||
3083 | |||
3084 | if (user_tick) | ||
3085 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
3086 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) | ||
3087 | account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, | ||
3088 | one_jiffy_scaled); | ||
3089 | else | ||
3090 | account_idle_time(cputime_one_jiffy); | ||
3091 | } | ||
3092 | |||
3093 | /* | ||
3094 | * Account multiple ticks of steal time. | ||
3095 | * @p: the process from which the cpu time has been stolen | ||
3096 | * @ticks: number of stolen ticks | ||
3097 | */ | ||
3098 | void account_steal_ticks(unsigned long ticks) | ||
3099 | { | ||
3100 | account_steal_time(jiffies_to_cputime(ticks)); | ||
3101 | } | ||
3102 | |||
3103 | /* | ||
3104 | * Account multiple ticks of idle time. | ||
3105 | * @ticks: number of stolen ticks | ||
3106 | */ | ||
3107 | void account_idle_ticks(unsigned long ticks) | ||
3108 | { | ||
3109 | |||
3110 | if (sched_clock_irqtime) { | ||
3111 | irqtime_account_idle_ticks(ticks); | ||
3112 | return; | ||
3113 | } | ||
3114 | |||
3115 | account_idle_time(jiffies_to_cputime(ticks)); | ||
3116 | } | ||
3117 | |||
3118 | #endif | ||
3119 | |||
3120 | /* | ||
3121 | * Use precise platform statistics if available: | ||
3122 | */ | ||
3123 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | ||
3124 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
3125 | { | ||
3126 | *ut = p->utime; | ||
3127 | *st = p->stime; | ||
3128 | } | ||
3129 | |||
3130 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
3131 | { | ||
3132 | struct task_cputime cputime; | ||
3133 | |||
3134 | thread_group_cputime(p, &cputime); | ||
3135 | |||
3136 | *ut = cputime.utime; | ||
3137 | *st = cputime.stime; | ||
3138 | } | ||
3139 | #else | ||
3140 | |||
3141 | #ifndef nsecs_to_cputime | ||
3142 | # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) | ||
3143 | #endif | ||
3144 | |||
3145 | static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) | ||
3146 | { | ||
3147 | u64 temp = (__force u64) rtime; | ||
3148 | |||
3149 | temp *= (__force u64) utime; | ||
3150 | |||
3151 | if (sizeof(cputime_t) == 4) | ||
3152 | temp = div_u64(temp, (__force u32) total); | ||
3153 | else | ||
3154 | temp = div64_u64(temp, (__force u64) total); | ||
3155 | |||
3156 | return (__force cputime_t) temp; | ||
3157 | } | ||
3158 | |||
3159 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
3160 | { | ||
3161 | cputime_t rtime, utime = p->utime, total = utime + p->stime; | ||
3162 | |||
3163 | /* | ||
3164 | * Use CFS's precise accounting: | ||
3165 | */ | ||
3166 | rtime = nsecs_to_cputime(p->se.sum_exec_runtime); | ||
3167 | |||
3168 | if (total) | ||
3169 | utime = scale_utime(utime, rtime, total); | ||
3170 | else | ||
3171 | utime = rtime; | ||
3172 | |||
3173 | /* | ||
3174 | * Compare with previous values, to keep monotonicity: | ||
3175 | */ | ||
3176 | p->prev_utime = max(p->prev_utime, utime); | ||
3177 | p->prev_stime = max(p->prev_stime, rtime - p->prev_utime); | ||
3178 | |||
3179 | *ut = p->prev_utime; | ||
3180 | *st = p->prev_stime; | ||
3181 | } | ||
3182 | |||
3183 | /* | ||
3184 | * Must be called with siglock held. | ||
3185 | */ | ||
3186 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
3187 | { | ||
3188 | struct signal_struct *sig = p->signal; | ||
3189 | struct task_cputime cputime; | ||
3190 | cputime_t rtime, utime, total; | ||
3191 | |||
3192 | thread_group_cputime(p, &cputime); | ||
3193 | |||
3194 | total = cputime.utime + cputime.stime; | ||
3195 | rtime = nsecs_to_cputime(cputime.sum_exec_runtime); | ||
3196 | |||
3197 | if (total) | ||
3198 | utime = scale_utime(cputime.utime, rtime, total); | ||
3199 | else | ||
3200 | utime = rtime; | ||
3201 | |||
3202 | sig->prev_utime = max(sig->prev_utime, utime); | ||
3203 | sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime); | ||
3204 | |||
3205 | *ut = sig->prev_utime; | ||
3206 | *st = sig->prev_stime; | ||
3207 | } | ||
3208 | #endif | ||
3209 | |||
3210 | /* | 2655 | /* |
3211 | * This function gets called by the timer code, with HZ frequency. | 2656 | * This function gets called by the timer code, with HZ frequency. |
3212 | * We call it with interrupts disabled. | 2657 | * We call it with interrupts disabled. |
@@ -8419,6 +7864,8 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
8419 | * (balbir@in.ibm.com). | 7864 | * (balbir@in.ibm.com). |
8420 | */ | 7865 | */ |
8421 | 7866 | ||
7867 | struct cpuacct root_cpuacct; | ||
7868 | |||
8422 | /* create a new cpu accounting group */ | 7869 | /* create a new cpu accounting group */ |
8423 | static struct cgroup_subsys_state *cpuacct_create(struct cgroup *cgrp) | 7870 | static struct cgroup_subsys_state *cpuacct_create(struct cgroup *cgrp) |
8424 | { | 7871 | { |
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c new file mode 100644 index 000000000000..372692bd5376 --- /dev/null +++ b/kernel/sched/cputime.c | |||
@@ -0,0 +1,504 @@ | |||
1 | #include <linux/export.h> | ||
2 | #include <linux/sched.h> | ||
3 | #include <linux/tsacct_kern.h> | ||
4 | #include <linux/kernel_stat.h> | ||
5 | #include <linux/static_key.h> | ||
6 | #include "sched.h" | ||
7 | |||
8 | |||
9 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
10 | |||
11 | /* | ||
12 | * There are no locks covering percpu hardirq/softirq time. | ||
13 | * They are only modified in account_system_vtime, on corresponding CPU | ||
14 | * with interrupts disabled. So, writes are safe. | ||
15 | * They are read and saved off onto struct rq in update_rq_clock(). | ||
16 | * This may result in other CPU reading this CPU's irq time and can | ||
17 | * race with irq/account_system_vtime on this CPU. We would either get old | ||
18 | * or new value with a side effect of accounting a slice of irq time to wrong | ||
19 | * task when irq is in progress while we read rq->clock. That is a worthy | ||
20 | * compromise in place of having locks on each irq in account_system_time. | ||
21 | */ | ||
22 | DEFINE_PER_CPU(u64, cpu_hardirq_time); | ||
23 | DEFINE_PER_CPU(u64, cpu_softirq_time); | ||
24 | |||
25 | static DEFINE_PER_CPU(u64, irq_start_time); | ||
26 | static int sched_clock_irqtime; | ||
27 | |||
28 | void enable_sched_clock_irqtime(void) | ||
29 | { | ||
30 | sched_clock_irqtime = 1; | ||
31 | } | ||
32 | |||
33 | void disable_sched_clock_irqtime(void) | ||
34 | { | ||
35 | sched_clock_irqtime = 0; | ||
36 | } | ||
37 | |||
38 | #ifndef CONFIG_64BIT | ||
39 | DEFINE_PER_CPU(seqcount_t, irq_time_seq); | ||
40 | #endif /* CONFIG_64BIT */ | ||
41 | |||
42 | /* | ||
43 | * Called before incrementing preempt_count on {soft,}irq_enter | ||
44 | * and before decrementing preempt_count on {soft,}irq_exit. | ||
45 | */ | ||
46 | void account_system_vtime(struct task_struct *curr) | ||
47 | { | ||
48 | unsigned long flags; | ||
49 | s64 delta; | ||
50 | int cpu; | ||
51 | |||
52 | if (!sched_clock_irqtime) | ||
53 | return; | ||
54 | |||
55 | local_irq_save(flags); | ||
56 | |||
57 | cpu = smp_processor_id(); | ||
58 | delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); | ||
59 | __this_cpu_add(irq_start_time, delta); | ||
60 | |||
61 | irq_time_write_begin(); | ||
62 | /* | ||
63 | * We do not account for softirq time from ksoftirqd here. | ||
64 | * We want to continue accounting softirq time to ksoftirqd thread | ||
65 | * in that case, so as not to confuse scheduler with a special task | ||
66 | * that do not consume any time, but still wants to run. | ||
67 | */ | ||
68 | if (hardirq_count()) | ||
69 | __this_cpu_add(cpu_hardirq_time, delta); | ||
70 | else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) | ||
71 | __this_cpu_add(cpu_softirq_time, delta); | ||
72 | |||
73 | irq_time_write_end(); | ||
74 | local_irq_restore(flags); | ||
75 | } | ||
76 | EXPORT_SYMBOL_GPL(account_system_vtime); | ||
77 | |||
78 | static int irqtime_account_hi_update(void) | ||
79 | { | ||
80 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
81 | unsigned long flags; | ||
82 | u64 latest_ns; | ||
83 | int ret = 0; | ||
84 | |||
85 | local_irq_save(flags); | ||
86 | latest_ns = this_cpu_read(cpu_hardirq_time); | ||
87 | if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ]) | ||
88 | ret = 1; | ||
89 | local_irq_restore(flags); | ||
90 | return ret; | ||
91 | } | ||
92 | |||
93 | static int irqtime_account_si_update(void) | ||
94 | { | ||
95 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
96 | unsigned long flags; | ||
97 | u64 latest_ns; | ||
98 | int ret = 0; | ||
99 | |||
100 | local_irq_save(flags); | ||
101 | latest_ns = this_cpu_read(cpu_softirq_time); | ||
102 | if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) | ||
103 | ret = 1; | ||
104 | local_irq_restore(flags); | ||
105 | return ret; | ||
106 | } | ||
107 | |||
108 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
109 | |||
110 | #define sched_clock_irqtime (0) | ||
111 | |||
112 | #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ | ||
113 | |||
114 | static inline void task_group_account_field(struct task_struct *p, int index, | ||
115 | u64 tmp) | ||
116 | { | ||
117 | #ifdef CONFIG_CGROUP_CPUACCT | ||
118 | struct kernel_cpustat *kcpustat; | ||
119 | struct cpuacct *ca; | ||
120 | #endif | ||
121 | /* | ||
122 | * Since all updates are sure to touch the root cgroup, we | ||
123 | * get ourselves ahead and touch it first. If the root cgroup | ||
124 | * is the only cgroup, then nothing else should be necessary. | ||
125 | * | ||
126 | */ | ||
127 | __get_cpu_var(kernel_cpustat).cpustat[index] += tmp; | ||
128 | |||
129 | #ifdef CONFIG_CGROUP_CPUACCT | ||
130 | if (unlikely(!cpuacct_subsys.active)) | ||
131 | return; | ||
132 | |||
133 | rcu_read_lock(); | ||
134 | ca = task_ca(p); | ||
135 | while (ca && (ca != &root_cpuacct)) { | ||
136 | kcpustat = this_cpu_ptr(ca->cpustat); | ||
137 | kcpustat->cpustat[index] += tmp; | ||
138 | ca = parent_ca(ca); | ||
139 | } | ||
140 | rcu_read_unlock(); | ||
141 | #endif | ||
142 | } | ||
143 | |||
144 | /* | ||
145 | * Account user cpu time to a process. | ||
146 | * @p: the process that the cpu time gets accounted to | ||
147 | * @cputime: the cpu time spent in user space since the last update | ||
148 | * @cputime_scaled: cputime scaled by cpu frequency | ||
149 | */ | ||
150 | void account_user_time(struct task_struct *p, cputime_t cputime, | ||
151 | cputime_t cputime_scaled) | ||
152 | { | ||
153 | int index; | ||
154 | |||
155 | /* Add user time to process. */ | ||
156 | p->utime += cputime; | ||
157 | p->utimescaled += cputime_scaled; | ||
158 | account_group_user_time(p, cputime); | ||
159 | |||
160 | index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; | ||
161 | |||
162 | /* Add user time to cpustat. */ | ||
163 | task_group_account_field(p, index, (__force u64) cputime); | ||
164 | |||
165 | /* Account for user time used */ | ||
166 | acct_update_integrals(p); | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * Account guest cpu time to a process. | ||
171 | * @p: the process that the cpu time gets accounted to | ||
172 | * @cputime: the cpu time spent in virtual machine since the last update | ||
173 | * @cputime_scaled: cputime scaled by cpu frequency | ||
174 | */ | ||
175 | static void account_guest_time(struct task_struct *p, cputime_t cputime, | ||
176 | cputime_t cputime_scaled) | ||
177 | { | ||
178 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
179 | |||
180 | /* Add guest time to process. */ | ||
181 | p->utime += cputime; | ||
182 | p->utimescaled += cputime_scaled; | ||
183 | account_group_user_time(p, cputime); | ||
184 | p->gtime += cputime; | ||
185 | |||
186 | /* Add guest time to cpustat. */ | ||
187 | if (TASK_NICE(p) > 0) { | ||
188 | cpustat[CPUTIME_NICE] += (__force u64) cputime; | ||
189 | cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; | ||
190 | } else { | ||
191 | cpustat[CPUTIME_USER] += (__force u64) cputime; | ||
192 | cpustat[CPUTIME_GUEST] += (__force u64) cputime; | ||
193 | } | ||
194 | } | ||
195 | |||
196 | /* | ||
197 | * Account system cpu time to a process and desired cpustat field | ||
198 | * @p: the process that the cpu time gets accounted to | ||
199 | * @cputime: the cpu time spent in kernel space since the last update | ||
200 | * @cputime_scaled: cputime scaled by cpu frequency | ||
201 | * @target_cputime64: pointer to cpustat field that has to be updated | ||
202 | */ | ||
203 | static inline | ||
204 | void __account_system_time(struct task_struct *p, cputime_t cputime, | ||
205 | cputime_t cputime_scaled, int index) | ||
206 | { | ||
207 | /* Add system time to process. */ | ||
208 | p->stime += cputime; | ||
209 | p->stimescaled += cputime_scaled; | ||
210 | account_group_system_time(p, cputime); | ||
211 | |||
212 | /* Add system time to cpustat. */ | ||
213 | task_group_account_field(p, index, (__force u64) cputime); | ||
214 | |||
215 | /* Account for system time used */ | ||
216 | acct_update_integrals(p); | ||
217 | } | ||
218 | |||
219 | /* | ||
220 | * Account system cpu time to a process. | ||
221 | * @p: the process that the cpu time gets accounted to | ||
222 | * @hardirq_offset: the offset to subtract from hardirq_count() | ||
223 | * @cputime: the cpu time spent in kernel space since the last update | ||
224 | * @cputime_scaled: cputime scaled by cpu frequency | ||
225 | */ | ||
226 | void account_system_time(struct task_struct *p, int hardirq_offset, | ||
227 | cputime_t cputime, cputime_t cputime_scaled) | ||
228 | { | ||
229 | int index; | ||
230 | |||
231 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { | ||
232 | account_guest_time(p, cputime, cputime_scaled); | ||
233 | return; | ||
234 | } | ||
235 | |||
236 | if (hardirq_count() - hardirq_offset) | ||
237 | index = CPUTIME_IRQ; | ||
238 | else if (in_serving_softirq()) | ||
239 | index = CPUTIME_SOFTIRQ; | ||
240 | else | ||
241 | index = CPUTIME_SYSTEM; | ||
242 | |||
243 | __account_system_time(p, cputime, cputime_scaled, index); | ||
244 | } | ||
245 | |||
246 | /* | ||
247 | * Account for involuntary wait time. | ||
248 | * @cputime: the cpu time spent in involuntary wait | ||
249 | */ | ||
250 | void account_steal_time(cputime_t cputime) | ||
251 | { | ||
252 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
253 | |||
254 | cpustat[CPUTIME_STEAL] += (__force u64) cputime; | ||
255 | } | ||
256 | |||
257 | /* | ||
258 | * Account for idle time. | ||
259 | * @cputime: the cpu time spent in idle wait | ||
260 | */ | ||
261 | void account_idle_time(cputime_t cputime) | ||
262 | { | ||
263 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
264 | struct rq *rq = this_rq(); | ||
265 | |||
266 | if (atomic_read(&rq->nr_iowait) > 0) | ||
267 | cpustat[CPUTIME_IOWAIT] += (__force u64) cputime; | ||
268 | else | ||
269 | cpustat[CPUTIME_IDLE] += (__force u64) cputime; | ||
270 | } | ||
271 | |||
272 | static __always_inline bool steal_account_process_tick(void) | ||
273 | { | ||
274 | #ifdef CONFIG_PARAVIRT | ||
275 | if (static_key_false(¶virt_steal_enabled)) { | ||
276 | u64 steal, st = 0; | ||
277 | |||
278 | steal = paravirt_steal_clock(smp_processor_id()); | ||
279 | steal -= this_rq()->prev_steal_time; | ||
280 | |||
281 | st = steal_ticks(steal); | ||
282 | this_rq()->prev_steal_time += st * TICK_NSEC; | ||
283 | |||
284 | account_steal_time(st); | ||
285 | return st; | ||
286 | } | ||
287 | #endif | ||
288 | return false; | ||
289 | } | ||
290 | |||
291 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
292 | |||
293 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
294 | /* | ||
295 | * Account a tick to a process and cpustat | ||
296 | * @p: the process that the cpu time gets accounted to | ||
297 | * @user_tick: is the tick from userspace | ||
298 | * @rq: the pointer to rq | ||
299 | * | ||
300 | * Tick demultiplexing follows the order | ||
301 | * - pending hardirq update | ||
302 | * - pending softirq update | ||
303 | * - user_time | ||
304 | * - idle_time | ||
305 | * - system time | ||
306 | * - check for guest_time | ||
307 | * - else account as system_time | ||
308 | * | ||
309 | * Check for hardirq is done both for system and user time as there is | ||
310 | * no timer going off while we are on hardirq and hence we may never get an | ||
311 | * opportunity to update it solely in system time. | ||
312 | * p->stime and friends are only updated on system time and not on irq | ||
313 | * softirq as those do not count in task exec_runtime any more. | ||
314 | */ | ||
315 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | ||
316 | struct rq *rq) | ||
317 | { | ||
318 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | ||
319 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
320 | |||
321 | if (steal_account_process_tick()) | ||
322 | return; | ||
323 | |||
324 | if (irqtime_account_hi_update()) { | ||
325 | cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; | ||
326 | } else if (irqtime_account_si_update()) { | ||
327 | cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; | ||
328 | } else if (this_cpu_ksoftirqd() == p) { | ||
329 | /* | ||
330 | * ksoftirqd time do not get accounted in cpu_softirq_time. | ||
331 | * So, we have to handle it separately here. | ||
332 | * Also, p->stime needs to be updated for ksoftirqd. | ||
333 | */ | ||
334 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | ||
335 | CPUTIME_SOFTIRQ); | ||
336 | } else if (user_tick) { | ||
337 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
338 | } else if (p == rq->idle) { | ||
339 | account_idle_time(cputime_one_jiffy); | ||
340 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ | ||
341 | account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
342 | } else { | ||
343 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | ||
344 | CPUTIME_SYSTEM); | ||
345 | } | ||
346 | } | ||
347 | |||
348 | static void irqtime_account_idle_ticks(int ticks) | ||
349 | { | ||
350 | int i; | ||
351 | struct rq *rq = this_rq(); | ||
352 | |||
353 | for (i = 0; i < ticks; i++) | ||
354 | irqtime_account_process_tick(current, 0, rq); | ||
355 | } | ||
356 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
357 | static void irqtime_account_idle_ticks(int ticks) {} | ||
358 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | ||
359 | struct rq *rq) {} | ||
360 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
361 | |||
362 | /* | ||
363 | * Account a single tick of cpu time. | ||
364 | * @p: the process that the cpu time gets accounted to | ||
365 | * @user_tick: indicates if the tick is a user or a system tick | ||
366 | */ | ||
367 | void account_process_tick(struct task_struct *p, int user_tick) | ||
368 | { | ||
369 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | ||
370 | struct rq *rq = this_rq(); | ||
371 | |||
372 | if (sched_clock_irqtime) { | ||
373 | irqtime_account_process_tick(p, user_tick, rq); | ||
374 | return; | ||
375 | } | ||
376 | |||
377 | if (steal_account_process_tick()) | ||
378 | return; | ||
379 | |||
380 | if (user_tick) | ||
381 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
382 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) | ||
383 | account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, | ||
384 | one_jiffy_scaled); | ||
385 | else | ||
386 | account_idle_time(cputime_one_jiffy); | ||
387 | } | ||
388 | |||
389 | /* | ||
390 | * Account multiple ticks of steal time. | ||
391 | * @p: the process from which the cpu time has been stolen | ||
392 | * @ticks: number of stolen ticks | ||
393 | */ | ||
394 | void account_steal_ticks(unsigned long ticks) | ||
395 | { | ||
396 | account_steal_time(jiffies_to_cputime(ticks)); | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * Account multiple ticks of idle time. | ||
401 | * @ticks: number of stolen ticks | ||
402 | */ | ||
403 | void account_idle_ticks(unsigned long ticks) | ||
404 | { | ||
405 | |||
406 | if (sched_clock_irqtime) { | ||
407 | irqtime_account_idle_ticks(ticks); | ||
408 | return; | ||
409 | } | ||
410 | |||
411 | account_idle_time(jiffies_to_cputime(ticks)); | ||
412 | } | ||
413 | |||
414 | #endif | ||
415 | |||
416 | /* | ||
417 | * Use precise platform statistics if available: | ||
418 | */ | ||
419 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | ||
420 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
421 | { | ||
422 | *ut = p->utime; | ||
423 | *st = p->stime; | ||
424 | } | ||
425 | |||
426 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
427 | { | ||
428 | struct task_cputime cputime; | ||
429 | |||
430 | thread_group_cputime(p, &cputime); | ||
431 | |||
432 | *ut = cputime.utime; | ||
433 | *st = cputime.stime; | ||
434 | } | ||
435 | #else | ||
436 | |||
437 | #ifndef nsecs_to_cputime | ||
438 | # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) | ||
439 | #endif | ||
440 | |||
441 | static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) | ||
442 | { | ||
443 | u64 temp = (__force u64) rtime; | ||
444 | |||
445 | temp *= (__force u64) utime; | ||
446 | |||
447 | if (sizeof(cputime_t) == 4) | ||
448 | temp = div_u64(temp, (__force u32) total); | ||
449 | else | ||
450 | temp = div64_u64(temp, (__force u64) total); | ||
451 | |||
452 | return (__force cputime_t) temp; | ||
453 | } | ||
454 | |||
455 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
456 | { | ||
457 | cputime_t rtime, utime = p->utime, total = utime + p->stime; | ||
458 | |||
459 | /* | ||
460 | * Use CFS's precise accounting: | ||
461 | */ | ||
462 | rtime = nsecs_to_cputime(p->se.sum_exec_runtime); | ||
463 | |||
464 | if (total) | ||
465 | utime = scale_utime(utime, rtime, total); | ||
466 | else | ||
467 | utime = rtime; | ||
468 | |||
469 | /* | ||
470 | * Compare with previous values, to keep monotonicity: | ||
471 | */ | ||
472 | p->prev_utime = max(p->prev_utime, utime); | ||
473 | p->prev_stime = max(p->prev_stime, rtime - p->prev_utime); | ||
474 | |||
475 | *ut = p->prev_utime; | ||
476 | *st = p->prev_stime; | ||
477 | } | ||
478 | |||
479 | /* | ||
480 | * Must be called with siglock held. | ||
481 | */ | ||
482 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
483 | { | ||
484 | struct signal_struct *sig = p->signal; | ||
485 | struct task_cputime cputime; | ||
486 | cputime_t rtime, utime, total; | ||
487 | |||
488 | thread_group_cputime(p, &cputime); | ||
489 | |||
490 | total = cputime.utime + cputime.stime; | ||
491 | rtime = nsecs_to_cputime(cputime.sum_exec_runtime); | ||
492 | |||
493 | if (total) | ||
494 | utime = scale_utime(cputime.utime, rtime, total); | ||
495 | else | ||
496 | utime = rtime; | ||
497 | |||
498 | sig->prev_utime = max(sig->prev_utime, utime); | ||
499 | sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime); | ||
500 | |||
501 | *ut = sig->prev_utime; | ||
502 | *st = sig->prev_stime; | ||
503 | } | ||
504 | #endif | ||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f6714d009e77..804c2e5e7872 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -891,6 +891,9 @@ struct cpuacct { | |||
891 | struct kernel_cpustat __percpu *cpustat; | 891 | struct kernel_cpustat __percpu *cpustat; |
892 | }; | 892 | }; |
893 | 893 | ||
894 | extern struct cgroup_subsys cpuacct_subsys; | ||
895 | extern struct cpuacct root_cpuacct; | ||
896 | |||
894 | /* return cpu accounting group corresponding to this container */ | 897 | /* return cpu accounting group corresponding to this container */ |
895 | static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp) | 898 | static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp) |
896 | { | 899 | { |
@@ -917,6 +920,16 @@ extern void cpuacct_charge(struct task_struct *tsk, u64 cputime); | |||
917 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} | 920 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} |
918 | #endif | 921 | #endif |
919 | 922 | ||
923 | #ifdef CONFIG_PARAVIRT | ||
924 | static inline u64 steal_ticks(u64 steal) | ||
925 | { | ||
926 | if (unlikely(steal > NSEC_PER_SEC)) | ||
927 | return div_u64(steal, TICK_NSEC); | ||
928 | |||
929 | return __iter_div_u64_rem(steal, TICK_NSEC, &steal); | ||
930 | } | ||
931 | #endif | ||
932 | |||
920 | static inline void inc_nr_running(struct rq *rq) | 933 | static inline void inc_nr_running(struct rq *rq) |
921 | { | 934 | { |
922 | rq->nr_running++; | 935 | rq->nr_running++; |
@@ -1157,3 +1170,53 @@ enum rq_nohz_flag_bits { | |||
1157 | 1170 | ||
1158 | #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) | 1171 | #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) |
1159 | #endif | 1172 | #endif |
1173 | |||
1174 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
1175 | |||
1176 | DECLARE_PER_CPU(u64, cpu_hardirq_time); | ||
1177 | DECLARE_PER_CPU(u64, cpu_softirq_time); | ||
1178 | |||
1179 | #ifndef CONFIG_64BIT | ||
1180 | DECLARE_PER_CPU(seqcount_t, irq_time_seq); | ||
1181 | |||
1182 | static inline void irq_time_write_begin(void) | ||
1183 | { | ||
1184 | __this_cpu_inc(irq_time_seq.sequence); | ||
1185 | smp_wmb(); | ||
1186 | } | ||
1187 | |||
1188 | static inline void irq_time_write_end(void) | ||
1189 | { | ||
1190 | smp_wmb(); | ||
1191 | __this_cpu_inc(irq_time_seq.sequence); | ||
1192 | } | ||
1193 | |||
1194 | static inline u64 irq_time_read(int cpu) | ||
1195 | { | ||
1196 | u64 irq_time; | ||
1197 | unsigned seq; | ||
1198 | |||
1199 | do { | ||
1200 | seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu)); | ||
1201 | irq_time = per_cpu(cpu_softirq_time, cpu) + | ||
1202 | per_cpu(cpu_hardirq_time, cpu); | ||
1203 | } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq)); | ||
1204 | |||
1205 | return irq_time; | ||
1206 | } | ||
1207 | #else /* CONFIG_64BIT */ | ||
1208 | static inline void irq_time_write_begin(void) | ||
1209 | { | ||
1210 | } | ||
1211 | |||
1212 | static inline void irq_time_write_end(void) | ||
1213 | { | ||
1214 | } | ||
1215 | |||
1216 | static inline u64 irq_time_read(int cpu) | ||
1217 | { | ||
1218 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); | ||
1219 | } | ||
1220 | #endif /* CONFIG_64BIT */ | ||
1221 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
1222 | |||