diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/fork.c | 4 | ||||
| -rw-r--r-- | kernel/sched/Makefile | 2 | ||||
| -rw-r--r-- | kernel/sched/core.c | 675 | ||||
| -rw-r--r-- | kernel/sched/cputime.c | 530 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 81 | ||||
| -rw-r--r-- | kernel/sched/features.h | 10 | ||||
| -rw-r--r-- | kernel/sched/rt.c | 5 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 69 | ||||
| -rw-r--r-- | kernel/softirq.c | 6 | ||||
| -rw-r--r-- | kernel/sysctl.c | 6 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 3 |
11 files changed, 686 insertions, 705 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 2343c9eaaaf4..5a0e74d89a5a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -1276,11 +1276,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1276 | #endif | 1276 | #endif |
| 1277 | #ifdef CONFIG_TRACE_IRQFLAGS | 1277 | #ifdef CONFIG_TRACE_IRQFLAGS |
| 1278 | p->irq_events = 0; | 1278 | p->irq_events = 0; |
| 1279 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | ||
| 1280 | p->hardirqs_enabled = 1; | ||
| 1281 | #else | ||
| 1282 | p->hardirqs_enabled = 0; | 1279 | p->hardirqs_enabled = 0; |
| 1283 | #endif | ||
| 1284 | p->hardirq_enable_ip = 0; | 1280 | p->hardirq_enable_ip = 0; |
| 1285 | p->hardirq_enable_event = 0; | 1281 | p->hardirq_enable_event = 0; |
| 1286 | p->hardirq_disable_ip = _THIS_IP_; | 1282 | p->hardirq_disable_ip = _THIS_IP_; |
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 173ea52f3af0..f06d249e103b 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile | |||
| @@ -11,7 +11,7 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) | |||
| 11 | CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer | 11 | CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer |
| 12 | endif | 12 | endif |
| 13 | 13 | ||
| 14 | obj-y += core.o clock.o idle_task.o fair.o rt.o stop_task.o | 14 | obj-y += core.o clock.o cputime.o idle_task.o fair.o rt.o stop_task.o |
| 15 | obj-$(CONFIG_SMP) += cpupri.o | 15 | obj-$(CONFIG_SMP) += cpupri.o |
| 16 | obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o | 16 | obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o |
| 17 | obj-$(CONFIG_SCHEDSTATS) += stats.o | 17 | obj-$(CONFIG_SCHEDSTATS) += stats.o |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3c4dec0594d6..c17747236438 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -740,126 +740,6 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags) | |||
| 740 | dequeue_task(rq, p, flags); | 740 | dequeue_task(rq, p, flags); |
| 741 | } | 741 | } |
| 742 | 742 | ||
| 743 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
| 744 | |||
| 745 | /* | ||
| 746 | * There are no locks covering percpu hardirq/softirq time. | ||
| 747 | * They are only modified in account_system_vtime, on corresponding CPU | ||
| 748 | * with interrupts disabled. So, writes are safe. | ||
| 749 | * They are read and saved off onto struct rq in update_rq_clock(). | ||
| 750 | * This may result in other CPU reading this CPU's irq time and can | ||
| 751 | * race with irq/account_system_vtime on this CPU. We would either get old | ||
| 752 | * or new value with a side effect of accounting a slice of irq time to wrong | ||
| 753 | * task when irq is in progress while we read rq->clock. That is a worthy | ||
| 754 | * compromise in place of having locks on each irq in account_system_time. | ||
| 755 | */ | ||
| 756 | static DEFINE_PER_CPU(u64, cpu_hardirq_time); | ||
| 757 | static DEFINE_PER_CPU(u64, cpu_softirq_time); | ||
| 758 | |||
| 759 | static DEFINE_PER_CPU(u64, irq_start_time); | ||
| 760 | static int sched_clock_irqtime; | ||
| 761 | |||
| 762 | void enable_sched_clock_irqtime(void) | ||
| 763 | { | ||
| 764 | sched_clock_irqtime = 1; | ||
| 765 | } | ||
| 766 | |||
| 767 | void disable_sched_clock_irqtime(void) | ||
| 768 | { | ||
| 769 | sched_clock_irqtime = 0; | ||
| 770 | } | ||
| 771 | |||
| 772 | #ifndef CONFIG_64BIT | ||
| 773 | static DEFINE_PER_CPU(seqcount_t, irq_time_seq); | ||
| 774 | |||
| 775 | static inline void irq_time_write_begin(void) | ||
| 776 | { | ||
| 777 | __this_cpu_inc(irq_time_seq.sequence); | ||
| 778 | smp_wmb(); | ||
| 779 | } | ||
| 780 | |||
| 781 | static inline void irq_time_write_end(void) | ||
| 782 | { | ||
| 783 | smp_wmb(); | ||
| 784 | __this_cpu_inc(irq_time_seq.sequence); | ||
| 785 | } | ||
| 786 | |||
| 787 | static inline u64 irq_time_read(int cpu) | ||
| 788 | { | ||
| 789 | u64 irq_time; | ||
| 790 | unsigned seq; | ||
| 791 | |||
| 792 | do { | ||
| 793 | seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu)); | ||
| 794 | irq_time = per_cpu(cpu_softirq_time, cpu) + | ||
| 795 | per_cpu(cpu_hardirq_time, cpu); | ||
| 796 | } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq)); | ||
| 797 | |||
| 798 | return irq_time; | ||
| 799 | } | ||
| 800 | #else /* CONFIG_64BIT */ | ||
| 801 | static inline void irq_time_write_begin(void) | ||
| 802 | { | ||
| 803 | } | ||
| 804 | |||
| 805 | static inline void irq_time_write_end(void) | ||
| 806 | { | ||
| 807 | } | ||
| 808 | |||
| 809 | static inline u64 irq_time_read(int cpu) | ||
| 810 | { | ||
| 811 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); | ||
| 812 | } | ||
| 813 | #endif /* CONFIG_64BIT */ | ||
| 814 | |||
| 815 | /* | ||
| 816 | * Called before incrementing preempt_count on {soft,}irq_enter | ||
| 817 | * and before decrementing preempt_count on {soft,}irq_exit. | ||
| 818 | */ | ||
| 819 | void account_system_vtime(struct task_struct *curr) | ||
| 820 | { | ||
| 821 | unsigned long flags; | ||
| 822 | s64 delta; | ||
| 823 | int cpu; | ||
| 824 | |||
| 825 | if (!sched_clock_irqtime) | ||
| 826 | return; | ||
| 827 | |||
| 828 | local_irq_save(flags); | ||
| 829 | |||
| 830 | cpu = smp_processor_id(); | ||
| 831 | delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); | ||
| 832 | __this_cpu_add(irq_start_time, delta); | ||
| 833 | |||
| 834 | irq_time_write_begin(); | ||
| 835 | /* | ||
| 836 | * We do not account for softirq time from ksoftirqd here. | ||
| 837 | * We want to continue accounting softirq time to ksoftirqd thread | ||
| 838 | * in that case, so as not to confuse scheduler with a special task | ||
| 839 | * that do not consume any time, but still wants to run. | ||
| 840 | */ | ||
| 841 | if (hardirq_count()) | ||
| 842 | __this_cpu_add(cpu_hardirq_time, delta); | ||
| 843 | else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) | ||
| 844 | __this_cpu_add(cpu_softirq_time, delta); | ||
| 845 | |||
| 846 | irq_time_write_end(); | ||
| 847 | local_irq_restore(flags); | ||
| 848 | } | ||
| 849 | EXPORT_SYMBOL_GPL(account_system_vtime); | ||
| 850 | |||
| 851 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
| 852 | |||
| 853 | #ifdef CONFIG_PARAVIRT | ||
| 854 | static inline u64 steal_ticks(u64 steal) | ||
| 855 | { | ||
| 856 | if (unlikely(steal > NSEC_PER_SEC)) | ||
| 857 | return div_u64(steal, TICK_NSEC); | ||
| 858 | |||
| 859 | return __iter_div_u64_rem(steal, TICK_NSEC, &steal); | ||
| 860 | } | ||
| 861 | #endif | ||
| 862 | |||
| 863 | static void update_rq_clock_task(struct rq *rq, s64 delta) | 743 | static void update_rq_clock_task(struct rq *rq, s64 delta) |
| 864 | { | 744 | { |
| 865 | /* | 745 | /* |
| @@ -920,43 +800,6 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) | |||
| 920 | #endif | 800 | #endif |
| 921 | } | 801 | } |
| 922 | 802 | ||
| 923 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
| 924 | static int irqtime_account_hi_update(void) | ||
| 925 | { | ||
| 926 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
| 927 | unsigned long flags; | ||
| 928 | u64 latest_ns; | ||
| 929 | int ret = 0; | ||
| 930 | |||
| 931 | local_irq_save(flags); | ||
| 932 | latest_ns = this_cpu_read(cpu_hardirq_time); | ||
| 933 | if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ]) | ||
| 934 | ret = 1; | ||
| 935 | local_irq_restore(flags); | ||
| 936 | return ret; | ||
| 937 | } | ||
| 938 | |||
| 939 | static int irqtime_account_si_update(void) | ||
| 940 | { | ||
| 941 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
| 942 | unsigned long flags; | ||
| 943 | u64 latest_ns; | ||
| 944 | int ret = 0; | ||
| 945 | |||
| 946 | local_irq_save(flags); | ||
| 947 | latest_ns = this_cpu_read(cpu_softirq_time); | ||
| 948 | if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) | ||
| 949 | ret = 1; | ||
| 950 | local_irq_restore(flags); | ||
| 951 | return ret; | ||
| 952 | } | ||
| 953 | |||
| 954 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
| 955 | |||
| 956 | #define sched_clock_irqtime (0) | ||
| 957 | |||
| 958 | #endif | ||
| 959 | |||
| 960 | void sched_set_stop_task(int cpu, struct task_struct *stop) | 803 | void sched_set_stop_task(int cpu, struct task_struct *stop) |
| 961 | { | 804 | { |
| 962 | struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; | 805 | struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; |
| @@ -1518,25 +1361,6 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu) | |||
| 1518 | smp_send_reschedule(cpu); | 1361 | smp_send_reschedule(cpu); |
| 1519 | } | 1362 | } |
| 1520 | 1363 | ||
| 1521 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | ||
| 1522 | static int ttwu_activate_remote(struct task_struct *p, int wake_flags) | ||
| 1523 | { | ||
| 1524 | struct rq *rq; | ||
| 1525 | int ret = 0; | ||
| 1526 | |||
| 1527 | rq = __task_rq_lock(p); | ||
| 1528 | if (p->on_cpu) { | ||
| 1529 | ttwu_activate(rq, p, ENQUEUE_WAKEUP); | ||
| 1530 | ttwu_do_wakeup(rq, p, wake_flags); | ||
| 1531 | ret = 1; | ||
| 1532 | } | ||
| 1533 | __task_rq_unlock(rq); | ||
| 1534 | |||
| 1535 | return ret; | ||
| 1536 | |||
| 1537 | } | ||
| 1538 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | ||
| 1539 | |||
| 1540 | bool cpus_share_cache(int this_cpu, int that_cpu) | 1364 | bool cpus_share_cache(int this_cpu, int that_cpu) |
| 1541 | { | 1365 | { |
| 1542 | return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); | 1366 | return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); |
| @@ -1597,21 +1421,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
| 1597 | * If the owning (remote) cpu is still in the middle of schedule() with | 1421 | * If the owning (remote) cpu is still in the middle of schedule() with |
| 1598 | * this task as prev, wait until its done referencing the task. | 1422 | * this task as prev, wait until its done referencing the task. |
| 1599 | */ | 1423 | */ |
| 1600 | while (p->on_cpu) { | 1424 | while (p->on_cpu) |
| 1601 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | ||
| 1602 | /* | ||
| 1603 | * In case the architecture enables interrupts in | ||
| 1604 | * context_switch(), we cannot busy wait, since that | ||
| 1605 | * would lead to deadlocks when an interrupt hits and | ||
| 1606 | * tries to wake up @prev. So bail and do a complete | ||
| 1607 | * remote wakeup. | ||
| 1608 | */ | ||
| 1609 | if (ttwu_activate_remote(p, wake_flags)) | ||
| 1610 | goto stat; | ||
| 1611 | #else | ||
| 1612 | cpu_relax(); | 1425 | cpu_relax(); |
| 1613 | #endif | ||
| 1614 | } | ||
| 1615 | /* | 1426 | /* |
| 1616 | * Pairs with the smp_wmb() in finish_lock_switch(). | 1427 | * Pairs with the smp_wmb() in finish_lock_switch(). |
| 1617 | */ | 1428 | */ |
| @@ -1953,14 +1764,9 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
| 1953 | * Manfred Spraul <manfred@colorfullife.com> | 1764 | * Manfred Spraul <manfred@colorfullife.com> |
| 1954 | */ | 1765 | */ |
| 1955 | prev_state = prev->state; | 1766 | prev_state = prev->state; |
| 1767 | vtime_task_switch(prev); | ||
| 1956 | finish_arch_switch(prev); | 1768 | finish_arch_switch(prev); |
| 1957 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | ||
| 1958 | local_irq_disable(); | ||
| 1959 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | ||
| 1960 | perf_event_task_sched_in(prev, current); | 1769 | perf_event_task_sched_in(prev, current); |
| 1961 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | ||
| 1962 | local_irq_enable(); | ||
| 1963 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | ||
| 1964 | finish_lock_switch(rq, prev); | 1770 | finish_lock_switch(rq, prev); |
| 1965 | finish_arch_post_lock_switch(); | 1771 | finish_arch_post_lock_switch(); |
| 1966 | 1772 | ||
| @@ -2810,404 +2616,6 @@ unsigned long long task_sched_runtime(struct task_struct *p) | |||
| 2810 | return ns; | 2616 | return ns; |
| 2811 | } | 2617 | } |
| 2812 | 2618 | ||
| 2813 | #ifdef CONFIG_CGROUP_CPUACCT | ||
| 2814 | struct cgroup_subsys cpuacct_subsys; | ||
| 2815 | struct cpuacct root_cpuacct; | ||
| 2816 | #endif | ||
| 2817 | |||
| 2818 | static inline void task_group_account_field(struct task_struct *p, int index, | ||
| 2819 | u64 tmp) | ||
| 2820 | { | ||
| 2821 | #ifdef CONFIG_CGROUP_CPUACCT | ||
| 2822 | struct kernel_cpustat *kcpustat; | ||
| 2823 | struct cpuacct *ca; | ||
| 2824 | #endif | ||
| 2825 | /* | ||
| 2826 | * Since all updates are sure to touch the root cgroup, we | ||
| 2827 | * get ourselves ahead and touch it first. If the root cgroup | ||
| 2828 | * is the only cgroup, then nothing else should be necessary. | ||
| 2829 | * | ||
| 2830 | */ | ||
| 2831 | __get_cpu_var(kernel_cpustat).cpustat[index] += tmp; | ||
| 2832 | |||
| 2833 | #ifdef CONFIG_CGROUP_CPUACCT | ||
| 2834 | if (unlikely(!cpuacct_subsys.active)) | ||
| 2835 | return; | ||
| 2836 | |||
| 2837 | rcu_read_lock(); | ||
| 2838 | ca = task_ca(p); | ||
| 2839 | while (ca && (ca != &root_cpuacct)) { | ||
| 2840 | kcpustat = this_cpu_ptr(ca->cpustat); | ||
| 2841 | kcpustat->cpustat[index] += tmp; | ||
| 2842 | ca = parent_ca(ca); | ||
| 2843 | } | ||
| 2844 | rcu_read_unlock(); | ||
| 2845 | #endif | ||
| 2846 | } | ||
| 2847 | |||
| 2848 | |||
| 2849 | /* | ||
| 2850 | * Account user cpu time to a process. | ||
| 2851 | * @p: the process that the cpu time gets accounted to | ||
| 2852 | * @cputime: the cpu time spent in user space since the last update | ||
| 2853 | * @cputime_scaled: cputime scaled by cpu frequency | ||
| 2854 | */ | ||
| 2855 | void account_user_time(struct task_struct *p, cputime_t cputime, | ||
| 2856 | cputime_t cputime_scaled) | ||
| 2857 | { | ||
| 2858 | int index; | ||
| 2859 | |||
| 2860 | /* Add user time to process. */ | ||
| 2861 | p->utime += cputime; | ||
| 2862 | p->utimescaled += cputime_scaled; | ||
| 2863 | account_group_user_time(p, cputime); | ||
| 2864 | |||
| 2865 | index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; | ||
| 2866 | |||
| 2867 | /* Add user time to cpustat. */ | ||
| 2868 | task_group_account_field(p, index, (__force u64) cputime); | ||
| 2869 | |||
| 2870 | /* Account for user time used */ | ||
| 2871 | acct_update_integrals(p); | ||
| 2872 | } | ||
| 2873 | |||
| 2874 | /* | ||
| 2875 | * Account guest cpu time to a process. | ||
| 2876 | * @p: the process that the cpu time gets accounted to | ||
| 2877 | * @cputime: the cpu time spent in virtual machine since the last update | ||
| 2878 | * @cputime_scaled: cputime scaled by cpu frequency | ||
| 2879 | */ | ||
| 2880 | static void account_guest_time(struct task_struct *p, cputime_t cputime, | ||
| 2881 | cputime_t cputime_scaled) | ||
| 2882 | { | ||
| 2883 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
| 2884 | |||
| 2885 | /* Add guest time to process. */ | ||
| 2886 | p->utime += cputime; | ||
| 2887 | p->utimescaled += cputime_scaled; | ||
| 2888 | account_group_user_time(p, cputime); | ||
| 2889 | p->gtime += cputime; | ||
| 2890 | |||
| 2891 | /* Add guest time to cpustat. */ | ||
| 2892 | if (TASK_NICE(p) > 0) { | ||
| 2893 | cpustat[CPUTIME_NICE] += (__force u64) cputime; | ||
| 2894 | cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; | ||
| 2895 | } else { | ||
| 2896 | cpustat[CPUTIME_USER] += (__force u64) cputime; | ||
| 2897 | cpustat[CPUTIME_GUEST] += (__force u64) cputime; | ||
| 2898 | } | ||
| 2899 | } | ||
| 2900 | |||
| 2901 | /* | ||
| 2902 | * Account system cpu time to a process and desired cpustat field | ||
| 2903 | * @p: the process that the cpu time gets accounted to | ||
| 2904 | * @cputime: the cpu time spent in kernel space since the last update | ||
| 2905 | * @cputime_scaled: cputime scaled by cpu frequency | ||
| 2906 | * @target_cputime64: pointer to cpustat field that has to be updated | ||
| 2907 | */ | ||
| 2908 | static inline | ||
| 2909 | void __account_system_time(struct task_struct *p, cputime_t cputime, | ||
| 2910 | cputime_t cputime_scaled, int index) | ||
| 2911 | { | ||
| 2912 | /* Add system time to process. */ | ||
| 2913 | p->stime += cputime; | ||
| 2914 | p->stimescaled += cputime_scaled; | ||
| 2915 | account_group_system_time(p, cputime); | ||
| 2916 | |||
| 2917 | /* Add system time to cpustat. */ | ||
| 2918 | task_group_account_field(p, index, (__force u64) cputime); | ||
| 2919 | |||
| 2920 | /* Account for system time used */ | ||
| 2921 | acct_update_integrals(p); | ||
| 2922 | } | ||
| 2923 | |||
| 2924 | /* | ||
| 2925 | * Account system cpu time to a process. | ||
| 2926 | * @p: the process that the cpu time gets accounted to | ||
| 2927 | * @hardirq_offset: the offset to subtract from hardirq_count() | ||
| 2928 | * @cputime: the cpu time spent in kernel space since the last update | ||
| 2929 | * @cputime_scaled: cputime scaled by cpu frequency | ||
| 2930 | */ | ||
| 2931 | void account_system_time(struct task_struct *p, int hardirq_offset, | ||
| 2932 | cputime_t cputime, cputime_t cputime_scaled) | ||
| 2933 | { | ||
| 2934 | int index; | ||
| 2935 | |||
| 2936 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { | ||
| 2937 | account_guest_time(p, cputime, cputime_scaled); | ||
| 2938 | return; | ||
| 2939 | } | ||
| 2940 | |||
| 2941 | if (hardirq_count() - hardirq_offset) | ||
| 2942 | index = CPUTIME_IRQ; | ||
| 2943 | else if (in_serving_softirq()) | ||
| 2944 | index = CPUTIME_SOFTIRQ; | ||
| 2945 | else | ||
| 2946 | index = CPUTIME_SYSTEM; | ||
| 2947 | |||
| 2948 | __account_system_time(p, cputime, cputime_scaled, index); | ||
| 2949 | } | ||
| 2950 | |||
| 2951 | /* | ||
| 2952 | * Account for involuntary wait time. | ||
| 2953 | * @cputime: the cpu time spent in involuntary wait | ||
| 2954 | */ | ||
| 2955 | void account_steal_time(cputime_t cputime) | ||
| 2956 | { | ||
| 2957 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
| 2958 | |||
| 2959 | cpustat[CPUTIME_STEAL] += (__force u64) cputime; | ||
| 2960 | } | ||
| 2961 | |||
| 2962 | /* | ||
| 2963 | * Account for idle time. | ||
| 2964 | * @cputime: the cpu time spent in idle wait | ||
| 2965 | */ | ||
| 2966 | void account_idle_time(cputime_t cputime) | ||
| 2967 | { | ||
| 2968 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
| 2969 | struct rq *rq = this_rq(); | ||
| 2970 | |||
| 2971 | if (atomic_read(&rq->nr_iowait) > 0) | ||
| 2972 | cpustat[CPUTIME_IOWAIT] += (__force u64) cputime; | ||
| 2973 | else | ||
| 2974 | cpustat[CPUTIME_IDLE] += (__force u64) cputime; | ||
| 2975 | } | ||
| 2976 | |||
| 2977 | static __always_inline bool steal_account_process_tick(void) | ||
| 2978 | { | ||
| 2979 | #ifdef CONFIG_PARAVIRT | ||
| 2980 | if (static_key_false(¶virt_steal_enabled)) { | ||
| 2981 | u64 steal, st = 0; | ||
| 2982 | |||
| 2983 | steal = paravirt_steal_clock(smp_processor_id()); | ||
| 2984 | steal -= this_rq()->prev_steal_time; | ||
| 2985 | |||
| 2986 | st = steal_ticks(steal); | ||
| 2987 | this_rq()->prev_steal_time += st * TICK_NSEC; | ||
| 2988 | |||
| 2989 | account_steal_time(st); | ||
| 2990 | return st; | ||
| 2991 | } | ||
| 2992 | #endif | ||
| 2993 | return false; | ||
| 2994 | } | ||
| 2995 | |||
| 2996 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
| 2997 | |||
| 2998 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
| 2999 | /* | ||
| 3000 | * Account a tick to a process and cpustat | ||
| 3001 | * @p: the process that the cpu time gets accounted to | ||
| 3002 | * @user_tick: is the tick from userspace | ||
| 3003 | * @rq: the pointer to rq | ||
| 3004 | * | ||
| 3005 | * Tick demultiplexing follows the order | ||
| 3006 | * - pending hardirq update | ||
| 3007 | * - pending softirq update | ||
| 3008 | * - user_time | ||
| 3009 | * - idle_time | ||
| 3010 | * - system time | ||
| 3011 | * - check for guest_time | ||
| 3012 | * - else account as system_time | ||
| 3013 | * | ||
| 3014 | * Check for hardirq is done both for system and user time as there is | ||
| 3015 | * no timer going off while we are on hardirq and hence we may never get an | ||
| 3016 | * opportunity to update it solely in system time. | ||
| 3017 | * p->stime and friends are only updated on system time and not on irq | ||
| 3018 | * softirq as those do not count in task exec_runtime any more. | ||
| 3019 | */ | ||
| 3020 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | ||
| 3021 | struct rq *rq) | ||
| 3022 | { | ||
| 3023 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | ||
| 3024 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
| 3025 | |||
| 3026 | if (steal_account_process_tick()) | ||
| 3027 | return; | ||
| 3028 | |||
| 3029 | if (irqtime_account_hi_update()) { | ||
| 3030 | cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; | ||
| 3031 | } else if (irqtime_account_si_update()) { | ||
| 3032 | cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; | ||
| 3033 | } else if (this_cpu_ksoftirqd() == p) { | ||
| 3034 | /* | ||
| 3035 | * ksoftirqd time do not get accounted in cpu_softirq_time. | ||
| 3036 | * So, we have to handle it separately here. | ||
| 3037 | * Also, p->stime needs to be updated for ksoftirqd. | ||
| 3038 | */ | ||
| 3039 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | ||
| 3040 | CPUTIME_SOFTIRQ); | ||
| 3041 | } else if (user_tick) { | ||
| 3042 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
| 3043 | } else if (p == rq->idle) { | ||
| 3044 | account_idle_time(cputime_one_jiffy); | ||
| 3045 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ | ||
| 3046 | account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
| 3047 | } else { | ||
| 3048 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | ||
| 3049 | CPUTIME_SYSTEM); | ||
| 3050 | } | ||
| 3051 | } | ||
| 3052 | |||
| 3053 | static void irqtime_account_idle_ticks(int ticks) | ||
| 3054 | { | ||
| 3055 | int i; | ||
| 3056 | struct rq *rq = this_rq(); | ||
| 3057 | |||
| 3058 | for (i = 0; i < ticks; i++) | ||
| 3059 | irqtime_account_process_tick(current, 0, rq); | ||
| 3060 | } | ||
| 3061 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
| 3062 | static void irqtime_account_idle_ticks(int ticks) {} | ||
| 3063 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | ||
| 3064 | struct rq *rq) {} | ||
| 3065 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
| 3066 | |||
| 3067 | /* | ||
| 3068 | * Account a single tick of cpu time. | ||
| 3069 | * @p: the process that the cpu time gets accounted to | ||
| 3070 | * @user_tick: indicates if the tick is a user or a system tick | ||
| 3071 | */ | ||
| 3072 | void account_process_tick(struct task_struct *p, int user_tick) | ||
| 3073 | { | ||
| 3074 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | ||
| 3075 | struct rq *rq = this_rq(); | ||
| 3076 | |||
| 3077 | if (sched_clock_irqtime) { | ||
| 3078 | irqtime_account_process_tick(p, user_tick, rq); | ||
| 3079 | return; | ||
| 3080 | } | ||
| 3081 | |||
| 3082 | if (steal_account_process_tick()) | ||
| 3083 | return; | ||
| 3084 | |||
| 3085 | if (user_tick) | ||
| 3086 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
| 3087 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) | ||
| 3088 | account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, | ||
| 3089 | one_jiffy_scaled); | ||
| 3090 | else | ||
| 3091 | account_idle_time(cputime_one_jiffy); | ||
| 3092 | } | ||
| 3093 | |||
| 3094 | /* | ||
| 3095 | * Account multiple ticks of steal time. | ||
| 3096 | * @p: the process from which the cpu time has been stolen | ||
| 3097 | * @ticks: number of stolen ticks | ||
| 3098 | */ | ||
| 3099 | void account_steal_ticks(unsigned long ticks) | ||
| 3100 | { | ||
| 3101 | account_steal_time(jiffies_to_cputime(ticks)); | ||
| 3102 | } | ||
| 3103 | |||
| 3104 | /* | ||
| 3105 | * Account multiple ticks of idle time. | ||
| 3106 | * @ticks: number of stolen ticks | ||
| 3107 | */ | ||
| 3108 | void account_idle_ticks(unsigned long ticks) | ||
| 3109 | { | ||
| 3110 | |||
| 3111 | if (sched_clock_irqtime) { | ||
| 3112 | irqtime_account_idle_ticks(ticks); | ||
| 3113 | return; | ||
| 3114 | } | ||
| 3115 | |||
| 3116 | account_idle_time(jiffies_to_cputime(ticks)); | ||
| 3117 | } | ||
| 3118 | |||
| 3119 | #endif | ||
| 3120 | |||
| 3121 | /* | ||
| 3122 | * Use precise platform statistics if available: | ||
| 3123 | */ | ||
| 3124 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | ||
| 3125 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
| 3126 | { | ||
| 3127 | *ut = p->utime; | ||
| 3128 | *st = p->stime; | ||
| 3129 | } | ||
| 3130 | |||
| 3131 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
| 3132 | { | ||
| 3133 | struct task_cputime cputime; | ||
| 3134 | |||
| 3135 | thread_group_cputime(p, &cputime); | ||
| 3136 | |||
| 3137 | *ut = cputime.utime; | ||
| 3138 | *st = cputime.stime; | ||
| 3139 | } | ||
| 3140 | #else | ||
| 3141 | |||
| 3142 | #ifndef nsecs_to_cputime | ||
| 3143 | # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) | ||
| 3144 | #endif | ||
| 3145 | |||
| 3146 | static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) | ||
| 3147 | { | ||
| 3148 | u64 temp = (__force u64) rtime; | ||
| 3149 | |||
| 3150 | temp *= (__force u64) utime; | ||
| 3151 | |||
| 3152 | if (sizeof(cputime_t) == 4) | ||
| 3153 | temp = div_u64(temp, (__force u32) total); | ||
| 3154 | else | ||
| 3155 | temp = div64_u64(temp, (__force u64) total); | ||
| 3156 | |||
| 3157 | return (__force cputime_t) temp; | ||
| 3158 | } | ||
| 3159 | |||
| 3160 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
| 3161 | { | ||
| 3162 | cputime_t rtime, utime = p->utime, total = utime + p->stime; | ||
| 3163 | |||
| 3164 | /* | ||
| 3165 | * Use CFS's precise accounting: | ||
| 3166 | */ | ||
| 3167 | rtime = nsecs_to_cputime(p->se.sum_exec_runtime); | ||
| 3168 | |||
| 3169 | if (total) | ||
| 3170 | utime = scale_utime(utime, rtime, total); | ||
| 3171 | else | ||
| 3172 | utime = rtime; | ||
| 3173 | |||
| 3174 | /* | ||
| 3175 | * Compare with previous values, to keep monotonicity: | ||
| 3176 | */ | ||
| 3177 | p->prev_utime = max(p->prev_utime, utime); | ||
| 3178 | p->prev_stime = max(p->prev_stime, rtime - p->prev_utime); | ||
| 3179 | |||
| 3180 | *ut = p->prev_utime; | ||
| 3181 | *st = p->prev_stime; | ||
| 3182 | } | ||
| 3183 | |||
| 3184 | /* | ||
| 3185 | * Must be called with siglock held. | ||
| 3186 | */ | ||
| 3187 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
| 3188 | { | ||
| 3189 | struct signal_struct *sig = p->signal; | ||
| 3190 | struct task_cputime cputime; | ||
| 3191 | cputime_t rtime, utime, total; | ||
| 3192 | |||
| 3193 | thread_group_cputime(p, &cputime); | ||
| 3194 | |||
| 3195 | total = cputime.utime + cputime.stime; | ||
| 3196 | rtime = nsecs_to_cputime(cputime.sum_exec_runtime); | ||
| 3197 | |||
| 3198 | if (total) | ||
| 3199 | utime = scale_utime(cputime.utime, rtime, total); | ||
| 3200 | else | ||
| 3201 | utime = rtime; | ||
| 3202 | |||
| 3203 | sig->prev_utime = max(sig->prev_utime, utime); | ||
| 3204 | sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime); | ||
| 3205 | |||
| 3206 | *ut = sig->prev_utime; | ||
| 3207 | *st = sig->prev_stime; | ||
| 3208 | } | ||
| 3209 | #endif | ||
| 3210 | |||
| 3211 | /* | 2619 | /* |
| 3212 | * This function gets called by the timer code, with HZ frequency. | 2620 | * This function gets called by the timer code, with HZ frequency. |
| 3213 | * We call it with interrupts disabled. | 2621 | * We call it with interrupts disabled. |
| @@ -3368,6 +2776,40 @@ pick_next_task(struct rq *rq) | |||
| 3368 | 2776 | ||
| 3369 | /* | 2777 | /* |
| 3370 | * __schedule() is the main scheduler function. | 2778 | * __schedule() is the main scheduler function. |
| 2779 | * | ||
| 2780 | * The main means of driving the scheduler and thus entering this function are: | ||
| 2781 | * | ||
| 2782 | * 1. Explicit blocking: mutex, semaphore, waitqueue, etc. | ||
| 2783 | * | ||
| 2784 | * 2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return | ||
| 2785 | * paths. For example, see arch/x86/entry_64.S. | ||
| 2786 | * | ||
| 2787 | * To drive preemption between tasks, the scheduler sets the flag in timer | ||
| 2788 | * interrupt handler scheduler_tick(). | ||
| 2789 | * | ||
| 2790 | * 3. Wakeups don't really cause entry into schedule(). They add a | ||
| 2791 | * task to the run-queue and that's it. | ||
| 2792 | * | ||
| 2793 | * Now, if the new task added to the run-queue preempts the current | ||
| 2794 | * task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets | ||
| 2795 | * called on the nearest possible occasion: | ||
| 2796 | * | ||
| 2797 | * - If the kernel is preemptible (CONFIG_PREEMPT=y): | ||
| 2798 | * | ||
| 2799 | * - in syscall or exception context, at the next outmost | ||
| 2800 | * preempt_enable(). (this might be as soon as the wake_up()'s | ||
| 2801 | * spin_unlock()!) | ||
| 2802 | * | ||
| 2803 | * - in IRQ context, return from interrupt-handler to | ||
| 2804 | * preemptible context | ||
| 2805 | * | ||
| 2806 | * - If the kernel is not preemptible (CONFIG_PREEMPT is not set) | ||
| 2807 | * then at the next: | ||
| 2808 | * | ||
| 2809 | * - cond_resched() call | ||
| 2810 | * - explicit schedule() call | ||
| 2811 | * - return from syscall or exception to user-space | ||
| 2812 | * - return from interrupt-handler to user-space | ||
| 3371 | */ | 2813 | */ |
| 3372 | static void __sched __schedule(void) | 2814 | static void __sched __schedule(void) |
| 3373 | { | 2815 | { |
| @@ -4885,13 +4327,6 @@ again: | |||
| 4885 | */ | 4327 | */ |
| 4886 | if (preempt && rq != p_rq) | 4328 | if (preempt && rq != p_rq) |
| 4887 | resched_task(p_rq->curr); | 4329 | resched_task(p_rq->curr); |
| 4888 | } else { | ||
| 4889 | /* | ||
| 4890 | * We might have set it in task_yield_fair(), but are | ||
| 4891 | * not going to schedule(), so don't want to skip | ||
| 4892 | * the next update. | ||
| 4893 | */ | ||
| 4894 | rq->skip_clock_update = 0; | ||
| 4895 | } | 4330 | } |
| 4896 | 4331 | ||
| 4897 | out: | 4332 | out: |
| @@ -5433,16 +4868,25 @@ static void sd_free_ctl_entry(struct ctl_table **tablep) | |||
| 5433 | *tablep = NULL; | 4868 | *tablep = NULL; |
| 5434 | } | 4869 | } |
| 5435 | 4870 | ||
| 4871 | static int min_load_idx = 0; | ||
| 4872 | static int max_load_idx = CPU_LOAD_IDX_MAX; | ||
| 4873 | |||
| 5436 | static void | 4874 | static void |
| 5437 | set_table_entry(struct ctl_table *entry, | 4875 | set_table_entry(struct ctl_table *entry, |
| 5438 | const char *procname, void *data, int maxlen, | 4876 | const char *procname, void *data, int maxlen, |
| 5439 | umode_t mode, proc_handler *proc_handler) | 4877 | umode_t mode, proc_handler *proc_handler, |
| 4878 | bool load_idx) | ||
| 5440 | { | 4879 | { |
| 5441 | entry->procname = procname; | 4880 | entry->procname = procname; |
| 5442 | entry->data = data; | 4881 | entry->data = data; |
| 5443 | entry->maxlen = maxlen; | 4882 | entry->maxlen = maxlen; |
| 5444 | entry->mode = mode; | 4883 | entry->mode = mode; |
| 5445 | entry->proc_handler = proc_handler; | 4884 | entry->proc_handler = proc_handler; |
| 4885 | |||
| 4886 | if (load_idx) { | ||
| 4887 | entry->extra1 = &min_load_idx; | ||
| 4888 | entry->extra2 = &max_load_idx; | ||
| 4889 | } | ||
| 5446 | } | 4890 | } |
| 5447 | 4891 | ||
| 5448 | static struct ctl_table * | 4892 | static struct ctl_table * |
| @@ -5454,30 +4898,30 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd) | |||
| 5454 | return NULL; | 4898 | return NULL; |
| 5455 | 4899 | ||
| 5456 | set_table_entry(&table[0], "min_interval", &sd->min_interval, | 4900 | set_table_entry(&table[0], "min_interval", &sd->min_interval, |
| 5457 | sizeof(long), 0644, proc_doulongvec_minmax); | 4901 | sizeof(long), 0644, proc_doulongvec_minmax, false); |
| 5458 | set_table_entry(&table[1], "max_interval", &sd->max_interval, | 4902 | set_table_entry(&table[1], "max_interval", &sd->max_interval, |
| 5459 | sizeof(long), 0644, proc_doulongvec_minmax); | 4903 | sizeof(long), 0644, proc_doulongvec_minmax, false); |
| 5460 | set_table_entry(&table[2], "busy_idx", &sd->busy_idx, | 4904 | set_table_entry(&table[2], "busy_idx", &sd->busy_idx, |
| 5461 | sizeof(int), 0644, proc_dointvec_minmax); | 4905 | sizeof(int), 0644, proc_dointvec_minmax, true); |
| 5462 | set_table_entry(&table[3], "idle_idx", &sd->idle_idx, | 4906 | set_table_entry(&table[3], "idle_idx", &sd->idle_idx, |
| 5463 | sizeof(int), 0644, proc_dointvec_minmax); | 4907 | sizeof(int), 0644, proc_dointvec_minmax, true); |
| 5464 | set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx, | 4908 | set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx, |
| 5465 | sizeof(int), 0644, proc_dointvec_minmax); | 4909 | sizeof(int), 0644, proc_dointvec_minmax, true); |
| 5466 | set_table_entry(&table[5], "wake_idx", &sd->wake_idx, | 4910 | set_table_entry(&table[5], "wake_idx", &sd->wake_idx, |
| 5467 | sizeof(int), 0644, proc_dointvec_minmax); | 4911 | sizeof(int), 0644, proc_dointvec_minmax, true); |
| 5468 | set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx, | 4912 | set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx, |
| 5469 | sizeof(int), 0644, proc_dointvec_minmax); | 4913 | sizeof(int), 0644, proc_dointvec_minmax, true); |
| 5470 | set_table_entry(&table[7], "busy_factor", &sd->busy_factor, | 4914 | set_table_entry(&table[7], "busy_factor", &sd->busy_factor, |
| 5471 | sizeof(int), 0644, proc_dointvec_minmax); | 4915 | sizeof(int), 0644, proc_dointvec_minmax, false); |
| 5472 | set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct, | 4916 | set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct, |
| 5473 | sizeof(int), 0644, proc_dointvec_minmax); | 4917 | sizeof(int), 0644, proc_dointvec_minmax, false); |
| 5474 | set_table_entry(&table[9], "cache_nice_tries", | 4918 | set_table_entry(&table[9], "cache_nice_tries", |
| 5475 | &sd->cache_nice_tries, | 4919 | &sd->cache_nice_tries, |
| 5476 | sizeof(int), 0644, proc_dointvec_minmax); | 4920 | sizeof(int), 0644, proc_dointvec_minmax, false); |
| 5477 | set_table_entry(&table[10], "flags", &sd->flags, | 4921 | set_table_entry(&table[10], "flags", &sd->flags, |
| 5478 | sizeof(int), 0644, proc_dointvec_minmax); | 4922 | sizeof(int), 0644, proc_dointvec_minmax, false); |
| 5479 | set_table_entry(&table[11], "name", sd->name, | 4923 | set_table_entry(&table[11], "name", sd->name, |
| 5480 | CORENAME_MAX_SIZE, 0444, proc_dostring); | 4924 | CORENAME_MAX_SIZE, 0444, proc_dostring, false); |
| 5481 | /* &table[12] is terminator */ | 4925 | /* &table[12] is terminator */ |
| 5482 | 4926 | ||
| 5483 | return table; | 4927 | return table; |
| @@ -6556,7 +6000,6 @@ sd_numa_init(struct sched_domain_topology_level *tl, int cpu) | |||
| 6556 | | 0*SD_BALANCE_FORK | 6000 | | 0*SD_BALANCE_FORK |
| 6557 | | 0*SD_BALANCE_WAKE | 6001 | | 0*SD_BALANCE_WAKE |
| 6558 | | 0*SD_WAKE_AFFINE | 6002 | | 0*SD_WAKE_AFFINE |
| 6559 | | 0*SD_PREFER_LOCAL | ||
| 6560 | | 0*SD_SHARE_CPUPOWER | 6003 | | 0*SD_SHARE_CPUPOWER |
| 6561 | | 0*SD_SHARE_PKG_RESOURCES | 6004 | | 0*SD_SHARE_PKG_RESOURCES |
| 6562 | | 1*SD_SERIALIZE | 6005 | | 1*SD_SERIALIZE |
| @@ -8354,6 +7797,8 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
| 8354 | * (balbir@in.ibm.com). | 7797 | * (balbir@in.ibm.com). |
| 8355 | */ | 7798 | */ |
| 8356 | 7799 | ||
| 7800 | struct cpuacct root_cpuacct; | ||
| 7801 | |||
| 8357 | /* create a new cpu accounting group */ | 7802 | /* create a new cpu accounting group */ |
| 8358 | static struct cgroup_subsys_state *cpuacct_create(struct cgroup *cgrp) | 7803 | static struct cgroup_subsys_state *cpuacct_create(struct cgroup *cgrp) |
| 8359 | { | 7804 | { |
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c new file mode 100644 index 000000000000..81b763ba58a6 --- /dev/null +++ b/kernel/sched/cputime.c | |||
| @@ -0,0 +1,530 @@ | |||
| 1 | #include <linux/export.h> | ||
| 2 | #include <linux/sched.h> | ||
| 3 | #include <linux/tsacct_kern.h> | ||
| 4 | #include <linux/kernel_stat.h> | ||
| 5 | #include <linux/static_key.h> | ||
| 6 | #include "sched.h" | ||
| 7 | |||
| 8 | |||
| 9 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
| 10 | |||
| 11 | /* | ||
| 12 | * There are no locks covering percpu hardirq/softirq time. | ||
| 13 | * They are only modified in vtime_account, on corresponding CPU | ||
| 14 | * with interrupts disabled. So, writes are safe. | ||
| 15 | * They are read and saved off onto struct rq in update_rq_clock(). | ||
| 16 | * This may result in other CPU reading this CPU's irq time and can | ||
| 17 | * race with irq/vtime_account on this CPU. We would either get old | ||
| 18 | * or new value with a side effect of accounting a slice of irq time to wrong | ||
| 19 | * task when irq is in progress while we read rq->clock. That is a worthy | ||
| 20 | * compromise in place of having locks on each irq in account_system_time. | ||
| 21 | */ | ||
| 22 | DEFINE_PER_CPU(u64, cpu_hardirq_time); | ||
| 23 | DEFINE_PER_CPU(u64, cpu_softirq_time); | ||
| 24 | |||
| 25 | static DEFINE_PER_CPU(u64, irq_start_time); | ||
| 26 | static int sched_clock_irqtime; | ||
| 27 | |||
| 28 | void enable_sched_clock_irqtime(void) | ||
| 29 | { | ||
| 30 | sched_clock_irqtime = 1; | ||
| 31 | } | ||
| 32 | |||
| 33 | void disable_sched_clock_irqtime(void) | ||
| 34 | { | ||
| 35 | sched_clock_irqtime = 0; | ||
| 36 | } | ||
| 37 | |||
| 38 | #ifndef CONFIG_64BIT | ||
| 39 | DEFINE_PER_CPU(seqcount_t, irq_time_seq); | ||
| 40 | #endif /* CONFIG_64BIT */ | ||
| 41 | |||
| 42 | /* | ||
| 43 | * Called before incrementing preempt_count on {soft,}irq_enter | ||
| 44 | * and before decrementing preempt_count on {soft,}irq_exit. | ||
| 45 | */ | ||
| 46 | void vtime_account(struct task_struct *curr) | ||
| 47 | { | ||
| 48 | unsigned long flags; | ||
| 49 | s64 delta; | ||
| 50 | int cpu; | ||
| 51 | |||
| 52 | if (!sched_clock_irqtime) | ||
| 53 | return; | ||
| 54 | |||
| 55 | local_irq_save(flags); | ||
| 56 | |||
| 57 | cpu = smp_processor_id(); | ||
| 58 | delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); | ||
| 59 | __this_cpu_add(irq_start_time, delta); | ||
| 60 | |||
| 61 | irq_time_write_begin(); | ||
| 62 | /* | ||
| 63 | * We do not account for softirq time from ksoftirqd here. | ||
| 64 | * We want to continue accounting softirq time to ksoftirqd thread | ||
| 65 | * in that case, so as not to confuse scheduler with a special task | ||
| 66 | * that do not consume any time, but still wants to run. | ||
| 67 | */ | ||
| 68 | if (hardirq_count()) | ||
| 69 | __this_cpu_add(cpu_hardirq_time, delta); | ||
| 70 | else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) | ||
| 71 | __this_cpu_add(cpu_softirq_time, delta); | ||
| 72 | |||
| 73 | irq_time_write_end(); | ||
| 74 | local_irq_restore(flags); | ||
| 75 | } | ||
| 76 | EXPORT_SYMBOL_GPL(vtime_account); | ||
| 77 | |||
| 78 | static int irqtime_account_hi_update(void) | ||
| 79 | { | ||
| 80 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
| 81 | unsigned long flags; | ||
| 82 | u64 latest_ns; | ||
| 83 | int ret = 0; | ||
| 84 | |||
| 85 | local_irq_save(flags); | ||
| 86 | latest_ns = this_cpu_read(cpu_hardirq_time); | ||
| 87 | if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ]) | ||
| 88 | ret = 1; | ||
| 89 | local_irq_restore(flags); | ||
| 90 | return ret; | ||
| 91 | } | ||
| 92 | |||
| 93 | static int irqtime_account_si_update(void) | ||
| 94 | { | ||
| 95 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
| 96 | unsigned long flags; | ||
| 97 | u64 latest_ns; | ||
| 98 | int ret = 0; | ||
| 99 | |||
| 100 | local_irq_save(flags); | ||
| 101 | latest_ns = this_cpu_read(cpu_softirq_time); | ||
| 102 | if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) | ||
| 103 | ret = 1; | ||
| 104 | local_irq_restore(flags); | ||
| 105 | return ret; | ||
| 106 | } | ||
| 107 | |||
| 108 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
| 109 | |||
| 110 | #define sched_clock_irqtime (0) | ||
| 111 | |||
| 112 | #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ | ||
| 113 | |||
| 114 | static inline void task_group_account_field(struct task_struct *p, int index, | ||
| 115 | u64 tmp) | ||
| 116 | { | ||
| 117 | #ifdef CONFIG_CGROUP_CPUACCT | ||
| 118 | struct kernel_cpustat *kcpustat; | ||
| 119 | struct cpuacct *ca; | ||
| 120 | #endif | ||
| 121 | /* | ||
| 122 | * Since all updates are sure to touch the root cgroup, we | ||
| 123 | * get ourselves ahead and touch it first. If the root cgroup | ||
| 124 | * is the only cgroup, then nothing else should be necessary. | ||
| 125 | * | ||
| 126 | */ | ||
| 127 | __get_cpu_var(kernel_cpustat).cpustat[index] += tmp; | ||
| 128 | |||
| 129 | #ifdef CONFIG_CGROUP_CPUACCT | ||
| 130 | if (unlikely(!cpuacct_subsys.active)) | ||
| 131 | return; | ||
| 132 | |||
| 133 | rcu_read_lock(); | ||
| 134 | ca = task_ca(p); | ||
| 135 | while (ca && (ca != &root_cpuacct)) { | ||
| 136 | kcpustat = this_cpu_ptr(ca->cpustat); | ||
| 137 | kcpustat->cpustat[index] += tmp; | ||
| 138 | ca = parent_ca(ca); | ||
| 139 | } | ||
| 140 | rcu_read_unlock(); | ||
| 141 | #endif | ||
| 142 | } | ||
| 143 | |||
| 144 | /* | ||
| 145 | * Account user cpu time to a process. | ||
| 146 | * @p: the process that the cpu time gets accounted to | ||
| 147 | * @cputime: the cpu time spent in user space since the last update | ||
| 148 | * @cputime_scaled: cputime scaled by cpu frequency | ||
| 149 | */ | ||
| 150 | void account_user_time(struct task_struct *p, cputime_t cputime, | ||
| 151 | cputime_t cputime_scaled) | ||
| 152 | { | ||
| 153 | int index; | ||
| 154 | |||
| 155 | /* Add user time to process. */ | ||
| 156 | p->utime += cputime; | ||
| 157 | p->utimescaled += cputime_scaled; | ||
| 158 | account_group_user_time(p, cputime); | ||
| 159 | |||
| 160 | index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; | ||
| 161 | |||
| 162 | /* Add user time to cpustat. */ | ||
| 163 | task_group_account_field(p, index, (__force u64) cputime); | ||
| 164 | |||
| 165 | /* Account for user time used */ | ||
| 166 | acct_update_integrals(p); | ||
| 167 | } | ||
| 168 | |||
| 169 | /* | ||
| 170 | * Account guest cpu time to a process. | ||
| 171 | * @p: the process that the cpu time gets accounted to | ||
| 172 | * @cputime: the cpu time spent in virtual machine since the last update | ||
| 173 | * @cputime_scaled: cputime scaled by cpu frequency | ||
| 174 | */ | ||
| 175 | static void account_guest_time(struct task_struct *p, cputime_t cputime, | ||
| 176 | cputime_t cputime_scaled) | ||
| 177 | { | ||
| 178 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
| 179 | |||
| 180 | /* Add guest time to process. */ | ||
| 181 | p->utime += cputime; | ||
| 182 | p->utimescaled += cputime_scaled; | ||
| 183 | account_group_user_time(p, cputime); | ||
| 184 | p->gtime += cputime; | ||
| 185 | |||
| 186 | /* Add guest time to cpustat. */ | ||
| 187 | if (TASK_NICE(p) > 0) { | ||
| 188 | cpustat[CPUTIME_NICE] += (__force u64) cputime; | ||
| 189 | cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; | ||
| 190 | } else { | ||
| 191 | cpustat[CPUTIME_USER] += (__force u64) cputime; | ||
| 192 | cpustat[CPUTIME_GUEST] += (__force u64) cputime; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | /* | ||
| 197 | * Account system cpu time to a process and desired cpustat field | ||
| 198 | * @p: the process that the cpu time gets accounted to | ||
| 199 | * @cputime: the cpu time spent in kernel space since the last update | ||
| 200 | * @cputime_scaled: cputime scaled by cpu frequency | ||
| 201 | * @target_cputime64: pointer to cpustat field that has to be updated | ||
| 202 | */ | ||
| 203 | static inline | ||
| 204 | void __account_system_time(struct task_struct *p, cputime_t cputime, | ||
| 205 | cputime_t cputime_scaled, int index) | ||
| 206 | { | ||
| 207 | /* Add system time to process. */ | ||
| 208 | p->stime += cputime; | ||
| 209 | p->stimescaled += cputime_scaled; | ||
| 210 | account_group_system_time(p, cputime); | ||
| 211 | |||
| 212 | /* Add system time to cpustat. */ | ||
| 213 | task_group_account_field(p, index, (__force u64) cputime); | ||
| 214 | |||
| 215 | /* Account for system time used */ | ||
| 216 | acct_update_integrals(p); | ||
| 217 | } | ||
| 218 | |||
| 219 | /* | ||
| 220 | * Account system cpu time to a process. | ||
| 221 | * @p: the process that the cpu time gets accounted to | ||
| 222 | * @hardirq_offset: the offset to subtract from hardirq_count() | ||
| 223 | * @cputime: the cpu time spent in kernel space since the last update | ||
| 224 | * @cputime_scaled: cputime scaled by cpu frequency | ||
| 225 | */ | ||
| 226 | void account_system_time(struct task_struct *p, int hardirq_offset, | ||
| 227 | cputime_t cputime, cputime_t cputime_scaled) | ||
| 228 | { | ||
| 229 | int index; | ||
| 230 | |||
| 231 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { | ||
| 232 | account_guest_time(p, cputime, cputime_scaled); | ||
| 233 | return; | ||
| 234 | } | ||
| 235 | |||
| 236 | if (hardirq_count() - hardirq_offset) | ||
| 237 | index = CPUTIME_IRQ; | ||
| 238 | else if (in_serving_softirq()) | ||
| 239 | index = CPUTIME_SOFTIRQ; | ||
| 240 | else | ||
| 241 | index = CPUTIME_SYSTEM; | ||
| 242 | |||
| 243 | __account_system_time(p, cputime, cputime_scaled, index); | ||
| 244 | } | ||
| 245 | |||
| 246 | /* | ||
| 247 | * Account for involuntary wait time. | ||
| 248 | * @cputime: the cpu time spent in involuntary wait | ||
| 249 | */ | ||
| 250 | void account_steal_time(cputime_t cputime) | ||
| 251 | { | ||
| 252 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
| 253 | |||
| 254 | cpustat[CPUTIME_STEAL] += (__force u64) cputime; | ||
| 255 | } | ||
| 256 | |||
| 257 | /* | ||
| 258 | * Account for idle time. | ||
| 259 | * @cputime: the cpu time spent in idle wait | ||
| 260 | */ | ||
| 261 | void account_idle_time(cputime_t cputime) | ||
| 262 | { | ||
| 263 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
| 264 | struct rq *rq = this_rq(); | ||
| 265 | |||
| 266 | if (atomic_read(&rq->nr_iowait) > 0) | ||
| 267 | cpustat[CPUTIME_IOWAIT] += (__force u64) cputime; | ||
| 268 | else | ||
| 269 | cpustat[CPUTIME_IDLE] += (__force u64) cputime; | ||
| 270 | } | ||
| 271 | |||
| 272 | static __always_inline bool steal_account_process_tick(void) | ||
| 273 | { | ||
| 274 | #ifdef CONFIG_PARAVIRT | ||
| 275 | if (static_key_false(¶virt_steal_enabled)) { | ||
| 276 | u64 steal, st = 0; | ||
| 277 | |||
| 278 | steal = paravirt_steal_clock(smp_processor_id()); | ||
| 279 | steal -= this_rq()->prev_steal_time; | ||
| 280 | |||
| 281 | st = steal_ticks(steal); | ||
| 282 | this_rq()->prev_steal_time += st * TICK_NSEC; | ||
| 283 | |||
| 284 | account_steal_time(st); | ||
| 285 | return st; | ||
| 286 | } | ||
| 287 | #endif | ||
| 288 | return false; | ||
| 289 | } | ||
| 290 | |||
| 291 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
| 292 | |||
| 293 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
| 294 | /* | ||
| 295 | * Account a tick to a process and cpustat | ||
| 296 | * @p: the process that the cpu time gets accounted to | ||
| 297 | * @user_tick: is the tick from userspace | ||
| 298 | * @rq: the pointer to rq | ||
| 299 | * | ||
| 300 | * Tick demultiplexing follows the order | ||
| 301 | * - pending hardirq update | ||
| 302 | * - pending softirq update | ||
| 303 | * - user_time | ||
| 304 | * - idle_time | ||
| 305 | * - system time | ||
| 306 | * - check for guest_time | ||
| 307 | * - else account as system_time | ||
| 308 | * | ||
| 309 | * Check for hardirq is done both for system and user time as there is | ||
| 310 | * no timer going off while we are on hardirq and hence we may never get an | ||
| 311 | * opportunity to update it solely in system time. | ||
| 312 | * p->stime and friends are only updated on system time and not on irq | ||
| 313 | * softirq as those do not count in task exec_runtime any more. | ||
| 314 | */ | ||
| 315 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | ||
| 316 | struct rq *rq) | ||
| 317 | { | ||
| 318 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | ||
| 319 | u64 *cpustat = kcpustat_this_cpu->cpustat; | ||
| 320 | |||
| 321 | if (steal_account_process_tick()) | ||
| 322 | return; | ||
| 323 | |||
| 324 | if (irqtime_account_hi_update()) { | ||
| 325 | cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; | ||
| 326 | } else if (irqtime_account_si_update()) { | ||
| 327 | cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; | ||
| 328 | } else if (this_cpu_ksoftirqd() == p) { | ||
| 329 | /* | ||
| 330 | * ksoftirqd time do not get accounted in cpu_softirq_time. | ||
| 331 | * So, we have to handle it separately here. | ||
| 332 | * Also, p->stime needs to be updated for ksoftirqd. | ||
| 333 | */ | ||
| 334 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | ||
| 335 | CPUTIME_SOFTIRQ); | ||
| 336 | } else if (user_tick) { | ||
| 337 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
| 338 | } else if (p == rq->idle) { | ||
| 339 | account_idle_time(cputime_one_jiffy); | ||
| 340 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ | ||
| 341 | account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
| 342 | } else { | ||
| 343 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | ||
| 344 | CPUTIME_SYSTEM); | ||
| 345 | } | ||
| 346 | } | ||
| 347 | |||
| 348 | static void irqtime_account_idle_ticks(int ticks) | ||
| 349 | { | ||
| 350 | int i; | ||
| 351 | struct rq *rq = this_rq(); | ||
| 352 | |||
| 353 | for (i = 0; i < ticks; i++) | ||
| 354 | irqtime_account_process_tick(current, 0, rq); | ||
| 355 | } | ||
| 356 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
| 357 | static void irqtime_account_idle_ticks(int ticks) {} | ||
| 358 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | ||
| 359 | struct rq *rq) {} | ||
| 360 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
| 361 | |||
| 362 | /* | ||
| 363 | * Account a single tick of cpu time. | ||
| 364 | * @p: the process that the cpu time gets accounted to | ||
| 365 | * @user_tick: indicates if the tick is a user or a system tick | ||
| 366 | */ | ||
| 367 | void account_process_tick(struct task_struct *p, int user_tick) | ||
| 368 | { | ||
| 369 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | ||
| 370 | struct rq *rq = this_rq(); | ||
| 371 | |||
| 372 | if (sched_clock_irqtime) { | ||
| 373 | irqtime_account_process_tick(p, user_tick, rq); | ||
| 374 | return; | ||
| 375 | } | ||
| 376 | |||
| 377 | if (steal_account_process_tick()) | ||
| 378 | return; | ||
| 379 | |||
| 380 | if (user_tick) | ||
| 381 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | ||
| 382 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) | ||
| 383 | account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, | ||
| 384 | one_jiffy_scaled); | ||
| 385 | else | ||
| 386 | account_idle_time(cputime_one_jiffy); | ||
| 387 | } | ||
| 388 | |||
| 389 | /* | ||
| 390 | * Account multiple ticks of steal time. | ||
| 391 | * @p: the process from which the cpu time has been stolen | ||
| 392 | * @ticks: number of stolen ticks | ||
| 393 | */ | ||
| 394 | void account_steal_ticks(unsigned long ticks) | ||
| 395 | { | ||
| 396 | account_steal_time(jiffies_to_cputime(ticks)); | ||
| 397 | } | ||
| 398 | |||
| 399 | /* | ||
| 400 | * Account multiple ticks of idle time. | ||
| 401 | * @ticks: number of stolen ticks | ||
| 402 | */ | ||
| 403 | void account_idle_ticks(unsigned long ticks) | ||
| 404 | { | ||
| 405 | |||
| 406 | if (sched_clock_irqtime) { | ||
| 407 | irqtime_account_idle_ticks(ticks); | ||
| 408 | return; | ||
| 409 | } | ||
| 410 | |||
| 411 | account_idle_time(jiffies_to_cputime(ticks)); | ||
| 412 | } | ||
| 413 | |||
| 414 | #endif | ||
| 415 | |||
| 416 | /* | ||
| 417 | * Use precise platform statistics if available: | ||
| 418 | */ | ||
| 419 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | ||
| 420 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
| 421 | { | ||
| 422 | *ut = p->utime; | ||
| 423 | *st = p->stime; | ||
| 424 | } | ||
| 425 | |||
| 426 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
| 427 | { | ||
| 428 | struct task_cputime cputime; | ||
| 429 | |||
| 430 | thread_group_cputime(p, &cputime); | ||
| 431 | |||
| 432 | *ut = cputime.utime; | ||
| 433 | *st = cputime.stime; | ||
| 434 | } | ||
| 435 | |||
| 436 | /* | ||
| 437 | * Archs that account the whole time spent in the idle task | ||
| 438 | * (outside irq) as idle time can rely on this and just implement | ||
| 439 | * vtime_account_system() and vtime_account_idle(). Archs that | ||
| 440 | * have other meaning of the idle time (s390 only includes the | ||
| 441 | * time spent by the CPU when it's in low power mode) must override | ||
| 442 | * vtime_account(). | ||
| 443 | */ | ||
| 444 | #ifndef __ARCH_HAS_VTIME_ACCOUNT | ||
| 445 | void vtime_account(struct task_struct *tsk) | ||
| 446 | { | ||
| 447 | unsigned long flags; | ||
| 448 | |||
| 449 | local_irq_save(flags); | ||
| 450 | |||
| 451 | if (in_interrupt() || !is_idle_task(tsk)) | ||
| 452 | vtime_account_system(tsk); | ||
| 453 | else | ||
| 454 | vtime_account_idle(tsk); | ||
| 455 | |||
| 456 | local_irq_restore(flags); | ||
| 457 | } | ||
| 458 | EXPORT_SYMBOL_GPL(vtime_account); | ||
| 459 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ | ||
| 460 | |||
| 461 | #else | ||
| 462 | |||
| 463 | #ifndef nsecs_to_cputime | ||
| 464 | # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) | ||
| 465 | #endif | ||
| 466 | |||
| 467 | static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) | ||
| 468 | { | ||
| 469 | u64 temp = (__force u64) rtime; | ||
| 470 | |||
| 471 | temp *= (__force u64) utime; | ||
| 472 | |||
| 473 | if (sizeof(cputime_t) == 4) | ||
| 474 | temp = div_u64(temp, (__force u32) total); | ||
| 475 | else | ||
| 476 | temp = div64_u64(temp, (__force u64) total); | ||
| 477 | |||
| 478 | return (__force cputime_t) temp; | ||
| 479 | } | ||
| 480 | |||
| 481 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
| 482 | { | ||
| 483 | cputime_t rtime, utime = p->utime, total = utime + p->stime; | ||
| 484 | |||
| 485 | /* | ||
| 486 | * Use CFS's precise accounting: | ||
| 487 | */ | ||
| 488 | rtime = nsecs_to_cputime(p->se.sum_exec_runtime); | ||
| 489 | |||
| 490 | if (total) | ||
| 491 | utime = scale_utime(utime, rtime, total); | ||
| 492 | else | ||
| 493 | utime = rtime; | ||
| 494 | |||
| 495 | /* | ||
| 496 | * Compare with previous values, to keep monotonicity: | ||
| 497 | */ | ||
| 498 | p->prev_utime = max(p->prev_utime, utime); | ||
| 499 | p->prev_stime = max(p->prev_stime, rtime - p->prev_utime); | ||
| 500 | |||
| 501 | *ut = p->prev_utime; | ||
| 502 | *st = p->prev_stime; | ||
| 503 | } | ||
| 504 | |||
| 505 | /* | ||
| 506 | * Must be called with siglock held. | ||
| 507 | */ | ||
| 508 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
| 509 | { | ||
| 510 | struct signal_struct *sig = p->signal; | ||
| 511 | struct task_cputime cputime; | ||
| 512 | cputime_t rtime, utime, total; | ||
| 513 | |||
| 514 | thread_group_cputime(p, &cputime); | ||
| 515 | |||
| 516 | total = cputime.utime + cputime.stime; | ||
| 517 | rtime = nsecs_to_cputime(cputime.sum_exec_runtime); | ||
| 518 | |||
| 519 | if (total) | ||
| 520 | utime = scale_utime(cputime.utime, rtime, total); | ||
| 521 | else | ||
| 522 | utime = rtime; | ||
| 523 | |||
| 524 | sig->prev_utime = max(sig->prev_utime, utime); | ||
| 525 | sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime); | ||
| 526 | |||
| 527 | *ut = sig->prev_utime; | ||
| 528 | *st = sig->prev_stime; | ||
| 529 | } | ||
| 530 | #endif | ||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 96e2b18b6283..6b800a14b990 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -597,7 +597,7 @@ calc_delta_fair(unsigned long delta, struct sched_entity *se) | |||
| 597 | /* | 597 | /* |
| 598 | * The idea is to set a period in which each task runs once. | 598 | * The idea is to set a period in which each task runs once. |
| 599 | * | 599 | * |
| 600 | * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch | 600 | * When there are too many tasks (sched_nr_latency) we have to stretch |
| 601 | * this period because otherwise the slices get too small. | 601 | * this period because otherwise the slices get too small. |
| 602 | * | 602 | * |
| 603 | * p = (nr <= nl) ? l : l*nr/nl | 603 | * p = (nr <= nl) ? l : l*nr/nl |
| @@ -2700,7 +2700,6 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) | |||
| 2700 | int prev_cpu = task_cpu(p); | 2700 | int prev_cpu = task_cpu(p); |
| 2701 | int new_cpu = cpu; | 2701 | int new_cpu = cpu; |
| 2702 | int want_affine = 0; | 2702 | int want_affine = 0; |
| 2703 | int want_sd = 1; | ||
| 2704 | int sync = wake_flags & WF_SYNC; | 2703 | int sync = wake_flags & WF_SYNC; |
| 2705 | 2704 | ||
| 2706 | if (p->nr_cpus_allowed == 1) | 2705 | if (p->nr_cpus_allowed == 1) |
| @@ -2718,48 +2717,21 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) | |||
| 2718 | continue; | 2717 | continue; |
| 2719 | 2718 | ||
| 2720 | /* | 2719 | /* |
| 2721 | * If power savings logic is enabled for a domain, see if we | ||
| 2722 | * are not overloaded, if so, don't balance wider. | ||
| 2723 | */ | ||
| 2724 | if (tmp->flags & (SD_PREFER_LOCAL)) { | ||
| 2725 | unsigned long power = 0; | ||
| 2726 | unsigned long nr_running = 0; | ||
| 2727 | unsigned long capacity; | ||
| 2728 | int i; | ||
| 2729 | |||
| 2730 | for_each_cpu(i, sched_domain_span(tmp)) { | ||
| 2731 | power += power_of(i); | ||
| 2732 | nr_running += cpu_rq(i)->cfs.nr_running; | ||
| 2733 | } | ||
| 2734 | |||
| 2735 | capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE); | ||
| 2736 | |||
| 2737 | if (nr_running < capacity) | ||
| 2738 | want_sd = 0; | ||
| 2739 | } | ||
| 2740 | |||
| 2741 | /* | ||
| 2742 | * If both cpu and prev_cpu are part of this domain, | 2720 | * If both cpu and prev_cpu are part of this domain, |
| 2743 | * cpu is a valid SD_WAKE_AFFINE target. | 2721 | * cpu is a valid SD_WAKE_AFFINE target. |
| 2744 | */ | 2722 | */ |
| 2745 | if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && | 2723 | if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && |
| 2746 | cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { | 2724 | cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { |
| 2747 | affine_sd = tmp; | 2725 | affine_sd = tmp; |
| 2748 | want_affine = 0; | ||
| 2749 | } | ||
| 2750 | |||
| 2751 | if (!want_sd && !want_affine) | ||
| 2752 | break; | 2726 | break; |
| 2727 | } | ||
| 2753 | 2728 | ||
| 2754 | if (!(tmp->flags & sd_flag)) | 2729 | if (tmp->flags & sd_flag) |
| 2755 | continue; | ||
| 2756 | |||
| 2757 | if (want_sd) | ||
| 2758 | sd = tmp; | 2730 | sd = tmp; |
| 2759 | } | 2731 | } |
| 2760 | 2732 | ||
| 2761 | if (affine_sd) { | 2733 | if (affine_sd) { |
| 2762 | if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) | 2734 | if (cpu != prev_cpu && wake_affine(affine_sd, p, sync)) |
| 2763 | prev_cpu = cpu; | 2735 | prev_cpu = cpu; |
| 2764 | 2736 | ||
| 2765 | new_cpu = select_idle_sibling(p, prev_cpu); | 2737 | new_cpu = select_idle_sibling(p, prev_cpu); |
| @@ -4295,7 +4267,7 @@ redo: | |||
| 4295 | goto out_balanced; | 4267 | goto out_balanced; |
| 4296 | } | 4268 | } |
| 4297 | 4269 | ||
| 4298 | BUG_ON(busiest == this_rq); | 4270 | BUG_ON(busiest == env.dst_rq); |
| 4299 | 4271 | ||
| 4300 | schedstat_add(sd, lb_imbalance[idle], env.imbalance); | 4272 | schedstat_add(sd, lb_imbalance[idle], env.imbalance); |
| 4301 | 4273 | ||
| @@ -4316,7 +4288,7 @@ redo: | |||
| 4316 | update_h_load(env.src_cpu); | 4288 | update_h_load(env.src_cpu); |
| 4317 | more_balance: | 4289 | more_balance: |
| 4318 | local_irq_save(flags); | 4290 | local_irq_save(flags); |
| 4319 | double_rq_lock(this_rq, busiest); | 4291 | double_rq_lock(env.dst_rq, busiest); |
| 4320 | 4292 | ||
| 4321 | /* | 4293 | /* |
| 4322 | * cur_ld_moved - load moved in current iteration | 4294 | * cur_ld_moved - load moved in current iteration |
| @@ -4324,7 +4296,7 @@ more_balance: | |||
| 4324 | */ | 4296 | */ |
| 4325 | cur_ld_moved = move_tasks(&env); | 4297 | cur_ld_moved = move_tasks(&env); |
| 4326 | ld_moved += cur_ld_moved; | 4298 | ld_moved += cur_ld_moved; |
| 4327 | double_rq_unlock(this_rq, busiest); | 4299 | double_rq_unlock(env.dst_rq, busiest); |
| 4328 | local_irq_restore(flags); | 4300 | local_irq_restore(flags); |
| 4329 | 4301 | ||
| 4330 | if (env.flags & LBF_NEED_BREAK) { | 4302 | if (env.flags & LBF_NEED_BREAK) { |
| @@ -4360,8 +4332,7 @@ more_balance: | |||
| 4360 | if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0 && | 4332 | if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0 && |
| 4361 | lb_iterations++ < max_lb_iterations) { | 4333 | lb_iterations++ < max_lb_iterations) { |
| 4362 | 4334 | ||
| 4363 | this_rq = cpu_rq(env.new_dst_cpu); | 4335 | env.dst_rq = cpu_rq(env.new_dst_cpu); |
| 4364 | env.dst_rq = this_rq; | ||
| 4365 | env.dst_cpu = env.new_dst_cpu; | 4336 | env.dst_cpu = env.new_dst_cpu; |
| 4366 | env.flags &= ~LBF_SOME_PINNED; | 4337 | env.flags &= ~LBF_SOME_PINNED; |
| 4367 | env.loop = 0; | 4338 | env.loop = 0; |
| @@ -4646,7 +4617,7 @@ static void nohz_balancer_kick(int cpu) | |||
| 4646 | return; | 4617 | return; |
| 4647 | } | 4618 | } |
| 4648 | 4619 | ||
| 4649 | static inline void clear_nohz_tick_stopped(int cpu) | 4620 | static inline void nohz_balance_exit_idle(int cpu) |
| 4650 | { | 4621 | { |
| 4651 | if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { | 4622 | if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { |
| 4652 | cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); | 4623 | cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); |
| @@ -4686,28 +4657,23 @@ void set_cpu_sd_state_idle(void) | |||
| 4686 | } | 4657 | } |
| 4687 | 4658 | ||
| 4688 | /* | 4659 | /* |
| 4689 | * This routine will record that this cpu is going idle with tick stopped. | 4660 | * This routine will record that the cpu is going idle with tick stopped. |
| 4690 | * This info will be used in performing idle load balancing in the future. | 4661 | * This info will be used in performing idle load balancing in the future. |
| 4691 | */ | 4662 | */ |
| 4692 | void select_nohz_load_balancer(int stop_tick) | 4663 | void nohz_balance_enter_idle(int cpu) |
| 4693 | { | 4664 | { |
| 4694 | int cpu = smp_processor_id(); | ||
| 4695 | |||
| 4696 | /* | 4665 | /* |
| 4697 | * If this cpu is going down, then nothing needs to be done. | 4666 | * If this cpu is going down, then nothing needs to be done. |
| 4698 | */ | 4667 | */ |
| 4699 | if (!cpu_active(cpu)) | 4668 | if (!cpu_active(cpu)) |
| 4700 | return; | 4669 | return; |
| 4701 | 4670 | ||
| 4702 | if (stop_tick) { | 4671 | if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) |
| 4703 | if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) | 4672 | return; |
| 4704 | return; | ||
| 4705 | 4673 | ||
| 4706 | cpumask_set_cpu(cpu, nohz.idle_cpus_mask); | 4674 | cpumask_set_cpu(cpu, nohz.idle_cpus_mask); |
| 4707 | atomic_inc(&nohz.nr_cpus); | 4675 | atomic_inc(&nohz.nr_cpus); |
| 4708 | set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); | 4676 | set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); |
| 4709 | } | ||
| 4710 | return; | ||
| 4711 | } | 4677 | } |
| 4712 | 4678 | ||
| 4713 | static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, | 4679 | static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, |
| @@ -4715,7 +4681,7 @@ static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, | |||
| 4715 | { | 4681 | { |
| 4716 | switch (action & ~CPU_TASKS_FROZEN) { | 4682 | switch (action & ~CPU_TASKS_FROZEN) { |
| 4717 | case CPU_DYING: | 4683 | case CPU_DYING: |
| 4718 | clear_nohz_tick_stopped(smp_processor_id()); | 4684 | nohz_balance_exit_idle(smp_processor_id()); |
| 4719 | return NOTIFY_OK; | 4685 | return NOTIFY_OK; |
| 4720 | default: | 4686 | default: |
| 4721 | return NOTIFY_DONE; | 4687 | return NOTIFY_DONE; |
| @@ -4837,14 +4803,15 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) | |||
| 4837 | if (need_resched()) | 4803 | if (need_resched()) |
| 4838 | break; | 4804 | break; |
| 4839 | 4805 | ||
| 4840 | raw_spin_lock_irq(&this_rq->lock); | 4806 | rq = cpu_rq(balance_cpu); |
| 4841 | update_rq_clock(this_rq); | 4807 | |
| 4842 | update_idle_cpu_load(this_rq); | 4808 | raw_spin_lock_irq(&rq->lock); |
| 4843 | raw_spin_unlock_irq(&this_rq->lock); | 4809 | update_rq_clock(rq); |
| 4810 | update_idle_cpu_load(rq); | ||
| 4811 | raw_spin_unlock_irq(&rq->lock); | ||
| 4844 | 4812 | ||
| 4845 | rebalance_domains(balance_cpu, CPU_IDLE); | 4813 | rebalance_domains(balance_cpu, CPU_IDLE); |
| 4846 | 4814 | ||
| 4847 | rq = cpu_rq(balance_cpu); | ||
| 4848 | if (time_after(this_rq->next_balance, rq->next_balance)) | 4815 | if (time_after(this_rq->next_balance, rq->next_balance)) |
| 4849 | this_rq->next_balance = rq->next_balance; | 4816 | this_rq->next_balance = rq->next_balance; |
| 4850 | } | 4817 | } |
| @@ -4875,7 +4842,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu) | |||
| 4875 | * busy tick after returning from idle, we will update the busy stats. | 4842 | * busy tick after returning from idle, we will update the busy stats. |
| 4876 | */ | 4843 | */ |
| 4877 | set_cpu_sd_state_busy(); | 4844 | set_cpu_sd_state_busy(); |
| 4878 | clear_nohz_tick_stopped(cpu); | 4845 | nohz_balance_exit_idle(cpu); |
| 4879 | 4846 | ||
| 4880 | /* | 4847 | /* |
| 4881 | * None are in tickless mode and hence no need for NOHZ idle load | 4848 | * None are in tickless mode and hence no need for NOHZ idle load |
diff --git a/kernel/sched/features.h b/kernel/sched/features.h index de00a486c5c6..eebefcad7027 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h | |||
| @@ -12,14 +12,6 @@ SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true) | |||
| 12 | SCHED_FEAT(START_DEBIT, true) | 12 | SCHED_FEAT(START_DEBIT, true) |
| 13 | 13 | ||
| 14 | /* | 14 | /* |
| 15 | * Based on load and program behaviour, see if it makes sense to place | ||
| 16 | * a newly woken task on the same cpu as the task that woke it -- | ||
| 17 | * improve cache locality. Typically used with SYNC wakeups as | ||
| 18 | * generated by pipes and the like, see also SYNC_WAKEUPS. | ||
| 19 | */ | ||
| 20 | SCHED_FEAT(AFFINE_WAKEUPS, true) | ||
| 21 | |||
| 22 | /* | ||
| 23 | * Prefer to schedule the task we woke last (assuming it failed | 15 | * Prefer to schedule the task we woke last (assuming it failed |
| 24 | * wakeup-preemption), since its likely going to consume data we | 16 | * wakeup-preemption), since its likely going to consume data we |
| 25 | * touched, increases cache locality. | 17 | * touched, increases cache locality. |
| @@ -42,7 +34,7 @@ SCHED_FEAT(CACHE_HOT_BUDDY, true) | |||
| 42 | /* | 34 | /* |
| 43 | * Use arch dependent cpu power functions | 35 | * Use arch dependent cpu power functions |
| 44 | */ | 36 | */ |
| 45 | SCHED_FEAT(ARCH_POWER, false) | 37 | SCHED_FEAT(ARCH_POWER, true) |
| 46 | 38 | ||
| 47 | SCHED_FEAT(HRTICK, false) | 39 | SCHED_FEAT(HRTICK, false) |
| 48 | SCHED_FEAT(DOUBLE_TICK, false) | 40 | SCHED_FEAT(DOUBLE_TICK, false) |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index e0b7ba9c040f..418feb01344e 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
| @@ -1632,11 +1632,6 @@ static int push_rt_task(struct rq *rq) | |||
| 1632 | if (!next_task) | 1632 | if (!next_task) |
| 1633 | return 0; | 1633 | return 0; |
| 1634 | 1634 | ||
| 1635 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | ||
| 1636 | if (unlikely(task_running(rq, next_task))) | ||
| 1637 | return 0; | ||
| 1638 | #endif | ||
| 1639 | |||
| 1640 | retry: | 1635 | retry: |
| 1641 | if (unlikely(next_task == rq->curr)) { | 1636 | if (unlikely(next_task == rq->curr)) { |
| 1642 | WARN_ON(1); | 1637 | WARN_ON(1); |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0848fa36c383..7a7db09cfabc 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
| @@ -737,11 +737,7 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | |||
| 737 | */ | 737 | */ |
| 738 | next->on_cpu = 1; | 738 | next->on_cpu = 1; |
| 739 | #endif | 739 | #endif |
| 740 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | ||
| 741 | raw_spin_unlock_irq(&rq->lock); | ||
| 742 | #else | ||
| 743 | raw_spin_unlock(&rq->lock); | 740 | raw_spin_unlock(&rq->lock); |
| 744 | #endif | ||
| 745 | } | 741 | } |
| 746 | 742 | ||
| 747 | static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | 743 | static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) |
| @@ -755,9 +751,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
| 755 | smp_wmb(); | 751 | smp_wmb(); |
| 756 | prev->on_cpu = 0; | 752 | prev->on_cpu = 0; |
| 757 | #endif | 753 | #endif |
| 758 | #ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW | ||
| 759 | local_irq_enable(); | 754 | local_irq_enable(); |
| 760 | #endif | ||
| 761 | } | 755 | } |
| 762 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ | 756 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ |
| 763 | 757 | ||
| @@ -891,6 +885,9 @@ struct cpuacct { | |||
| 891 | struct kernel_cpustat __percpu *cpustat; | 885 | struct kernel_cpustat __percpu *cpustat; |
| 892 | }; | 886 | }; |
| 893 | 887 | ||
| 888 | extern struct cgroup_subsys cpuacct_subsys; | ||
| 889 | extern struct cpuacct root_cpuacct; | ||
| 890 | |||
| 894 | /* return cpu accounting group corresponding to this container */ | 891 | /* return cpu accounting group corresponding to this container */ |
| 895 | static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp) | 892 | static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp) |
| 896 | { | 893 | { |
| @@ -917,6 +914,16 @@ extern void cpuacct_charge(struct task_struct *tsk, u64 cputime); | |||
| 917 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} | 914 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} |
| 918 | #endif | 915 | #endif |
| 919 | 916 | ||
| 917 | #ifdef CONFIG_PARAVIRT | ||
| 918 | static inline u64 steal_ticks(u64 steal) | ||
| 919 | { | ||
| 920 | if (unlikely(steal > NSEC_PER_SEC)) | ||
| 921 | return div_u64(steal, TICK_NSEC); | ||
| 922 | |||
| 923 | return __iter_div_u64_rem(steal, TICK_NSEC, &steal); | ||
| 924 | } | ||
| 925 | #endif | ||
| 926 | |||
| 920 | static inline void inc_nr_running(struct rq *rq) | 927 | static inline void inc_nr_running(struct rq *rq) |
| 921 | { | 928 | { |
| 922 | rq->nr_running++; | 929 | rq->nr_running++; |
| @@ -1156,3 +1163,53 @@ enum rq_nohz_flag_bits { | |||
| 1156 | 1163 | ||
| 1157 | #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) | 1164 | #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) |
| 1158 | #endif | 1165 | #endif |
| 1166 | |||
| 1167 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
| 1168 | |||
| 1169 | DECLARE_PER_CPU(u64, cpu_hardirq_time); | ||
| 1170 | DECLARE_PER_CPU(u64, cpu_softirq_time); | ||
| 1171 | |||
| 1172 | #ifndef CONFIG_64BIT | ||
| 1173 | DECLARE_PER_CPU(seqcount_t, irq_time_seq); | ||
| 1174 | |||
| 1175 | static inline void irq_time_write_begin(void) | ||
| 1176 | { | ||
| 1177 | __this_cpu_inc(irq_time_seq.sequence); | ||
| 1178 | smp_wmb(); | ||
| 1179 | } | ||
| 1180 | |||
| 1181 | static inline void irq_time_write_end(void) | ||
| 1182 | { | ||
| 1183 | smp_wmb(); | ||
| 1184 | __this_cpu_inc(irq_time_seq.sequence); | ||
| 1185 | } | ||
| 1186 | |||
| 1187 | static inline u64 irq_time_read(int cpu) | ||
| 1188 | { | ||
| 1189 | u64 irq_time; | ||
| 1190 | unsigned seq; | ||
| 1191 | |||
| 1192 | do { | ||
| 1193 | seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu)); | ||
| 1194 | irq_time = per_cpu(cpu_softirq_time, cpu) + | ||
| 1195 | per_cpu(cpu_hardirq_time, cpu); | ||
| 1196 | } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq)); | ||
| 1197 | |||
| 1198 | return irq_time; | ||
| 1199 | } | ||
| 1200 | #else /* CONFIG_64BIT */ | ||
| 1201 | static inline void irq_time_write_begin(void) | ||
| 1202 | { | ||
| 1203 | } | ||
| 1204 | |||
| 1205 | static inline void irq_time_write_end(void) | ||
| 1206 | { | ||
| 1207 | } | ||
| 1208 | |||
| 1209 | static inline u64 irq_time_read(int cpu) | ||
| 1210 | { | ||
| 1211 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); | ||
| 1212 | } | ||
| 1213 | #endif /* CONFIG_64BIT */ | ||
| 1214 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
| 1215 | |||
diff --git a/kernel/softirq.c b/kernel/softirq.c index 5c6a5bd8462f..cc96bdc0c2c9 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -221,7 +221,7 @@ asmlinkage void __do_softirq(void) | |||
| 221 | current->flags &= ~PF_MEMALLOC; | 221 | current->flags &= ~PF_MEMALLOC; |
| 222 | 222 | ||
| 223 | pending = local_softirq_pending(); | 223 | pending = local_softirq_pending(); |
| 224 | account_system_vtime(current); | 224 | vtime_account(current); |
| 225 | 225 | ||
| 226 | __local_bh_disable((unsigned long)__builtin_return_address(0), | 226 | __local_bh_disable((unsigned long)__builtin_return_address(0), |
| 227 | SOFTIRQ_OFFSET); | 227 | SOFTIRQ_OFFSET); |
| @@ -272,7 +272,7 @@ restart: | |||
| 272 | 272 | ||
| 273 | lockdep_softirq_exit(); | 273 | lockdep_softirq_exit(); |
| 274 | 274 | ||
| 275 | account_system_vtime(current); | 275 | vtime_account(current); |
| 276 | __local_bh_enable(SOFTIRQ_OFFSET); | 276 | __local_bh_enable(SOFTIRQ_OFFSET); |
| 277 | tsk_restore_flags(current, old_flags, PF_MEMALLOC); | 277 | tsk_restore_flags(current, old_flags, PF_MEMALLOC); |
| 278 | } | 278 | } |
| @@ -341,7 +341,7 @@ static inline void invoke_softirq(void) | |||
| 341 | */ | 341 | */ |
| 342 | void irq_exit(void) | 342 | void irq_exit(void) |
| 343 | { | 343 | { |
| 344 | account_system_vtime(current); | 344 | vtime_account(current); |
| 345 | trace_hardirq_exit(); | 345 | trace_hardirq_exit(); |
| 346 | sub_preempt_count(IRQ_EXIT_OFFSET); | 346 | sub_preempt_count(IRQ_EXIT_OFFSET); |
| 347 | if (!in_interrupt() && local_softirq_pending()) | 347 | if (!in_interrupt() && local_softirq_pending()) |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 87174ef59161..81c7b1a1a307 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -307,7 +307,7 @@ static struct ctl_table kern_table[] = { | |||
| 307 | .extra2 = &max_sched_tunable_scaling, | 307 | .extra2 = &max_sched_tunable_scaling, |
| 308 | }, | 308 | }, |
| 309 | { | 309 | { |
| 310 | .procname = "sched_migration_cost", | 310 | .procname = "sched_migration_cost_ns", |
| 311 | .data = &sysctl_sched_migration_cost, | 311 | .data = &sysctl_sched_migration_cost, |
| 312 | .maxlen = sizeof(unsigned int), | 312 | .maxlen = sizeof(unsigned int), |
| 313 | .mode = 0644, | 313 | .mode = 0644, |
| @@ -321,14 +321,14 @@ static struct ctl_table kern_table[] = { | |||
| 321 | .proc_handler = proc_dointvec, | 321 | .proc_handler = proc_dointvec, |
| 322 | }, | 322 | }, |
| 323 | { | 323 | { |
| 324 | .procname = "sched_time_avg", | 324 | .procname = "sched_time_avg_ms", |
| 325 | .data = &sysctl_sched_time_avg, | 325 | .data = &sysctl_sched_time_avg, |
| 326 | .maxlen = sizeof(unsigned int), | 326 | .maxlen = sizeof(unsigned int), |
| 327 | .mode = 0644, | 327 | .mode = 0644, |
| 328 | .proc_handler = proc_dointvec, | 328 | .proc_handler = proc_dointvec, |
| 329 | }, | 329 | }, |
| 330 | { | 330 | { |
| 331 | .procname = "sched_shares_window", | 331 | .procname = "sched_shares_window_ns", |
| 332 | .data = &sysctl_sched_shares_window, | 332 | .data = &sysctl_sched_shares_window, |
| 333 | .maxlen = sizeof(unsigned int), | 333 | .maxlen = sizeof(unsigned int), |
| 334 | .mode = 0644, | 334 | .mode = 0644, |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index cf5f6b262673..f423bdd035c2 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -372,7 +372,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
| 372 | * the scheduler tick in nohz_restart_sched_tick. | 372 | * the scheduler tick in nohz_restart_sched_tick. |
| 373 | */ | 373 | */ |
| 374 | if (!ts->tick_stopped) { | 374 | if (!ts->tick_stopped) { |
| 375 | select_nohz_load_balancer(1); | 375 | nohz_balance_enter_idle(cpu); |
| 376 | calc_load_enter_idle(); | 376 | calc_load_enter_idle(); |
| 377 | 377 | ||
| 378 | ts->last_tick = hrtimer_get_expires(&ts->sched_timer); | 378 | ts->last_tick = hrtimer_get_expires(&ts->sched_timer); |
| @@ -570,7 +570,6 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | |||
| 570 | static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) | 570 | static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) |
| 571 | { | 571 | { |
| 572 | /* Update jiffies first */ | 572 | /* Update jiffies first */ |
| 573 | select_nohz_load_balancer(0); | ||
| 574 | tick_do_update_jiffies64(now); | 573 | tick_do_update_jiffies64(now); |
| 575 | update_cpu_load_nohz(); | 574 | update_cpu_load_nohz(); |
| 576 | 575 | ||
