diff options
-rw-r--r-- | arch/ia64/include/asm/Kbuild | 1 | ||||
-rw-r--r-- | arch/m68k/include/asm/irqflags.h | 2 | ||||
-rw-r--r-- | arch/powerpc/include/asm/Kbuild | 1 | ||||
-rw-r--r-- | arch/s390/include/asm/cputime.h | 3 | ||||
-rw-r--r-- | arch/s390/include/asm/vtime.h | 7 | ||||
-rw-r--r-- | arch/s390/kernel/vtime.c | 1 | ||||
-rw-r--r-- | include/asm-generic/vtime.h | 0 | ||||
-rw-r--r-- | include/linux/context_tracking.h | 128 | ||||
-rw-r--r-- | include/linux/context_tracking_state.h | 39 | ||||
-rw-r--r-- | include/linux/hardirq.h | 117 | ||||
-rw-r--r-- | include/linux/preempt_mask.h | 122 | ||||
-rw-r--r-- | include/linux/tick.h | 45 | ||||
-rw-r--r-- | include/linux/vtime.h | 74 | ||||
-rw-r--r-- | include/trace/events/context_tracking.h | 58 | ||||
-rw-r--r-- | init/Kconfig | 28 | ||||
-rw-r--r-- | init/main.c | 2 | ||||
-rw-r--r-- | kernel/context_tracking.c | 125 | ||||
-rw-r--r-- | kernel/sched/core.c | 4 | ||||
-rw-r--r-- | kernel/sched/cputime.c | 53 | ||||
-rw-r--r-- | kernel/time/Kconfig | 1 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 61 |
21 files changed, 545 insertions, 327 deletions
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index 05b03ecd7933..a3456f34f672 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild | |||
@@ -3,3 +3,4 @@ generic-y += clkdev.h | |||
3 | generic-y += exec.h | 3 | generic-y += exec.h |
4 | generic-y += kvm_para.h | 4 | generic-y += kvm_para.h |
5 | generic-y += trace_clock.h | 5 | generic-y += trace_clock.h |
6 | generic-y += vtime.h \ No newline at end of file | ||
diff --git a/arch/m68k/include/asm/irqflags.h b/arch/m68k/include/asm/irqflags.h index 5053092b369f..a823cd73dc09 100644 --- a/arch/m68k/include/asm/irqflags.h +++ b/arch/m68k/include/asm/irqflags.h | |||
@@ -3,7 +3,7 @@ | |||
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | #ifdef CONFIG_MMU | 5 | #ifdef CONFIG_MMU |
6 | #include <linux/hardirq.h> | 6 | #include <linux/preempt_mask.h> |
7 | #endif | 7 | #endif |
8 | #include <linux/preempt.h> | 8 | #include <linux/preempt.h> |
9 | #include <asm/thread_info.h> | 9 | #include <asm/thread_info.h> |
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 650757c300db..704e6f10ae80 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild | |||
@@ -2,3 +2,4 @@ | |||
2 | generic-y += clkdev.h | 2 | generic-y += clkdev.h |
3 | generic-y += rwsem.h | 3 | generic-y += rwsem.h |
4 | generic-y += trace_clock.h | 4 | generic-y += trace_clock.h |
5 | generic-y += vtime.h \ No newline at end of file | ||
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index d2ff41370c0c..f65bd3634519 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h | |||
@@ -13,9 +13,6 @@ | |||
13 | #include <asm/div64.h> | 13 | #include <asm/div64.h> |
14 | 14 | ||
15 | 15 | ||
16 | #define __ARCH_HAS_VTIME_ACCOUNT | ||
17 | #define __ARCH_HAS_VTIME_TASK_SWITCH | ||
18 | |||
19 | /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ | 16 | /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ |
20 | 17 | ||
21 | typedef unsigned long long __nocast cputime_t; | 18 | typedef unsigned long long __nocast cputime_t; |
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h new file mode 100644 index 000000000000..af9896c53eb3 --- /dev/null +++ b/arch/s390/include/asm/vtime.h | |||
@@ -0,0 +1,7 @@ | |||
1 | #ifndef _S390_VTIME_H | ||
2 | #define _S390_VTIME_H | ||
3 | |||
4 | #define __ARCH_HAS_VTIME_ACCOUNT | ||
5 | #define __ARCH_HAS_VTIME_TASK_SWITCH | ||
6 | |||
7 | #endif /* _S390_VTIME_H */ | ||
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 9b9c1b78ec67..abcfab55f99b 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <asm/irq_regs.h> | 19 | #include <asm/irq_regs.h> |
20 | #include <asm/cputime.h> | 20 | #include <asm/cputime.h> |
21 | #include <asm/vtimer.h> | 21 | #include <asm/vtimer.h> |
22 | #include <asm/vtime.h> | ||
22 | #include <asm/irq.h> | 23 | #include <asm/irq.h> |
23 | #include "entry.h" | 24 | #include "entry.h" |
24 | 25 | ||
diff --git a/include/asm-generic/vtime.h b/include/asm-generic/vtime.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/include/asm-generic/vtime.h | |||
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index fc09d7b0dacf..158158704c30 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h | |||
@@ -2,100 +2,110 @@ | |||
2 | #define _LINUX_CONTEXT_TRACKING_H | 2 | #define _LINUX_CONTEXT_TRACKING_H |
3 | 3 | ||
4 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
5 | #include <linux/percpu.h> | ||
6 | #include <linux/vtime.h> | 5 | #include <linux/vtime.h> |
6 | #include <linux/context_tracking_state.h> | ||
7 | #include <asm/ptrace.h> | 7 | #include <asm/ptrace.h> |
8 | 8 | ||
9 | struct context_tracking { | ||
10 | /* | ||
11 | * When active is false, probes are unset in order | ||
12 | * to minimize overhead: TIF flags are cleared | ||
13 | * and calls to user_enter/exit are ignored. This | ||
14 | * may be further optimized using static keys. | ||
15 | */ | ||
16 | bool active; | ||
17 | enum ctx_state { | ||
18 | IN_KERNEL = 0, | ||
19 | IN_USER, | ||
20 | } state; | ||
21 | }; | ||
22 | |||
23 | static inline void __guest_enter(void) | ||
24 | { | ||
25 | /* | ||
26 | * This is running in ioctl context so we can avoid | ||
27 | * the call to vtime_account() with its unnecessary idle check. | ||
28 | */ | ||
29 | vtime_account_system(current); | ||
30 | current->flags |= PF_VCPU; | ||
31 | } | ||
32 | |||
33 | static inline void __guest_exit(void) | ||
34 | { | ||
35 | /* | ||
36 | * This is running in ioctl context so we can avoid | ||
37 | * the call to vtime_account() with its unnecessary idle check. | ||
38 | */ | ||
39 | vtime_account_system(current); | ||
40 | current->flags &= ~PF_VCPU; | ||
41 | } | ||
42 | 9 | ||
43 | #ifdef CONFIG_CONTEXT_TRACKING | 10 | #ifdef CONFIG_CONTEXT_TRACKING |
44 | DECLARE_PER_CPU(struct context_tracking, context_tracking); | 11 | extern void context_tracking_cpu_set(int cpu); |
45 | 12 | ||
46 | static inline bool context_tracking_in_user(void) | 13 | extern void context_tracking_user_enter(void); |
14 | extern void context_tracking_user_exit(void); | ||
15 | extern void __context_tracking_task_switch(struct task_struct *prev, | ||
16 | struct task_struct *next); | ||
17 | |||
18 | static inline void user_enter(void) | ||
47 | { | 19 | { |
48 | return __this_cpu_read(context_tracking.state) == IN_USER; | 20 | if (static_key_false(&context_tracking_enabled)) |
49 | } | 21 | context_tracking_user_enter(); |
50 | 22 | ||
51 | static inline bool context_tracking_active(void) | 23 | } |
24 | static inline void user_exit(void) | ||
52 | { | 25 | { |
53 | return __this_cpu_read(context_tracking.active); | 26 | if (static_key_false(&context_tracking_enabled)) |
27 | context_tracking_user_exit(); | ||
54 | } | 28 | } |
55 | 29 | ||
56 | extern void user_enter(void); | ||
57 | extern void user_exit(void); | ||
58 | |||
59 | extern void guest_enter(void); | ||
60 | extern void guest_exit(void); | ||
61 | |||
62 | static inline enum ctx_state exception_enter(void) | 30 | static inline enum ctx_state exception_enter(void) |
63 | { | 31 | { |
64 | enum ctx_state prev_ctx; | 32 | enum ctx_state prev_ctx; |
65 | 33 | ||
34 | if (!static_key_false(&context_tracking_enabled)) | ||
35 | return 0; | ||
36 | |||
66 | prev_ctx = this_cpu_read(context_tracking.state); | 37 | prev_ctx = this_cpu_read(context_tracking.state); |
67 | user_exit(); | 38 | context_tracking_user_exit(); |
68 | 39 | ||
69 | return prev_ctx; | 40 | return prev_ctx; |
70 | } | 41 | } |
71 | 42 | ||
72 | static inline void exception_exit(enum ctx_state prev_ctx) | 43 | static inline void exception_exit(enum ctx_state prev_ctx) |
73 | { | 44 | { |
74 | if (prev_ctx == IN_USER) | 45 | if (static_key_false(&context_tracking_enabled)) { |
75 | user_enter(); | 46 | if (prev_ctx == IN_USER) |
47 | context_tracking_user_enter(); | ||
48 | } | ||
76 | } | 49 | } |
77 | 50 | ||
78 | extern void context_tracking_task_switch(struct task_struct *prev, | 51 | static inline void context_tracking_task_switch(struct task_struct *prev, |
79 | struct task_struct *next); | 52 | struct task_struct *next) |
53 | { | ||
54 | if (static_key_false(&context_tracking_enabled)) | ||
55 | __context_tracking_task_switch(prev, next); | ||
56 | } | ||
80 | #else | 57 | #else |
81 | static inline bool context_tracking_in_user(void) { return false; } | ||
82 | static inline void user_enter(void) { } | 58 | static inline void user_enter(void) { } |
83 | static inline void user_exit(void) { } | 59 | static inline void user_exit(void) { } |
60 | static inline enum ctx_state exception_enter(void) { return 0; } | ||
61 | static inline void exception_exit(enum ctx_state prev_ctx) { } | ||
62 | static inline void context_tracking_task_switch(struct task_struct *prev, | ||
63 | struct task_struct *next) { } | ||
64 | #endif /* !CONFIG_CONTEXT_TRACKING */ | ||
65 | |||
66 | |||
67 | #ifdef CONFIG_CONTEXT_TRACKING_FORCE | ||
68 | extern void context_tracking_init(void); | ||
69 | #else | ||
70 | static inline void context_tracking_init(void) { } | ||
71 | #endif /* CONFIG_CONTEXT_TRACKING_FORCE */ | ||
72 | |||
84 | 73 | ||
74 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | ||
85 | static inline void guest_enter(void) | 75 | static inline void guest_enter(void) |
86 | { | 76 | { |
87 | __guest_enter(); | 77 | if (vtime_accounting_enabled()) |
78 | vtime_guest_enter(current); | ||
79 | else | ||
80 | current->flags |= PF_VCPU; | ||
88 | } | 81 | } |
89 | 82 | ||
90 | static inline void guest_exit(void) | 83 | static inline void guest_exit(void) |
91 | { | 84 | { |
92 | __guest_exit(); | 85 | if (vtime_accounting_enabled()) |
86 | vtime_guest_exit(current); | ||
87 | else | ||
88 | current->flags &= ~PF_VCPU; | ||
93 | } | 89 | } |
94 | 90 | ||
95 | static inline enum ctx_state exception_enter(void) { return 0; } | 91 | #else |
96 | static inline void exception_exit(enum ctx_state prev_ctx) { } | 92 | static inline void guest_enter(void) |
97 | static inline void context_tracking_task_switch(struct task_struct *prev, | 93 | { |
98 | struct task_struct *next) { } | 94 | /* |
99 | #endif /* !CONFIG_CONTEXT_TRACKING */ | 95 | * This is running in ioctl context so its safe |
96 | * to assume that it's the stime pending cputime | ||
97 | * to flush. | ||
98 | */ | ||
99 | vtime_account_system(current); | ||
100 | current->flags |= PF_VCPU; | ||
101 | } | ||
102 | |||
103 | static inline void guest_exit(void) | ||
104 | { | ||
105 | /* Flush the guest cputime we spent on the guest */ | ||
106 | vtime_account_system(current); | ||
107 | current->flags &= ~PF_VCPU; | ||
108 | } | ||
109 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ | ||
100 | 110 | ||
101 | #endif | 111 | #endif |
diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h new file mode 100644 index 000000000000..0f1979d0674f --- /dev/null +++ b/include/linux/context_tracking_state.h | |||
@@ -0,0 +1,39 @@ | |||
1 | #ifndef _LINUX_CONTEXT_TRACKING_STATE_H | ||
2 | #define _LINUX_CONTEXT_TRACKING_STATE_H | ||
3 | |||
4 | #include <linux/percpu.h> | ||
5 | #include <linux/static_key.h> | ||
6 | |||
7 | struct context_tracking { | ||
8 | /* | ||
9 | * When active is false, probes are unset in order | ||
10 | * to minimize overhead: TIF flags are cleared | ||
11 | * and calls to user_enter/exit are ignored. This | ||
12 | * may be further optimized using static keys. | ||
13 | */ | ||
14 | bool active; | ||
15 | enum ctx_state { | ||
16 | IN_KERNEL = 0, | ||
17 | IN_USER, | ||
18 | } state; | ||
19 | }; | ||
20 | |||
21 | #ifdef CONFIG_CONTEXT_TRACKING | ||
22 | extern struct static_key context_tracking_enabled; | ||
23 | DECLARE_PER_CPU(struct context_tracking, context_tracking); | ||
24 | |||
25 | static inline bool context_tracking_in_user(void) | ||
26 | { | ||
27 | return __this_cpu_read(context_tracking.state) == IN_USER; | ||
28 | } | ||
29 | |||
30 | static inline bool context_tracking_active(void) | ||
31 | { | ||
32 | return __this_cpu_read(context_tracking.active); | ||
33 | } | ||
34 | #else | ||
35 | static inline bool context_tracking_in_user(void) { return false; } | ||
36 | static inline bool context_tracking_active(void) { return false; } | ||
37 | #endif /* CONFIG_CONTEXT_TRACKING */ | ||
38 | |||
39 | #endif | ||
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 05bcc0903766..ccfe17c5c8da 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h | |||
@@ -1,126 +1,11 @@ | |||
1 | #ifndef LINUX_HARDIRQ_H | 1 | #ifndef LINUX_HARDIRQ_H |
2 | #define LINUX_HARDIRQ_H | 2 | #define LINUX_HARDIRQ_H |
3 | 3 | ||
4 | #include <linux/preempt.h> | 4 | #include <linux/preempt_mask.h> |
5 | #include <linux/lockdep.h> | 5 | #include <linux/lockdep.h> |
6 | #include <linux/ftrace_irq.h> | 6 | #include <linux/ftrace_irq.h> |
7 | #include <linux/vtime.h> | 7 | #include <linux/vtime.h> |
8 | #include <asm/hardirq.h> | ||
9 | 8 | ||
10 | /* | ||
11 | * We put the hardirq and softirq counter into the preemption | ||
12 | * counter. The bitmask has the following meaning: | ||
13 | * | ||
14 | * - bits 0-7 are the preemption count (max preemption depth: 256) | ||
15 | * - bits 8-15 are the softirq count (max # of softirqs: 256) | ||
16 | * | ||
17 | * The hardirq count can in theory reach the same as NR_IRQS. | ||
18 | * In reality, the number of nested IRQS is limited to the stack | ||
19 | * size as well. For archs with over 1000 IRQS it is not practical | ||
20 | * to expect that they will all nest. We give a max of 10 bits for | ||
21 | * hardirq nesting. An arch may choose to give less than 10 bits. | ||
22 | * m68k expects it to be 8. | ||
23 | * | ||
24 | * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024) | ||
25 | * - bit 26 is the NMI_MASK | ||
26 | * - bit 27 is the PREEMPT_ACTIVE flag | ||
27 | * | ||
28 | * PREEMPT_MASK: 0x000000ff | ||
29 | * SOFTIRQ_MASK: 0x0000ff00 | ||
30 | * HARDIRQ_MASK: 0x03ff0000 | ||
31 | * NMI_MASK: 0x04000000 | ||
32 | */ | ||
33 | #define PREEMPT_BITS 8 | ||
34 | #define SOFTIRQ_BITS 8 | ||
35 | #define NMI_BITS 1 | ||
36 | |||
37 | #define MAX_HARDIRQ_BITS 10 | ||
38 | |||
39 | #ifndef HARDIRQ_BITS | ||
40 | # define HARDIRQ_BITS MAX_HARDIRQ_BITS | ||
41 | #endif | ||
42 | |||
43 | #if HARDIRQ_BITS > MAX_HARDIRQ_BITS | ||
44 | #error HARDIRQ_BITS too high! | ||
45 | #endif | ||
46 | |||
47 | #define PREEMPT_SHIFT 0 | ||
48 | #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) | ||
49 | #define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) | ||
50 | #define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) | ||
51 | |||
52 | #define __IRQ_MASK(x) ((1UL << (x))-1) | ||
53 | |||
54 | #define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) | ||
55 | #define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) | ||
56 | #define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) | ||
57 | #define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) | ||
58 | |||
59 | #define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) | ||
60 | #define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) | ||
61 | #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) | ||
62 | #define NMI_OFFSET (1UL << NMI_SHIFT) | ||
63 | |||
64 | #define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) | ||
65 | |||
66 | #ifndef PREEMPT_ACTIVE | ||
67 | #define PREEMPT_ACTIVE_BITS 1 | ||
68 | #define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) | ||
69 | #define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT) | ||
70 | #endif | ||
71 | |||
72 | #if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS)) | ||
73 | #error PREEMPT_ACTIVE is too low! | ||
74 | #endif | ||
75 | |||
76 | #define hardirq_count() (preempt_count() & HARDIRQ_MASK) | ||
77 | #define softirq_count() (preempt_count() & SOFTIRQ_MASK) | ||
78 | #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ | ||
79 | | NMI_MASK)) | ||
80 | |||
81 | /* | ||
82 | * Are we doing bottom half or hardware interrupt processing? | ||
83 | * Are we in a softirq context? Interrupt context? | ||
84 | * in_softirq - Are we currently processing softirq or have bh disabled? | ||
85 | * in_serving_softirq - Are we currently processing softirq? | ||
86 | */ | ||
87 | #define in_irq() (hardirq_count()) | ||
88 | #define in_softirq() (softirq_count()) | ||
89 | #define in_interrupt() (irq_count()) | ||
90 | #define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) | ||
91 | |||
92 | /* | ||
93 | * Are we in NMI context? | ||
94 | */ | ||
95 | #define in_nmi() (preempt_count() & NMI_MASK) | ||
96 | |||
97 | #if defined(CONFIG_PREEMPT_COUNT) | ||
98 | # define PREEMPT_CHECK_OFFSET 1 | ||
99 | #else | ||
100 | # define PREEMPT_CHECK_OFFSET 0 | ||
101 | #endif | ||
102 | |||
103 | /* | ||
104 | * Are we running in atomic context? WARNING: this macro cannot | ||
105 | * always detect atomic context; in particular, it cannot know about | ||
106 | * held spinlocks in non-preemptible kernels. Thus it should not be | ||
107 | * used in the general case to determine whether sleeping is possible. | ||
108 | * Do not use in_atomic() in driver code. | ||
109 | */ | ||
110 | #define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) | ||
111 | |||
112 | /* | ||
113 | * Check whether we were atomic before we did preempt_disable(): | ||
114 | * (used by the scheduler, *after* releasing the kernel lock) | ||
115 | */ | ||
116 | #define in_atomic_preempt_off() \ | ||
117 | ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) | ||
118 | |||
119 | #ifdef CONFIG_PREEMPT_COUNT | ||
120 | # define preemptible() (preempt_count() == 0 && !irqs_disabled()) | ||
121 | #else | ||
122 | # define preemptible() 0 | ||
123 | #endif | ||
124 | 9 | ||
125 | #if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS) | 10 | #if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS) |
126 | extern void synchronize_irq(unsigned int irq); | 11 | extern void synchronize_irq(unsigned int irq); |
diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h new file mode 100644 index 000000000000..931bc616219f --- /dev/null +++ b/include/linux/preempt_mask.h | |||
@@ -0,0 +1,122 @@ | |||
1 | #ifndef LINUX_PREEMPT_MASK_H | ||
2 | #define LINUX_PREEMPT_MASK_H | ||
3 | |||
4 | #include <linux/preempt.h> | ||
5 | #include <asm/hardirq.h> | ||
6 | |||
7 | /* | ||
8 | * We put the hardirq and softirq counter into the preemption | ||
9 | * counter. The bitmask has the following meaning: | ||
10 | * | ||
11 | * - bits 0-7 are the preemption count (max preemption depth: 256) | ||
12 | * - bits 8-15 are the softirq count (max # of softirqs: 256) | ||
13 | * | ||
14 | * The hardirq count can in theory reach the same as NR_IRQS. | ||
15 | * In reality, the number of nested IRQS is limited to the stack | ||
16 | * size as well. For archs with over 1000 IRQS it is not practical | ||
17 | * to expect that they will all nest. We give a max of 10 bits for | ||
18 | * hardirq nesting. An arch may choose to give less than 10 bits. | ||
19 | * m68k expects it to be 8. | ||
20 | * | ||
21 | * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024) | ||
22 | * - bit 26 is the NMI_MASK | ||
23 | * - bit 27 is the PREEMPT_ACTIVE flag | ||
24 | * | ||
25 | * PREEMPT_MASK: 0x000000ff | ||
26 | * SOFTIRQ_MASK: 0x0000ff00 | ||
27 | * HARDIRQ_MASK: 0x03ff0000 | ||
28 | * NMI_MASK: 0x04000000 | ||
29 | */ | ||
30 | #define PREEMPT_BITS 8 | ||
31 | #define SOFTIRQ_BITS 8 | ||
32 | #define NMI_BITS 1 | ||
33 | |||
34 | #define MAX_HARDIRQ_BITS 10 | ||
35 | |||
36 | #ifndef HARDIRQ_BITS | ||
37 | # define HARDIRQ_BITS MAX_HARDIRQ_BITS | ||
38 | #endif | ||
39 | |||
40 | #if HARDIRQ_BITS > MAX_HARDIRQ_BITS | ||
41 | #error HARDIRQ_BITS too high! | ||
42 | #endif | ||
43 | |||
44 | #define PREEMPT_SHIFT 0 | ||
45 | #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) | ||
46 | #define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) | ||
47 | #define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) | ||
48 | |||
49 | #define __IRQ_MASK(x) ((1UL << (x))-1) | ||
50 | |||
51 | #define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) | ||
52 | #define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) | ||
53 | #define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) | ||
54 | #define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) | ||
55 | |||
56 | #define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) | ||
57 | #define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) | ||
58 | #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) | ||
59 | #define NMI_OFFSET (1UL << NMI_SHIFT) | ||
60 | |||
61 | #define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) | ||
62 | |||
63 | #ifndef PREEMPT_ACTIVE | ||
64 | #define PREEMPT_ACTIVE_BITS 1 | ||
65 | #define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) | ||
66 | #define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT) | ||
67 | #endif | ||
68 | |||
69 | #if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS)) | ||
70 | #error PREEMPT_ACTIVE is too low! | ||
71 | #endif | ||
72 | |||
73 | #define hardirq_count() (preempt_count() & HARDIRQ_MASK) | ||
74 | #define softirq_count() (preempt_count() & SOFTIRQ_MASK) | ||
75 | #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ | ||
76 | | NMI_MASK)) | ||
77 | |||
78 | /* | ||
79 | * Are we doing bottom half or hardware interrupt processing? | ||
80 | * Are we in a softirq context? Interrupt context? | ||
81 | * in_softirq - Are we currently processing softirq or have bh disabled? | ||
82 | * in_serving_softirq - Are we currently processing softirq? | ||
83 | */ | ||
84 | #define in_irq() (hardirq_count()) | ||
85 | #define in_softirq() (softirq_count()) | ||
86 | #define in_interrupt() (irq_count()) | ||
87 | #define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) | ||
88 | |||
89 | /* | ||
90 | * Are we in NMI context? | ||
91 | */ | ||
92 | #define in_nmi() (preempt_count() & NMI_MASK) | ||
93 | |||
94 | #if defined(CONFIG_PREEMPT_COUNT) | ||
95 | # define PREEMPT_CHECK_OFFSET 1 | ||
96 | #else | ||
97 | # define PREEMPT_CHECK_OFFSET 0 | ||
98 | #endif | ||
99 | |||
100 | /* | ||
101 | * Are we running in atomic context? WARNING: this macro cannot | ||
102 | * always detect atomic context; in particular, it cannot know about | ||
103 | * held spinlocks in non-preemptible kernels. Thus it should not be | ||
104 | * used in the general case to determine whether sleeping is possible. | ||
105 | * Do not use in_atomic() in driver code. | ||
106 | */ | ||
107 | #define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) | ||
108 | |||
109 | /* | ||
110 | * Check whether we were atomic before we did preempt_disable(): | ||
111 | * (used by the scheduler, *after* releasing the kernel lock) | ||
112 | */ | ||
113 | #define in_atomic_preempt_off() \ | ||
114 | ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) | ||
115 | |||
116 | #ifdef CONFIG_PREEMPT_COUNT | ||
117 | # define preemptible() (preempt_count() == 0 && !irqs_disabled()) | ||
118 | #else | ||
119 | # define preemptible() 0 | ||
120 | #endif | ||
121 | |||
122 | #endif /* LINUX_PREEMPT_MASK_H */ | ||
diff --git a/include/linux/tick.h b/include/linux/tick.h index 62bd8b72873c..5128d33bbb39 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h | |||
@@ -10,6 +10,8 @@ | |||
10 | #include <linux/irqflags.h> | 10 | #include <linux/irqflags.h> |
11 | #include <linux/percpu.h> | 11 | #include <linux/percpu.h> |
12 | #include <linux/hrtimer.h> | 12 | #include <linux/hrtimer.h> |
13 | #include <linux/context_tracking_state.h> | ||
14 | #include <linux/cpumask.h> | ||
13 | 15 | ||
14 | #ifdef CONFIG_GENERIC_CLOCKEVENTS | 16 | #ifdef CONFIG_GENERIC_CLOCKEVENTS |
15 | 17 | ||
@@ -158,20 +160,51 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } | |||
158 | # endif /* !CONFIG_NO_HZ_COMMON */ | 160 | # endif /* !CONFIG_NO_HZ_COMMON */ |
159 | 161 | ||
160 | #ifdef CONFIG_NO_HZ_FULL | 162 | #ifdef CONFIG_NO_HZ_FULL |
163 | extern bool tick_nohz_full_running; | ||
164 | extern cpumask_var_t tick_nohz_full_mask; | ||
165 | |||
166 | static inline bool tick_nohz_full_enabled(void) | ||
167 | { | ||
168 | if (!static_key_false(&context_tracking_enabled)) | ||
169 | return false; | ||
170 | |||
171 | return tick_nohz_full_running; | ||
172 | } | ||
173 | |||
174 | static inline bool tick_nohz_full_cpu(int cpu) | ||
175 | { | ||
176 | if (!tick_nohz_full_enabled()) | ||
177 | return false; | ||
178 | |||
179 | return cpumask_test_cpu(cpu, tick_nohz_full_mask); | ||
180 | } | ||
181 | |||
161 | extern void tick_nohz_init(void); | 182 | extern void tick_nohz_init(void); |
162 | extern int tick_nohz_full_cpu(int cpu); | 183 | extern void __tick_nohz_full_check(void); |
163 | extern void tick_nohz_full_check(void); | ||
164 | extern void tick_nohz_full_kick(void); | 184 | extern void tick_nohz_full_kick(void); |
165 | extern void tick_nohz_full_kick_all(void); | 185 | extern void tick_nohz_full_kick_all(void); |
166 | extern void tick_nohz_task_switch(struct task_struct *tsk); | 186 | extern void __tick_nohz_task_switch(struct task_struct *tsk); |
167 | #else | 187 | #else |
168 | static inline void tick_nohz_init(void) { } | 188 | static inline void tick_nohz_init(void) { } |
169 | static inline int tick_nohz_full_cpu(int cpu) { return 0; } | 189 | static inline bool tick_nohz_full_enabled(void) { return false; } |
170 | static inline void tick_nohz_full_check(void) { } | 190 | static inline bool tick_nohz_full_cpu(int cpu) { return false; } |
191 | static inline void __tick_nohz_full_check(void) { } | ||
171 | static inline void tick_nohz_full_kick(void) { } | 192 | static inline void tick_nohz_full_kick(void) { } |
172 | static inline void tick_nohz_full_kick_all(void) { } | 193 | static inline void tick_nohz_full_kick_all(void) { } |
173 | static inline void tick_nohz_task_switch(struct task_struct *tsk) { } | 194 | static inline void __tick_nohz_task_switch(struct task_struct *tsk) { } |
174 | #endif | 195 | #endif |
175 | 196 | ||
197 | static inline void tick_nohz_full_check(void) | ||
198 | { | ||
199 | if (tick_nohz_full_enabled()) | ||
200 | __tick_nohz_full_check(); | ||
201 | } | ||
202 | |||
203 | static inline void tick_nohz_task_switch(struct task_struct *tsk) | ||
204 | { | ||
205 | if (tick_nohz_full_enabled()) | ||
206 | __tick_nohz_task_switch(tsk); | ||
207 | } | ||
208 | |||
176 | 209 | ||
177 | #endif | 210 | #endif |
diff --git a/include/linux/vtime.h b/include/linux/vtime.h index b1dd2db80076..f5b72b364bda 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h | |||
@@ -1,18 +1,68 @@ | |||
1 | #ifndef _LINUX_KERNEL_VTIME_H | 1 | #ifndef _LINUX_KERNEL_VTIME_H |
2 | #define _LINUX_KERNEL_VTIME_H | 2 | #define _LINUX_KERNEL_VTIME_H |
3 | 3 | ||
4 | #include <linux/context_tracking_state.h> | ||
5 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE | ||
6 | #include <asm/vtime.h> | ||
7 | #endif | ||
8 | |||
9 | |||
4 | struct task_struct; | 10 | struct task_struct; |
5 | 11 | ||
12 | /* | ||
13 | * vtime_accounting_enabled() definitions/declarations | ||
14 | */ | ||
15 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE | ||
16 | static inline bool vtime_accounting_enabled(void) { return true; } | ||
17 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ | ||
18 | |||
19 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | ||
20 | static inline bool vtime_accounting_enabled(void) | ||
21 | { | ||
22 | if (static_key_false(&context_tracking_enabled)) { | ||
23 | if (context_tracking_active()) | ||
24 | return true; | ||
25 | } | ||
26 | |||
27 | return false; | ||
28 | } | ||
29 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ | ||
30 | |||
31 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
32 | static inline bool vtime_accounting_enabled(void) { return false; } | ||
33 | #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ | ||
34 | |||
35 | |||
36 | /* | ||
37 | * Common vtime APIs | ||
38 | */ | ||
6 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | 39 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING |
40 | |||
41 | #ifdef __ARCH_HAS_VTIME_TASK_SWITCH | ||
7 | extern void vtime_task_switch(struct task_struct *prev); | 42 | extern void vtime_task_switch(struct task_struct *prev); |
43 | #else | ||
44 | extern void vtime_common_task_switch(struct task_struct *prev); | ||
45 | static inline void vtime_task_switch(struct task_struct *prev) | ||
46 | { | ||
47 | if (vtime_accounting_enabled()) | ||
48 | vtime_common_task_switch(prev); | ||
49 | } | ||
50 | #endif /* __ARCH_HAS_VTIME_TASK_SWITCH */ | ||
51 | |||
8 | extern void vtime_account_system(struct task_struct *tsk); | 52 | extern void vtime_account_system(struct task_struct *tsk); |
9 | extern void vtime_account_idle(struct task_struct *tsk); | 53 | extern void vtime_account_idle(struct task_struct *tsk); |
10 | extern void vtime_account_user(struct task_struct *tsk); | 54 | extern void vtime_account_user(struct task_struct *tsk); |
11 | extern void vtime_account_irq_enter(struct task_struct *tsk); | ||
12 | 55 | ||
13 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE | 56 | #ifdef __ARCH_HAS_VTIME_ACCOUNT |
14 | static inline bool vtime_accounting_enabled(void) { return true; } | 57 | extern void vtime_account_irq_enter(struct task_struct *tsk); |
15 | #endif | 58 | #else |
59 | extern void vtime_common_account_irq_enter(struct task_struct *tsk); | ||
60 | static inline void vtime_account_irq_enter(struct task_struct *tsk) | ||
61 | { | ||
62 | if (vtime_accounting_enabled()) | ||
63 | vtime_common_account_irq_enter(tsk); | ||
64 | } | ||
65 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ | ||
16 | 66 | ||
17 | #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ | 67 | #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ |
18 | 68 | ||
@@ -20,14 +70,20 @@ static inline void vtime_task_switch(struct task_struct *prev) { } | |||
20 | static inline void vtime_account_system(struct task_struct *tsk) { } | 70 | static inline void vtime_account_system(struct task_struct *tsk) { } |
21 | static inline void vtime_account_user(struct task_struct *tsk) { } | 71 | static inline void vtime_account_user(struct task_struct *tsk) { } |
22 | static inline void vtime_account_irq_enter(struct task_struct *tsk) { } | 72 | static inline void vtime_account_irq_enter(struct task_struct *tsk) { } |
23 | static inline bool vtime_accounting_enabled(void) { return false; } | 73 | #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ |
24 | #endif | ||
25 | 74 | ||
26 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | 75 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN |
27 | extern void arch_vtime_task_switch(struct task_struct *tsk); | 76 | extern void arch_vtime_task_switch(struct task_struct *tsk); |
28 | extern void vtime_account_irq_exit(struct task_struct *tsk); | 77 | extern void vtime_gen_account_irq_exit(struct task_struct *tsk); |
29 | extern bool vtime_accounting_enabled(void); | 78 | |
79 | static inline void vtime_account_irq_exit(struct task_struct *tsk) | ||
80 | { | ||
81 | if (vtime_accounting_enabled()) | ||
82 | vtime_gen_account_irq_exit(tsk); | ||
83 | } | ||
84 | |||
30 | extern void vtime_user_enter(struct task_struct *tsk); | 85 | extern void vtime_user_enter(struct task_struct *tsk); |
86 | |||
31 | static inline void vtime_user_exit(struct task_struct *tsk) | 87 | static inline void vtime_user_exit(struct task_struct *tsk) |
32 | { | 88 | { |
33 | vtime_account_user(tsk); | 89 | vtime_account_user(tsk); |
@@ -35,7 +91,7 @@ static inline void vtime_user_exit(struct task_struct *tsk) | |||
35 | extern void vtime_guest_enter(struct task_struct *tsk); | 91 | extern void vtime_guest_enter(struct task_struct *tsk); |
36 | extern void vtime_guest_exit(struct task_struct *tsk); | 92 | extern void vtime_guest_exit(struct task_struct *tsk); |
37 | extern void vtime_init_idle(struct task_struct *tsk, int cpu); | 93 | extern void vtime_init_idle(struct task_struct *tsk, int cpu); |
38 | #else | 94 | #else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */ |
39 | static inline void vtime_account_irq_exit(struct task_struct *tsk) | 95 | static inline void vtime_account_irq_exit(struct task_struct *tsk) |
40 | { | 96 | { |
41 | /* On hard|softirq exit we always account to hard|softirq cputime */ | 97 | /* On hard|softirq exit we always account to hard|softirq cputime */ |
diff --git a/include/trace/events/context_tracking.h b/include/trace/events/context_tracking.h new file mode 100644 index 000000000000..ce8007cf29cf --- /dev/null +++ b/include/trace/events/context_tracking.h | |||
@@ -0,0 +1,58 @@ | |||
1 | #undef TRACE_SYSTEM | ||
2 | #define TRACE_SYSTEM context_tracking | ||
3 | |||
4 | #if !defined(_TRACE_CONTEXT_TRACKING_H) || defined(TRACE_HEADER_MULTI_READ) | ||
5 | #define _TRACE_CONTEXT_TRACKING_H | ||
6 | |||
7 | #include <linux/tracepoint.h> | ||
8 | |||
9 | DECLARE_EVENT_CLASS(context_tracking_user, | ||
10 | |||
11 | TP_PROTO(int dummy), | ||
12 | |||
13 | TP_ARGS(dummy), | ||
14 | |||
15 | TP_STRUCT__entry( | ||
16 | __field( int, dummy ) | ||
17 | ), | ||
18 | |||
19 | TP_fast_assign( | ||
20 | __entry->dummy = dummy; | ||
21 | ), | ||
22 | |||
23 | TP_printk("%s", "") | ||
24 | ); | ||
25 | |||
26 | /** | ||
27 | * user_enter - called when the kernel resumes to userspace | ||
28 | * @dummy: dummy arg to make trace event macro happy | ||
29 | * | ||
30 | * This event occurs when the kernel resumes to userspace after | ||
31 | * an exception or a syscall. | ||
32 | */ | ||
33 | DEFINE_EVENT(context_tracking_user, user_enter, | ||
34 | |||
35 | TP_PROTO(int dummy), | ||
36 | |||
37 | TP_ARGS(dummy) | ||
38 | ); | ||
39 | |||
40 | /** | ||
41 | * user_exit - called when userspace enters the kernel | ||
42 | * @dummy: dummy arg to make trace event macro happy | ||
43 | * | ||
44 | * This event occurs when userspace enters the kernel through | ||
45 | * an exception or a syscall. | ||
46 | */ | ||
47 | DEFINE_EVENT(context_tracking_user, user_exit, | ||
48 | |||
49 | TP_PROTO(int dummy), | ||
50 | |||
51 | TP_ARGS(dummy) | ||
52 | ); | ||
53 | |||
54 | |||
55 | #endif /* _TRACE_CONTEXT_TRACKING_H */ | ||
56 | |||
57 | /* This part must be outside protection */ | ||
58 | #include <trace/define_trace.h> | ||
diff --git a/init/Kconfig b/init/Kconfig index cc917d3ec858..0a2c4bcf179e 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -528,13 +528,29 @@ config RCU_USER_QS | |||
528 | config CONTEXT_TRACKING_FORCE | 528 | config CONTEXT_TRACKING_FORCE |
529 | bool "Force context tracking" | 529 | bool "Force context tracking" |
530 | depends on CONTEXT_TRACKING | 530 | depends on CONTEXT_TRACKING |
531 | default CONTEXT_TRACKING | 531 | default y if !NO_HZ_FULL |
532 | help | 532 | help |
533 | Probe on user/kernel boundaries by default in order to | 533 | The major pre-requirement for full dynticks to work is to |
534 | test the features that rely on it such as userspace RCU extended | 534 | support the context tracking subsystem. But there are also |
535 | quiescent states. | 535 | other dependencies to provide in order to make the full |
536 | This test is there for debugging until we have a real user like the | 536 | dynticks working. |
537 | full dynticks mode. | 537 | |
538 | This option stands for testing when an arch implements the | ||
539 | context tracking backend but doesn't yet fullfill all the | ||
540 | requirements to make the full dynticks feature working. | ||
541 | Without the full dynticks, there is no way to test the support | ||
542 | for context tracking and the subsystems that rely on it: RCU | ||
543 | userspace extended quiescent state and tickless cputime | ||
544 | accounting. This option copes with the absence of the full | ||
545 | dynticks subsystem by forcing the context tracking on all | ||
546 | CPUs in the system. | ||
547 | |||
548 | Say Y only if you're working on the developpement of an | ||
549 | architecture backend for the context tracking. | ||
550 | |||
551 | Say N otherwise, this option brings an overhead that you | ||
552 | don't want in production. | ||
553 | |||
538 | 554 | ||
539 | config RCU_FANOUT | 555 | config RCU_FANOUT |
540 | int "Tree-based hierarchical RCU fanout value" | 556 | int "Tree-based hierarchical RCU fanout value" |
diff --git a/init/main.c b/init/main.c index d03d2ec2eacf..af310afbef28 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -75,6 +75,7 @@ | |||
75 | #include <linux/blkdev.h> | 75 | #include <linux/blkdev.h> |
76 | #include <linux/elevator.h> | 76 | #include <linux/elevator.h> |
77 | #include <linux/sched_clock.h> | 77 | #include <linux/sched_clock.h> |
78 | #include <linux/context_tracking.h> | ||
78 | 79 | ||
79 | #include <asm/io.h> | 80 | #include <asm/io.h> |
80 | #include <asm/bugs.h> | 81 | #include <asm/bugs.h> |
@@ -545,6 +546,7 @@ asmlinkage void __init start_kernel(void) | |||
545 | idr_init_cache(); | 546 | idr_init_cache(); |
546 | rcu_init(); | 547 | rcu_init(); |
547 | tick_nohz_init(); | 548 | tick_nohz_init(); |
549 | context_tracking_init(); | ||
548 | radix_tree_init(); | 550 | radix_tree_init(); |
549 | /* init some links before init_ISA_irqs() */ | 551 | /* init some links before init_ISA_irqs() */ |
550 | early_irq_init(); | 552 | early_irq_init(); |
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 383f8231e436..247091bf0587 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c | |||
@@ -20,22 +20,33 @@ | |||
20 | #include <linux/hardirq.h> | 20 | #include <linux/hardirq.h> |
21 | #include <linux/export.h> | 21 | #include <linux/export.h> |
22 | 22 | ||
23 | DEFINE_PER_CPU(struct context_tracking, context_tracking) = { | 23 | #define CREATE_TRACE_POINTS |
24 | #ifdef CONFIG_CONTEXT_TRACKING_FORCE | 24 | #include <trace/events/context_tracking.h> |
25 | .active = true, | 25 | |
26 | #endif | 26 | struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE; |
27 | }; | 27 | EXPORT_SYMBOL_GPL(context_tracking_enabled); |
28 | |||
29 | DEFINE_PER_CPU(struct context_tracking, context_tracking); | ||
30 | EXPORT_SYMBOL_GPL(context_tracking); | ||
31 | |||
32 | void context_tracking_cpu_set(int cpu) | ||
33 | { | ||
34 | if (!per_cpu(context_tracking.active, cpu)) { | ||
35 | per_cpu(context_tracking.active, cpu) = true; | ||
36 | static_key_slow_inc(&context_tracking_enabled); | ||
37 | } | ||
38 | } | ||
28 | 39 | ||
29 | /** | 40 | /** |
30 | * user_enter - Inform the context tracking that the CPU is going to | 41 | * context_tracking_user_enter - Inform the context tracking that the CPU is going to |
31 | * enter userspace mode. | 42 | * enter userspace mode. |
32 | * | 43 | * |
33 | * This function must be called right before we switch from the kernel | 44 | * This function must be called right before we switch from the kernel |
34 | * to userspace, when it's guaranteed the remaining kernel instructions | 45 | * to userspace, when it's guaranteed the remaining kernel instructions |
35 | * to execute won't use any RCU read side critical section because this | 46 | * to execute won't use any RCU read side critical section because this |
36 | * function sets RCU in extended quiescent state. | 47 | * function sets RCU in extended quiescent state. |
37 | */ | 48 | */ |
38 | void user_enter(void) | 49 | void context_tracking_user_enter(void) |
39 | { | 50 | { |
40 | unsigned long flags; | 51 | unsigned long flags; |
41 | 52 | ||
@@ -54,17 +65,32 @@ void user_enter(void) | |||
54 | WARN_ON_ONCE(!current->mm); | 65 | WARN_ON_ONCE(!current->mm); |
55 | 66 | ||
56 | local_irq_save(flags); | 67 | local_irq_save(flags); |
57 | if (__this_cpu_read(context_tracking.active) && | 68 | if ( __this_cpu_read(context_tracking.state) != IN_USER) { |
58 | __this_cpu_read(context_tracking.state) != IN_USER) { | 69 | if (__this_cpu_read(context_tracking.active)) { |
70 | trace_user_enter(0); | ||
71 | /* | ||
72 | * At this stage, only low level arch entry code remains and | ||
73 | * then we'll run in userspace. We can assume there won't be | ||
74 | * any RCU read-side critical section until the next call to | ||
75 | * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency | ||
76 | * on the tick. | ||
77 | */ | ||
78 | vtime_user_enter(current); | ||
79 | rcu_user_enter(); | ||
80 | } | ||
59 | /* | 81 | /* |
60 | * At this stage, only low level arch entry code remains and | 82 | * Even if context tracking is disabled on this CPU, because it's outside |
61 | * then we'll run in userspace. We can assume there won't be | 83 | * the full dynticks mask for example, we still have to keep track of the |
62 | * any RCU read-side critical section until the next call to | 84 | * context transitions and states to prevent inconsistency on those of |
63 | * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency | 85 | * other CPUs. |
64 | * on the tick. | 86 | * If a task triggers an exception in userspace, sleep on the exception |
87 | * handler and then migrate to another CPU, that new CPU must know where | ||
88 | * the exception returns by the time we call exception_exit(). | ||
89 | * This information can only be provided by the previous CPU when it called | ||
90 | * exception_enter(). | ||
91 | * OTOH we can spare the calls to vtime and RCU when context_tracking.active | ||
92 | * is false because we know that CPU is not tickless. | ||
65 | */ | 93 | */ |
66 | vtime_user_enter(current); | ||
67 | rcu_user_enter(); | ||
68 | __this_cpu_write(context_tracking.state, IN_USER); | 94 | __this_cpu_write(context_tracking.state, IN_USER); |
69 | } | 95 | } |
70 | local_irq_restore(flags); | 96 | local_irq_restore(flags); |
@@ -87,10 +113,9 @@ void user_enter(void) | |||
87 | */ | 113 | */ |
88 | void __sched notrace preempt_schedule_context(void) | 114 | void __sched notrace preempt_schedule_context(void) |
89 | { | 115 | { |
90 | struct thread_info *ti = current_thread_info(); | ||
91 | enum ctx_state prev_ctx; | 116 | enum ctx_state prev_ctx; |
92 | 117 | ||
93 | if (likely(ti->preempt_count || irqs_disabled())) | 118 | if (likely(!preemptible())) |
94 | return; | 119 | return; |
95 | 120 | ||
96 | /* | 121 | /* |
@@ -112,8 +137,8 @@ EXPORT_SYMBOL_GPL(preempt_schedule_context); | |||
112 | #endif /* CONFIG_PREEMPT */ | 137 | #endif /* CONFIG_PREEMPT */ |
113 | 138 | ||
114 | /** | 139 | /** |
115 | * user_exit - Inform the context tracking that the CPU is | 140 | * context_tracking_user_exit - Inform the context tracking that the CPU is |
116 | * exiting userspace mode and entering the kernel. | 141 | * exiting userspace mode and entering the kernel. |
117 | * | 142 | * |
118 | * This function must be called after we entered the kernel from userspace | 143 | * This function must be called after we entered the kernel from userspace |
119 | * before any use of RCU read side critical section. This potentially include | 144 | * before any use of RCU read side critical section. This potentially include |
@@ -122,7 +147,7 @@ EXPORT_SYMBOL_GPL(preempt_schedule_context); | |||
122 | * This call supports re-entrancy. This way it can be called from any exception | 147 | * This call supports re-entrancy. This way it can be called from any exception |
123 | * handler without needing to know if we came from userspace or not. | 148 | * handler without needing to know if we came from userspace or not. |
124 | */ | 149 | */ |
125 | void user_exit(void) | 150 | void context_tracking_user_exit(void) |
126 | { | 151 | { |
127 | unsigned long flags; | 152 | unsigned long flags; |
128 | 153 | ||
@@ -131,38 +156,22 @@ void user_exit(void) | |||
131 | 156 | ||
132 | local_irq_save(flags); | 157 | local_irq_save(flags); |
133 | if (__this_cpu_read(context_tracking.state) == IN_USER) { | 158 | if (__this_cpu_read(context_tracking.state) == IN_USER) { |
134 | /* | 159 | if (__this_cpu_read(context_tracking.active)) { |
135 | * We are going to run code that may use RCU. Inform | 160 | /* |
136 | * RCU core about that (ie: we may need the tick again). | 161 | * We are going to run code that may use RCU. Inform |
137 | */ | 162 | * RCU core about that (ie: we may need the tick again). |
138 | rcu_user_exit(); | 163 | */ |
139 | vtime_user_exit(current); | 164 | rcu_user_exit(); |
165 | vtime_user_exit(current); | ||
166 | trace_user_exit(0); | ||
167 | } | ||
140 | __this_cpu_write(context_tracking.state, IN_KERNEL); | 168 | __this_cpu_write(context_tracking.state, IN_KERNEL); |
141 | } | 169 | } |
142 | local_irq_restore(flags); | 170 | local_irq_restore(flags); |
143 | } | 171 | } |
144 | 172 | ||
145 | void guest_enter(void) | ||
146 | { | ||
147 | if (vtime_accounting_enabled()) | ||
148 | vtime_guest_enter(current); | ||
149 | else | ||
150 | __guest_enter(); | ||
151 | } | ||
152 | EXPORT_SYMBOL_GPL(guest_enter); | ||
153 | |||
154 | void guest_exit(void) | ||
155 | { | ||
156 | if (vtime_accounting_enabled()) | ||
157 | vtime_guest_exit(current); | ||
158 | else | ||
159 | __guest_exit(); | ||
160 | } | ||
161 | EXPORT_SYMBOL_GPL(guest_exit); | ||
162 | |||
163 | |||
164 | /** | 173 | /** |
165 | * context_tracking_task_switch - context switch the syscall callbacks | 174 | * __context_tracking_task_switch - context switch the syscall callbacks |
166 | * @prev: the task that is being switched out | 175 | * @prev: the task that is being switched out |
167 | * @next: the task that is being switched in | 176 | * @next: the task that is being switched in |
168 | * | 177 | * |
@@ -174,11 +183,19 @@ EXPORT_SYMBOL_GPL(guest_exit); | |||
174 | * migrate to some CPU that doesn't do the context tracking. As such the TIF | 183 | * migrate to some CPU that doesn't do the context tracking. As such the TIF |
175 | * flag may not be desired there. | 184 | * flag may not be desired there. |
176 | */ | 185 | */ |
177 | void context_tracking_task_switch(struct task_struct *prev, | 186 | void __context_tracking_task_switch(struct task_struct *prev, |
178 | struct task_struct *next) | 187 | struct task_struct *next) |
179 | { | 188 | { |
180 | if (__this_cpu_read(context_tracking.active)) { | 189 | clear_tsk_thread_flag(prev, TIF_NOHZ); |
181 | clear_tsk_thread_flag(prev, TIF_NOHZ); | 190 | set_tsk_thread_flag(next, TIF_NOHZ); |
182 | set_tsk_thread_flag(next, TIF_NOHZ); | ||
183 | } | ||
184 | } | 191 | } |
192 | |||
193 | #ifdef CONFIG_CONTEXT_TRACKING_FORCE | ||
194 | void __init context_tracking_init(void) | ||
195 | { | ||
196 | int cpu; | ||
197 | |||
198 | for_each_possible_cpu(cpu) | ||
199 | context_tracking_cpu_set(cpu); | ||
200 | } | ||
201 | #endif | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b8e2162fc803..725aa067ad63 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2527,13 +2527,11 @@ void __sched schedule_preempt_disabled(void) | |||
2527 | */ | 2527 | */ |
2528 | asmlinkage void __sched notrace preempt_schedule(void) | 2528 | asmlinkage void __sched notrace preempt_schedule(void) |
2529 | { | 2529 | { |
2530 | struct thread_info *ti = current_thread_info(); | ||
2531 | |||
2532 | /* | 2530 | /* |
2533 | * If there is a non-zero preempt_count or interrupts are disabled, | 2531 | * If there is a non-zero preempt_count or interrupts are disabled, |
2534 | * we do not want to preempt the current task. Just return.. | 2532 | * we do not want to preempt the current task. Just return.. |
2535 | */ | 2533 | */ |
2536 | if (likely(ti->preempt_count || irqs_disabled())) | 2534 | if (likely(!preemptible())) |
2537 | return; | 2535 | return; |
2538 | 2536 | ||
2539 | do { | 2537 | do { |
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index e89ccefef278..ace34f95e200 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
@@ -378,11 +378,8 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_ | |||
378 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | 378 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING |
379 | 379 | ||
380 | #ifndef __ARCH_HAS_VTIME_TASK_SWITCH | 380 | #ifndef __ARCH_HAS_VTIME_TASK_SWITCH |
381 | void vtime_task_switch(struct task_struct *prev) | 381 | void vtime_common_task_switch(struct task_struct *prev) |
382 | { | 382 | { |
383 | if (!vtime_accounting_enabled()) | ||
384 | return; | ||
385 | |||
386 | if (is_idle_task(prev)) | 383 | if (is_idle_task(prev)) |
387 | vtime_account_idle(prev); | 384 | vtime_account_idle(prev); |
388 | else | 385 | else |
@@ -404,11 +401,8 @@ void vtime_task_switch(struct task_struct *prev) | |||
404 | * vtime_account(). | 401 | * vtime_account(). |
405 | */ | 402 | */ |
406 | #ifndef __ARCH_HAS_VTIME_ACCOUNT | 403 | #ifndef __ARCH_HAS_VTIME_ACCOUNT |
407 | void vtime_account_irq_enter(struct task_struct *tsk) | 404 | void vtime_common_account_irq_enter(struct task_struct *tsk) |
408 | { | 405 | { |
409 | if (!vtime_accounting_enabled()) | ||
410 | return; | ||
411 | |||
412 | if (!in_interrupt()) { | 406 | if (!in_interrupt()) { |
413 | /* | 407 | /* |
414 | * If we interrupted user, context_tracking_in_user() | 408 | * If we interrupted user, context_tracking_in_user() |
@@ -428,7 +422,7 @@ void vtime_account_irq_enter(struct task_struct *tsk) | |||
428 | } | 422 | } |
429 | vtime_account_system(tsk); | 423 | vtime_account_system(tsk); |
430 | } | 424 | } |
431 | EXPORT_SYMBOL_GPL(vtime_account_irq_enter); | 425 | EXPORT_SYMBOL_GPL(vtime_common_account_irq_enter); |
432 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ | 426 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ |
433 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ | 427 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ |
434 | 428 | ||
@@ -559,12 +553,6 @@ static void cputime_adjust(struct task_cputime *curr, | |||
559 | { | 553 | { |
560 | cputime_t rtime, stime, utime, total; | 554 | cputime_t rtime, stime, utime, total; |
561 | 555 | ||
562 | if (vtime_accounting_enabled()) { | ||
563 | *ut = curr->utime; | ||
564 | *st = curr->stime; | ||
565 | return; | ||
566 | } | ||
567 | |||
568 | stime = curr->stime; | 556 | stime = curr->stime; |
569 | total = stime + curr->utime; | 557 | total = stime + curr->utime; |
570 | 558 | ||
@@ -664,23 +652,17 @@ static void __vtime_account_system(struct task_struct *tsk) | |||
664 | 652 | ||
665 | void vtime_account_system(struct task_struct *tsk) | 653 | void vtime_account_system(struct task_struct *tsk) |
666 | { | 654 | { |
667 | if (!vtime_accounting_enabled()) | ||
668 | return; | ||
669 | |||
670 | write_seqlock(&tsk->vtime_seqlock); | 655 | write_seqlock(&tsk->vtime_seqlock); |
671 | __vtime_account_system(tsk); | 656 | __vtime_account_system(tsk); |
672 | write_sequnlock(&tsk->vtime_seqlock); | 657 | write_sequnlock(&tsk->vtime_seqlock); |
673 | } | 658 | } |
674 | 659 | ||
675 | void vtime_account_irq_exit(struct task_struct *tsk) | 660 | void vtime_gen_account_irq_exit(struct task_struct *tsk) |
676 | { | 661 | { |
677 | if (!vtime_accounting_enabled()) | ||
678 | return; | ||
679 | |||
680 | write_seqlock(&tsk->vtime_seqlock); | 662 | write_seqlock(&tsk->vtime_seqlock); |
663 | __vtime_account_system(tsk); | ||
681 | if (context_tracking_in_user()) | 664 | if (context_tracking_in_user()) |
682 | tsk->vtime_snap_whence = VTIME_USER; | 665 | tsk->vtime_snap_whence = VTIME_USER; |
683 | __vtime_account_system(tsk); | ||
684 | write_sequnlock(&tsk->vtime_seqlock); | 666 | write_sequnlock(&tsk->vtime_seqlock); |
685 | } | 667 | } |
686 | 668 | ||
@@ -688,12 +670,8 @@ void vtime_account_user(struct task_struct *tsk) | |||
688 | { | 670 | { |
689 | cputime_t delta_cpu; | 671 | cputime_t delta_cpu; |
690 | 672 | ||
691 | if (!vtime_accounting_enabled()) | ||
692 | return; | ||
693 | |||
694 | delta_cpu = get_vtime_delta(tsk); | ||
695 | |||
696 | write_seqlock(&tsk->vtime_seqlock); | 673 | write_seqlock(&tsk->vtime_seqlock); |
674 | delta_cpu = get_vtime_delta(tsk); | ||
697 | tsk->vtime_snap_whence = VTIME_SYS; | 675 | tsk->vtime_snap_whence = VTIME_SYS; |
698 | account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); | 676 | account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); |
699 | write_sequnlock(&tsk->vtime_seqlock); | 677 | write_sequnlock(&tsk->vtime_seqlock); |
@@ -701,22 +679,27 @@ void vtime_account_user(struct task_struct *tsk) | |||
701 | 679 | ||
702 | void vtime_user_enter(struct task_struct *tsk) | 680 | void vtime_user_enter(struct task_struct *tsk) |
703 | { | 681 | { |
704 | if (!vtime_accounting_enabled()) | ||
705 | return; | ||
706 | |||
707 | write_seqlock(&tsk->vtime_seqlock); | 682 | write_seqlock(&tsk->vtime_seqlock); |
708 | tsk->vtime_snap_whence = VTIME_USER; | ||
709 | __vtime_account_system(tsk); | 683 | __vtime_account_system(tsk); |
684 | tsk->vtime_snap_whence = VTIME_USER; | ||
710 | write_sequnlock(&tsk->vtime_seqlock); | 685 | write_sequnlock(&tsk->vtime_seqlock); |
711 | } | 686 | } |
712 | 687 | ||
713 | void vtime_guest_enter(struct task_struct *tsk) | 688 | void vtime_guest_enter(struct task_struct *tsk) |
714 | { | 689 | { |
690 | /* | ||
691 | * The flags must be updated under the lock with | ||
692 | * the vtime_snap flush and update. | ||
693 | * That enforces a right ordering and update sequence | ||
694 | * synchronization against the reader (task_gtime()) | ||
695 | * that can thus safely catch up with a tickless delta. | ||
696 | */ | ||
715 | write_seqlock(&tsk->vtime_seqlock); | 697 | write_seqlock(&tsk->vtime_seqlock); |
716 | __vtime_account_system(tsk); | 698 | __vtime_account_system(tsk); |
717 | current->flags |= PF_VCPU; | 699 | current->flags |= PF_VCPU; |
718 | write_sequnlock(&tsk->vtime_seqlock); | 700 | write_sequnlock(&tsk->vtime_seqlock); |
719 | } | 701 | } |
702 | EXPORT_SYMBOL_GPL(vtime_guest_enter); | ||
720 | 703 | ||
721 | void vtime_guest_exit(struct task_struct *tsk) | 704 | void vtime_guest_exit(struct task_struct *tsk) |
722 | { | 705 | { |
@@ -725,6 +708,7 @@ void vtime_guest_exit(struct task_struct *tsk) | |||
725 | current->flags &= ~PF_VCPU; | 708 | current->flags &= ~PF_VCPU; |
726 | write_sequnlock(&tsk->vtime_seqlock); | 709 | write_sequnlock(&tsk->vtime_seqlock); |
727 | } | 710 | } |
711 | EXPORT_SYMBOL_GPL(vtime_guest_exit); | ||
728 | 712 | ||
729 | void vtime_account_idle(struct task_struct *tsk) | 713 | void vtime_account_idle(struct task_struct *tsk) |
730 | { | 714 | { |
@@ -733,11 +717,6 @@ void vtime_account_idle(struct task_struct *tsk) | |||
733 | account_idle_time(delta_cpu); | 717 | account_idle_time(delta_cpu); |
734 | } | 718 | } |
735 | 719 | ||
736 | bool vtime_accounting_enabled(void) | ||
737 | { | ||
738 | return context_tracking_active(); | ||
739 | } | ||
740 | |||
741 | void arch_vtime_task_switch(struct task_struct *prev) | 720 | void arch_vtime_task_switch(struct task_struct *prev) |
742 | { | 721 | { |
743 | write_seqlock(&prev->vtime_seqlock); | 722 | write_seqlock(&prev->vtime_seqlock); |
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 3381f098070f..2b62fe86f9ec 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig | |||
@@ -105,7 +105,6 @@ config NO_HZ_FULL | |||
105 | select RCU_USER_QS | 105 | select RCU_USER_QS |
106 | select RCU_NOCB_CPU | 106 | select RCU_NOCB_CPU |
107 | select VIRT_CPU_ACCOUNTING_GEN | 107 | select VIRT_CPU_ACCOUNTING_GEN |
108 | select CONTEXT_TRACKING_FORCE | ||
109 | select IRQ_WORK | 108 | select IRQ_WORK |
110 | help | 109 | help |
111 | Adaptively try to shutdown the tick whenever possible, even when | 110 | Adaptively try to shutdown the tick whenever possible, even when |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index e8a1516cc0a3..3612fc77f834 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/irq_work.h> | 23 | #include <linux/irq_work.h> |
24 | #include <linux/posix-timers.h> | 24 | #include <linux/posix-timers.h> |
25 | #include <linux/perf_event.h> | 25 | #include <linux/perf_event.h> |
26 | #include <linux/context_tracking.h> | ||
26 | 27 | ||
27 | #include <asm/irq_regs.h> | 28 | #include <asm/irq_regs.h> |
28 | 29 | ||
@@ -148,8 +149,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) | |||
148 | } | 149 | } |
149 | 150 | ||
150 | #ifdef CONFIG_NO_HZ_FULL | 151 | #ifdef CONFIG_NO_HZ_FULL |
151 | static cpumask_var_t nohz_full_mask; | 152 | cpumask_var_t tick_nohz_full_mask; |
152 | bool have_nohz_full_mask; | 153 | bool tick_nohz_full_running; |
153 | 154 | ||
154 | static bool can_stop_full_tick(void) | 155 | static bool can_stop_full_tick(void) |
155 | { | 156 | { |
@@ -182,7 +183,7 @@ static bool can_stop_full_tick(void) | |||
182 | * Don't allow the user to think they can get | 183 | * Don't allow the user to think they can get |
183 | * full NO_HZ with this machine. | 184 | * full NO_HZ with this machine. |
184 | */ | 185 | */ |
185 | WARN_ONCE(have_nohz_full_mask, | 186 | WARN_ONCE(tick_nohz_full_running, |
186 | "NO_HZ FULL will not work with unstable sched clock"); | 187 | "NO_HZ FULL will not work with unstable sched clock"); |
187 | return false; | 188 | return false; |
188 | } | 189 | } |
@@ -197,7 +198,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now); | |||
197 | * Re-evaluate the need for the tick on the current CPU | 198 | * Re-evaluate the need for the tick on the current CPU |
198 | * and restart it if necessary. | 199 | * and restart it if necessary. |
199 | */ | 200 | */ |
200 | void tick_nohz_full_check(void) | 201 | void __tick_nohz_full_check(void) |
201 | { | 202 | { |
202 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 203 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
203 | 204 | ||
@@ -211,7 +212,7 @@ void tick_nohz_full_check(void) | |||
211 | 212 | ||
212 | static void nohz_full_kick_work_func(struct irq_work *work) | 213 | static void nohz_full_kick_work_func(struct irq_work *work) |
213 | { | 214 | { |
214 | tick_nohz_full_check(); | 215 | __tick_nohz_full_check(); |
215 | } | 216 | } |
216 | 217 | ||
217 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { | 218 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { |
@@ -230,7 +231,7 @@ void tick_nohz_full_kick(void) | |||
230 | 231 | ||
231 | static void nohz_full_kick_ipi(void *info) | 232 | static void nohz_full_kick_ipi(void *info) |
232 | { | 233 | { |
233 | tick_nohz_full_check(); | 234 | __tick_nohz_full_check(); |
234 | } | 235 | } |
235 | 236 | ||
236 | /* | 237 | /* |
@@ -239,12 +240,13 @@ static void nohz_full_kick_ipi(void *info) | |||
239 | */ | 240 | */ |
240 | void tick_nohz_full_kick_all(void) | 241 | void tick_nohz_full_kick_all(void) |
241 | { | 242 | { |
242 | if (!have_nohz_full_mask) | 243 | if (!tick_nohz_full_running) |
243 | return; | 244 | return; |
244 | 245 | ||
245 | preempt_disable(); | 246 | preempt_disable(); |
246 | smp_call_function_many(nohz_full_mask, | 247 | smp_call_function_many(tick_nohz_full_mask, |
247 | nohz_full_kick_ipi, NULL, false); | 248 | nohz_full_kick_ipi, NULL, false); |
249 | tick_nohz_full_kick(); | ||
248 | preempt_enable(); | 250 | preempt_enable(); |
249 | } | 251 | } |
250 | 252 | ||
@@ -253,7 +255,7 @@ void tick_nohz_full_kick_all(void) | |||
253 | * It might need the tick due to per task/process properties: | 255 | * It might need the tick due to per task/process properties: |
254 | * perf events, posix cpu timers, ... | 256 | * perf events, posix cpu timers, ... |
255 | */ | 257 | */ |
256 | void tick_nohz_task_switch(struct task_struct *tsk) | 258 | void __tick_nohz_task_switch(struct task_struct *tsk) |
257 | { | 259 | { |
258 | unsigned long flags; | 260 | unsigned long flags; |
259 | 261 | ||
@@ -269,31 +271,23 @@ out: | |||
269 | local_irq_restore(flags); | 271 | local_irq_restore(flags); |
270 | } | 272 | } |
271 | 273 | ||
272 | int tick_nohz_full_cpu(int cpu) | ||
273 | { | ||
274 | if (!have_nohz_full_mask) | ||
275 | return 0; | ||
276 | |||
277 | return cpumask_test_cpu(cpu, nohz_full_mask); | ||
278 | } | ||
279 | |||
280 | /* Parse the boot-time nohz CPU list from the kernel parameters. */ | 274 | /* Parse the boot-time nohz CPU list from the kernel parameters. */ |
281 | static int __init tick_nohz_full_setup(char *str) | 275 | static int __init tick_nohz_full_setup(char *str) |
282 | { | 276 | { |
283 | int cpu; | 277 | int cpu; |
284 | 278 | ||
285 | alloc_bootmem_cpumask_var(&nohz_full_mask); | 279 | alloc_bootmem_cpumask_var(&tick_nohz_full_mask); |
286 | if (cpulist_parse(str, nohz_full_mask) < 0) { | 280 | if (cpulist_parse(str, tick_nohz_full_mask) < 0) { |
287 | pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); | 281 | pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); |
288 | return 1; | 282 | return 1; |
289 | } | 283 | } |
290 | 284 | ||
291 | cpu = smp_processor_id(); | 285 | cpu = smp_processor_id(); |
292 | if (cpumask_test_cpu(cpu, nohz_full_mask)) { | 286 | if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) { |
293 | pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); | 287 | pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); |
294 | cpumask_clear_cpu(cpu, nohz_full_mask); | 288 | cpumask_clear_cpu(cpu, tick_nohz_full_mask); |
295 | } | 289 | } |
296 | have_nohz_full_mask = true; | 290 | tick_nohz_full_running = true; |
297 | 291 | ||
298 | return 1; | 292 | return 1; |
299 | } | 293 | } |
@@ -311,7 +305,7 @@ static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, | |||
311 | * If we handle the timekeeping duty for full dynticks CPUs, | 305 | * If we handle the timekeeping duty for full dynticks CPUs, |
312 | * we can't safely shutdown that CPU. | 306 | * we can't safely shutdown that CPU. |
313 | */ | 307 | */ |
314 | if (have_nohz_full_mask && tick_do_timer_cpu == cpu) | 308 | if (tick_nohz_full_running && tick_do_timer_cpu == cpu) |
315 | return NOTIFY_BAD; | 309 | return NOTIFY_BAD; |
316 | break; | 310 | break; |
317 | } | 311 | } |
@@ -330,31 +324,34 @@ static int tick_nohz_init_all(void) | |||
330 | int err = -1; | 324 | int err = -1; |
331 | 325 | ||
332 | #ifdef CONFIG_NO_HZ_FULL_ALL | 326 | #ifdef CONFIG_NO_HZ_FULL_ALL |
333 | if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { | 327 | if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) { |
334 | pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); | 328 | pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); |
335 | return err; | 329 | return err; |
336 | } | 330 | } |
337 | err = 0; | 331 | err = 0; |
338 | cpumask_setall(nohz_full_mask); | 332 | cpumask_setall(tick_nohz_full_mask); |
339 | cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); | 333 | cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask); |
340 | have_nohz_full_mask = true; | 334 | tick_nohz_full_running = true; |
341 | #endif | 335 | #endif |
342 | return err; | 336 | return err; |
343 | } | 337 | } |
344 | 338 | ||
345 | void __init tick_nohz_init(void) | 339 | void __init tick_nohz_init(void) |
346 | { | 340 | { |
347 | if (!have_nohz_full_mask) { | 341 | int cpu; |
342 | |||
343 | if (!tick_nohz_full_running) { | ||
348 | if (tick_nohz_init_all() < 0) | 344 | if (tick_nohz_init_all() < 0) |
349 | return; | 345 | return; |
350 | } | 346 | } |
351 | 347 | ||
348 | for_each_cpu(cpu, tick_nohz_full_mask) | ||
349 | context_tracking_cpu_set(cpu); | ||
350 | |||
352 | cpu_notifier(tick_nohz_cpu_down_callback, 0); | 351 | cpu_notifier(tick_nohz_cpu_down_callback, 0); |
353 | cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); | 352 | cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask); |
354 | pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); | 353 | pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); |
355 | } | 354 | } |
356 | #else | ||
357 | #define have_nohz_full_mask (0) | ||
358 | #endif | 355 | #endif |
359 | 356 | ||
360 | /* | 357 | /* |
@@ -732,7 +729,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | |||
732 | return false; | 729 | return false; |
733 | } | 730 | } |
734 | 731 | ||
735 | if (have_nohz_full_mask) { | 732 | if (tick_nohz_full_enabled()) { |
736 | /* | 733 | /* |
737 | * Keep the tick alive to guarantee timekeeping progression | 734 | * Keep the tick alive to guarantee timekeeping progression |
738 | * if there are full dynticks CPUs around | 735 | * if there are full dynticks CPUs around |