aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/ia64/include/asm/Kbuild1
-rw-r--r--arch/m68k/include/asm/irqflags.h2
-rw-r--r--arch/powerpc/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/cputime.h3
-rw-r--r--arch/s390/include/asm/vtime.h7
-rw-r--r--arch/s390/kernel/vtime.c1
-rw-r--r--include/asm-generic/vtime.h0
-rw-r--r--include/linux/context_tracking.h128
-rw-r--r--include/linux/context_tracking_state.h39
-rw-r--r--include/linux/hardirq.h117
-rw-r--r--include/linux/preempt_mask.h122
-rw-r--r--include/linux/tick.h45
-rw-r--r--include/linux/vtime.h74
-rw-r--r--include/trace/events/context_tracking.h58
-rw-r--r--init/Kconfig28
-rw-r--r--init/main.c2
-rw-r--r--kernel/context_tracking.c125
-rw-r--r--kernel/sched/core.c4
-rw-r--r--kernel/sched/cputime.c53
-rw-r--r--kernel/time/Kconfig1
-rw-r--r--kernel/time/tick-sched.c61
21 files changed, 545 insertions, 327 deletions
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 05b03ecd7933..a3456f34f672 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += clkdev.h
3generic-y += exec.h 3generic-y += exec.h
4generic-y += kvm_para.h 4generic-y += kvm_para.h
5generic-y += trace_clock.h 5generic-y += trace_clock.h
6generic-y += vtime.h \ No newline at end of file
diff --git a/arch/m68k/include/asm/irqflags.h b/arch/m68k/include/asm/irqflags.h
index 5053092b369f..a823cd73dc09 100644
--- a/arch/m68k/include/asm/irqflags.h
+++ b/arch/m68k/include/asm/irqflags.h
@@ -3,7 +3,7 @@
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#ifdef CONFIG_MMU 5#ifdef CONFIG_MMU
6#include <linux/hardirq.h> 6#include <linux/preempt_mask.h>
7#endif 7#endif
8#include <linux/preempt.h> 8#include <linux/preempt.h>
9#include <asm/thread_info.h> 9#include <asm/thread_info.h>
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 650757c300db..704e6f10ae80 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -2,3 +2,4 @@
2generic-y += clkdev.h 2generic-y += clkdev.h
3generic-y += rwsem.h 3generic-y += rwsem.h
4generic-y += trace_clock.h 4generic-y += trace_clock.h
5generic-y += vtime.h \ No newline at end of file
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index d2ff41370c0c..f65bd3634519 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -13,9 +13,6 @@
13#include <asm/div64.h> 13#include <asm/div64.h>
14 14
15 15
16#define __ARCH_HAS_VTIME_ACCOUNT
17#define __ARCH_HAS_VTIME_TASK_SWITCH
18
19/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ 16/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
20 17
21typedef unsigned long long __nocast cputime_t; 18typedef unsigned long long __nocast cputime_t;
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
new file mode 100644
index 000000000000..af9896c53eb3
--- /dev/null
+++ b/arch/s390/include/asm/vtime.h
@@ -0,0 +1,7 @@
1#ifndef _S390_VTIME_H
2#define _S390_VTIME_H
3
4#define __ARCH_HAS_VTIME_ACCOUNT
5#define __ARCH_HAS_VTIME_TASK_SWITCH
6
7#endif /* _S390_VTIME_H */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 9b9c1b78ec67..abcfab55f99b 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -19,6 +19,7 @@
19#include <asm/irq_regs.h> 19#include <asm/irq_regs.h>
20#include <asm/cputime.h> 20#include <asm/cputime.h>
21#include <asm/vtimer.h> 21#include <asm/vtimer.h>
22#include <asm/vtime.h>
22#include <asm/irq.h> 23#include <asm/irq.h>
23#include "entry.h" 24#include "entry.h"
24 25
diff --git a/include/asm-generic/vtime.h b/include/asm-generic/vtime.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/include/asm-generic/vtime.h
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index fc09d7b0dacf..158158704c30 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -2,100 +2,110 @@
2#define _LINUX_CONTEXT_TRACKING_H 2#define _LINUX_CONTEXT_TRACKING_H
3 3
4#include <linux/sched.h> 4#include <linux/sched.h>
5#include <linux/percpu.h>
6#include <linux/vtime.h> 5#include <linux/vtime.h>
6#include <linux/context_tracking_state.h>
7#include <asm/ptrace.h> 7#include <asm/ptrace.h>
8 8
9struct context_tracking {
10 /*
11 * When active is false, probes are unset in order
12 * to minimize overhead: TIF flags are cleared
13 * and calls to user_enter/exit are ignored. This
14 * may be further optimized using static keys.
15 */
16 bool active;
17 enum ctx_state {
18 IN_KERNEL = 0,
19 IN_USER,
20 } state;
21};
22
23static inline void __guest_enter(void)
24{
25 /*
26 * This is running in ioctl context so we can avoid
27 * the call to vtime_account() with its unnecessary idle check.
28 */
29 vtime_account_system(current);
30 current->flags |= PF_VCPU;
31}
32
33static inline void __guest_exit(void)
34{
35 /*
36 * This is running in ioctl context so we can avoid
37 * the call to vtime_account() with its unnecessary idle check.
38 */
39 vtime_account_system(current);
40 current->flags &= ~PF_VCPU;
41}
42 9
43#ifdef CONFIG_CONTEXT_TRACKING 10#ifdef CONFIG_CONTEXT_TRACKING
44DECLARE_PER_CPU(struct context_tracking, context_tracking); 11extern void context_tracking_cpu_set(int cpu);
45 12
46static inline bool context_tracking_in_user(void) 13extern void context_tracking_user_enter(void);
14extern void context_tracking_user_exit(void);
15extern void __context_tracking_task_switch(struct task_struct *prev,
16 struct task_struct *next);
17
18static inline void user_enter(void)
47{ 19{
48 return __this_cpu_read(context_tracking.state) == IN_USER; 20 if (static_key_false(&context_tracking_enabled))
49} 21 context_tracking_user_enter();
50 22
51static inline bool context_tracking_active(void) 23}
24static inline void user_exit(void)
52{ 25{
53 return __this_cpu_read(context_tracking.active); 26 if (static_key_false(&context_tracking_enabled))
27 context_tracking_user_exit();
54} 28}
55 29
56extern void user_enter(void);
57extern void user_exit(void);
58
59extern void guest_enter(void);
60extern void guest_exit(void);
61
62static inline enum ctx_state exception_enter(void) 30static inline enum ctx_state exception_enter(void)
63{ 31{
64 enum ctx_state prev_ctx; 32 enum ctx_state prev_ctx;
65 33
34 if (!static_key_false(&context_tracking_enabled))
35 return 0;
36
66 prev_ctx = this_cpu_read(context_tracking.state); 37 prev_ctx = this_cpu_read(context_tracking.state);
67 user_exit(); 38 context_tracking_user_exit();
68 39
69 return prev_ctx; 40 return prev_ctx;
70} 41}
71 42
72static inline void exception_exit(enum ctx_state prev_ctx) 43static inline void exception_exit(enum ctx_state prev_ctx)
73{ 44{
74 if (prev_ctx == IN_USER) 45 if (static_key_false(&context_tracking_enabled)) {
75 user_enter(); 46 if (prev_ctx == IN_USER)
47 context_tracking_user_enter();
48 }
76} 49}
77 50
78extern void context_tracking_task_switch(struct task_struct *prev, 51static inline void context_tracking_task_switch(struct task_struct *prev,
79 struct task_struct *next); 52 struct task_struct *next)
53{
54 if (static_key_false(&context_tracking_enabled))
55 __context_tracking_task_switch(prev, next);
56}
80#else 57#else
81static inline bool context_tracking_in_user(void) { return false; }
82static inline void user_enter(void) { } 58static inline void user_enter(void) { }
83static inline void user_exit(void) { } 59static inline void user_exit(void) { }
60static inline enum ctx_state exception_enter(void) { return 0; }
61static inline void exception_exit(enum ctx_state prev_ctx) { }
62static inline void context_tracking_task_switch(struct task_struct *prev,
63 struct task_struct *next) { }
64#endif /* !CONFIG_CONTEXT_TRACKING */
65
66
67#ifdef CONFIG_CONTEXT_TRACKING_FORCE
68extern void context_tracking_init(void);
69#else
70static inline void context_tracking_init(void) { }
71#endif /* CONFIG_CONTEXT_TRACKING_FORCE */
72
84 73
74#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
85static inline void guest_enter(void) 75static inline void guest_enter(void)
86{ 76{
87 __guest_enter(); 77 if (vtime_accounting_enabled())
78 vtime_guest_enter(current);
79 else
80 current->flags |= PF_VCPU;
88} 81}
89 82
90static inline void guest_exit(void) 83static inline void guest_exit(void)
91{ 84{
92 __guest_exit(); 85 if (vtime_accounting_enabled())
86 vtime_guest_exit(current);
87 else
88 current->flags &= ~PF_VCPU;
93} 89}
94 90
95static inline enum ctx_state exception_enter(void) { return 0; } 91#else
96static inline void exception_exit(enum ctx_state prev_ctx) { } 92static inline void guest_enter(void)
97static inline void context_tracking_task_switch(struct task_struct *prev, 93{
98 struct task_struct *next) { } 94 /*
99#endif /* !CONFIG_CONTEXT_TRACKING */ 95 * This is running in ioctl context so its safe
96 * to assume that it's the stime pending cputime
97 * to flush.
98 */
99 vtime_account_system(current);
100 current->flags |= PF_VCPU;
101}
102
103static inline void guest_exit(void)
104{
105 /* Flush the guest cputime we spent on the guest */
106 vtime_account_system(current);
107 current->flags &= ~PF_VCPU;
108}
109#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
100 110
101#endif 111#endif
diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
new file mode 100644
index 000000000000..0f1979d0674f
--- /dev/null
+++ b/include/linux/context_tracking_state.h
@@ -0,0 +1,39 @@
1#ifndef _LINUX_CONTEXT_TRACKING_STATE_H
2#define _LINUX_CONTEXT_TRACKING_STATE_H
3
4#include <linux/percpu.h>
5#include <linux/static_key.h>
6
7struct context_tracking {
8 /*
9 * When active is false, probes are unset in order
10 * to minimize overhead: TIF flags are cleared
11 * and calls to user_enter/exit are ignored. This
12 * may be further optimized using static keys.
13 */
14 bool active;
15 enum ctx_state {
16 IN_KERNEL = 0,
17 IN_USER,
18 } state;
19};
20
21#ifdef CONFIG_CONTEXT_TRACKING
22extern struct static_key context_tracking_enabled;
23DECLARE_PER_CPU(struct context_tracking, context_tracking);
24
25static inline bool context_tracking_in_user(void)
26{
27 return __this_cpu_read(context_tracking.state) == IN_USER;
28}
29
30static inline bool context_tracking_active(void)
31{
32 return __this_cpu_read(context_tracking.active);
33}
34#else
35static inline bool context_tracking_in_user(void) { return false; }
36static inline bool context_tracking_active(void) { return false; }
37#endif /* CONFIG_CONTEXT_TRACKING */
38
39#endif
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 05bcc0903766..ccfe17c5c8da 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -1,126 +1,11 @@
1#ifndef LINUX_HARDIRQ_H 1#ifndef LINUX_HARDIRQ_H
2#define LINUX_HARDIRQ_H 2#define LINUX_HARDIRQ_H
3 3
4#include <linux/preempt.h> 4#include <linux/preempt_mask.h>
5#include <linux/lockdep.h> 5#include <linux/lockdep.h>
6#include <linux/ftrace_irq.h> 6#include <linux/ftrace_irq.h>
7#include <linux/vtime.h> 7#include <linux/vtime.h>
8#include <asm/hardirq.h>
9 8
10/*
11 * We put the hardirq and softirq counter into the preemption
12 * counter. The bitmask has the following meaning:
13 *
14 * - bits 0-7 are the preemption count (max preemption depth: 256)
15 * - bits 8-15 are the softirq count (max # of softirqs: 256)
16 *
17 * The hardirq count can in theory reach the same as NR_IRQS.
18 * In reality, the number of nested IRQS is limited to the stack
19 * size as well. For archs with over 1000 IRQS it is not practical
20 * to expect that they will all nest. We give a max of 10 bits for
21 * hardirq nesting. An arch may choose to give less than 10 bits.
22 * m68k expects it to be 8.
23 *
24 * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
25 * - bit 26 is the NMI_MASK
26 * - bit 27 is the PREEMPT_ACTIVE flag
27 *
28 * PREEMPT_MASK: 0x000000ff
29 * SOFTIRQ_MASK: 0x0000ff00
30 * HARDIRQ_MASK: 0x03ff0000
31 * NMI_MASK: 0x04000000
32 */
33#define PREEMPT_BITS 8
34#define SOFTIRQ_BITS 8
35#define NMI_BITS 1
36
37#define MAX_HARDIRQ_BITS 10
38
39#ifndef HARDIRQ_BITS
40# define HARDIRQ_BITS MAX_HARDIRQ_BITS
41#endif
42
43#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
44#error HARDIRQ_BITS too high!
45#endif
46
47#define PREEMPT_SHIFT 0
48#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
49#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
50#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
51
52#define __IRQ_MASK(x) ((1UL << (x))-1)
53
54#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
55#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
56#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
57#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
58
59#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
60#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
61#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
62#define NMI_OFFSET (1UL << NMI_SHIFT)
63
64#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
65
66#ifndef PREEMPT_ACTIVE
67#define PREEMPT_ACTIVE_BITS 1
68#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS)
69#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
70#endif
71
72#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
73#error PREEMPT_ACTIVE is too low!
74#endif
75
76#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
77#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
78#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
79 | NMI_MASK))
80
81/*
82 * Are we doing bottom half or hardware interrupt processing?
83 * Are we in a softirq context? Interrupt context?
84 * in_softirq - Are we currently processing softirq or have bh disabled?
85 * in_serving_softirq - Are we currently processing softirq?
86 */
87#define in_irq() (hardirq_count())
88#define in_softirq() (softirq_count())
89#define in_interrupt() (irq_count())
90#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
91
92/*
93 * Are we in NMI context?
94 */
95#define in_nmi() (preempt_count() & NMI_MASK)
96
97#if defined(CONFIG_PREEMPT_COUNT)
98# define PREEMPT_CHECK_OFFSET 1
99#else
100# define PREEMPT_CHECK_OFFSET 0
101#endif
102
103/*
104 * Are we running in atomic context? WARNING: this macro cannot
105 * always detect atomic context; in particular, it cannot know about
106 * held spinlocks in non-preemptible kernels. Thus it should not be
107 * used in the general case to determine whether sleeping is possible.
108 * Do not use in_atomic() in driver code.
109 */
110#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
111
112/*
113 * Check whether we were atomic before we did preempt_disable():
114 * (used by the scheduler, *after* releasing the kernel lock)
115 */
116#define in_atomic_preempt_off() \
117 ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
118
119#ifdef CONFIG_PREEMPT_COUNT
120# define preemptible() (preempt_count() == 0 && !irqs_disabled())
121#else
122# define preemptible() 0
123#endif
124 9
125#if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS) 10#if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS)
126extern void synchronize_irq(unsigned int irq); 11extern void synchronize_irq(unsigned int irq);
diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h
new file mode 100644
index 000000000000..931bc616219f
--- /dev/null
+++ b/include/linux/preempt_mask.h
@@ -0,0 +1,122 @@
1#ifndef LINUX_PREEMPT_MASK_H
2#define LINUX_PREEMPT_MASK_H
3
4#include <linux/preempt.h>
5#include <asm/hardirq.h>
6
7/*
8 * We put the hardirq and softirq counter into the preemption
9 * counter. The bitmask has the following meaning:
10 *
11 * - bits 0-7 are the preemption count (max preemption depth: 256)
12 * - bits 8-15 are the softirq count (max # of softirqs: 256)
13 *
14 * The hardirq count can in theory reach the same as NR_IRQS.
15 * In reality, the number of nested IRQS is limited to the stack
16 * size as well. For archs with over 1000 IRQS it is not practical
17 * to expect that they will all nest. We give a max of 10 bits for
18 * hardirq nesting. An arch may choose to give less than 10 bits.
19 * m68k expects it to be 8.
20 *
21 * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
22 * - bit 26 is the NMI_MASK
23 * - bit 27 is the PREEMPT_ACTIVE flag
24 *
25 * PREEMPT_MASK: 0x000000ff
26 * SOFTIRQ_MASK: 0x0000ff00
27 * HARDIRQ_MASK: 0x03ff0000
28 * NMI_MASK: 0x04000000
29 */
30#define PREEMPT_BITS 8
31#define SOFTIRQ_BITS 8
32#define NMI_BITS 1
33
34#define MAX_HARDIRQ_BITS 10
35
36#ifndef HARDIRQ_BITS
37# define HARDIRQ_BITS MAX_HARDIRQ_BITS
38#endif
39
40#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
41#error HARDIRQ_BITS too high!
42#endif
43
44#define PREEMPT_SHIFT 0
45#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
46#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
47#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
48
49#define __IRQ_MASK(x) ((1UL << (x))-1)
50
51#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
52#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
53#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
54#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
55
56#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
57#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
58#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
59#define NMI_OFFSET (1UL << NMI_SHIFT)
60
61#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
62
63#ifndef PREEMPT_ACTIVE
64#define PREEMPT_ACTIVE_BITS 1
65#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS)
66#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
67#endif
68
69#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
70#error PREEMPT_ACTIVE is too low!
71#endif
72
73#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
74#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
75#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
76 | NMI_MASK))
77
78/*
79 * Are we doing bottom half or hardware interrupt processing?
80 * Are we in a softirq context? Interrupt context?
81 * in_softirq - Are we currently processing softirq or have bh disabled?
82 * in_serving_softirq - Are we currently processing softirq?
83 */
84#define in_irq() (hardirq_count())
85#define in_softirq() (softirq_count())
86#define in_interrupt() (irq_count())
87#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
88
89/*
90 * Are we in NMI context?
91 */
92#define in_nmi() (preempt_count() & NMI_MASK)
93
94#if defined(CONFIG_PREEMPT_COUNT)
95# define PREEMPT_CHECK_OFFSET 1
96#else
97# define PREEMPT_CHECK_OFFSET 0
98#endif
99
100/*
101 * Are we running in atomic context? WARNING: this macro cannot
102 * always detect atomic context; in particular, it cannot know about
103 * held spinlocks in non-preemptible kernels. Thus it should not be
104 * used in the general case to determine whether sleeping is possible.
105 * Do not use in_atomic() in driver code.
106 */
107#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
108
109/*
110 * Check whether we were atomic before we did preempt_disable():
111 * (used by the scheduler, *after* releasing the kernel lock)
112 */
113#define in_atomic_preempt_off() \
114 ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
115
116#ifdef CONFIG_PREEMPT_COUNT
117# define preemptible() (preempt_count() == 0 && !irqs_disabled())
118#else
119# define preemptible() 0
120#endif
121
122#endif /* LINUX_PREEMPT_MASK_H */
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 62bd8b72873c..5128d33bbb39 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -10,6 +10,8 @@
10#include <linux/irqflags.h> 10#include <linux/irqflags.h>
11#include <linux/percpu.h> 11#include <linux/percpu.h>
12#include <linux/hrtimer.h> 12#include <linux/hrtimer.h>
13#include <linux/context_tracking_state.h>
14#include <linux/cpumask.h>
13 15
14#ifdef CONFIG_GENERIC_CLOCKEVENTS 16#ifdef CONFIG_GENERIC_CLOCKEVENTS
15 17
@@ -158,20 +160,51 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
158# endif /* !CONFIG_NO_HZ_COMMON */ 160# endif /* !CONFIG_NO_HZ_COMMON */
159 161
160#ifdef CONFIG_NO_HZ_FULL 162#ifdef CONFIG_NO_HZ_FULL
163extern bool tick_nohz_full_running;
164extern cpumask_var_t tick_nohz_full_mask;
165
166static inline bool tick_nohz_full_enabled(void)
167{
168 if (!static_key_false(&context_tracking_enabled))
169 return false;
170
171 return tick_nohz_full_running;
172}
173
174static inline bool tick_nohz_full_cpu(int cpu)
175{
176 if (!tick_nohz_full_enabled())
177 return false;
178
179 return cpumask_test_cpu(cpu, tick_nohz_full_mask);
180}
181
161extern void tick_nohz_init(void); 182extern void tick_nohz_init(void);
162extern int tick_nohz_full_cpu(int cpu); 183extern void __tick_nohz_full_check(void);
163extern void tick_nohz_full_check(void);
164extern void tick_nohz_full_kick(void); 184extern void tick_nohz_full_kick(void);
165extern void tick_nohz_full_kick_all(void); 185extern void tick_nohz_full_kick_all(void);
166extern void tick_nohz_task_switch(struct task_struct *tsk); 186extern void __tick_nohz_task_switch(struct task_struct *tsk);
167#else 187#else
168static inline void tick_nohz_init(void) { } 188static inline void tick_nohz_init(void) { }
169static inline int tick_nohz_full_cpu(int cpu) { return 0; } 189static inline bool tick_nohz_full_enabled(void) { return false; }
170static inline void tick_nohz_full_check(void) { } 190static inline bool tick_nohz_full_cpu(int cpu) { return false; }
191static inline void __tick_nohz_full_check(void) { }
171static inline void tick_nohz_full_kick(void) { } 192static inline void tick_nohz_full_kick(void) { }
172static inline void tick_nohz_full_kick_all(void) { } 193static inline void tick_nohz_full_kick_all(void) { }
173static inline void tick_nohz_task_switch(struct task_struct *tsk) { } 194static inline void __tick_nohz_task_switch(struct task_struct *tsk) { }
174#endif 195#endif
175 196
197static inline void tick_nohz_full_check(void)
198{
199 if (tick_nohz_full_enabled())
200 __tick_nohz_full_check();
201}
202
203static inline void tick_nohz_task_switch(struct task_struct *tsk)
204{
205 if (tick_nohz_full_enabled())
206 __tick_nohz_task_switch(tsk);
207}
208
176 209
177#endif 210#endif
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index b1dd2db80076..f5b72b364bda 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -1,18 +1,68 @@
1#ifndef _LINUX_KERNEL_VTIME_H 1#ifndef _LINUX_KERNEL_VTIME_H
2#define _LINUX_KERNEL_VTIME_H 2#define _LINUX_KERNEL_VTIME_H
3 3
4#include <linux/context_tracking_state.h>
5#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
6#include <asm/vtime.h>
7#endif
8
9
4struct task_struct; 10struct task_struct;
5 11
12/*
13 * vtime_accounting_enabled() definitions/declarations
14 */
15#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
16static inline bool vtime_accounting_enabled(void) { return true; }
17#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
18
19#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
20static inline bool vtime_accounting_enabled(void)
21{
22 if (static_key_false(&context_tracking_enabled)) {
23 if (context_tracking_active())
24 return true;
25 }
26
27 return false;
28}
29#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
30
31#ifndef CONFIG_VIRT_CPU_ACCOUNTING
32static inline bool vtime_accounting_enabled(void) { return false; }
33#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
34
35
36/*
37 * Common vtime APIs
38 */
6#ifdef CONFIG_VIRT_CPU_ACCOUNTING 39#ifdef CONFIG_VIRT_CPU_ACCOUNTING
40
41#ifdef __ARCH_HAS_VTIME_TASK_SWITCH
7extern void vtime_task_switch(struct task_struct *prev); 42extern void vtime_task_switch(struct task_struct *prev);
43#else
44extern void vtime_common_task_switch(struct task_struct *prev);
45static inline void vtime_task_switch(struct task_struct *prev)
46{
47 if (vtime_accounting_enabled())
48 vtime_common_task_switch(prev);
49}
50#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
51
8extern void vtime_account_system(struct task_struct *tsk); 52extern void vtime_account_system(struct task_struct *tsk);
9extern void vtime_account_idle(struct task_struct *tsk); 53extern void vtime_account_idle(struct task_struct *tsk);
10extern void vtime_account_user(struct task_struct *tsk); 54extern void vtime_account_user(struct task_struct *tsk);
11extern void vtime_account_irq_enter(struct task_struct *tsk);
12 55
13#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 56#ifdef __ARCH_HAS_VTIME_ACCOUNT
14static inline bool vtime_accounting_enabled(void) { return true; } 57extern void vtime_account_irq_enter(struct task_struct *tsk);
15#endif 58#else
59extern void vtime_common_account_irq_enter(struct task_struct *tsk);
60static inline void vtime_account_irq_enter(struct task_struct *tsk)
61{
62 if (vtime_accounting_enabled())
63 vtime_common_account_irq_enter(tsk);
64}
65#endif /* __ARCH_HAS_VTIME_ACCOUNT */
16 66
17#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ 67#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
18 68
@@ -20,14 +70,20 @@ static inline void vtime_task_switch(struct task_struct *prev) { }
20static inline void vtime_account_system(struct task_struct *tsk) { } 70static inline void vtime_account_system(struct task_struct *tsk) { }
21static inline void vtime_account_user(struct task_struct *tsk) { } 71static inline void vtime_account_user(struct task_struct *tsk) { }
22static inline void vtime_account_irq_enter(struct task_struct *tsk) { } 72static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
23static inline bool vtime_accounting_enabled(void) { return false; } 73#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
24#endif
25 74
26#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN 75#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
27extern void arch_vtime_task_switch(struct task_struct *tsk); 76extern void arch_vtime_task_switch(struct task_struct *tsk);
28extern void vtime_account_irq_exit(struct task_struct *tsk); 77extern void vtime_gen_account_irq_exit(struct task_struct *tsk);
29extern bool vtime_accounting_enabled(void); 78
79static inline void vtime_account_irq_exit(struct task_struct *tsk)
80{
81 if (vtime_accounting_enabled())
82 vtime_gen_account_irq_exit(tsk);
83}
84
30extern void vtime_user_enter(struct task_struct *tsk); 85extern void vtime_user_enter(struct task_struct *tsk);
86
31static inline void vtime_user_exit(struct task_struct *tsk) 87static inline void vtime_user_exit(struct task_struct *tsk)
32{ 88{
33 vtime_account_user(tsk); 89 vtime_account_user(tsk);
@@ -35,7 +91,7 @@ static inline void vtime_user_exit(struct task_struct *tsk)
35extern void vtime_guest_enter(struct task_struct *tsk); 91extern void vtime_guest_enter(struct task_struct *tsk);
36extern void vtime_guest_exit(struct task_struct *tsk); 92extern void vtime_guest_exit(struct task_struct *tsk);
37extern void vtime_init_idle(struct task_struct *tsk, int cpu); 93extern void vtime_init_idle(struct task_struct *tsk, int cpu);
38#else 94#else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */
39static inline void vtime_account_irq_exit(struct task_struct *tsk) 95static inline void vtime_account_irq_exit(struct task_struct *tsk)
40{ 96{
41 /* On hard|softirq exit we always account to hard|softirq cputime */ 97 /* On hard|softirq exit we always account to hard|softirq cputime */
diff --git a/include/trace/events/context_tracking.h b/include/trace/events/context_tracking.h
new file mode 100644
index 000000000000..ce8007cf29cf
--- /dev/null
+++ b/include/trace/events/context_tracking.h
@@ -0,0 +1,58 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM context_tracking
3
4#if !defined(_TRACE_CONTEXT_TRACKING_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_CONTEXT_TRACKING_H
6
7#include <linux/tracepoint.h>
8
9DECLARE_EVENT_CLASS(context_tracking_user,
10
11 TP_PROTO(int dummy),
12
13 TP_ARGS(dummy),
14
15 TP_STRUCT__entry(
16 __field( int, dummy )
17 ),
18
19 TP_fast_assign(
20 __entry->dummy = dummy;
21 ),
22
23 TP_printk("%s", "")
24);
25
26/**
27 * user_enter - called when the kernel resumes to userspace
28 * @dummy: dummy arg to make trace event macro happy
29 *
30 * This event occurs when the kernel resumes to userspace after
31 * an exception or a syscall.
32 */
33DEFINE_EVENT(context_tracking_user, user_enter,
34
35 TP_PROTO(int dummy),
36
37 TP_ARGS(dummy)
38);
39
40/**
41 * user_exit - called when userspace enters the kernel
42 * @dummy: dummy arg to make trace event macro happy
43 *
44 * This event occurs when userspace enters the kernel through
45 * an exception or a syscall.
46 */
47DEFINE_EVENT(context_tracking_user, user_exit,
48
49 TP_PROTO(int dummy),
50
51 TP_ARGS(dummy)
52);
53
54
55#endif /* _TRACE_CONTEXT_TRACKING_H */
56
57/* This part must be outside protection */
58#include <trace/define_trace.h>
diff --git a/init/Kconfig b/init/Kconfig
index cc917d3ec858..0a2c4bcf179e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -528,13 +528,29 @@ config RCU_USER_QS
528config CONTEXT_TRACKING_FORCE 528config CONTEXT_TRACKING_FORCE
529 bool "Force context tracking" 529 bool "Force context tracking"
530 depends on CONTEXT_TRACKING 530 depends on CONTEXT_TRACKING
531 default CONTEXT_TRACKING 531 default y if !NO_HZ_FULL
532 help 532 help
533 Probe on user/kernel boundaries by default in order to 533 The major pre-requirement for full dynticks to work is to
534 test the features that rely on it such as userspace RCU extended 534 support the context tracking subsystem. But there are also
535 quiescent states. 535 other dependencies to provide in order to make the full
536 This test is there for debugging until we have a real user like the 536 dynticks working.
537 full dynticks mode. 537
538 This option stands for testing when an arch implements the
539 context tracking backend but doesn't yet fullfill all the
540 requirements to make the full dynticks feature working.
541 Without the full dynticks, there is no way to test the support
542 for context tracking and the subsystems that rely on it: RCU
543 userspace extended quiescent state and tickless cputime
544 accounting. This option copes with the absence of the full
545 dynticks subsystem by forcing the context tracking on all
546 CPUs in the system.
547
548 Say Y only if you're working on the developpement of an
549 architecture backend for the context tracking.
550
551 Say N otherwise, this option brings an overhead that you
552 don't want in production.
553
538 554
539config RCU_FANOUT 555config RCU_FANOUT
540 int "Tree-based hierarchical RCU fanout value" 556 int "Tree-based hierarchical RCU fanout value"
diff --git a/init/main.c b/init/main.c
index d03d2ec2eacf..af310afbef28 100644
--- a/init/main.c
+++ b/init/main.c
@@ -75,6 +75,7 @@
75#include <linux/blkdev.h> 75#include <linux/blkdev.h>
76#include <linux/elevator.h> 76#include <linux/elevator.h>
77#include <linux/sched_clock.h> 77#include <linux/sched_clock.h>
78#include <linux/context_tracking.h>
78 79
79#include <asm/io.h> 80#include <asm/io.h>
80#include <asm/bugs.h> 81#include <asm/bugs.h>
@@ -545,6 +546,7 @@ asmlinkage void __init start_kernel(void)
545 idr_init_cache(); 546 idr_init_cache();
546 rcu_init(); 547 rcu_init();
547 tick_nohz_init(); 548 tick_nohz_init();
549 context_tracking_init();
548 radix_tree_init(); 550 radix_tree_init();
549 /* init some links before init_ISA_irqs() */ 551 /* init some links before init_ISA_irqs() */
550 early_irq_init(); 552 early_irq_init();
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 383f8231e436..247091bf0587 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -20,22 +20,33 @@
20#include <linux/hardirq.h> 20#include <linux/hardirq.h>
21#include <linux/export.h> 21#include <linux/export.h>
22 22
23DEFINE_PER_CPU(struct context_tracking, context_tracking) = { 23#define CREATE_TRACE_POINTS
24#ifdef CONFIG_CONTEXT_TRACKING_FORCE 24#include <trace/events/context_tracking.h>
25 .active = true, 25
26#endif 26struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE;
27}; 27EXPORT_SYMBOL_GPL(context_tracking_enabled);
28
29DEFINE_PER_CPU(struct context_tracking, context_tracking);
30EXPORT_SYMBOL_GPL(context_tracking);
31
32void context_tracking_cpu_set(int cpu)
33{
34 if (!per_cpu(context_tracking.active, cpu)) {
35 per_cpu(context_tracking.active, cpu) = true;
36 static_key_slow_inc(&context_tracking_enabled);
37 }
38}
28 39
29/** 40/**
30 * user_enter - Inform the context tracking that the CPU is going to 41 * context_tracking_user_enter - Inform the context tracking that the CPU is going to
31 * enter userspace mode. 42 * enter userspace mode.
32 * 43 *
33 * This function must be called right before we switch from the kernel 44 * This function must be called right before we switch from the kernel
34 * to userspace, when it's guaranteed the remaining kernel instructions 45 * to userspace, when it's guaranteed the remaining kernel instructions
35 * to execute won't use any RCU read side critical section because this 46 * to execute won't use any RCU read side critical section because this
36 * function sets RCU in extended quiescent state. 47 * function sets RCU in extended quiescent state.
37 */ 48 */
38void user_enter(void) 49void context_tracking_user_enter(void)
39{ 50{
40 unsigned long flags; 51 unsigned long flags;
41 52
@@ -54,17 +65,32 @@ void user_enter(void)
54 WARN_ON_ONCE(!current->mm); 65 WARN_ON_ONCE(!current->mm);
55 66
56 local_irq_save(flags); 67 local_irq_save(flags);
57 if (__this_cpu_read(context_tracking.active) && 68 if ( __this_cpu_read(context_tracking.state) != IN_USER) {
58 __this_cpu_read(context_tracking.state) != IN_USER) { 69 if (__this_cpu_read(context_tracking.active)) {
70 trace_user_enter(0);
71 /*
72 * At this stage, only low level arch entry code remains and
73 * then we'll run in userspace. We can assume there won't be
74 * any RCU read-side critical section until the next call to
75 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
76 * on the tick.
77 */
78 vtime_user_enter(current);
79 rcu_user_enter();
80 }
59 /* 81 /*
60 * At this stage, only low level arch entry code remains and 82 * Even if context tracking is disabled on this CPU, because it's outside
61 * then we'll run in userspace. We can assume there won't be 83 * the full dynticks mask for example, we still have to keep track of the
62 * any RCU read-side critical section until the next call to 84 * context transitions and states to prevent inconsistency on those of
63 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency 85 * other CPUs.
64 * on the tick. 86 * If a task triggers an exception in userspace, sleep on the exception
87 * handler and then migrate to another CPU, that new CPU must know where
88 * the exception returns by the time we call exception_exit().
89 * This information can only be provided by the previous CPU when it called
90 * exception_enter().
91 * OTOH we can spare the calls to vtime and RCU when context_tracking.active
92 * is false because we know that CPU is not tickless.
65 */ 93 */
66 vtime_user_enter(current);
67 rcu_user_enter();
68 __this_cpu_write(context_tracking.state, IN_USER); 94 __this_cpu_write(context_tracking.state, IN_USER);
69 } 95 }
70 local_irq_restore(flags); 96 local_irq_restore(flags);
@@ -87,10 +113,9 @@ void user_enter(void)
87 */ 113 */
88void __sched notrace preempt_schedule_context(void) 114void __sched notrace preempt_schedule_context(void)
89{ 115{
90 struct thread_info *ti = current_thread_info();
91 enum ctx_state prev_ctx; 116 enum ctx_state prev_ctx;
92 117
93 if (likely(ti->preempt_count || irqs_disabled())) 118 if (likely(!preemptible()))
94 return; 119 return;
95 120
96 /* 121 /*
@@ -112,8 +137,8 @@ EXPORT_SYMBOL_GPL(preempt_schedule_context);
112#endif /* CONFIG_PREEMPT */ 137#endif /* CONFIG_PREEMPT */
113 138
114/** 139/**
115 * user_exit - Inform the context tracking that the CPU is 140 * context_tracking_user_exit - Inform the context tracking that the CPU is
116 * exiting userspace mode and entering the kernel. 141 * exiting userspace mode and entering the kernel.
117 * 142 *
118 * This function must be called after we entered the kernel from userspace 143 * This function must be called after we entered the kernel from userspace
119 * before any use of RCU read side critical section. This potentially include 144 * before any use of RCU read side critical section. This potentially include
@@ -122,7 +147,7 @@ EXPORT_SYMBOL_GPL(preempt_schedule_context);
122 * This call supports re-entrancy. This way it can be called from any exception 147 * This call supports re-entrancy. This way it can be called from any exception
123 * handler without needing to know if we came from userspace or not. 148 * handler without needing to know if we came from userspace or not.
124 */ 149 */
125void user_exit(void) 150void context_tracking_user_exit(void)
126{ 151{
127 unsigned long flags; 152 unsigned long flags;
128 153
@@ -131,38 +156,22 @@ void user_exit(void)
131 156
132 local_irq_save(flags); 157 local_irq_save(flags);
133 if (__this_cpu_read(context_tracking.state) == IN_USER) { 158 if (__this_cpu_read(context_tracking.state) == IN_USER) {
134 /* 159 if (__this_cpu_read(context_tracking.active)) {
135 * We are going to run code that may use RCU. Inform 160 /*
136 * RCU core about that (ie: we may need the tick again). 161 * We are going to run code that may use RCU. Inform
137 */ 162 * RCU core about that (ie: we may need the tick again).
138 rcu_user_exit(); 163 */
139 vtime_user_exit(current); 164 rcu_user_exit();
165 vtime_user_exit(current);
166 trace_user_exit(0);
167 }
140 __this_cpu_write(context_tracking.state, IN_KERNEL); 168 __this_cpu_write(context_tracking.state, IN_KERNEL);
141 } 169 }
142 local_irq_restore(flags); 170 local_irq_restore(flags);
143} 171}
144 172
145void guest_enter(void)
146{
147 if (vtime_accounting_enabled())
148 vtime_guest_enter(current);
149 else
150 __guest_enter();
151}
152EXPORT_SYMBOL_GPL(guest_enter);
153
154void guest_exit(void)
155{
156 if (vtime_accounting_enabled())
157 vtime_guest_exit(current);
158 else
159 __guest_exit();
160}
161EXPORT_SYMBOL_GPL(guest_exit);
162
163
164/** 173/**
165 * context_tracking_task_switch - context switch the syscall callbacks 174 * __context_tracking_task_switch - context switch the syscall callbacks
166 * @prev: the task that is being switched out 175 * @prev: the task that is being switched out
167 * @next: the task that is being switched in 176 * @next: the task that is being switched in
168 * 177 *
@@ -174,11 +183,19 @@ EXPORT_SYMBOL_GPL(guest_exit);
174 * migrate to some CPU that doesn't do the context tracking. As such the TIF 183 * migrate to some CPU that doesn't do the context tracking. As such the TIF
175 * flag may not be desired there. 184 * flag may not be desired there.
176 */ 185 */
177void context_tracking_task_switch(struct task_struct *prev, 186void __context_tracking_task_switch(struct task_struct *prev,
178 struct task_struct *next) 187 struct task_struct *next)
179{ 188{
180 if (__this_cpu_read(context_tracking.active)) { 189 clear_tsk_thread_flag(prev, TIF_NOHZ);
181 clear_tsk_thread_flag(prev, TIF_NOHZ); 190 set_tsk_thread_flag(next, TIF_NOHZ);
182 set_tsk_thread_flag(next, TIF_NOHZ);
183 }
184} 191}
192
193#ifdef CONFIG_CONTEXT_TRACKING_FORCE
194void __init context_tracking_init(void)
195{
196 int cpu;
197
198 for_each_possible_cpu(cpu)
199 context_tracking_cpu_set(cpu);
200}
201#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b8e2162fc803..725aa067ad63 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2527,13 +2527,11 @@ void __sched schedule_preempt_disabled(void)
2527 */ 2527 */
2528asmlinkage void __sched notrace preempt_schedule(void) 2528asmlinkage void __sched notrace preempt_schedule(void)
2529{ 2529{
2530 struct thread_info *ti = current_thread_info();
2531
2532 /* 2530 /*
2533 * If there is a non-zero preempt_count or interrupts are disabled, 2531 * If there is a non-zero preempt_count or interrupts are disabled,
2534 * we do not want to preempt the current task. Just return.. 2532 * we do not want to preempt the current task. Just return..
2535 */ 2533 */
2536 if (likely(ti->preempt_count || irqs_disabled())) 2534 if (likely(!preemptible()))
2537 return; 2535 return;
2538 2536
2539 do { 2537 do {
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index e89ccefef278..ace34f95e200 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -378,11 +378,8 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_
378#ifdef CONFIG_VIRT_CPU_ACCOUNTING 378#ifdef CONFIG_VIRT_CPU_ACCOUNTING
379 379
380#ifndef __ARCH_HAS_VTIME_TASK_SWITCH 380#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
381void vtime_task_switch(struct task_struct *prev) 381void vtime_common_task_switch(struct task_struct *prev)
382{ 382{
383 if (!vtime_accounting_enabled())
384 return;
385
386 if (is_idle_task(prev)) 383 if (is_idle_task(prev))
387 vtime_account_idle(prev); 384 vtime_account_idle(prev);
388 else 385 else
@@ -404,11 +401,8 @@ void vtime_task_switch(struct task_struct *prev)
404 * vtime_account(). 401 * vtime_account().
405 */ 402 */
406#ifndef __ARCH_HAS_VTIME_ACCOUNT 403#ifndef __ARCH_HAS_VTIME_ACCOUNT
407void vtime_account_irq_enter(struct task_struct *tsk) 404void vtime_common_account_irq_enter(struct task_struct *tsk)
408{ 405{
409 if (!vtime_accounting_enabled())
410 return;
411
412 if (!in_interrupt()) { 406 if (!in_interrupt()) {
413 /* 407 /*
414 * If we interrupted user, context_tracking_in_user() 408 * If we interrupted user, context_tracking_in_user()
@@ -428,7 +422,7 @@ void vtime_account_irq_enter(struct task_struct *tsk)
428 } 422 }
429 vtime_account_system(tsk); 423 vtime_account_system(tsk);
430} 424}
431EXPORT_SYMBOL_GPL(vtime_account_irq_enter); 425EXPORT_SYMBOL_GPL(vtime_common_account_irq_enter);
432#endif /* __ARCH_HAS_VTIME_ACCOUNT */ 426#endif /* __ARCH_HAS_VTIME_ACCOUNT */
433#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 427#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
434 428
@@ -559,12 +553,6 @@ static void cputime_adjust(struct task_cputime *curr,
559{ 553{
560 cputime_t rtime, stime, utime, total; 554 cputime_t rtime, stime, utime, total;
561 555
562 if (vtime_accounting_enabled()) {
563 *ut = curr->utime;
564 *st = curr->stime;
565 return;
566 }
567
568 stime = curr->stime; 556 stime = curr->stime;
569 total = stime + curr->utime; 557 total = stime + curr->utime;
570 558
@@ -664,23 +652,17 @@ static void __vtime_account_system(struct task_struct *tsk)
664 652
665void vtime_account_system(struct task_struct *tsk) 653void vtime_account_system(struct task_struct *tsk)
666{ 654{
667 if (!vtime_accounting_enabled())
668 return;
669
670 write_seqlock(&tsk->vtime_seqlock); 655 write_seqlock(&tsk->vtime_seqlock);
671 __vtime_account_system(tsk); 656 __vtime_account_system(tsk);
672 write_sequnlock(&tsk->vtime_seqlock); 657 write_sequnlock(&tsk->vtime_seqlock);
673} 658}
674 659
675void vtime_account_irq_exit(struct task_struct *tsk) 660void vtime_gen_account_irq_exit(struct task_struct *tsk)
676{ 661{
677 if (!vtime_accounting_enabled())
678 return;
679
680 write_seqlock(&tsk->vtime_seqlock); 662 write_seqlock(&tsk->vtime_seqlock);
663 __vtime_account_system(tsk);
681 if (context_tracking_in_user()) 664 if (context_tracking_in_user())
682 tsk->vtime_snap_whence = VTIME_USER; 665 tsk->vtime_snap_whence = VTIME_USER;
683 __vtime_account_system(tsk);
684 write_sequnlock(&tsk->vtime_seqlock); 666 write_sequnlock(&tsk->vtime_seqlock);
685} 667}
686 668
@@ -688,12 +670,8 @@ void vtime_account_user(struct task_struct *tsk)
688{ 670{
689 cputime_t delta_cpu; 671 cputime_t delta_cpu;
690 672
691 if (!vtime_accounting_enabled())
692 return;
693
694 delta_cpu = get_vtime_delta(tsk);
695
696 write_seqlock(&tsk->vtime_seqlock); 673 write_seqlock(&tsk->vtime_seqlock);
674 delta_cpu = get_vtime_delta(tsk);
697 tsk->vtime_snap_whence = VTIME_SYS; 675 tsk->vtime_snap_whence = VTIME_SYS;
698 account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); 676 account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
699 write_sequnlock(&tsk->vtime_seqlock); 677 write_sequnlock(&tsk->vtime_seqlock);
@@ -701,22 +679,27 @@ void vtime_account_user(struct task_struct *tsk)
701 679
702void vtime_user_enter(struct task_struct *tsk) 680void vtime_user_enter(struct task_struct *tsk)
703{ 681{
704 if (!vtime_accounting_enabled())
705 return;
706
707 write_seqlock(&tsk->vtime_seqlock); 682 write_seqlock(&tsk->vtime_seqlock);
708 tsk->vtime_snap_whence = VTIME_USER;
709 __vtime_account_system(tsk); 683 __vtime_account_system(tsk);
684 tsk->vtime_snap_whence = VTIME_USER;
710 write_sequnlock(&tsk->vtime_seqlock); 685 write_sequnlock(&tsk->vtime_seqlock);
711} 686}
712 687
713void vtime_guest_enter(struct task_struct *tsk) 688void vtime_guest_enter(struct task_struct *tsk)
714{ 689{
690 /*
691 * The flags must be updated under the lock with
692 * the vtime_snap flush and update.
693 * That enforces a right ordering and update sequence
694 * synchronization against the reader (task_gtime())
695 * that can thus safely catch up with a tickless delta.
696 */
715 write_seqlock(&tsk->vtime_seqlock); 697 write_seqlock(&tsk->vtime_seqlock);
716 __vtime_account_system(tsk); 698 __vtime_account_system(tsk);
717 current->flags |= PF_VCPU; 699 current->flags |= PF_VCPU;
718 write_sequnlock(&tsk->vtime_seqlock); 700 write_sequnlock(&tsk->vtime_seqlock);
719} 701}
702EXPORT_SYMBOL_GPL(vtime_guest_enter);
720 703
721void vtime_guest_exit(struct task_struct *tsk) 704void vtime_guest_exit(struct task_struct *tsk)
722{ 705{
@@ -725,6 +708,7 @@ void vtime_guest_exit(struct task_struct *tsk)
725 current->flags &= ~PF_VCPU; 708 current->flags &= ~PF_VCPU;
726 write_sequnlock(&tsk->vtime_seqlock); 709 write_sequnlock(&tsk->vtime_seqlock);
727} 710}
711EXPORT_SYMBOL_GPL(vtime_guest_exit);
728 712
729void vtime_account_idle(struct task_struct *tsk) 713void vtime_account_idle(struct task_struct *tsk)
730{ 714{
@@ -733,11 +717,6 @@ void vtime_account_idle(struct task_struct *tsk)
733 account_idle_time(delta_cpu); 717 account_idle_time(delta_cpu);
734} 718}
735 719
736bool vtime_accounting_enabled(void)
737{
738 return context_tracking_active();
739}
740
741void arch_vtime_task_switch(struct task_struct *prev) 720void arch_vtime_task_switch(struct task_struct *prev)
742{ 721{
743 write_seqlock(&prev->vtime_seqlock); 722 write_seqlock(&prev->vtime_seqlock);
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 3381f098070f..2b62fe86f9ec 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -105,7 +105,6 @@ config NO_HZ_FULL
105 select RCU_USER_QS 105 select RCU_USER_QS
106 select RCU_NOCB_CPU 106 select RCU_NOCB_CPU
107 select VIRT_CPU_ACCOUNTING_GEN 107 select VIRT_CPU_ACCOUNTING_GEN
108 select CONTEXT_TRACKING_FORCE
109 select IRQ_WORK 108 select IRQ_WORK
110 help 109 help
111 Adaptively try to shutdown the tick whenever possible, even when 110 Adaptively try to shutdown the tick whenever possible, even when
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index e8a1516cc0a3..3612fc77f834 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -23,6 +23,7 @@
23#include <linux/irq_work.h> 23#include <linux/irq_work.h>
24#include <linux/posix-timers.h> 24#include <linux/posix-timers.h>
25#include <linux/perf_event.h> 25#include <linux/perf_event.h>
26#include <linux/context_tracking.h>
26 27
27#include <asm/irq_regs.h> 28#include <asm/irq_regs.h>
28 29
@@ -148,8 +149,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
148} 149}
149 150
150#ifdef CONFIG_NO_HZ_FULL 151#ifdef CONFIG_NO_HZ_FULL
151static cpumask_var_t nohz_full_mask; 152cpumask_var_t tick_nohz_full_mask;
152bool have_nohz_full_mask; 153bool tick_nohz_full_running;
153 154
154static bool can_stop_full_tick(void) 155static bool can_stop_full_tick(void)
155{ 156{
@@ -182,7 +183,7 @@ static bool can_stop_full_tick(void)
182 * Don't allow the user to think they can get 183 * Don't allow the user to think they can get
183 * full NO_HZ with this machine. 184 * full NO_HZ with this machine.
184 */ 185 */
185 WARN_ONCE(have_nohz_full_mask, 186 WARN_ONCE(tick_nohz_full_running,
186 "NO_HZ FULL will not work with unstable sched clock"); 187 "NO_HZ FULL will not work with unstable sched clock");
187 return false; 188 return false;
188 } 189 }
@@ -197,7 +198,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now);
197 * Re-evaluate the need for the tick on the current CPU 198 * Re-evaluate the need for the tick on the current CPU
198 * and restart it if necessary. 199 * and restart it if necessary.
199 */ 200 */
200void tick_nohz_full_check(void) 201void __tick_nohz_full_check(void)
201{ 202{
202 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 203 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
203 204
@@ -211,7 +212,7 @@ void tick_nohz_full_check(void)
211 212
212static void nohz_full_kick_work_func(struct irq_work *work) 213static void nohz_full_kick_work_func(struct irq_work *work)
213{ 214{
214 tick_nohz_full_check(); 215 __tick_nohz_full_check();
215} 216}
216 217
217static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { 218static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
@@ -230,7 +231,7 @@ void tick_nohz_full_kick(void)
230 231
231static void nohz_full_kick_ipi(void *info) 232static void nohz_full_kick_ipi(void *info)
232{ 233{
233 tick_nohz_full_check(); 234 __tick_nohz_full_check();
234} 235}
235 236
236/* 237/*
@@ -239,12 +240,13 @@ static void nohz_full_kick_ipi(void *info)
239 */ 240 */
240void tick_nohz_full_kick_all(void) 241void tick_nohz_full_kick_all(void)
241{ 242{
242 if (!have_nohz_full_mask) 243 if (!tick_nohz_full_running)
243 return; 244 return;
244 245
245 preempt_disable(); 246 preempt_disable();
246 smp_call_function_many(nohz_full_mask, 247 smp_call_function_many(tick_nohz_full_mask,
247 nohz_full_kick_ipi, NULL, false); 248 nohz_full_kick_ipi, NULL, false);
249 tick_nohz_full_kick();
248 preempt_enable(); 250 preempt_enable();
249} 251}
250 252
@@ -253,7 +255,7 @@ void tick_nohz_full_kick_all(void)
253 * It might need the tick due to per task/process properties: 255 * It might need the tick due to per task/process properties:
254 * perf events, posix cpu timers, ... 256 * perf events, posix cpu timers, ...
255 */ 257 */
256void tick_nohz_task_switch(struct task_struct *tsk) 258void __tick_nohz_task_switch(struct task_struct *tsk)
257{ 259{
258 unsigned long flags; 260 unsigned long flags;
259 261
@@ -269,31 +271,23 @@ out:
269 local_irq_restore(flags); 271 local_irq_restore(flags);
270} 272}
271 273
272int tick_nohz_full_cpu(int cpu)
273{
274 if (!have_nohz_full_mask)
275 return 0;
276
277 return cpumask_test_cpu(cpu, nohz_full_mask);
278}
279
280/* Parse the boot-time nohz CPU list from the kernel parameters. */ 274/* Parse the boot-time nohz CPU list from the kernel parameters. */
281static int __init tick_nohz_full_setup(char *str) 275static int __init tick_nohz_full_setup(char *str)
282{ 276{
283 int cpu; 277 int cpu;
284 278
285 alloc_bootmem_cpumask_var(&nohz_full_mask); 279 alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
286 if (cpulist_parse(str, nohz_full_mask) < 0) { 280 if (cpulist_parse(str, tick_nohz_full_mask) < 0) {
287 pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); 281 pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
288 return 1; 282 return 1;
289 } 283 }
290 284
291 cpu = smp_processor_id(); 285 cpu = smp_processor_id();
292 if (cpumask_test_cpu(cpu, nohz_full_mask)) { 286 if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
293 pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); 287 pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
294 cpumask_clear_cpu(cpu, nohz_full_mask); 288 cpumask_clear_cpu(cpu, tick_nohz_full_mask);
295 } 289 }
296 have_nohz_full_mask = true; 290 tick_nohz_full_running = true;
297 291
298 return 1; 292 return 1;
299} 293}
@@ -311,7 +305,7 @@ static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
311 * If we handle the timekeeping duty for full dynticks CPUs, 305 * If we handle the timekeeping duty for full dynticks CPUs,
312 * we can't safely shutdown that CPU. 306 * we can't safely shutdown that CPU.
313 */ 307 */
314 if (have_nohz_full_mask && tick_do_timer_cpu == cpu) 308 if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
315 return NOTIFY_BAD; 309 return NOTIFY_BAD;
316 break; 310 break;
317 } 311 }
@@ -330,31 +324,34 @@ static int tick_nohz_init_all(void)
330 int err = -1; 324 int err = -1;
331 325
332#ifdef CONFIG_NO_HZ_FULL_ALL 326#ifdef CONFIG_NO_HZ_FULL_ALL
333 if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { 327 if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) {
334 pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); 328 pr_err("NO_HZ: Can't allocate full dynticks cpumask\n");
335 return err; 329 return err;
336 } 330 }
337 err = 0; 331 err = 0;
338 cpumask_setall(nohz_full_mask); 332 cpumask_setall(tick_nohz_full_mask);
339 cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); 333 cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask);
340 have_nohz_full_mask = true; 334 tick_nohz_full_running = true;
341#endif 335#endif
342 return err; 336 return err;
343} 337}
344 338
345void __init tick_nohz_init(void) 339void __init tick_nohz_init(void)
346{ 340{
347 if (!have_nohz_full_mask) { 341 int cpu;
342
343 if (!tick_nohz_full_running) {
348 if (tick_nohz_init_all() < 0) 344 if (tick_nohz_init_all() < 0)
349 return; 345 return;
350 } 346 }
351 347
348 for_each_cpu(cpu, tick_nohz_full_mask)
349 context_tracking_cpu_set(cpu);
350
352 cpu_notifier(tick_nohz_cpu_down_callback, 0); 351 cpu_notifier(tick_nohz_cpu_down_callback, 0);
353 cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); 352 cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask);
354 pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); 353 pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf);
355} 354}
356#else
357#define have_nohz_full_mask (0)
358#endif 355#endif
359 356
360/* 357/*
@@ -732,7 +729,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
732 return false; 729 return false;
733 } 730 }
734 731
735 if (have_nohz_full_mask) { 732 if (tick_nohz_full_enabled()) {
736 /* 733 /*
737 * Keep the tick alive to guarantee timekeeping progression 734 * Keep the tick alive to guarantee timekeeping progression
738 * if there are full dynticks CPUs around 735 * if there are full dynticks CPUs around