21 files changed, 545 insertions, 327 deletions
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 05b03ecd7933..a3456f34f672 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += clkdev.h
 generic-y += exec.h
 generic-y += kvm_para.h
 generic-y += trace_clock.h
+generic-y += vtime.h
+\ No newline at end of file
diff --git a/arch/m68k/include/asm/irqflags.h b/arch/m68k/include/asm/irqflags.h
index 5053092b369f..a823cd73dc09 100644
--- a/arch/m68k/include/asm/irqflags.h
+++ b/arch/m68k/include/asm/irqflags.h
@@ -3,7 +3,7 @@
 #include <linux/types.h>
 #ifdef CONFIG_MMU
-#include <linux/hardirq.h>
+#include <linux/preempt_mask.h>
 #endif
 #include <linux/preempt.h>
 #include <asm/thread_info.h>
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 650757c300db..704e6f10ae80 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -2,3 +2,4 @@
 generic-y += clkdev.h
 generic-y += rwsem.h
 generic-y += trace_clock.h
+generic-y += vtime.h
+\ No newline at end of file
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index d2ff41370c0c..f65bd3634519 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -13,9 +13,6 @@
 #include <asm/div64.h>
-#define __ARCH_HAS_VTIME_ACCOUNT
-#define __ARCH_HAS_VTIME_TASK_SWITCH
 /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
 typedef unsigned long long __nocast cputime_t;
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
new file mode 100644
index 000000000000..af9896c53eb3
--- /dev/null
+++ b/arch/s390/include/asm/vtime.h
@@ -0,0 +1,7 @@
+#ifndef _S390_VTIME_H
+#define _S390_VTIME_H
+#define __ARCH_HAS_VTIME_ACCOUNT
+#define __ARCH_HAS_VTIME_TASK_SWITCH
+#endif /* _S390_VTIME_H */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 9b9c1b78ec67..abcfab55f99b 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -19,6 +19,7 @@
 #include <asm/irq_regs.h>
 #include <asm/cputime.h>
 #include <asm/vtimer.h>
+#include <asm/vtime.h>
 #include <asm/irq.h>
 #include "entry.h"
diff --git a/include/asm-generic/vtime.h b/include/asm-generic/vtime.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/include/asm-generic/vtime.h
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index fc09d7b0dacf..158158704c30 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -2,100 +2,110 @@
 #define _LINUX_CONTEXT_TRACKING_H
 #include <linux/sched.h>
-#include <linux/percpu.h>
 #include <linux/vtime.h>
+#include <linux/context_tracking_state.h>
 #include <asm/ptrace.h>
-struct context_tracking {
-        /*
-         * When active is false, probes are unset in order
-         * to minimize overhead: TIF flags are cleared
-         * and calls to user_enter/exit are ignored. This
-         * may be further optimized using static keys.
-         */
-        bool active;
-        enum ctx_state {
-                IN_KERNEL = 0,
-                IN_USER,
-        } state;
-};
-static inline void __guest_enter(void)
-{
-        /*
-         * This is running in ioctl context so we can avoid
-         * the call to vtime_account() with its unnecessary idle check.
-         */
-        vtime_account_system(current);
-        current->flags |= PF_VCPU;
-}
-static inline void __guest_exit(void)
-{
-        /*
-         * This is running in ioctl context so we can avoid
-         * the call to vtime_account() with its unnecessary idle check.
-         */
-        vtime_account_system(current);
-        current->flags &= ~PF_VCPU;
-}
 #ifdef CONFIG_CONTEXT_TRACKING
-DECLARE_PER_CPU(struct context_tracking, context_tracking);
+extern void context_tracking_cpu_set(int cpu);
-static inline bool context_tracking_in_user(void)
+extern void context_tracking_user_enter(void);
+extern void context_tracking_user_exit(void);
+extern void __context_tracking_task_switch(struct task_struct *prev,
+                                           struct task_struct *next);
+static inline void user_enter(void)
 {
-        return __this_cpu_read(context_tracking.state) == IN_USER;
+        if (static_key_false(&context_tracking_enabled))
-}
+                context_tracking_user_enter();
-static inline bool context_tracking_active(void)
+}
+static inline void user_exit(void)
 {
-        return __this_cpu_read(context_tracking.active);
+        if (static_key_false(&context_tracking_enabled))
+                context_tracking_user_exit();
 }
-extern void user_enter(void);
-extern void user_exit(void);
-extern void guest_enter(void);
-extern void guest_exit(void);
 static inline enum ctx_state exception_enter(void)
 {
        enum ctx_state prev_ctx;
+        if (!static_key_false(&context_tracking_enabled))
+                return 0;
        prev_ctx = this_cpu_read(context_tracking.state);
-        user_exit();
+        context_tracking_user_exit();
        return prev_ctx;
 }
 static inline void exception_exit(enum ctx_state prev_ctx)
 {
-        if (prev_ctx == IN_USER)
+        if (static_key_false(&context_tracking_enabled)) {
-                user_enter();
+                if (prev_ctx == IN_USER)
+                        context_tracking_user_enter();
+        }
 }
-extern void context_tracking_task_switch(struct task_struct *prev,
+static inline void context_tracking_task_switch(struct task_struct *prev,
-                                         struct task_struct *next);
+                                                struct task_struct *next)
+{
+        if (static_key_false(&context_tracking_enabled))
+                __context_tracking_task_switch(prev, next);
+}
 #else
-static inline bool context_tracking_in_user(void) { return false; }
 static inline void user_enter(void) { }
 static inline void user_exit(void) { }
+static inline enum ctx_state exception_enter(void) { return 0; }
+static inline void exception_exit(enum ctx_state prev_ctx) { }
+static inline void context_tracking_task_switch(struct task_struct *prev,
+                                                struct task_struct *next) { }
+#endif /* !CONFIG_CONTEXT_TRACKING */
+#ifdef CONFIG_CONTEXT_TRACKING_FORCE
+extern void context_tracking_init(void);
+#else
+static inline void context_tracking_init(void) { }
+#endif /* CONFIG_CONTEXT_TRACKING_FORCE */
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 static inline void guest_enter(void)
 {
-        __guest_enter();
+        if (vtime_accounting_enabled())
+                vtime_guest_enter(current);
+        else
+                current->flags |= PF_VCPU;
 }
 static inline void guest_exit(void)
 {
-        __guest_exit();
+        if (vtime_accounting_enabled())
+                vtime_guest_exit(current);
+        else
+                current->flags &= ~PF_VCPU;
 }
-static inline enum ctx_state exception_enter(void) { return 0; }
+#else
-static inline void exception_exit(enum ctx_state prev_ctx) { }
+static inline void guest_enter(void)
-static inline void context_tracking_task_switch(struct task_struct *prev,
+{
-                                                struct task_struct *next) { }
+        /*
-#endif /* !CONFIG_CONTEXT_TRACKING */
+         * This is running in ioctl context so its safe
+         * to assume that it's the stime pending cputime
+         * to flush.
+         */
+        vtime_account_system(current);
+        current->flags |= PF_VCPU;
+}
+static inline void guest_exit(void)
+{
+        /* Flush the guest cputime we spent on the guest */
+        vtime_account_system(current);
+        current->flags &= ~PF_VCPU;
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
 #endif
diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
new file mode 100644
index 000000000000..0f1979d0674f
--- /dev/null
+++ b/include/linux/context_tracking_state.h
@@ -0,0 +1,39 @@
+#ifndef _LINUX_CONTEXT_TRACKING_STATE_H
+#define _LINUX_CONTEXT_TRACKING_STATE_H
+#include <linux/percpu.h>
+#include <linux/static_key.h>
+struct context_tracking {
+        /*
+         * When active is false, probes are unset in order
+         * to minimize overhead: TIF flags are cleared
+         * and calls to user_enter/exit are ignored. This
+         * may be further optimized using static keys.
+         */
+        bool active;
+        enum ctx_state {
+                IN_KERNEL = 0,
+                IN_USER,
+        } state;
+};
+#ifdef CONFIG_CONTEXT_TRACKING
+extern struct static_key context_tracking_enabled;
+DECLARE_PER_CPU(struct context_tracking, context_tracking);
+static inline bool context_tracking_in_user(void)
+{
+        return __this_cpu_read(context_tracking.state) == IN_USER;
+}
+static inline bool context_tracking_active(void)
+{
+        return __this_cpu_read(context_tracking.active);
+}
+#else
+static inline bool context_tracking_in_user(void) { return false; }
+static inline bool context_tracking_active(void) { return false; }
+#endif /* CONFIG_CONTEXT_TRACKING */
+#endif
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 05bcc0903766..ccfe17c5c8da 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -1,126 +1,11 @@
 #ifndef LINUX_HARDIRQ_H
 #define LINUX_HARDIRQ_H
-#include <linux/preempt.h>
+#include <linux/preempt_mask.h>
 #include <linux/lockdep.h>
 #include <linux/ftrace_irq.h>
 #include <linux/vtime.h>
-#include <asm/hardirq.h>
-/*
- * We put the hardirq and softirq counter into the preemption
- * counter. The bitmask has the following meaning:
- *
- * - bits 0-7 are the preemption count (max preemption depth: 256)
- * - bits 8-15 are the softirq count (max # of softirqs: 256)
- *
- * The hardirq count can in theory reach the same as NR_IRQS.
- * In reality, the number of nested IRQS is limited to the stack
- * size as well. For archs with over 1000 IRQS it is not practical
- * to expect that they will all nest. We give a max of 10 bits for
- * hardirq nesting. An arch may choose to give less than 10 bits.
- * m68k expects it to be 8.
- *
- * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
- * - bit 26 is the NMI_MASK
- * - bit 27 is the PREEMPT_ACTIVE flag
- *
- * PREEMPT_MASK: 0x000000ff
- * SOFTIRQ_MASK: 0x0000ff00
- * HARDIRQ_MASK: 0x03ff0000
- *     NMI_MASK: 0x04000000
- */
-#define PREEMPT_BITS    8
-#define SOFTIRQ_BITS    8
-#define NMI_BITS        1
-#define MAX_HARDIRQ_BITS 10
-#ifndef HARDIRQ_BITS
-# define HARDIRQ_BITS   MAX_HARDIRQ_BITS
-#endif
-#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
-#error HARDIRQ_BITS too high!
-#endif
-#define PREEMPT_SHIFT   0
-#define SOFTIRQ_SHIFT   (PREEMPT_SHIFT + PREEMPT_BITS)
-#define HARDIRQ_SHIFT   (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
-#define NMI_SHIFT       (HARDIRQ_SHIFT + HARDIRQ_BITS)
-#define __IRQ_MASK(x)   ((1UL << (x))-1)
-#define PREEMPT_MASK    (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
-#define SOFTIRQ_MASK    (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
-#define HARDIRQ_MASK    (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
-#define NMI_MASK        (__IRQ_MASK(NMI_BITS)     << NMI_SHIFT)
-#define PREEMPT_OFFSET  (1UL << PREEMPT_SHIFT)
-#define SOFTIRQ_OFFSET  (1UL << SOFTIRQ_SHIFT)
-#define HARDIRQ_OFFSET  (1UL << HARDIRQ_SHIFT)
-#define NMI_OFFSET      (1UL << NMI_SHIFT)
-#define SOFTIRQ_DISABLE_OFFSET  (2 * SOFTIRQ_OFFSET)
-#ifndef PREEMPT_ACTIVE
-#define PREEMPT_ACTIVE_BITS     1
-#define PREEMPT_ACTIVE_SHIFT    (NMI_SHIFT + NMI_BITS)
-#define PREEMPT_ACTIVE  (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
-#endif
-#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
-#error PREEMPT_ACTIVE is too low!
-#endif
-#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
-#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
-#define irq_count()     (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
-                                 | NMI_MASK))
-/*
- * Are we doing bottom half or hardware interrupt processing?
- * Are we in a softirq context? Interrupt context?
- * in_softirq - Are we currently processing softirq or have bh disabled?
- * in_serving_softirq - Are we currently processing softirq?
- */
-#define in_irq()                (hardirq_count())
-#define in_softirq()            (softirq_count())
-#define in_interrupt()          (irq_count())
-#define in_serving_softirq()    (softirq_count() & SOFTIRQ_OFFSET)
-/*
- * Are we in NMI context?
- */
-#define in_nmi()        (preempt_count() & NMI_MASK)
-#if defined(CONFIG_PREEMPT_COUNT)
-# define PREEMPT_CHECK_OFFSET 1
-#else
-# define PREEMPT_CHECK_OFFSET 0
-#endif
-/*
- * Are we running in atomic context?  WARNING: this macro cannot
- * always detect atomic context; in particular, it cannot know about
- * held spinlocks in non-preemptible kernels.  Thus it should not be
- * used in the general case to determine whether sleeping is possible.
- * Do not use in_atomic() in driver code.
- */
-#define in_atomic()     ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
-/*
- * Check whether we were atomic before we did preempt_disable():
- * (used by the scheduler, *after* releasing the kernel lock)
- */
-#define in_atomic_preempt_off() \
-                ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
-#ifdef CONFIG_PREEMPT_COUNT
-# define preemptible()  (preempt_count() == 0 && !irqs_disabled())
-#else
-# define preemptible()  0
-#endif
 #if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS)
 extern void synchronize_irq(unsigned int irq);
diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h
new file mode 100644
index 000000000000..931bc616219f
--- /dev/null
+++ b/include/linux/preempt_mask.h
@@ -0,0 +1,122 @@
+#ifndef LINUX_PREEMPT_MASK_H
+#define LINUX_PREEMPT_MASK_H
+#include <linux/preempt.h>
+#include <asm/hardirq.h>
+/*
+ * We put the hardirq and softirq counter into the preemption
+ * counter. The bitmask has the following meaning:
+ *
+ * - bits 0-7 are the preemption count (max preemption depth: 256)
+ * - bits 8-15 are the softirq count (max # of softirqs: 256)
+ *
+ * The hardirq count can in theory reach the same as NR_IRQS.
+ * In reality, the number of nested IRQS is limited to the stack
+ * size as well. For archs with over 1000 IRQS it is not practical
+ * to expect that they will all nest. We give a max of 10 bits for
+ * hardirq nesting. An arch may choose to give less than 10 bits.
+ * m68k expects it to be 8.
+ *
+ * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
+ * - bit 26 is the NMI_MASK
+ * - bit 27 is the PREEMPT_ACTIVE flag
+ *
+ * PREEMPT_MASK: 0x000000ff
+ * SOFTIRQ_MASK: 0x0000ff00
+ * HARDIRQ_MASK: 0x03ff0000
+ *     NMI_MASK: 0x04000000
+ */
+#define PREEMPT_BITS    8
+#define SOFTIRQ_BITS    8
+#define NMI_BITS        1
+#define MAX_HARDIRQ_BITS 10
+#ifndef HARDIRQ_BITS
+# define HARDIRQ_BITS   MAX_HARDIRQ_BITS
+#endif
+#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
+#error HARDIRQ_BITS too high!
+#endif
+#define PREEMPT_SHIFT   0
+#define SOFTIRQ_SHIFT   (PREEMPT_SHIFT + PREEMPT_BITS)
+#define HARDIRQ_SHIFT   (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
+#define NMI_SHIFT       (HARDIRQ_SHIFT + HARDIRQ_BITS)
+#define __IRQ_MASK(x)   ((1UL << (x))-1)
+#define PREEMPT_MASK    (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
+#define SOFTIRQ_MASK    (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
+#define HARDIRQ_MASK    (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
+#define NMI_MASK        (__IRQ_MASK(NMI_BITS)     << NMI_SHIFT)
+#define PREEMPT_OFFSET  (1UL << PREEMPT_SHIFT)
+#define SOFTIRQ_OFFSET  (1UL << SOFTIRQ_SHIFT)
+#define HARDIRQ_OFFSET  (1UL << HARDIRQ_SHIFT)
+#define NMI_OFFSET      (1UL << NMI_SHIFT)
+#define SOFTIRQ_DISABLE_OFFSET  (2 * SOFTIRQ_OFFSET)
+#ifndef PREEMPT_ACTIVE
+#define PREEMPT_ACTIVE_BITS     1
+#define PREEMPT_ACTIVE_SHIFT    (NMI_SHIFT + NMI_BITS)
+#define PREEMPT_ACTIVE  (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
+#endif
+#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
+#error PREEMPT_ACTIVE is too low!
+#endif
+#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
+#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
+#define irq_count()     (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
+                                 | NMI_MASK))
+/*
+ * Are we doing bottom half or hardware interrupt processing?
+ * Are we in a softirq context? Interrupt context?
+ * in_softirq - Are we currently processing softirq or have bh disabled?
+ * in_serving_softirq - Are we currently processing softirq?
+ */
+#define in_irq()                (hardirq_count())
+#define in_softirq()            (softirq_count())
+#define in_interrupt()          (irq_count())
+#define in_serving_softirq()    (softirq_count() & SOFTIRQ_OFFSET)
+/*
+ * Are we in NMI context?
+ */
+#define in_nmi()        (preempt_count() & NMI_MASK)
+#if defined(CONFIG_PREEMPT_COUNT)
+# define PREEMPT_CHECK_OFFSET 1
+#else
+# define PREEMPT_CHECK_OFFSET 0
+#endif
+/*
+ * Are we running in atomic context?  WARNING: this macro cannot
+ * always detect atomic context; in particular, it cannot know about
+ * held spinlocks in non-preemptible kernels.  Thus it should not be
+ * used in the general case to determine whether sleeping is possible.
+ * Do not use in_atomic() in driver code.
+ */
+#define in_atomic()     ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
+/*
+ * Check whether we were atomic before we did preempt_disable():
+ * (used by the scheduler, *after* releasing the kernel lock)
+ */
+#define in_atomic_preempt_off() \
+                ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
+#ifdef CONFIG_PREEMPT_COUNT
+# define preemptible()  (preempt_count() == 0 && !irqs_disabled())
+#else
+# define preemptible()  0
+#endif
+#endif /* LINUX_PREEMPT_MASK_H */
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 62bd8b72873c..5128d33bbb39 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -10,6 +10,8 @@
 #include <linux/irqflags.h>
 #include <linux/percpu.h>
 #include <linux/hrtimer.h>
+#include <linux/context_tracking_state.h>
+#include <linux/cpumask.h>
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
@@ -158,20 +160,51 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
 # endif /* !CONFIG_NO_HZ_COMMON */
 #ifdef CONFIG_NO_HZ_FULL
+extern bool tick_nohz_full_running;
+extern cpumask_var_t tick_nohz_full_mask;
+static inline bool tick_nohz_full_enabled(void)
+{
+        if (!static_key_false(&context_tracking_enabled))
+                return false;
+        return tick_nohz_full_running;
+}
+static inline bool tick_nohz_full_cpu(int cpu)
+{
+        if (!tick_nohz_full_enabled())
+                return false;
+        return cpumask_test_cpu(cpu, tick_nohz_full_mask);
+}
 extern void tick_nohz_init(void);
-extern int tick_nohz_full_cpu(int cpu);
+extern void __tick_nohz_full_check(void);
-extern void tick_nohz_full_check(void);
 extern void tick_nohz_full_kick(void);
 extern void tick_nohz_full_kick_all(void);
-extern void tick_nohz_task_switch(struct task_struct *tsk);
+extern void __tick_nohz_task_switch(struct task_struct *tsk);
 #else
 static inline void tick_nohz_init(void) { }
-static inline int tick_nohz_full_cpu(int cpu) { return 0; }
+static inline bool tick_nohz_full_enabled(void) { return false; }
-static inline void tick_nohz_full_check(void) { }
+static inline bool tick_nohz_full_cpu(int cpu) { return false; }
+static inline void __tick_nohz_full_check(void) { }
 static inline void tick_nohz_full_kick(void) { }
 static inline void tick_nohz_full_kick_all(void) { }
-static inline void tick_nohz_task_switch(struct task_struct *tsk) { }
+static inline void __tick_nohz_task_switch(struct task_struct *tsk) { }
 #endif
+static inline void tick_nohz_full_check(void)
+{
+        if (tick_nohz_full_enabled())
+                __tick_nohz_full_check();
+}
+static inline void tick_nohz_task_switch(struct task_struct *tsk)
+{
+        if (tick_nohz_full_enabled())
+                __tick_nohz_task_switch(tsk);
+}
 #endif
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index b1dd2db80076..f5b72b364bda 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -1,18 +1,68 @@
 #ifndef _LINUX_KERNEL_VTIME_H
 #define _LINUX_KERNEL_VTIME_H
+#include <linux/context_tracking_state.h>
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#include <asm/vtime.h>
+#endif
 struct task_struct;
+/*
+ * vtime_accounting_enabled() definitions/declarations
+ */
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+static inline bool vtime_accounting_enabled(void) { return true; }
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+static inline bool vtime_accounting_enabled(void)
+{
+        if (static_key_false(&context_tracking_enabled)) {
+                if (context_tracking_active())
+                        return true;
+        }
+        return false;
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+static inline bool vtime_accounting_enabled(void) { return false; }
+#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
+/*
+ * Common vtime APIs
+ */
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef __ARCH_HAS_VTIME_TASK_SWITCH
 extern void vtime_task_switch(struct task_struct *prev);
+#else
+extern void vtime_common_task_switch(struct task_struct *prev);
+static inline void vtime_task_switch(struct task_struct *prev)
+{
+        if (vtime_accounting_enabled())
+                vtime_common_task_switch(prev);
+}
+#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
 extern void vtime_account_system(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
 extern void vtime_account_user(struct task_struct *tsk);
-extern void vtime_account_irq_enter(struct task_struct *tsk);
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#ifdef __ARCH_HAS_VTIME_ACCOUNT
-static inline bool vtime_accounting_enabled(void) { return true; }
+extern void vtime_account_irq_enter(struct task_struct *tsk);
-#endif
+#else
+extern void vtime_common_account_irq_enter(struct task_struct *tsk);
+static inline void vtime_account_irq_enter(struct task_struct *tsk)
+{
+        if (vtime_accounting_enabled())
+                vtime_common_account_irq_enter(tsk);
+}
+#endif /* __ARCH_HAS_VTIME_ACCOUNT */
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
@@ -20,14 +70,20 @@ static inline void vtime_task_switch(struct task_struct *prev) { }
 static inline void vtime_account_system(struct task_struct *tsk) { }
 static inline void vtime_account_user(struct task_struct *tsk) { }
 static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
-static inline bool vtime_accounting_enabled(void) { return false; }
+#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
-#endif
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 extern void arch_vtime_task_switch(struct task_struct *tsk);
-extern void vtime_account_irq_exit(struct task_struct *tsk);
+extern void vtime_gen_account_irq_exit(struct task_struct *tsk);
-extern bool vtime_accounting_enabled(void);
+static inline void vtime_account_irq_exit(struct task_struct *tsk)
+{
+        if (vtime_accounting_enabled())
+                vtime_gen_account_irq_exit(tsk);
+}
 extern void vtime_user_enter(struct task_struct *tsk);
 static inline void vtime_user_exit(struct task_struct *tsk)
 {
        vtime_account_user(tsk);
@@ -35,7 +91,7 @@ static inline void vtime_user_exit(struct task_struct *tsk)
 extern void vtime_guest_enter(struct task_struct *tsk);
 extern void vtime_guest_exit(struct task_struct *tsk);
 extern void vtime_init_idle(struct task_struct *tsk, int cpu);
-#else
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN  */
 static inline void vtime_account_irq_exit(struct task_struct *tsk)
 {
        /* On hard|softirq exit we always account to hard|softirq cputime */
diff --git a/include/trace/events/context_tracking.h b/include/trace/events/context_tracking.h
new file mode 100644
index 000000000000..ce8007cf29cf
--- /dev/null
+++ b/include/trace/events/context_tracking.h
@@ -0,0 +1,58 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM context_tracking
+#if !defined(_TRACE_CONTEXT_TRACKING_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_CONTEXT_TRACKING_H
+#include <linux/tracepoint.h>
+DECLARE_EVENT_CLASS(context_tracking_user,
+        TP_PROTO(int dummy),
+        TP_ARGS(dummy),
+        TP_STRUCT__entry(
+                __field( int,   dummy   )
+        ),
+        TP_fast_assign(
+                __entry->dummy          = dummy;
+        ),
+        TP_printk("%s", "")
+);
+/**
+ * user_enter - called when the kernel resumes to userspace
+ * @dummy:      dummy arg to make trace event macro happy
+ *
+ * This event occurs when the kernel resumes to userspace  after
+ * an exception or a syscall.
+ */
+DEFINE_EVENT(context_tracking_user, user_enter,
+        TP_PROTO(int dummy),
+        TP_ARGS(dummy)
+);
+/**
+ * user_exit - called when userspace enters the kernel
+ * @dummy:      dummy arg to make trace event macro happy
+ *
+ * This event occurs when userspace enters the kernel through
+ * an exception or a syscall.
+ */
+DEFINE_EVENT(context_tracking_user, user_exit,
+        TP_PROTO(int dummy),
+        TP_ARGS(dummy)
+);
+#endif /*  _TRACE_CONTEXT_TRACKING_H */
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/init/Kconfig b/init/Kconfig
index cc917d3ec858..0a2c4bcf179e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -528,13 +528,29 @@ config RCU_USER_QS
 config CONTEXT_TRACKING_FORCE
        bool "Force context tracking"
        depends on CONTEXT_TRACKING
-        default CONTEXT_TRACKING
+        default y if !NO_HZ_FULL
        help
-          Probe on user/kernel boundaries by default in order to
+          The major pre-requirement for full dynticks to work is to
-          test the features that rely on it such as userspace RCU extended
+          support the context tracking subsystem. But there are also
-          quiescent states.
+          other dependencies to provide in order to make the full
-          This test is there for debugging until we have a real user like the
+          dynticks working.
-          full dynticks mode.
+          This option stands for testing when an arch implements the
+          context tracking backend but doesn't yet fullfill all the
+          requirements to make the full dynticks feature working.
+          Without the full dynticks, there is no way to test the support
+          for context tracking and the subsystems that rely on it: RCU
+          userspace extended quiescent state and tickless cputime
+          accounting. This option copes with the absence of the full
+          dynticks subsystem by forcing the context tracking on all
+          CPUs in the system.
+          Say Y only if you're working on the developpement of an
+          architecture backend for the context tracking.
+          Say N otherwise, this option brings an overhead that you
+          don't want in production.
 config RCU_FANOUT
        int "Tree-based hierarchical RCU fanout value"
diff --git a/init/main.c b/init/main.c
index d03d2ec2eacf..af310afbef28 100644
--- a/init/main.c
+++ b/init/main.c
@@ -75,6 +75,7 @@
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
 #include <linux/sched_clock.h>
+#include <linux/context_tracking.h>
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -545,6 +546,7 @@ asmlinkage void __init start_kernel(void)
        idr_init_cache();
        rcu_init();
        tick_nohz_init();
+        context_tracking_init();
        radix_tree_init();
        /* init some links before init_ISA_irqs() */
        early_irq_init();
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 383f8231e436..247091bf0587 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -20,22 +20,33 @@
 #include <linux/hardirq.h>
 #include <linux/export.h>
-DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
+#define CREATE_TRACE_POINTS
-#ifdef CONFIG_CONTEXT_TRACKING_FORCE
+#include <trace/events/context_tracking.h>
-        .active = true,
-#endif
+struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE;
-};
+EXPORT_SYMBOL_GPL(context_tracking_enabled);
+DEFINE_PER_CPU(struct context_tracking, context_tracking);
+EXPORT_SYMBOL_GPL(context_tracking);
+void context_tracking_cpu_set(int cpu)
+{
+        if (!per_cpu(context_tracking.active, cpu)) {
+                per_cpu(context_tracking.active, cpu) = true;
+                static_key_slow_inc(&context_tracking_enabled);
+        }
+}
 /**
- * user_enter - Inform the context tracking that the CPU is going to
+ * context_tracking_user_enter - Inform the context tracking that the CPU is going to
- *              enter userspace mode.
+ *                               enter userspace mode.
 *
 * This function must be called right before we switch from the kernel
 * to userspace, when it's guaranteed the remaining kernel instructions
 * to execute won't use any RCU read side critical section because this
 * function sets RCU in extended quiescent state.
 */
-void user_enter(void)
+void context_tracking_user_enter(void)
 {
        unsigned long flags;
@@ -54,17 +65,32 @@ void user_enter(void)
        WARN_ON_ONCE(!current->mm);
        local_irq_save(flags);
-        if (__this_cpu_read(context_tracking.active) &&
+        if ( __this_cpu_read(context_tracking.state) != IN_USER) {
-            __this_cpu_read(context_tracking.state) != IN_USER) {
+                if (__this_cpu_read(context_tracking.active)) {
+                        trace_user_enter(0);
+                        /*
+                         * At this stage, only low level arch entry code remains and
+                         * then we'll run in userspace. We can assume there won't be
+                         * any RCU read-side critical section until the next call to
+                         * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
+                         * on the tick.
+                         */
+                        vtime_user_enter(current);
+                        rcu_user_enter();
+                }
                /*
-                 * At this stage, only low level arch entry code remains and
+                 * Even if context tracking is disabled on this CPU, because it's outside
-                 * then we'll run in userspace. We can assume there won't be
+                 * the full dynticks mask for example, we still have to keep track of the
-                 * any RCU read-side critical section until the next call to
+                 * context transitions and states to prevent inconsistency on those of
-                 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
+                 * other CPUs.
-                 * on the tick.
+                 * If a task triggers an exception in userspace, sleep on the exception
+                 * handler and then migrate to another CPU, that new CPU must know where
+                 * the exception returns by the time we call exception_exit().
+                 * This information can only be provided by the previous CPU when it called
+                 * exception_enter().
+                 * OTOH we can spare the calls to vtime and RCU when context_tracking.active
+                 * is false because we know that CPU is not tickless.
                 */
-                vtime_user_enter(current);
-                rcu_user_enter();
                __this_cpu_write(context_tracking.state, IN_USER);
        }
        local_irq_restore(flags);
@@ -87,10 +113,9 @@ void user_enter(void)
 */
 void __sched notrace preempt_schedule_context(void)
 {
-        struct thread_info *ti = current_thread_info();
        enum ctx_state prev_ctx;
-        if (likely(ti->preempt_count || irqs_disabled()))
+        if (likely(!preemptible()))
                return;
        /*
@@ -112,8 +137,8 @@ EXPORT_SYMBOL_GPL(preempt_schedule_context);
 #endif /* CONFIG_PREEMPT */
 /**
- * user_exit - Inform the context tracking that the CPU is
+ * context_tracking_user_exit - Inform the context tracking that the CPU is
- *             exiting userspace mode and entering the kernel.
+ *                              exiting userspace mode and entering the kernel.
 *
 * This function must be called after we entered the kernel from userspace
 * before any use of RCU read side critical section. This potentially include
@@ -122,7 +147,7 @@ EXPORT_SYMBOL_GPL(preempt_schedule_context);
 * This call supports re-entrancy. This way it can be called from any exception
 * handler without needing to know if we came from userspace or not.
 */
-void user_exit(void)
+void context_tracking_user_exit(void)
 {
        unsigned long flags;
@@ -131,38 +156,22 @@ void user_exit(void)
        local_irq_save(flags);
        if (__this_cpu_read(context_tracking.state) == IN_USER) {
-                /*
+                if (__this_cpu_read(context_tracking.active)) {
-                 * We are going to run code that may use RCU. Inform
+                        /*
-                 * RCU core about that (ie: we may need the tick again).
+                         * We are going to run code that may use RCU. Inform
-                 */
+                         * RCU core about that (ie: we may need the tick again).
-                rcu_user_exit();
+                         */
-                vtime_user_exit(current);
+                        rcu_user_exit();
+                        vtime_user_exit(current);
+                        trace_user_exit(0);
+                }
                __this_cpu_write(context_tracking.state, IN_KERNEL);
        }
        local_irq_restore(flags);
 }
-void guest_enter(void)
-{
-        if (vtime_accounting_enabled())
-                vtime_guest_enter(current);
-        else
-                __guest_enter();
-}
-EXPORT_SYMBOL_GPL(guest_enter);
-void guest_exit(void)
-{
-        if (vtime_accounting_enabled())
-                vtime_guest_exit(current);
-        else
-                __guest_exit();
-}
-EXPORT_SYMBOL_GPL(guest_exit);
 /**
- * context_tracking_task_switch - context switch the syscall callbacks
+ * __context_tracking_task_switch - context switch the syscall callbacks
 * @prev: the task that is being switched out
 * @next: the task that is being switched in
 *
@@ -174,11 +183,19 @@ EXPORT_SYMBOL_GPL(guest_exit);
 * migrate to some CPU that doesn't do the context tracking. As such the TIF
 * flag may not be desired there.
 */
-void context_tracking_task_switch(struct task_struct *prev,
+void __context_tracking_task_switch(struct task_struct *prev,
-                             struct task_struct *next)
+                                    struct task_struct *next)
 {
-        if (__this_cpu_read(context_tracking.active)) {
+        clear_tsk_thread_flag(prev, TIF_NOHZ);
-                clear_tsk_thread_flag(prev, TIF_NOHZ);
+        set_tsk_thread_flag(next, TIF_NOHZ);
-                set_tsk_thread_flag(next, TIF_NOHZ);
-        }
 }
+#ifdef CONFIG_CONTEXT_TRACKING_FORCE
+void __init context_tracking_init(void)
+{
+        int cpu;
+        for_each_possible_cpu(cpu)
+                context_tracking_cpu_set(cpu);
+}
+#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b8e2162fc803..725aa067ad63 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2527,13 +2527,11 @@ void __sched schedule_preempt_disabled(void)
 */
 asmlinkage void __sched notrace preempt_schedule(void)
 {
-        struct thread_info *ti = current_thread_info();
        /*
         * If there is a non-zero preempt_count or interrupts are disabled,
         * we do not want to preempt the current task. Just return..
         */
-        if (likely(ti->preempt_count || irqs_disabled()))
+        if (likely(!preemptible()))
                return;
        do {
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index e89ccefef278..ace34f95e200 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -378,11 +378,8 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 #ifndef __ARCH_HAS_VTIME_TASK_SWITCH
-void vtime_task_switch(struct task_struct *prev)
+void vtime_common_task_switch(struct task_struct *prev)
 {
-        if (!vtime_accounting_enabled())
-                return;
        if (is_idle_task(prev))
                vtime_account_idle(prev);
        else
@@ -404,11 +401,8 @@ void vtime_task_switch(struct task_struct *prev)
 * vtime_account().
 */
 #ifndef __ARCH_HAS_VTIME_ACCOUNT
-void vtime_account_irq_enter(struct task_struct *tsk)
+void vtime_common_account_irq_enter(struct task_struct *tsk)
 {
-        if (!vtime_accounting_enabled())
-                return;
        if (!in_interrupt()) {
                /*
                 * If we interrupted user, context_tracking_in_user()
@@ -428,7 +422,7 @@ void vtime_account_irq_enter(struct task_struct *tsk)
        }
        vtime_account_system(tsk);
 }
-EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
+EXPORT_SYMBOL_GPL(vtime_common_account_irq_enter);
 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
@@ -559,12 +553,6 @@ static void cputime_adjust(struct task_cputime *curr,
 {
        cputime_t rtime, stime, utime, total;
-        if (vtime_accounting_enabled()) {
-                *ut = curr->utime;
-                *st = curr->stime;
-                return;
-        }
        stime = curr->stime;
        total = stime + curr->utime;
@@ -664,23 +652,17 @@ static void __vtime_account_system(struct task_struct *tsk)
 void vtime_account_system(struct task_struct *tsk)
 {
-        if (!vtime_accounting_enabled())
-                return;
        write_seqlock(&tsk->vtime_seqlock);
        __vtime_account_system(tsk);
        write_sequnlock(&tsk->vtime_seqlock);
 }
-void vtime_account_irq_exit(struct task_struct *tsk)
+void vtime_gen_account_irq_exit(struct task_struct *tsk)
 {
-        if (!vtime_accounting_enabled())
-                return;
        write_seqlock(&tsk->vtime_seqlock);
+        __vtime_account_system(tsk);
        if (context_tracking_in_user())
                tsk->vtime_snap_whence = VTIME_USER;
-        __vtime_account_system(tsk);
        write_sequnlock(&tsk->vtime_seqlock);
 }
@@ -688,12 +670,8 @@ void vtime_account_user(struct task_struct *tsk)
 {
        cputime_t delta_cpu;
-        if (!vtime_accounting_enabled())
-                return;
-        delta_cpu = get_vtime_delta(tsk);
        write_seqlock(&tsk->vtime_seqlock);
+        delta_cpu = get_vtime_delta(tsk);
        tsk->vtime_snap_whence = VTIME_SYS;
        account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
        write_sequnlock(&tsk->vtime_seqlock);
@@ -701,22 +679,27 @@ void vtime_account_user(struct task_struct *tsk)
 void vtime_user_enter(struct task_struct *tsk)
 {
-        if (!vtime_accounting_enabled())
-                return;
        write_seqlock(&tsk->vtime_seqlock);
-        tsk->vtime_snap_whence = VTIME_USER;
        __vtime_account_system(tsk);
+        tsk->vtime_snap_whence = VTIME_USER;
        write_sequnlock(&tsk->vtime_seqlock);
 }
 void vtime_guest_enter(struct task_struct *tsk)
 {
+        /*
+         * The flags must be updated under the lock with
+         * the vtime_snap flush and update.
+         * That enforces a right ordering and update sequence
+         * synchronization against the reader (task_gtime())
+         * that can thus safely catch up with a tickless delta.
+         */
        write_seqlock(&tsk->vtime_seqlock);
        __vtime_account_system(tsk);
        current->flags |= PF_VCPU;
        write_sequnlock(&tsk->vtime_seqlock);
 }
+EXPORT_SYMBOL_GPL(vtime_guest_enter);
 void vtime_guest_exit(struct task_struct *tsk)
 {
@@ -725,6 +708,7 @@ void vtime_guest_exit(struct task_struct *tsk)
        current->flags &= ~PF_VCPU;
        write_sequnlock(&tsk->vtime_seqlock);
 }
+EXPORT_SYMBOL_GPL(vtime_guest_exit);
 void vtime_account_idle(struct task_struct *tsk)
 {
@@ -733,11 +717,6 @@ void vtime_account_idle(struct task_struct *tsk)
        account_idle_time(delta_cpu);
 }
-bool vtime_accounting_enabled(void)
-{
-        return context_tracking_active();
-}
 void arch_vtime_task_switch(struct task_struct *prev)
 {
        write_seqlock(&prev->vtime_seqlock);
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 3381f098070f..2b62fe86f9ec 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -105,7 +105,6 @@ config NO_HZ_FULL
        select RCU_USER_QS
        select RCU_NOCB_CPU
        select VIRT_CPU_ACCOUNTING_GEN
-        select CONTEXT_TRACKING_FORCE
        select IRQ_WORK
        help
         Adaptively try to shutdown the tick whenever possible, even when
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index e8a1516cc0a3..3612fc77f834 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -23,6 +23,7 @@
 #include <linux/irq_work.h>
 #include <linux/posix-timers.h>
 #include <linux/perf_event.h>
+#include <linux/context_tracking.h>
 #include <asm/irq_regs.h>
@@ -148,8 +149,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
 }
 #ifdef CONFIG_NO_HZ_FULL
-static cpumask_var_t nohz_full_mask;
+cpumask_var_t tick_nohz_full_mask;
-bool have_nohz_full_mask;
+bool tick_nohz_full_running;
 static bool can_stop_full_tick(void)
 {
@@ -182,7 +183,7 @@ static bool can_stop_full_tick(void)
                 * Don't allow the user to think they can get
                 * full NO_HZ with this machine.
                 */
-                WARN_ONCE(have_nohz_full_mask,
+                WARN_ONCE(tick_nohz_full_running,
                          "NO_HZ FULL will not work with unstable sched clock");
                return false;
        }
@@ -197,7 +198,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now);
 * Re-evaluate the need for the tick on the current CPU
 * and restart it if necessary.
 */
-void tick_nohz_full_check(void)
+void __tick_nohz_full_check(void)
 {
        struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
@@ -211,7 +212,7 @@ void tick_nohz_full_check(void)
 static void nohz_full_kick_work_func(struct irq_work *work)
 {
-        tick_nohz_full_check();
+        __tick_nohz_full_check();
 }
 static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
@@ -230,7 +231,7 @@ void tick_nohz_full_kick(void)
 static void nohz_full_kick_ipi(void *info)
 {
-        tick_nohz_full_check();
+        __tick_nohz_full_check();
 }
 /*
@@ -239,12 +240,13 @@ static void nohz_full_kick_ipi(void *info)
 */
 void tick_nohz_full_kick_all(void)
 {
-        if (!have_nohz_full_mask)
+        if (!tick_nohz_full_running)
                return;
        preempt_disable();
-        smp_call_function_many(nohz_full_mask,
+        smp_call_function_many(tick_nohz_full_mask,
                               nohz_full_kick_ipi, NULL, false);
+        tick_nohz_full_kick();
        preempt_enable();
 }
@@ -253,7 +255,7 @@ void tick_nohz_full_kick_all(void)
 * It might need the tick due to per task/process properties:
 * perf events, posix cpu timers, ...
 */
-void tick_nohz_task_switch(struct task_struct *tsk)
+void __tick_nohz_task_switch(struct task_struct *tsk)
 {
        unsigned long flags;
@@ -269,31 +271,23 @@ out:
        local_irq_restore(flags);
 }
-int tick_nohz_full_cpu(int cpu)
-{
-        if (!have_nohz_full_mask)
-                return 0;
-        return cpumask_test_cpu(cpu, nohz_full_mask);
-}
 /* Parse the boot-time nohz CPU list from the kernel parameters. */
 static int __init tick_nohz_full_setup(char *str)
 {
        int cpu;
-        alloc_bootmem_cpumask_var(&nohz_full_mask);
+        alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
-        if (cpulist_parse(str, nohz_full_mask) < 0) {
+        if (cpulist_parse(str, tick_nohz_full_mask) < 0) {
                pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
                return 1;
        }
        cpu = smp_processor_id();
-        if (cpumask_test_cpu(cpu, nohz_full_mask)) {
+        if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
                pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
-                cpumask_clear_cpu(cpu, nohz_full_mask);
+                cpumask_clear_cpu(cpu, tick_nohz_full_mask);
        }
-        have_nohz_full_mask = true;
+        tick_nohz_full_running = true;
        return 1;
 }
@@ -311,7 +305,7 @@ static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
                 * If we handle the timekeeping duty for full dynticks CPUs,
                 * we can't safely shutdown that CPU.
                 */
-                if (have_nohz_full_mask && tick_do_timer_cpu == cpu)
+                if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
                        return NOTIFY_BAD;
                break;
        }
@@ -330,31 +324,34 @@ static int tick_nohz_init_all(void)
        int err = -1;
 #ifdef CONFIG_NO_HZ_FULL_ALL
-        if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) {
+        if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) {
                pr_err("NO_HZ: Can't allocate full dynticks cpumask\n");
                return err;
        }
        err = 0;
-        cpumask_setall(nohz_full_mask);
+        cpumask_setall(tick_nohz_full_mask);
-        cpumask_clear_cpu(smp_processor_id(), nohz_full_mask);
+        cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask);
-        have_nohz_full_mask = true;
+        tick_nohz_full_running = true;
 #endif
        return err;
 }
 void __init tick_nohz_init(void)
 {
-        if (!have_nohz_full_mask) {
+        int cpu;
+        if (!tick_nohz_full_running) {
                if (tick_nohz_init_all() < 0)
                        return;
        }
+        for_each_cpu(cpu, tick_nohz_full_mask)
+                context_tracking_cpu_set(cpu);
        cpu_notifier(tick_nohz_cpu_down_callback, 0);
-        cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask);
+        cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask);
        pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf);
 }
-#else
-#define have_nohz_full_mask (0)
 #endif
 /*
@@ -732,7 +729,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
                return false;
        }
-        if (have_nohz_full_mask) {
+        if (tick_nohz_full_enabled()) {
                /*
                 * Keep the tick alive to guarantee timekeeping progression
                 * if there are full dynticks CPUs around