Merge branch 'timers/core' into perf/timer, to apply dependent patch

An upcoming patch will depend on tai_ns() and NMI-safe ktime_get_raw_fast(), so merge timers/core here in a separate topic branch until it's all cooked and timers/core is merged upstream. Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Ingo Molnar <mingo@kernel.org> 2015-03-27 05:09:21 -0400
committer: Ingo Molnar <mingo@kernel.org> 2015-03-27 05:09:21 -0400
commit: 4e6d7c2aa95158315902647963b359b32da5c295 (patch)
tree: 5141f79302e1e653cde53bab6a981a1b7bfa47b0
parent: 3c435c1e472ba344ee25f795f4807d4457e61f6c (diff)
parent: fe5fba05b46c791c95a9f34228ac495f81f72fc0 (diff)
21 files changed, 703 insertions, 360 deletions
diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c
index 61b4d705c267..43cf74561cfd 100644
--- a/arch/arm/plat-omap/counter_32k.c
+++ b/arch/arm/plat-omap/counter_32k.c
@@ -103,7 +103,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase)
        /*
         * 120000 rough estimate from the calculations in
-         * __clocksource_updatefreq_scale.
+         * __clocksource_update_freq_scale.
         */
        clocks_calc_mult_shift(&persistent_mult, &persistent_shift,
                        32768, NSEC_PER_SEC, 120000);
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 32aeea083d93..ec37ab3f524f 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -200,7 +200,7 @@ up_fail:
 void update_vsyscall(struct timekeeper *tk)
 {
        struct timespec xtime_coarse;
-        u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter");
+        u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter");
        ++vdso_data->tb_seq_count;
        smp_wmb();
@@ -213,11 +213,11 @@ void update_vsyscall(struct timekeeper *tk)
        vdso_data->wtm_clock_nsec               = tk->wall_to_monotonic.tv_nsec;
        if (!use_syscall) {
-                vdso_data->cs_cycle_last        = tk->tkr.cycle_last;
+                vdso_data->cs_cycle_last        = tk->tkr_mono.cycle_last;
                vdso_data->xtime_clock_sec      = tk->xtime_sec;
-                vdso_data->xtime_clock_nsec     = tk->tkr.xtime_nsec;
+                vdso_data->xtime_clock_nsec     = tk->tkr_mono.xtime_nsec;
-                vdso_data->cs_mult              = tk->tkr.mult;
+                vdso_data->cs_mult              = tk->tkr_mono.mult;
-                vdso_data->cs_shift             = tk->tkr.shift;
+                vdso_data->cs_shift             = tk->tkr_mono.shift;
        }
        smp_wmb();
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 20660dddb2d6..170ddd2018b3 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -215,20 +215,20 @@ void update_vsyscall(struct timekeeper *tk)
 {
        u64 nsecps;
-        if (tk->tkr.clock != &clocksource_tod)
+        if (tk->tkr_mono.clock != &clocksource_tod)
                return;
        /* Make userspace gettimeofday spin until we're done. */
        ++vdso_data->tb_update_count;
        smp_wmb();
-        vdso_data->xtime_tod_stamp = tk->tkr.cycle_last;
+        vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
        vdso_data->xtime_clock_sec = tk->xtime_sec;
-        vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
+        vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
        vdso_data->wtom_clock_sec =
                tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
-        vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec +
+        vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
-                + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift);
+                + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
-        nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift;
+        nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
        while (vdso_data->wtom_clock_nsec >= nsecps) {
                vdso_data->wtom_clock_nsec -= nsecps;
                vdso_data->wtom_clock_sec++;
@@ -236,7 +236,7 @@ void update_vsyscall(struct timekeeper *tk)
        vdso_data->xtime_coarse_sec = tk->xtime_sec;
        vdso_data->xtime_coarse_nsec =
-                (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+                (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
        vdso_data->wtom_coarse_sec =
                vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
        vdso_data->wtom_coarse_nsec =
@@ -246,8 +246,8 @@ void update_vsyscall(struct timekeeper *tk)
                vdso_data->wtom_coarse_sec++;
        }
-        vdso_data->tk_mult = tk->tkr.mult;
+        vdso_data->tk_mult = tk->tkr_mono.mult;
-        vdso_data->tk_shift = tk->tkr.shift;
+        vdso_data->tk_shift = tk->tkr_mono.shift;
        smp_wmb();
        ++vdso_data->tb_update_count;
 }
@@ -283,7 +283,7 @@ void __init time_init(void)
        if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
                panic("Couldn't request external interrupt 0x1406");
-        if (clocksource_register(&clocksource_tod) != 0)
+        if (__clocksource_register(&clocksource_tod) != 0)
                panic("Could not register TOD clock source");
        /* Enable TOD clock interrupts on the boot cpu. */
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 2f80d23a0a44..18147a5523d9 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -181,17 +181,13 @@ static struct clocksource timer_cs = {
        .rating = 100,
        .read   = timer_cs_read,
        .mask   = CLOCKSOURCE_MASK(64),
-        .shift  = 2,
        .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 static __init int setup_timer_cs(void)
 {
        timer_cs_enabled = 1;
-        timer_cs.mult = clocksource_hz2mult(sparc_config.clock_rate,
+        return clocksource_register_hz(&timer_cs, sparc_config.clock_rate);
-                                            timer_cs.shift);
-        return clocksource_register(&timer_cs);
 }
 #ifdef CONFIG_SMP
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index d412b0856c0a..00178ecf9aea 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -257,34 +257,34 @@ void update_vsyscall_tz(void)
 void update_vsyscall(struct timekeeper *tk)
 {
-        if (tk->tkr.clock != &cycle_counter_cs)
+        if (tk->tkr_mono.clock != &cycle_counter_cs)
                return;
        write_seqcount_begin(&vdso_data->tb_seq);
-        vdso_data->cycle_last           = tk->tkr.cycle_last;
+        vdso_data->cycle_last           = tk->tkr_mono.cycle_last;
-        vdso_data->mask                 = tk->tkr.mask;
+        vdso_data->mask                 = tk->tkr_mono.mask;
-        vdso_data->mult                 = tk->tkr.mult;
+        vdso_data->mult                 = tk->tkr_mono.mult;
-        vdso_data->shift                = tk->tkr.shift;
+        vdso_data->shift                = tk->tkr_mono.shift;
        vdso_data->wall_time_sec        = tk->xtime_sec;
-        vdso_data->wall_time_snsec      = tk->tkr.xtime_nsec;
+        vdso_data->wall_time_snsec      = tk->tkr_mono.xtime_nsec;
        vdso_data->monotonic_time_sec   = tk->xtime_sec
                                        + tk->wall_to_monotonic.tv_sec;
-        vdso_data->monotonic_time_snsec = tk->tkr.xtime_nsec
+        vdso_data->monotonic_time_snsec = tk->tkr_mono.xtime_nsec
                                        + ((u64)tk->wall_to_monotonic.tv_nsec
-                                                << tk->tkr.shift);
+                                                << tk->tkr_mono.shift);
        while (vdso_data->monotonic_time_snsec >=
-                                        (((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+                                        (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
                vdso_data->monotonic_time_snsec -=
-                                        ((u64)NSEC_PER_SEC) << tk->tkr.shift;
+                                        ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
                vdso_data->monotonic_time_sec++;
        }
        vdso_data->wall_time_coarse_sec = tk->xtime_sec;
-        vdso_data->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
+        vdso_data->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
-                                                 tk->tkr.shift);
+                                                 tk->tkr_mono.shift);
        vdso_data->monotonic_time_coarse_sec =
                vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index c7d791f32b98..51e330416995 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -31,30 +31,30 @@ void update_vsyscall(struct timekeeper *tk)
        gtod_write_begin(vdata);
        /* copy vsyscall data */
-        vdata->vclock_mode      = tk->tkr.clock->archdata.vclock_mode;
+        vdata->vclock_mode      = tk->tkr_mono.clock->archdata.vclock_mode;
-        vdata->cycle_last       = tk->tkr.cycle_last;
+        vdata->cycle_last       = tk->tkr_mono.cycle_last;
-        vdata->mask             = tk->tkr.mask;
+        vdata->mask             = tk->tkr_mono.mask;
-        vdata->mult             = tk->tkr.mult;
+        vdata->mult             = tk->tkr_mono.mult;
-        vdata->shift            = tk->tkr.shift;
+        vdata->shift            = tk->tkr_mono.shift;
        vdata->wall_time_sec            = tk->xtime_sec;
-        vdata->wall_time_snsec          = tk->tkr.xtime_nsec;
+        vdata->wall_time_snsec          = tk->tkr_mono.xtime_nsec;
        vdata->monotonic_time_sec       = tk->xtime_sec
                                        + tk->wall_to_monotonic.tv_sec;
-        vdata->monotonic_time_snsec     = tk->tkr.xtime_nsec
+        vdata->monotonic_time_snsec     = tk->tkr_mono.xtime_nsec
                                        + ((u64)tk->wall_to_monotonic.tv_nsec
-                                                << tk->tkr.shift);
+                                                << tk->tkr_mono.shift);
        while (vdata->monotonic_time_snsec >=
-                                        (((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+                                        (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
                vdata->monotonic_time_snsec -=
-                                        ((u64)NSEC_PER_SEC) << tk->tkr.shift;
+                                        ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
                vdata->monotonic_time_sec++;
        }
        vdata->wall_time_coarse_sec     = tk->xtime_sec;
-        vdata->wall_time_coarse_nsec    = (long)(tk->tkr.xtime_nsec >>
+        vdata->wall_time_coarse_nsec    = (long)(tk->tkr_mono.xtime_nsec >>
-                                                 tk->tkr.shift);
+                                                 tk->tkr_mono.shift);
        vdata->monotonic_time_coarse_sec =
                vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 32bf19ef3115..0ee725f1896d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1070,19 +1070,19 @@ static void update_pvclock_gtod(struct timekeeper *tk)
        struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
        u64 boot_ns;
-        boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot));
+        boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
        write_seqcount_begin(&vdata->seq);
        /* copy pvclock gtod data */
-        vdata->clock.vclock_mode        = tk->tkr.clock->archdata.vclock_mode;
+        vdata->clock.vclock_mode        = tk->tkr_mono.clock->archdata.vclock_mode;
-        vdata->clock.cycle_last         = tk->tkr.cycle_last;
+        vdata->clock.cycle_last         = tk->tkr_mono.cycle_last;
-        vdata->clock.mask               = tk->tkr.mask;
+        vdata->clock.mask               = tk->tkr_mono.mask;
-        vdata->clock.mult               = tk->tkr.mult;
+        vdata->clock.mult               = tk->tkr_mono.mult;
-        vdata->clock.shift              = tk->tkr.shift;
+        vdata->clock.shift              = tk->tkr_mono.shift;
        vdata->boot_ns                  = boot_ns;
-        vdata->nsec_base                = tk->tkr.xtime_nsec;
+        vdata->nsec_base                = tk->tkr_mono.xtime_nsec;
        write_seqcount_end(&vdata->seq);
 }
diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c
index d0a7bd66b8b9..dc3c6ee04aaa 100644
--- a/drivers/clocksource/em_sti.c
+++ b/drivers/clocksource/em_sti.c
@@ -210,7 +210,7 @@ static int em_sti_clocksource_enable(struct clocksource *cs)
        ret = em_sti_start(p, USER_CLOCKSOURCE);
        if (!ret)
-                __clocksource_updatefreq_hz(cs, p->rate);
+                __clocksource_update_freq_hz(cs, p->rate);
        return ret;
 }
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index 2bd13b53b727..b8ff3c64cc45 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -641,7 +641,7 @@ static int sh_cmt_clocksource_enable(struct clocksource *cs)
        ret = sh_cmt_start(ch, FLAG_CLOCKSOURCE);
        if (!ret) {
-                __clocksource_updatefreq_hz(cs, ch->rate);
+                __clocksource_update_freq_hz(cs, ch->rate);
                ch->cs_enabled = true;
        }
        return ret;
diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c
index f150ca82bfaf..b6b8fa3cd211 100644
--- a/drivers/clocksource/sh_tmu.c
+++ b/drivers/clocksource/sh_tmu.c
@@ -272,7 +272,7 @@ static int sh_tmu_clocksource_enable(struct clocksource *cs)
        ret = sh_tmu_enable(ch);
        if (!ret) {
-                __clocksource_updatefreq_hz(cs, ch->rate);
+                __clocksource_update_freq_hz(cs, ch->rate);
                ch->cs_enabled = true;
        }
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 2e4cb67f6e56..59af26b54d15 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -39,6 +39,8 @@ enum clock_event_mode {
        CLOCK_EVT_MODE_PERIODIC,
        CLOCK_EVT_MODE_ONESHOT,
        CLOCK_EVT_MODE_RESUME,
+        /* Legacy ->set_mode() callback doesn't support below modes */
 };
 /*
@@ -81,7 +83,11 @@ enum clock_event_mode {
 * @mode:               operating mode assigned by the management code
 * @features:           features
 * @retries:            number of forced programming retries
- * @set_mode:           set mode function
+ * @set_mode:           legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME.
+ * @set_mode_periodic:  switch mode to periodic, if !set_mode
+ * @set_mode_oneshot:   switch mode to oneshot, if !set_mode
+ * @set_mode_shutdown:  switch mode to shutdown, if !set_mode
+ * @set_mode_resume:    resume clkevt device, if !set_mode
 * @broadcast:          function to broadcast events
 * @min_delta_ticks:    minimum delta value in ticks stored for reconfiguration
 * @max_delta_ticks:    maximum delta value in ticks stored for reconfiguration
@@ -108,9 +114,20 @@ struct clock_event_device {
        unsigned int            features;
        unsigned long           retries;
-        void                    (*broadcast)(const struct cpumask *mask);
+        /*
+         * Mode transition callback(s): Only one of the two groups should be
+         * defined:
+         * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME.
+         * - set_mode_{shutdown|periodic|oneshot|resume}().
+         */
        void                    (*set_mode)(enum clock_event_mode mode,
                                            struct clock_event_device *);
+        int                     (*set_mode_periodic)(struct clock_event_device *);
+        int                     (*set_mode_oneshot)(struct clock_event_device *);
+        int                     (*set_mode_shutdown)(struct clock_event_device *);
+        int                     (*set_mode_resume)(struct clock_event_device *);
+        void                    (*broadcast)(const struct cpumask *mask);
        void                    (*suspend)(struct clock_event_device *);
        void                    (*resume)(struct clock_event_device *);
        unsigned long           min_delta_ticks;
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 9c78d15d33e4..135509821c39 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -56,6 +56,7 @@ struct module;
 * @shift:              cycle to nanosecond divisor (power of two)
 * @max_idle_ns:        max idle time permitted by the clocksource (nsecs)
 * @maxadj:             maximum adjustment value to mult (~11%)
+ * @max_cycles:         maximum safe cycle value which won't overflow on multiplication
 * @flags:              flags describing special properties
 * @archdata:           arch-specific data
 * @suspend:            suspend function for the clocksource, if necessary
@@ -76,7 +77,7 @@ struct clocksource {
 #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
        struct arch_clocksource_data archdata;
 #endif
+        u64 max_cycles;
        const char *name;
        struct list_head list;
        int rating;
@@ -178,7 +179,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
 }
-extern int clocksource_register(struct clocksource*);
 extern int clocksource_unregister(struct clocksource*);
 extern void clocksource_touch_watchdog(void);
 extern struct clocksource* clocksource_get_next(void);
@@ -189,7 +189,7 @@ extern struct clocksource * __init clocksource_default_clock(void);
 extern void clocksource_mark_unstable(struct clocksource *cs);
 extern u64
-clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask);
+clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles);
 extern void
 clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
@@ -200,7 +200,16 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
 extern int
 __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq);
 extern void
-__clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq);
+__clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq);
+/*
+ * Don't call this unless you are a default clocksource
+ * (AKA: jiffies) and absolutely have to.
+ */
+static inline int __clocksource_register(struct clocksource *cs)
+{
+        return __clocksource_register_scale(cs, 1, 0);
+}
 static inline int clocksource_register_hz(struct clocksource *cs, u32 hz)
 {
@@ -212,14 +221,14 @@ static inline int clocksource_register_khz(struct clocksource *cs, u32 khz)
        return __clocksource_register_scale(cs, 1000, khz);
 }
-static inline void __clocksource_updatefreq_hz(struct clocksource *cs, u32 hz)
+static inline void __clocksource_update_freq_hz(struct clocksource *cs, u32 hz)
 {
-        __clocksource_updatefreq_scale(cs, 1, hz);
+        __clocksource_update_freq_scale(cs, 1, hz);
 }
-static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz)
+static inline void __clocksource_update_freq_khz(struct clocksource *cs, u32 khz)
 {
-        __clocksource_updatefreq_scale(cs, 1000, khz);
+        __clocksource_update_freq_scale(cs, 1000, khz);
 }
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 05af9a334893..fb86963859c7 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -16,16 +16,16 @@
 * @read:       Read function of @clock
 * @mask:       Bitmask for two's complement subtraction of non 64bit clocks
 * @cycle_last: @clock cycle value at last update
- * @mult:       NTP adjusted multiplier for scaled math conversion
+ * @mult:       (NTP adjusted) multiplier for scaled math conversion
 * @shift:      Shift value for scaled math conversion
 * @xtime_nsec: Shifted (fractional) nano seconds offset for readout
- * @base_mono:  ktime_t (nanoseconds) base time for readout
+ * @base:       ktime_t (nanoseconds) base time for readout
 *
 * This struct has size 56 byte on 64 bit. Together with a seqcount it
 * occupies a single 64byte cache line.
 *
 * The struct is separate from struct timekeeper as it is also used
- * for a fast NMI safe accessor to clock monotonic.
+ * for a fast NMI safe accessors.
 */
 struct tk_read_base {
        struct clocksource      *clock;
@@ -35,12 +35,13 @@ struct tk_read_base {
        u32                     mult;
        u32                     shift;
        u64                     xtime_nsec;
-        ktime_t                 base_mono;
+        ktime_t                 base;
 };
 /**
 * struct timekeeper - Structure holding internal timekeeping values.
- * @tkr:                The readout base structure
+ * @tkr_mono:           The readout base structure for CLOCK_MONOTONIC
+ * @tkr_raw:            The readout base structure for CLOCK_MONOTONIC_RAW
 * @xtime_sec:          Current CLOCK_REALTIME time in seconds
 * @ktime_sec:          Current CLOCK_MONOTONIC time in seconds
 * @wall_to_monotonic:  CLOCK_REALTIME to CLOCK_MONOTONIC offset
@@ -48,7 +49,6 @@ struct tk_read_base {
 * @offs_boot:          Offset clock monotonic -> clock boottime
 * @offs_tai:           Offset clock monotonic -> clock tai
 * @tai_offset:         The current UTC to TAI offset in seconds
- * @base_raw:           Monotonic raw base time in ktime_t format
 * @raw_time:           Monotonic raw base time in timespec64 format
 * @cycle_interval:     Number of clock cycles in one NTP interval
 * @xtime_interval:     Number of clock shifted nano seconds in one NTP
@@ -76,7 +76,8 @@ struct tk_read_base {
 * used instead.
 */
 struct timekeeper {
-        struct tk_read_base     tkr;
+        struct tk_read_base     tkr_mono;
+        struct tk_read_base     tkr_raw;
        u64                     xtime_sec;
        unsigned long           ktime_sec;
        struct timespec64       wall_to_monotonic;
@@ -84,7 +85,6 @@ struct timekeeper {
        ktime_t                 offs_boot;
        ktime_t                 offs_tai;
        s32                     tai_offset;
-        ktime_t                 base_raw;
        struct timespec64       raw_time;
        /* The following members are for timekeeping internal use */
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 3eaae4754275..5047b83483d6 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -214,12 +214,18 @@ static inline u64 ktime_get_boot_ns(void)
        return ktime_to_ns(ktime_get_boottime());
 }
+static inline u64 ktime_get_tai_ns(void)
+{
+        return ktime_to_ns(ktime_get_clocktai());
+}
 static inline u64 ktime_get_raw_ns(void)
 {
        return ktime_to_ns(ktime_get_raw());
 }
 extern u64 ktime_get_mono_fast_ns(void);
+extern u64 ktime_get_raw_fast_ns(void);
 /*
 * Timespec interfaces utilizing the ktime based ones
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 55449909f114..489642b08d64 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -94,6 +94,57 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
 }
 EXPORT_SYMBOL_GPL(clockevent_delta2ns);
+static int __clockevents_set_mode(struct clock_event_device *dev,
+                                  enum clock_event_mode mode)
+{
+        /* Transition with legacy set_mode() callback */
+        if (dev->set_mode) {
+                /* Legacy callback doesn't support new modes */
+                if (mode > CLOCK_EVT_MODE_RESUME)
+                        return -ENOSYS;
+                dev->set_mode(mode, dev);
+                return 0;
+        }
+        if (dev->features & CLOCK_EVT_FEAT_DUMMY)
+                return 0;
+        /* Transition with new mode-specific callbacks */
+        switch (mode) {
+        case CLOCK_EVT_MODE_UNUSED:
+                /*
+                 * This is an internal state, which is guaranteed to go from
+                 * SHUTDOWN to UNUSED. No driver interaction required.
+                 */
+                return 0;
+        case CLOCK_EVT_MODE_SHUTDOWN:
+                return dev->set_mode_shutdown(dev);
+        case CLOCK_EVT_MODE_PERIODIC:
+                /* Core internal bug */
+                if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))
+                        return -ENOSYS;
+                return dev->set_mode_periodic(dev);
+        case CLOCK_EVT_MODE_ONESHOT:
+                /* Core internal bug */
+                if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
+                        return -ENOSYS;
+                return dev->set_mode_oneshot(dev);
+        case CLOCK_EVT_MODE_RESUME:
+                /* Optional callback */
+                if (dev->set_mode_resume)
+                        return dev->set_mode_resume(dev);
+                else
+                        return 0;
+        default:
+                return -ENOSYS;
+        }
+}
 /**
 * clockevents_set_mode - set the operating mode of a clock event device
 * @dev:        device to modify
@@ -105,7 +156,9 @@ void clockevents_set_mode(struct clock_event_device *dev,
                                 enum clock_event_mode mode)
 {
        if (dev->mode != mode) {
-                dev->set_mode(mode, dev);
+                if (__clockevents_set_mode(dev, mode))
+                        return;
                dev->mode = mode;
                /*
@@ -373,6 +426,35 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
 }
 EXPORT_SYMBOL_GPL(clockevents_unbind);
+/* Sanity check of mode transition callbacks */
+static int clockevents_sanity_check(struct clock_event_device *dev)
+{
+        /* Legacy set_mode() callback */
+        if (dev->set_mode) {
+                /* We shouldn't be supporting new modes now */
+                WARN_ON(dev->set_mode_periodic || dev->set_mode_oneshot ||
+                        dev->set_mode_shutdown || dev->set_mode_resume);
+                return 0;
+        }
+        if (dev->features & CLOCK_EVT_FEAT_DUMMY)
+                return 0;
+        /* New mode-specific callbacks */
+        if (!dev->set_mode_shutdown)
+                return -EINVAL;
+        if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
+            !dev->set_mode_periodic)
+                return -EINVAL;
+        if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) &&
+            !dev->set_mode_oneshot)
+                return -EINVAL;
+        return 0;
+}
 /**
 * clockevents_register_device - register a clock event device
 * @dev:        device to register
@@ -382,6 +464,8 @@ void clockevents_register_device(struct clock_event_device *dev)
        unsigned long flags;
        BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+        BUG_ON(clockevents_sanity_check(dev));
        if (!dev->cpumask) {
                WARN_ON(num_possible_cpus() > 1);
                dev->cpumask = cpumask_of(smp_processor_id());
@@ -449,7 +533,7 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
                return clockevents_program_event(dev, dev->next_event, false);
        if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
-                dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev);
+                return __clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
        return 0;
 }
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 4892352f0e49..c3be3c71bbad 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -142,13 +142,6 @@ static void __clocksource_unstable(struct clocksource *cs)
                schedule_work(&watchdog_work);
 }
-static void clocksource_unstable(struct clocksource *cs, int64_t delta)
-{
-        printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
-               cs->name, delta);
-        __clocksource_unstable(cs);
-}
 /**
 * clocksource_mark_unstable - mark clocksource unstable via watchdog
 * @cs:         clocksource to be marked unstable
@@ -174,7 +167,7 @@ void clocksource_mark_unstable(struct clocksource *cs)
 static void clocksource_watchdog(unsigned long data)
 {
        struct clocksource *cs;
-        cycle_t csnow, wdnow, delta;
+        cycle_t csnow, wdnow, cslast, wdlast, delta;
        int64_t wd_nsec, cs_nsec;
        int next_cpu, reset_pending;
@@ -213,6 +206,8 @@ static void clocksource_watchdog(unsigned long data)
                delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
                cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
+                wdlast = cs->wd_last; /* save these in case we print them */
+                cslast = cs->cs_last;
                cs->cs_last = csnow;
                cs->wd_last = wdnow;
@@ -221,7 +216,12 @@ static void clocksource_watchdog(unsigned long data)
                /* Check the deviation from the watchdog clocksource. */
                if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
-                        clocksource_unstable(cs, cs_nsec - wd_nsec);
+                        pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name);
+                        pr_warn("       '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
+                                watchdog->name, wdnow, wdlast, watchdog->mask);
+                        pr_warn("       '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
+                                cs->name, csnow, cslast, cs->mask);
+                        __clocksource_unstable(cs);
                        continue;
                }
@@ -469,26 +469,22 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
 * @shift:      cycle to nanosecond divisor (power of two)
 * @maxadj:     maximum adjustment value to mult (~11%)
 * @mask:       bitmask for two's complement subtraction of non 64 bit counters
+ * @max_cyc:    maximum cycle value before potential overflow (does not include
+ *              any safety margin)
+ *
+ * NOTE: This function includes a safety margin of 50%, so that bad clock values
+ * can be detected.
 */
-u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
+u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
 {
        u64 max_nsecs, max_cycles;
        /*
         * Calculate the maximum number of cycles that we can pass to the
-         * cyc2ns function without overflowing a 64-bit signed result. The
+         * cyc2ns() function without overflowing a 64-bit result.
-         * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj)
-         * which is equivalent to the below.
-         * max_cycles < (2^63)/(mult + maxadj)
-         * max_cycles < 2^(log2((2^63)/(mult + maxadj)))
-         * max_cycles < 2^(log2(2^63) - log2(mult + maxadj))
-         * max_cycles < 2^(63 - log2(mult + maxadj))
-         * max_cycles < 1 << (63 - log2(mult + maxadj))
-         * Please note that we add 1 to the result of the log2 to account for
-         * any rounding errors, ensure the above inequality is satisfied and
-         * no overflow will occur.
         */
-        max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1));
+        max_cycles = ULLONG_MAX;
+        do_div(max_cycles, mult+maxadj);
        /*
         * The actual maximum number of cycles we can defer the clocksource is
@@ -499,27 +495,26 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
        max_cycles = min(max_cycles, mask);
        max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
+        /* return the max_cycles value as well if requested */
+        if (max_cyc)
+                *max_cyc = max_cycles;
+        /* Return 50% of the actual maximum, so we can detect bad values */
+        max_nsecs >>= 1;
        return max_nsecs;
 }
 /**
- * clocksource_max_deferment - Returns max time the clocksource can be deferred
+ * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
- * @cs:         Pointer to clocksource
+ * @cs:         Pointer to clocksource to be updated
 *
 */
-static u64 clocksource_max_deferment(struct clocksource *cs)
+static inline void clocksource_update_max_deferment(struct clocksource *cs)
 {
-        u64 max_nsecs;
+        cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
+                                                cs->maxadj, cs->mask,
-        max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj,
+                                                &cs->max_cycles);
-                                          cs->mask);
-        /*
-         * To ensure that the clocksource does not wrap whilst we are idle,
-         * limit the time the clocksource can be deferred by 12.5%. Please
-         * note a margin of 12.5% is used because this can be computed with
-         * a shift, versus say 10% which would require division.
-         */
-        return max_nsecs - (max_nsecs >> 3);
 }
 #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
@@ -648,7 +643,7 @@ static void clocksource_enqueue(struct clocksource *cs)
 }
 /**
- * __clocksource_updatefreq_scale - Used update clocksource with new freq
+ * __clocksource_update_freq_scale - Used update clocksource with new freq
 * @cs:         clocksource to be registered
 * @scale:      Scale factor multiplied against freq to get clocksource hz
 * @freq:       clocksource frequency (cycles per second) divided by scale
@@ -656,48 +651,64 @@ static void clocksource_enqueue(struct clocksource *cs)
 * This should only be called from the clocksource->enable() method.
 *
 * This *SHOULD NOT* be called directly! Please use the
- * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions.
+ * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
+ * functions.
 */
-void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
+void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
        u64 sec;
        /*
-         * Calc the maximum number of seconds which we can run before
+         * Default clocksources are *special* and self-define their mult/shift.
-         * wrapping around. For clocksources which have a mask > 32bit
+         * But, you're not special, so you should specify a freq value.
-         * we need to limit the max sleep time to have a good
-         * conversion precision. 10 minutes is still a reasonable
-         * amount. That results in a shift value of 24 for a
-         * clocksource with mask >= 40bit and f >= 4GHz. That maps to
-         * ~ 0.06ppm granularity for NTP. We apply the same 12.5%
-         * margin as we do in clocksource_max_deferment()
         */
-        sec = (cs->mask - (cs->mask >> 3));
+        if (freq) {
-        do_div(sec, freq);
+                /*
-        do_div(sec, scale);
+                 * Calc the maximum number of seconds which we can run before
-        if (!sec)
+                 * wrapping around. For clocksources which have a mask > 32-bit
-                sec = 1;
+                 * we need to limit the max sleep time to have a good
-        else if (sec > 600 && cs->mask > UINT_MAX)
+                 * conversion precision. 10 minutes is still a reasonable
-                sec = 600;
+                 * amount. That results in a shift value of 24 for a
+                 * clocksource with mask >= 40-bit and f >= 4GHz. That maps to
-        clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
+                 * ~ 0.06ppm granularity for NTP.
-                               NSEC_PER_SEC / scale, sec * scale);
+                 */
+                sec = cs->mask;
+                do_div(sec, freq);
+                do_div(sec, scale);
+                if (!sec)
+                        sec = 1;
+                else if (sec > 600 && cs->mask > UINT_MAX)
+                        sec = 600;
+                clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
+                                       NSEC_PER_SEC / scale, sec * scale);
+        }
        /*
-         * for clocksources that have large mults, to avoid overflow.
+         * Ensure clocksources that have large 'mult' values don't overflow
-         * Since mult may be adjusted by ntp, add an safety extra margin
+         * when adjusted.
-         *
         */
        cs->maxadj = clocksource_max_adjustment(cs);
-        while ((cs->mult + cs->maxadj < cs->mult)
+        while (freq && ((cs->mult + cs->maxadj < cs->mult)
-                || (cs->mult - cs->maxadj > cs->mult)) {
+                || (cs->mult - cs->maxadj > cs->mult))) {
                cs->mult >>= 1;
                cs->shift--;
                cs->maxadj = clocksource_max_adjustment(cs);
        }
-        cs->max_idle_ns = clocksource_max_deferment(cs);
+        /*
+         * Only warn for *special* clocksources that self-define
+         * their mult/shift values and don't specify a freq.
+         */
+        WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
+                "timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
+                cs->name);
+        clocksource_update_max_deferment(cs);
+        pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
+                        cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
 }
-EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
+EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
 /**
 * __clocksource_register_scale - Used to install new clocksources
@@ -714,7 +725,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
        /* Initialize mult/shift and max_idle_ns */
-        __clocksource_updatefreq_scale(cs, scale, freq);
+        __clocksource_update_freq_scale(cs, scale, freq);
        /* Add clocksource to the clocksource list */
        mutex_lock(&clocksource_mutex);
@@ -726,33 +737,6 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 }
 EXPORT_SYMBOL_GPL(__clocksource_register_scale);
-/**
- * clocksource_register - Used to install new clocksources
- * @cs:         clocksource to be registered
- *
- * Returns -EBUSY if registration fails, zero otherwise.
- */
-int clocksource_register(struct clocksource *cs)
-{
-        /* calculate max adjustment for given mult/shift */
-        cs->maxadj = clocksource_max_adjustment(cs);
-        WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
-                "Clocksource %s might overflow on 11%% adjustment\n",
-                cs->name);
-        /* calculate max idle time permitted for this clocksource */
-        cs->max_idle_ns = clocksource_max_deferment(cs);
-        mutex_lock(&clocksource_mutex);
-        clocksource_enqueue(cs);
-        clocksource_enqueue_watchdog(cs);
-        clocksource_select();
-        mutex_unlock(&clocksource_mutex);
-        return 0;
-}
-EXPORT_SYMBOL(clocksource_register);
 static void __clocksource_change_rating(struct clocksource *cs, int rating)
 {
        list_del(&cs->list);
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index a6a5bf53e86d..c4bb518725b5 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -71,6 +71,7 @@ static struct clocksource clocksource_jiffies = {
        .mask           = 0xffffffff, /*32bits*/
        .mult           = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
        .shift          = JIFFIES_SHIFT,
+        .max_cycles     = 10,
 };
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
@@ -94,7 +95,7 @@ EXPORT_SYMBOL(jiffies);
 static int __init init_jiffies_clocksource(void)
 {
-        return clocksource_register(&clocksource_jiffies);
+        return __clocksource_register(&clocksource_jiffies);
 }
 core_initcall(init_jiffies_clocksource);
@@ -130,6 +131,6 @@ int register_refined_jiffies(long cycles_per_second)
        refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
-        clocksource_register(&refined_jiffies);
+        __clocksource_register(&refined_jiffies);
        return 0;
 }
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 01d2d15aa662..a26036d37a38 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -1,5 +1,6 @@
 /*
- * sched_clock.c: support for extending counters to full 64-bit ns counter
+ * sched_clock.c: Generic sched_clock() support, to extend low level
+ *                hardware time counters to full 64-bit ns values.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
@@ -18,15 +19,53 @@
 #include <linux/seqlock.h>
 #include <linux/bitops.h>
-struct clock_data {
+/**
-        ktime_t wrap_kt;
+ * struct clock_read_data - data required to read from sched_clock()
+ *
+ * @epoch_ns:           sched_clock() value at last update
+ * @epoch_cyc:          Clock cycle value at last update.
+ * @sched_clock_mask:   Bitmask for two's complement subtraction of non 64bit
+ *                      clocks.
+ * @read_sched_clock:   Current clock source (or dummy source when suspended).
+ * @mult:               Multipler for scaled math conversion.
+ * @shift:              Shift value for scaled math conversion.
+ *
+ * Care must be taken when updating this structure; it is read by
+ * some very hot code paths. It occupies <=40 bytes and, when combined
+ * with the seqcount used to synchronize access, comfortably fits into
+ * a 64 byte cache line.
+ */
+struct clock_read_data {
        u64 epoch_ns;
        u64 epoch_cyc;
-        seqcount_t seq;
+        u64 sched_clock_mask;
-        unsigned long rate;
+        u64 (*read_sched_clock)(void);
        u32 mult;
        u32 shift;
-        bool suspended;
+};
+/**
+ * struct clock_data - all data needed for sched_clock() (including
+ *                     registration of a new clock source)
+ *
+ * @seq:                Sequence counter for protecting updates. The lowest
+ *                      bit is the index for @read_data.
+ * @read_data:          Data required to read from sched_clock.
+ * @wrap_kt:            Duration for which clock can run before wrapping.
+ * @rate:               Tick rate of the registered clock.
+ * @actual_read_sched_clock: Registered hardware level clock read function.
+ *
+ * The ordering of this structure has been chosen to optimize cache
+ * performance. In particular 'seq' and 'read_data[0]' (combined) should fit
+ * into a single 64-byte cache line.
+ */
+struct clock_data {
+        seqcount_t              seq;
+        struct clock_read_data  read_data[2];
+        ktime_t                 wrap_kt;
+        unsigned long           rate;
+        u64 (*actual_read_sched_clock)(void);
 };
 static struct hrtimer sched_clock_timer;
@@ -34,12 +73,6 @@ static int irqtime = -1;
 core_param(irqtime, irqtime, int, 0400);
-static struct clock_data cd = {
-        .mult   = NSEC_PER_SEC / HZ,
-};
-static u64 __read_mostly sched_clock_mask;
 static u64 notrace jiffy_sched_clock_read(void)
 {
        /*
@@ -49,7 +82,11 @@ static u64 notrace jiffy_sched_clock_read(void)
        return (u64)(jiffies - INITIAL_JIFFIES);
 }
-static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
+static struct clock_data cd ____cacheline_aligned = {
+        .read_data[0] = { .mult = NSEC_PER_SEC / HZ,
+                          .read_sched_clock = jiffy_sched_clock_read, },
+        .actual_read_sched_clock = jiffy_sched_clock_read,
+};
 static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 {
@@ -58,111 +95,136 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 unsigned long long notrace sched_clock(void)
 {
-        u64 epoch_ns;
+        u64 cyc, res;
-        u64 epoch_cyc;
-        u64 cyc;
        unsigned long seq;
+        struct clock_read_data *rd;
-        if (cd.suspended)
-                return cd.epoch_ns;
        do {
-                seq = raw_read_seqcount_begin(&cd.seq);
+                seq = raw_read_seqcount(&cd.seq);
-                epoch_cyc = cd.epoch_cyc;
+                rd = cd.read_data + (seq & 1);
-                epoch_ns = cd.epoch_ns;
+                cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
+                      rd->sched_clock_mask;
+                res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift);
        } while (read_seqcount_retry(&cd.seq, seq));
-        cyc = read_sched_clock();
+        return res;
-        cyc = (cyc - epoch_cyc) & sched_clock_mask;
+}
-        return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift);
+/*
+ * Updating the data required to read the clock.
+ *
+ * sched_clock() will never observe mis-matched data even if called from
+ * an NMI. We do this by maintaining an odd/even copy of the data and
+ * steering sched_clock() to one or the other using a sequence counter.
+ * In order to preserve the data cache profile of sched_clock() as much
+ * as possible the system reverts back to the even copy when the update
+ * completes; the odd copy is used *only* during an update.
+ */
+static void update_clock_read_data(struct clock_read_data *rd)
+{
+        /* update the backup (odd) copy with the new data */
+        cd.read_data[1] = *rd;
+        /* steer readers towards the odd copy */
+        raw_write_seqcount_latch(&cd.seq);
+        /* now its safe for us to update the normal (even) copy */
+        cd.read_data[0] = *rd;
+        /* switch readers back to the even copy */
+        raw_write_seqcount_latch(&cd.seq);
 }
 /*
- * Atomically update the sched_clock epoch.
+ * Atomically update the sched_clock() epoch.
 */
-static void notrace update_sched_clock(void)
+static void update_sched_clock(void)
 {
-        unsigned long flags;
        u64 cyc;
        u64 ns;
+        struct clock_read_data rd;
+        rd = cd.read_data[0];
+        cyc = cd.actual_read_sched_clock();
+        ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
+        rd.epoch_ns = ns;
+        rd.epoch_cyc = cyc;
-        cyc = read_sched_clock();
+        update_clock_read_data(&rd);
-        ns = cd.epoch_ns +
-                cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
-                          cd.mult, cd.shift);
-        raw_local_irq_save(flags);
-        raw_write_seqcount_begin(&cd.seq);
-        cd.epoch_ns = ns;
-        cd.epoch_cyc = cyc;
-        raw_write_seqcount_end(&cd.seq);
-        raw_local_irq_restore(flags);
 }
 static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
 {
        update_sched_clock();
        hrtimer_forward_now(hrt, cd.wrap_kt);
        return HRTIMER_RESTART;
 }
-void __init sched_clock_register(u64 (*read)(void), int bits,
+void __init
-                                 unsigned long rate)
+sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
 {
        u64 res, wrap, new_mask, new_epoch, cyc, ns;
        u32 new_mult, new_shift;
-        ktime_t new_wrap_kt;
        unsigned long r;
        char r_unit;
+        struct clock_read_data rd;
        if (cd.rate > rate)
                return;
        WARN_ON(!irqs_disabled());
-        /* calculate the mult/shift to convert counter ticks to ns. */
+        /* Calculate the mult/shift to convert counter ticks to ns. */
        clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600);
        new_mask = CLOCKSOURCE_MASK(bits);
+        cd.rate = rate;
+        /* Calculate how many nanosecs until we risk wrapping */
+        wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL);
+        cd.wrap_kt = ns_to_ktime(wrap);
-        /* calculate how many ns until we wrap */
+        rd = cd.read_data[0];
-        wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask);
-        new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3));
-        /* update epoch for new counter and update epoch_ns from old counter*/
+        /* Update epoch for new counter and update 'epoch_ns' from old counter*/
        new_epoch = read();
-        cyc = read_sched_clock();
+        cyc = cd.actual_read_sched_clock();
-        ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
+        ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
-                          cd.mult, cd.shift);
+        cd.actual_read_sched_clock = read;
-        raw_write_seqcount_begin(&cd.seq);
+        rd.read_sched_clock     = read;
-        read_sched_clock = read;
+        rd.sched_clock_mask     = new_mask;
-        sched_clock_mask = new_mask;
+        rd.mult                 = new_mult;
-        cd.rate = rate;
+        rd.shift                = new_shift;
-        cd.wrap_kt = new_wrap_kt;
+        rd.epoch_cyc            = new_epoch;
-        cd.mult = new_mult;
+        rd.epoch_ns             = ns;
-        cd.shift = new_shift;
-        cd.epoch_cyc = new_epoch;
+        update_clock_read_data(&rd);
-        cd.epoch_ns = ns;
-        raw_write_seqcount_end(&cd.seq);
        r = rate;
        if (r >= 4000000) {
                r /= 1000000;
                r_unit = 'M';
-        } else if (r >= 1000) {
+        } else {
-                r /= 1000;
+                if (r >= 1000) {
-                r_unit = 'k';
+                        r /= 1000;
-        } else
+                        r_unit = 'k';
-                r_unit = ' ';
+                } else {
+                        r_unit = ' ';
-        /* calculate the ns resolution of this counter */
+                }
+        }
+        /* Calculate the ns resolution of this counter */
        res = cyc_to_ns(1ULL, new_mult, new_shift);
        pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n",
                bits, r, r_unit, res, wrap);
-        /* Enable IRQ time accounting if we have a fast enough sched_clock */
+        /* Enable IRQ time accounting if we have a fast enough sched_clock() */
        if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
                enable_sched_clock_irqtime();
@@ -172,10 +234,10 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
 void __init sched_clock_postinit(void)
 {
        /*
-         * If no sched_clock function has been provided at that point,
+         * If no sched_clock() function has been provided at that point,
         * make it the final one one.
         */
-        if (read_sched_clock == jiffy_sched_clock_read)
+        if (cd.actual_read_sched_clock == jiffy_sched_clock_read)
                sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);
        update_sched_clock();
@@ -189,29 +251,53 @@ void __init sched_clock_postinit(void)
        hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
 }
+/*
+ * Clock read function for use when the clock is suspended.
+ *
+ * This function makes it appear to sched_clock() as if the clock
+ * stopped counting at its last update.
+ *
+ * This function must only be called from the critical
+ * section in sched_clock(). It relies on the read_seqcount_retry()
+ * at the end of the critical section to be sure we observe the
+ * correct copy of 'epoch_cyc'.
+ */
+static u64 notrace suspended_sched_clock_read(void)
+{
+        unsigned long seq = raw_read_seqcount(&cd.seq);
+        return cd.read_data[seq & 1].epoch_cyc;
+}
 static int sched_clock_suspend(void)
 {
+        struct clock_read_data *rd = &cd.read_data[0];
        update_sched_clock();
        hrtimer_cancel(&sched_clock_timer);
-        cd.suspended = true;
+        rd->read_sched_clock = suspended_sched_clock_read;
        return 0;
 }
 static void sched_clock_resume(void)
 {
-        cd.epoch_cyc = read_sched_clock();
+        struct clock_read_data *rd = &cd.read_data[0];
+        rd->epoch_cyc = cd.actual_read_sched_clock();
        hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
-        cd.suspended = false;
+        rd->read_sched_clock = cd.actual_read_sched_clock;
 }
 static struct syscore_ops sched_clock_ops = {
-        .suspend = sched_clock_suspend,
+        .suspend        = sched_clock_suspend,
-        .resume = sched_clock_resume,
+        .resume         = sched_clock_resume,
 };
 static int __init sched_clock_syscore_init(void)
 {
        register_syscore_ops(&sched_clock_ops);
        return 0;
 }
 device_initcall(sched_clock_syscore_init);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 91db94136c10..c3fcff06d30a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -59,6 +59,7 @@ struct tk_fast {
 };
 static struct tk_fast tk_fast_mono ____cacheline_aligned;
+static struct tk_fast tk_fast_raw  ____cacheline_aligned;
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
@@ -68,8 +69,8 @@ bool __read_mostly persistent_clock_exist = false;
 static inline void tk_normalize_xtime(struct timekeeper *tk)
 {
-        while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) {
+        while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
-                tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift;
+                tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
                tk->xtime_sec++;
        }
 }
@@ -79,20 +80,20 @@ static inline struct timespec64 tk_xtime(struct timekeeper *tk)
        struct timespec64 ts;
        ts.tv_sec = tk->xtime_sec;
-        ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+        ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
        return ts;
 }
 static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
 {
        tk->xtime_sec = ts->tv_sec;
-        tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift;
+        tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
 }
 static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
 {
        tk->xtime_sec += ts->tv_sec;
-        tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift;
+        tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
        tk_normalize_xtime(tk);
 }
@@ -118,6 +119,117 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
        tk->offs_boot = ktime_add(tk->offs_boot, delta);
 }
+#ifdef CONFIG_DEBUG_TIMEKEEPING
+#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
+/*
+ * These simple flag variables are managed
+ * without locks, which is racy, but ok since
+ * we don't really care about being super
+ * precise about how many events were seen,
+ * just that a problem was observed.
+ */
+static int timekeeping_underflow_seen;
+static int timekeeping_overflow_seen;
+/* last_warning is only modified under the timekeeping lock */
+static long timekeeping_last_warning;
+static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
+{
+        cycle_t max_cycles = tk->tkr_mono.clock->max_cycles;
+        const char *name = tk->tkr_mono.clock->name;
+        if (offset > max_cycles) {
+                printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
+                                offset, name, max_cycles);
+                printk_deferred("         timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
+        } else {
+                if (offset > (max_cycles >> 1)) {
+                        printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n",
+                                        offset, name, max_cycles >> 1);
+                        printk_deferred("      timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
+                }
+        }
+        if (timekeeping_underflow_seen) {
+                if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
+                        printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
+                        printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
+                        printk_deferred("         Your kernel is probably still fine.\n");
+                        timekeeping_last_warning = jiffies;
+                }
+                timekeeping_underflow_seen = 0;
+        }
+        if (timekeeping_overflow_seen) {
+                if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
+                        printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
+                        printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
+                        printk_deferred("         Your kernel is probably still fine.\n");
+                        timekeeping_last_warning = jiffies;
+                }
+                timekeeping_overflow_seen = 0;
+        }
+}
+static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
+{
+        cycle_t now, last, mask, max, delta;
+        unsigned int seq;
+        /*
+         * Since we're called holding a seqlock, the data may shift
+         * under us while we're doing the calculation. This can cause
+         * false positives, since we'd note a problem but throw the
+         * results away. So nest another seqlock here to atomically
+         * grab the points we are checking with.
+         */
+        do {
+                seq = read_seqcount_begin(&tk_core.seq);
+                now = tkr->read(tkr->clock);
+                last = tkr->cycle_last;
+                mask = tkr->mask;
+                max = tkr->clock->max_cycles;
+        } while (read_seqcount_retry(&tk_core.seq, seq));
+        delta = clocksource_delta(now, last, mask);
+        /*
+         * Try to catch underflows by checking if we are seeing small
+         * mask-relative negative values.
+         */
+        if (unlikely((~delta & mask) < (mask >> 3))) {
+                timekeeping_underflow_seen = 1;
+                delta = 0;
+        }
+        /* Cap delta value to the max_cycles values to avoid mult overflows */
+        if (unlikely(delta > max)) {
+                timekeeping_overflow_seen = 1;
+                delta = tkr->clock->max_cycles;
+        }
+        return delta;
+}
+#else
+static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
+{
+}
+static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
+{
+        cycle_t cycle_now, delta;
+        /* read clocksource */
+        cycle_now = tkr->read(tkr->clock);
+        /* calculate the delta since the last update_wall_time */
+        delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
+        return delta;
+}
+#endif
 /**
 * tk_setup_internals - Set up internals to use clocksource clock.
 *
@@ -135,11 +247,16 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
        u64 tmp, ntpinterval;
        struct clocksource *old_clock;
-        old_clock = tk->tkr.clock;
+        old_clock = tk->tkr_mono.clock;
-        tk->tkr.clock = clock;
+        tk->tkr_mono.clock = clock;
-        tk->tkr.read = clock->read;
+        tk->tkr_mono.read = clock->read;
-        tk->tkr.mask = clock->mask;
+        tk->tkr_mono.mask = clock->mask;
-        tk->tkr.cycle_last = tk->tkr.read(clock);
+        tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+        tk->tkr_raw.clock = clock;
+        tk->tkr_raw.read = clock->read;
+        tk->tkr_raw.mask = clock->mask;
+        tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
        /* Do the ns -> cycle conversion first, using original mult */
        tmp = NTP_INTERVAL_LENGTH;
@@ -163,11 +280,14 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
        if (old_clock) {
                int shift_change = clock->shift - old_clock->shift;
                if (shift_change < 0)
-                        tk->tkr.xtime_nsec >>= -shift_change;
+                        tk->tkr_mono.xtime_nsec >>= -shift_change;
                else
-                        tk->tkr.xtime_nsec <<= shift_change;
+                        tk->tkr_mono.xtime_nsec <<= shift_change;
        }
-        tk->tkr.shift = clock->shift;
+        tk->tkr_raw.xtime_nsec = 0;
+        tk->tkr_mono.shift = clock->shift;
+        tk->tkr_raw.shift = clock->shift;
        tk->ntp_error = 0;
        tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
@@ -178,7 +298,8 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
         * active clocksource. These value will be adjusted via NTP
         * to counteract clock drifting.
         */
-        tk->tkr.mult = clock->mult;
+        tk->tkr_mono.mult = clock->mult;
+        tk->tkr_raw.mult = clock->mult;
        tk->ntp_err_mult = 0;
 }
@@ -193,14 +314,10 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
 static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
 {
-        cycle_t cycle_now, delta;
+        cycle_t delta;
        s64 nsec;
-        /* read clocksource: */
+        delta = timekeeping_get_delta(tkr);
-        cycle_now = tkr->read(tkr->clock);
-        /* calculate the delta since the last update_wall_time: */
-        delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
        nsec = delta * tkr->mult + tkr->xtime_nsec;
        nsec >>= tkr->shift;
@@ -209,25 +326,6 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
        return nsec + arch_gettimeoffset();
 }
-static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
-{
-        struct clocksource *clock = tk->tkr.clock;
-        cycle_t cycle_now, delta;
-        s64 nsec;
-        /* read clocksource: */
-        cycle_now = tk->tkr.read(clock);
-        /* calculate the delta since the last update_wall_time: */
-        delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
-        /* convert delta to nanoseconds. */
-        nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
-        /* If arch requires, add in get_arch_timeoffset() */
-        return nsec + arch_gettimeoffset();
-}
 /**
 * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
 * @tkr: Timekeeping readout base from which we take the update
@@ -267,18 +365,18 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
 * slightly wrong timestamp (a few nanoseconds). See
 * @ktime_get_mono_fast_ns.
 */
-static void update_fast_timekeeper(struct tk_read_base *tkr)
+static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf)
 {
-        struct tk_read_base *base = tk_fast_mono.base;
+        struct tk_read_base *base = tkf->base;
        /* Force readers off to base[1] */
-        raw_write_seqcount_latch(&tk_fast_mono.seq);
+        raw_write_seqcount_latch(&tkf->seq);
        /* Update base[0] */
        memcpy(base, tkr, sizeof(*base));
        /* Force readers back to base[0] */
-        raw_write_seqcount_latch(&tk_fast_mono.seq);
+        raw_write_seqcount_latch(&tkf->seq);
        /* Update base[1] */
        memcpy(base + 1, base, sizeof(*base));
@@ -316,22 +414,33 @@ static void update_fast_timekeeper(struct tk_read_base *tkr)
 * of the following timestamps. Callers need to be aware of that and
 * deal with it.
 */
-u64 notrace ktime_get_mono_fast_ns(void)
+static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
 {
        struct tk_read_base *tkr;
        unsigned int seq;
        u64 now;
        do {
-                seq = raw_read_seqcount(&tk_fast_mono.seq);
+                seq = raw_read_seqcount(&tkf->seq);
-                tkr = tk_fast_mono.base + (seq & 0x01);
+                tkr = tkf->base + (seq & 0x01);
-                now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr);
+                now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
+        } while (read_seqcount_retry(&tkf->seq, seq));
-        } while (read_seqcount_retry(&tk_fast_mono.seq, seq));
        return now;
 }
+u64 ktime_get_mono_fast_ns(void)
+{
+        return __ktime_get_fast_ns(&tk_fast_mono);
+}
 EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
+u64 ktime_get_raw_fast_ns(void)
+{
+        return __ktime_get_fast_ns(&tk_fast_raw);
+}
+EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
 /* Suspend-time cycles value for halted fast timekeeper. */
 static cycle_t cycles_at_suspend;
@@ -353,12 +462,17 @@ static cycle_t dummy_clock_read(struct clocksource *cs)
 static void halt_fast_timekeeper(struct timekeeper *tk)
 {
        static struct tk_read_base tkr_dummy;
-        struct tk_read_base *tkr = &tk->tkr;
+        struct tk_read_base *tkr = &tk->tkr_mono;
        memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
        cycles_at_suspend = tkr->read(tkr->clock);
        tkr_dummy.read = dummy_clock_read;
-        update_fast_timekeeper(&tkr_dummy);
+        update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
+        tkr = &tk->tkr_raw;
+        memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
+        tkr_dummy.read = dummy_clock_read;
+        update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
 }
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
@@ -369,8 +483,8 @@ static inline void update_vsyscall(struct timekeeper *tk)
        xt = timespec64_to_timespec(tk_xtime(tk));
        wm = timespec64_to_timespec(tk->wall_to_monotonic);
-        update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult,
+        update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult,
-                            tk->tkr.cycle_last);
+                            tk->tkr_mono.cycle_last);
 }
 static inline void old_vsyscall_fixup(struct timekeeper *tk)
@@ -387,11 +501,11 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
        * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
        * users are removed, this can be killed.
        */
-        remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1);
+        remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1);
-        tk->tkr.xtime_nsec -= remainder;
+        tk->tkr_mono.xtime_nsec -= remainder;
-        tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift;
+        tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift;
        tk->ntp_error += remainder << tk->ntp_error_shift;
-        tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift;
+        tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift;
 }
 #else
 #define old_vsyscall_fixup(tk)
@@ -456,17 +570,17 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
         */
        seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
        nsec = (u32) tk->wall_to_monotonic.tv_nsec;
-        tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
+        tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
        /* Update the monotonic raw base */
-        tk->base_raw = timespec64_to_ktime(tk->raw_time);
+        tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);
        /*
         * The sum of the nanoseconds portions of xtime and
         * wall_to_monotonic can be greater/equal one second. Take
         * this into account before updating tk->ktime_sec.
         */
-        nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+        nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
        if (nsec >= NSEC_PER_SEC)
                seconds++;
        tk->ktime_sec = seconds;
@@ -489,7 +603,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
                memcpy(&shadow_timekeeper, &tk_core.timekeeper,
                       sizeof(tk_core.timekeeper));
-        update_fast_timekeeper(&tk->tkr);
+        update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
+        update_fast_timekeeper(&tk->tkr_raw,  &tk_fast_raw);
 }
 /**
@@ -501,22 +616,23 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
 */
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
-        struct clocksource *clock = tk->tkr.clock;
+        struct clocksource *clock = tk->tkr_mono.clock;
        cycle_t cycle_now, delta;
        s64 nsec;
-        cycle_now = tk->tkr.read(clock);
+        cycle_now = tk->tkr_mono.read(clock);
-        delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
+        delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
-        tk->tkr.cycle_last = cycle_now;
+        tk->tkr_mono.cycle_last = cycle_now;
+        tk->tkr_raw.cycle_last  = cycle_now;
-        tk->tkr.xtime_nsec += delta * tk->tkr.mult;
+        tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
        /* If arch requires, add in get_arch_timeoffset() */
-        tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift;
+        tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
        tk_normalize_xtime(tk);
-        nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
+        nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);
        timespec64_add_ns(&tk->raw_time, nsec);
 }
@@ -537,7 +653,7 @@ int __getnstimeofday64(struct timespec64 *ts)
                seq = read_seqcount_begin(&tk_core.seq);
                ts->tv_sec = tk->xtime_sec;
-                nsecs = timekeeping_get_ns(&tk->tkr);
+                nsecs = timekeeping_get_ns(&tk->tkr_mono);
        } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -577,8 +693,8 @@ ktime_t ktime_get(void)
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-                base = tk->tkr.base_mono;
+                base = tk->tkr_mono.base;
-                nsecs = timekeeping_get_ns(&tk->tkr);
+                nsecs = timekeeping_get_ns(&tk->tkr_mono);
        } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -603,8 +719,8 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs)
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-                base = ktime_add(tk->tkr.base_mono, *offset);
+                base = ktime_add(tk->tkr_mono.base, *offset);
-                nsecs = timekeeping_get_ns(&tk->tkr);
+                nsecs = timekeeping_get_ns(&tk->tkr_mono);
        } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -645,8 +761,8 @@ ktime_t ktime_get_raw(void)
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-                base = tk->base_raw;
+                base = tk->tkr_raw.base;
-                nsecs = timekeeping_get_ns_raw(tk);
+                nsecs = timekeeping_get_ns(&tk->tkr_raw);
        } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -674,7 +790,7 @@ void ktime_get_ts64(struct timespec64 *ts)
        do {
                seq = read_seqcount_begin(&tk_core.seq);
                ts->tv_sec = tk->xtime_sec;
-                nsec = timekeeping_get_ns(&tk->tkr);
+                nsec = timekeeping_get_ns(&tk->tkr_mono);
                tomono = tk->wall_to_monotonic;
        } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -759,8 +875,8 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
                ts_real->tv_sec = tk->xtime_sec;
                ts_real->tv_nsec = 0;
-                nsecs_raw = timekeeping_get_ns_raw(tk);
+                nsecs_raw  = timekeeping_get_ns(&tk->tkr_raw);
-                nsecs_real = timekeeping_get_ns(&tk->tkr);
+                nsecs_real = timekeeping_get_ns(&tk->tkr_mono);
        } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -943,7 +1059,7 @@ static int change_clocksource(void *data)
         */
        if (try_module_get(new->owner)) {
                if (!new->enable || new->enable(new) == 0) {
-                        old = tk->tkr.clock;
+                        old = tk->tkr_mono.clock;
                        tk_setup_internals(tk, new);
                        if (old->disable)
                                old->disable(old);
@@ -971,11 +1087,11 @@ int timekeeping_notify(struct clocksource *clock)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
-        if (tk->tkr.clock == clock)
+        if (tk->tkr_mono.clock == clock)
                return 0;
        stop_machine(change_clocksource, clock, NULL);
        tick_clock_notify();
-        return tk->tkr.clock == clock ? 0 : -1;
+        return tk->tkr_mono.clock == clock ? 0 : -1;
 }
 /**
@@ -993,7 +1109,7 @@ void getrawmonotonic64(struct timespec64 *ts)
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-                nsecs = timekeeping_get_ns_raw(tk);
+                nsecs = timekeeping_get_ns(&tk->tkr_raw);
                ts64 = tk->raw_time;
        } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1016,7 +1132,7 @@ int timekeeping_valid_for_hres(void)
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-                ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
+                ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
        } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1035,7 +1151,7 @@ u64 timekeeping_max_deferment(void)
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-                ret = tk->tkr.clock->max_idle_ns;
+                ret = tk->tkr_mono.clock->max_idle_ns;
        } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1114,7 +1230,6 @@ void __init timekeeping_init(void)
        tk_set_xtime(tk, &now);
        tk->raw_time.tv_sec = 0;
        tk->raw_time.tv_nsec = 0;
-        tk->base_raw.tv64 = 0;
        if (boot.tv_sec == 0 && boot.tv_nsec == 0)
                boot = tk_xtime(tk);
@@ -1200,7 +1315,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
 void timekeeping_resume(void)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
-        struct clocksource *clock = tk->tkr.clock;
+        struct clocksource *clock = tk->tkr_mono.clock;
        unsigned long flags;
        struct timespec64 ts_new, ts_delta;
        struct timespec tmp;
@@ -1228,16 +1343,16 @@ void timekeeping_resume(void)
         * The less preferred source will only be tried if there is no better
         * usable source. The rtc part is handled separately in rtc core code.
         */
-        cycle_now = tk->tkr.read(clock);
+        cycle_now = tk->tkr_mono.read(clock);
        if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
-                cycle_now > tk->tkr.cycle_last) {
+                cycle_now > tk->tkr_mono.cycle_last) {
                u64 num, max = ULLONG_MAX;
                u32 mult = clock->mult;
                u32 shift = clock->shift;
                s64 nsec = 0;
-                cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last,
+                cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,
-                                                tk->tkr.mask);
+                                                tk->tkr_mono.mask);
                /*
                 * "cycle_delta * mutl" may cause 64 bits overflow, if the
@@ -1263,7 +1378,9 @@ void timekeeping_resume(void)
                __timekeeping_inject_sleeptime(tk, &ts_delta);
        /* Re-base the last cycle value */
-        tk->tkr.cycle_last = cycle_now;
+        tk->tkr_mono.cycle_last = cycle_now;
+        tk->tkr_raw.cycle_last  = cycle_now;
        tk->ntp_error = 0;
        timekeeping_suspended = 0;
        timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
@@ -1416,15 +1533,15 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
         *
         * XXX - TODO: Doc ntp_error calculation.
         */
-        if ((mult_adj > 0) && (tk->tkr.mult + mult_adj < mult_adj)) {
+        if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
                /* NTP adjustment caused clocksource mult overflow */
                WARN_ON_ONCE(1);
                return;
        }
-        tk->tkr.mult += mult_adj;
+        tk->tkr_mono.mult += mult_adj;
        tk->xtime_interval += interval;
-        tk->tkr.xtime_nsec -= offset;
+        tk->tkr_mono.xtime_nsec -= offset;
        tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
 }
@@ -1486,13 +1603,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
                tk->ntp_err_mult = 0;
        }
-        if (unlikely(tk->tkr.clock->maxadj &&
+        if (unlikely(tk->tkr_mono.clock->maxadj &&
-                (abs(tk->tkr.mult - tk->tkr.clock->mult)
+                (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
-                        > tk->tkr.clock->maxadj))) {
+                        > tk->tkr_mono.clock->maxadj))) {
                printk_once(KERN_WARNING
                        "Adjusting %s more than 11%% (%ld vs %ld)\n",
-                        tk->tkr.clock->name, (long)tk->tkr.mult,
+                        tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
-                        (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);
+                        (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
        }
        /*
@@ -1509,9 +1626,9 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
         * We'll correct this error next time through this function, when
         * xtime_nsec is not as small.
         */
-        if (unlikely((s64)tk->tkr.xtime_nsec < 0)) {
+        if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
-                s64 neg = -(s64)tk->tkr.xtime_nsec;
+                s64 neg = -(s64)tk->tkr_mono.xtime_nsec;
-                tk->tkr.xtime_nsec = 0;
+                tk->tkr_mono.xtime_nsec = 0;
                tk->ntp_error += neg << tk->ntp_error_shift;
        }
 }
@@ -1526,13 +1643,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
 */
 static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
 {
-        u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift;
+        u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
        unsigned int clock_set = 0;
-        while (tk->tkr.xtime_nsec >= nsecps) {
+        while (tk->tkr_mono.xtime_nsec >= nsecps) {
                int leap;
-                tk->tkr.xtime_nsec -= nsecps;
+                tk->tkr_mono.xtime_nsec -= nsecps;
                tk->xtime_sec++;
                /* Figure out if its a leap sec and apply if needed */
@@ -1577,9 +1694,10 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
        /* Accumulate one shifted interval */
        offset -= interval;
-        tk->tkr.cycle_last += interval;
+        tk->tkr_mono.cycle_last += interval;
+        tk->tkr_raw.cycle_last  += interval;
-        tk->tkr.xtime_nsec += tk->xtime_interval << shift;
+        tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
        *clock_set |= accumulate_nsecs_to_secs(tk);
        /* Accumulate raw time */
@@ -1622,14 +1740,17 @@ void update_wall_time(void)
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
        offset = real_tk->cycle_interval;
 #else
-        offset = clocksource_delta(tk->tkr.read(tk->tkr.clock),
+        offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
-                                   tk->tkr.cycle_last, tk->tkr.mask);
+                                   tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 #endif
        /* Check if there's really nothing to do */
        if (offset < real_tk->cycle_interval)
                goto out;
+        /* Do some additional sanity checking */
+        timekeeping_check_update(real_tk, offset);
        /*
         * With NO_HZ we may have to accumulate many cycle_intervals
         * (think "ticks") worth of time at once. To do this efficiently,
@@ -1784,8 +1905,8 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-                base = tk->tkr.base_mono;
+                base = tk->tkr_mono.base;
-                nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift;
+                nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
                *offs_real = tk->offs_real;
                *offs_boot = tk->offs_boot;
@@ -1816,8 +1937,8 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-                base = tk->tkr.base_mono;
+                base = tk->tkr_mono.base;
-                nsecs = timekeeping_get_ns(&tk->tkr);
+                nsecs = timekeeping_get_ns(&tk->tkr_mono);
                *offs_real = tk->offs_real;
                *offs_boot = tk->offs_boot;
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 61ed862cdd37..2cfd19485824 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -228,9 +228,35 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
        print_name_offset(m, dev->set_next_event);
        SEQ_printf(m, "\n");
-        SEQ_printf(m, " set_mode:       ");
+        if (dev->set_mode) {
-        print_name_offset(m, dev->set_mode);
+                SEQ_printf(m, " set_mode:       ");
-        SEQ_printf(m, "\n");
+                print_name_offset(m, dev->set_mode);
+                SEQ_printf(m, "\n");
+        } else {
+                if (dev->set_mode_shutdown) {
+                        SEQ_printf(m, " shutdown: ");
+                        print_name_offset(m, dev->set_mode_shutdown);
+                        SEQ_printf(m, "\n");
+                }
+                if (dev->set_mode_periodic) {
+                        SEQ_printf(m, " periodic: ");
+                        print_name_offset(m, dev->set_mode_periodic);
+                        SEQ_printf(m, "\n");
+                }
+                if (dev->set_mode_oneshot) {
+                        SEQ_printf(m, " oneshot:  ");
+                        print_name_offset(m, dev->set_mode_oneshot);
+                        SEQ_printf(m, "\n");
+                }
+                if (dev->set_mode_resume) {
+                        SEQ_printf(m, " resume:   ");
+                        print_name_offset(m, dev->set_mode_resume);
+                        SEQ_printf(m, "\n");
+                }
+        }
        SEQ_printf(m, " event_handler:  ");
        print_name_offset(m, dev->event_handler);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c5cefb3c009c..36b6fa88ce5b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -865,6 +865,19 @@ config SCHED_STACK_END_CHECK
          data corruption or a sporadic crash at a later stage once the region
          is examined. The runtime overhead introduced is minimal.
+config DEBUG_TIMEKEEPING
+        bool "Enable extra timekeeping sanity checking"
+        help
+          This option will enable additional timekeeping sanity checks
+          which may be helpful when diagnosing issues where timekeeping
+          problems are suspected.
+          This may include checks in the timekeeping hotpaths, so this
+          option may have a (very small) performance impact to some
+          workloads.
+          If unsure, say N.
 config TIMER_STATS
        bool "Collect kernel timers statistics"
        depends on DEBUG_KERNEL && PROC_FS
author	Ingo Molnar <mingo@kernel.org>	2015-03-27 05:09:21 -0400
committer	Ingo Molnar <mingo@kernel.org>	2015-03-27 05:09:21 -0400
commit	4e6d7c2aa95158315902647963b359b32da5c295 (patch)
tree	5141f79302e1e653cde53bab6a981a1b7bfa47b0
parent	3c435c1e472ba344ee25f795f4807d4457e61f6c (diff)
parent	fe5fba05b46c791c95a9f34228ac495f81f72fc0 (diff)