Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timer and time updates from Thomas Gleixner: "A rather large update of timers, timekeeping & co - Core timekeeping code is year-2038 safe now for 32bit machines. Now we just need to fix all in kernel users and the gazillion of user space interfaces which rely on timespec/timeval :) - Better cache layout for the timekeeping internal data structures. - Proper nanosecond based interfaces for in kernel users. - Tree wide cleanup of code which wants nanoseconds but does hoops and loops to convert back and forth from timespecs. Some of it definitely belongs into the ugly code museum. - Consolidation of the timekeeping interface zoo. - A fast NMI safe accessor to clock monotonic for tracing. This is a long standing request to support correlated user/kernel space traces. With proper NTP frequency correction it's also suitable for correlation of traces accross separate machines. - Checkpoint/restart support for timerfd. - A few NOHZ[_FULL] improvements in the [hr]timer code. - Code move from kernel to kernel/time of all time* related code. - New clocksource/event drivers from the ARM universe. I'm really impressed that despite an architected timer in the newer chips SoC manufacturers insist on inventing new and differently broken SoC specific timers. [ Ed. "Impressed"? I don't think that word means what you think it means ] - Another round of code move from arch to drivers. Looks like most of the legacy mess in ARM regarding timers is sorted out except for a few obnoxious strongholds. - The usual updates and fixlets all over the place" * 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (114 commits) timekeeping: Fixup typo in update_vsyscall_old definition clocksource: document some basic timekeeping concepts timekeeping: Use cached ntp_tick_length when accumulating error timekeeping: Rework frequency adjustments to work better w/ nohz timekeeping: Minor fixup for timespec64->timespec assignment ftrace: Provide trace clocks monotonic timekeeping: Provide fast and NMI safe access to CLOCK_MONOTONIC seqcount: Add raw_write_seqcount_latch() seqcount: Provide raw_read_seqcount() timekeeping: Use tk_read_base as argument for timekeeping_get_ns() timekeeping: Create struct tk_read_base and use it in struct timekeeper timekeeping: Restructure the timekeeper some more clocksource: Get rid of cycle_last clocksource: Move cycle_last validation to core code clocksource: Make delta calculation a function wireless: ath9k: Get rid of timespec conversions drm: vmwgfx: Use nsec based interfaces drm: i915: Use nsec based interfaces timekeeping: Provide ktime_get_raw() hangcheck-timer: Use ktime_get_ns() ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2014-08-05 20:46:42 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2014-08-05 20:46:42 -0400
commit: e7fda6c4c3c1a7d6996dd75fd84670fa0b5d448f (patch)
tree: daa51c16462c318b890acf7f01fba5827275dd74 /kernel/time
parent: 08d69a25714429850cf9ef71f22d8cdc9189d93f (diff)
parent: 953dec21aed4038464fec02f96a2f1b8701a5bce (diff)
18 files changed, 8260 insertions, 555 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index f448513a45ed..d626dc98e8df 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -12,6 +12,11 @@ config CLOCKSOURCE_WATCHDOG
 config ARCH_CLOCKSOURCE_DATA
        bool
+# Clocksources require validation of the clocksource against the last
+# cycle update - x86/TSC misfeature
+config CLOCKSOURCE_VALIDATE_LAST_CYCLE
+        bool
 # Timekeeping vsyscall support
 config GENERIC_TIME_VSYSCALL
        bool
@@ -20,10 +25,6 @@ config GENERIC_TIME_VSYSCALL
 config GENERIC_TIME_VSYSCALL_OLD
        bool
-# ktime_t scalar 64bit nsec representation
-config KTIME_SCALAR
-        bool
 # Old style timekeeping
 config ARCH_USES_GETTIMEOFFSET
        bool
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 57a413fd0ebf..7347426fa68d 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,3 +1,4 @@
+obj-y += time.o timer.o hrtimer.o itimer.o posix-timers.o posix-cpu-timers.o
 obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
 obj-y += timeconv.o posix-clock.o alarmtimer.o
@@ -12,3 +13,21 @@ obj-$(CONFIG_TICK_ONESHOT)			+= tick-oneshot.o
 obj-$(CONFIG_TICK_ONESHOT)                      += tick-sched.o
 obj-$(CONFIG_TIMER_STATS)                       += timer_stats.o
 obj-$(CONFIG_DEBUG_FS)                          += timekeeping_debug.o
+obj-$(CONFIG_TEST_UDELAY)                       += udelay_test.o
+$(obj)/time.o: $(obj)/timeconst.h
+quiet_cmd_hzfile = HZFILE  $@
+      cmd_hzfile = echo "hz=$(CONFIG_HZ)" > $@
+targets += hz.bc
+$(obj)/hz.bc: $(objtree)/include/config/hz.h FORCE
+        $(call if_changed,hzfile)
+quiet_cmd_bc  = BC      $@
+      cmd_bc  = bc -q $(filter-out FORCE,$^) > $@
+targets += timeconst.h
+$(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE
+        $(call if_changed,bc)
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index ba3e502c955a..2e949cc9c9f1 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -32,6 +32,7 @@
 #include <linux/kthread.h>
 #include "tick-internal.h"
+#include "timekeeping_internal.h"
 void timecounter_init(struct timecounter *tc,
                      const struct cyclecounter *cc,
@@ -249,7 +250,7 @@ void clocksource_mark_unstable(struct clocksource *cs)
 static void clocksource_watchdog(unsigned long data)
 {
        struct clocksource *cs;
-        cycle_t csnow, wdnow;
+        cycle_t csnow, wdnow, delta;
        int64_t wd_nsec, cs_nsec;
        int next_cpu, reset_pending;
@@ -282,11 +283,12 @@ static void clocksource_watchdog(unsigned long data)
                        continue;
                }
-                wd_nsec = clocksource_cyc2ns((wdnow - cs->wd_last) & watchdog->mask,
+                delta = clocksource_delta(wdnow, cs->wd_last, watchdog->mask);
-                                             watchdog->mult, watchdog->shift);
+                wd_nsec = clocksource_cyc2ns(delta, watchdog->mult,
+                                             watchdog->shift);
-                cs_nsec = clocksource_cyc2ns((csnow - cs->cs_last) &
+                delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
-                                             cs->mask, cs->mult, cs->shift);
+                cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
                cs->cs_last = csnow;
                cs->wd_last = wdnow;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
new file mode 100644
index 000000000000..1c2fe7de2842
--- /dev/null
+++ b/kernel/time/hrtimer.c
@@ -0,0 +1,1866 @@
+/*
+ *  linux/kernel/hrtimer.c
+ *
+ *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
+ *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner
+ *
+ *  High-resolution kernel timers
+ *
+ *  In contrast to the low-resolution timeout API implemented in
+ *  kernel/timer.c, hrtimers provide finer resolution and accuracy
+ *  depending on system configuration and capabilities.
+ *
+ *  These timers are currently used for:
+ *   - itimers
+ *   - POSIX timers
+ *   - nanosleep
+ *   - precise in-kernel timing
+ *
+ *  Started by: Thomas Gleixner and Ingo Molnar
+ *
+ *  Credits:
+ *      based on kernel/timer.c
+ *
+ *      Help, testing, suggestions, bugfixes, improvements were
+ *      provided by:
+ *
+ *      George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
+ *      et. al.
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+#include <linux/cpu.h>
+#include <linux/export.h>
+#include <linux/percpu.h>
+#include <linux/hrtimer.h>
+#include <linux/notifier.h>
+#include <linux/syscalls.h>
+#include <linux/kallsyms.h>
+#include <linux/interrupt.h>
+#include <linux/tick.h>
+#include <linux/seq_file.h>
+#include <linux/err.h>
+#include <linux/debugobjects.h>
+#include <linux/sched.h>
+#include <linux/sched/sysctl.h>
+#include <linux/sched/rt.h>
+#include <linux/sched/deadline.h>
+#include <linux/timer.h>
+#include <linux/freezer.h>
+#include <asm/uaccess.h>
+#include <trace/events/timer.h>
+#include "timekeeping.h"
+/*
+ * The timer bases:
+ *
+ * There are more clockids then hrtimer bases. Thus, we index
+ * into the timer bases by the hrtimer_base_type enum. When trying
+ * to reach a base using a clockid, hrtimer_clockid_to_base()
+ * is used to convert from clockid to the proper hrtimer_base_type.
+ */
+DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
+{
+        .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
+        .clock_base =
+        {
+                {
+                        .index = HRTIMER_BASE_MONOTONIC,
+                        .clockid = CLOCK_MONOTONIC,
+                        .get_time = &ktime_get,
+                        .resolution = KTIME_LOW_RES,
+                },
+                {
+                        .index = HRTIMER_BASE_REALTIME,
+                        .clockid = CLOCK_REALTIME,
+                        .get_time = &ktime_get_real,
+                        .resolution = KTIME_LOW_RES,
+                },
+                {
+                        .index = HRTIMER_BASE_BOOTTIME,
+                        .clockid = CLOCK_BOOTTIME,
+                        .get_time = &ktime_get_boottime,
+                        .resolution = KTIME_LOW_RES,
+                },
+                {
+                        .index = HRTIMER_BASE_TAI,
+                        .clockid = CLOCK_TAI,
+                        .get_time = &ktime_get_clocktai,
+                        .resolution = KTIME_LOW_RES,
+                },
+        }
+};
+static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
+        [CLOCK_REALTIME]        = HRTIMER_BASE_REALTIME,
+        [CLOCK_MONOTONIC]       = HRTIMER_BASE_MONOTONIC,
+        [CLOCK_BOOTTIME]        = HRTIMER_BASE_BOOTTIME,
+        [CLOCK_TAI]             = HRTIMER_BASE_TAI,
+};
+static inline int hrtimer_clockid_to_base(clockid_t clock_id)
+{
+        return hrtimer_clock_to_base_table[clock_id];
+}
+/*
+ * Get the coarse grained time at the softirq based on xtime and
+ * wall_to_monotonic.
+ */
+static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
+{
+        ktime_t xtim, mono, boot, tai;
+        ktime_t off_real, off_boot, off_tai;
+        mono = ktime_get_update_offsets_tick(&off_real, &off_boot, &off_tai);
+        boot = ktime_add(mono, off_boot);
+        xtim = ktime_add(mono, off_real);
+        tai = ktime_add(xtim, off_tai);
+        base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
+        base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
+        base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
+        base->clock_base[HRTIMER_BASE_TAI].softirq_time = tai;
+}
+/*
+ * Functions and macros which are different for UP/SMP systems are kept in a
+ * single place
+ */
+#ifdef CONFIG_SMP
+/*
+ * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
+ * means that all timers which are tied to this base via timer->base are
+ * locked, and the base itself is locked too.
+ *
+ * So __run_timers/migrate_timers can safely modify all timers which could
+ * be found on the lists/queues.
+ *
+ * When the timer's base is locked, and the timer removed from list, it is
+ * possible to set timer->base = NULL and drop the lock: the timer remains
+ * locked.
+ */
+static
+struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
+                                             unsigned long *flags)
+{
+        struct hrtimer_clock_base *base;
+        for (;;) {
+                base = timer->base;
+                if (likely(base != NULL)) {
+                        raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
+                        if (likely(base == timer->base))
+                                return base;
+                        /* The timer has migrated to another CPU: */
+                        raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
+                }
+                cpu_relax();
+        }
+}
+/*
+ * With HIGHRES=y we do not migrate the timer when it is expiring
+ * before the next event on the target cpu because we cannot reprogram
+ * the target cpu hardware and we would cause it to fire late.
+ *
+ * Called with cpu_base->lock of target cpu held.
+ */
+static int
+hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
+{
+#ifdef CONFIG_HIGH_RES_TIMERS
+        ktime_t expires;
+        if (!new_base->cpu_base->hres_active)
+                return 0;
+        expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
+        return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
+#else
+        return 0;
+#endif
+}
+/*
+ * Switch the timer base to the current CPU when possible.
+ */
+static inline struct hrtimer_clock_base *
+switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
+                    int pinned)
+{
+        struct hrtimer_clock_base *new_base;
+        struct hrtimer_cpu_base *new_cpu_base;
+        int this_cpu = smp_processor_id();
+        int cpu = get_nohz_timer_target(pinned);
+        int basenum = base->index;
+again:
+        new_cpu_base = &per_cpu(hrtimer_bases, cpu);
+        new_base = &new_cpu_base->clock_base[basenum];
+        if (base != new_base) {
+                /*
+                 * We are trying to move timer to new_base.
+                 * However we can't change timer's base while it is running,
+                 * so we keep it on the same CPU. No hassle vs. reprogramming
+                 * the event source in the high resolution case. The softirq
+                 * code will take care of this when the timer function has
+                 * completed. There is no conflict as we hold the lock until
+                 * the timer is enqueued.
+                 */
+                if (unlikely(hrtimer_callback_running(timer)))
+                        return base;
+                /* See the comment in lock_timer_base() */
+                timer->base = NULL;
+                raw_spin_unlock(&base->cpu_base->lock);
+                raw_spin_lock(&new_base->cpu_base->lock);
+                if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
+                        cpu = this_cpu;
+                        raw_spin_unlock(&new_base->cpu_base->lock);
+                        raw_spin_lock(&base->cpu_base->lock);
+                        timer->base = base;
+                        goto again;
+                }
+                timer->base = new_base;
+        } else {
+                if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
+                        cpu = this_cpu;
+                        goto again;
+                }
+        }
+        return new_base;
+}
+#else /* CONFIG_SMP */
+static inline struct hrtimer_clock_base *
+lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
+{
+        struct hrtimer_clock_base *base = timer->base;
+        raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
+        return base;
+}
+# define switch_hrtimer_base(t, b, p)   (b)
+#endif  /* !CONFIG_SMP */
+/*
+ * Functions for the union type storage format of ktime_t which are
+ * too large for inlining:
+ */
+#if BITS_PER_LONG < 64
+/*
+ * Divide a ktime value by a nanosecond value
+ */
+u64 ktime_divns(const ktime_t kt, s64 div)
+{
+        u64 dclc;
+        int sft = 0;
+        dclc = ktime_to_ns(kt);
+        /* Make sure the divisor is less than 2^32: */
+        while (div >> 32) {
+                sft++;
+                div >>= 1;
+        }
+        dclc >>= sft;
+        do_div(dclc, (unsigned long) div);
+        return dclc;
+}
+EXPORT_SYMBOL_GPL(ktime_divns);
+#endif /* BITS_PER_LONG >= 64 */
+/*
+ * Add two ktime values and do a safety check for overflow:
+ */
+ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
+{
+        ktime_t res = ktime_add(lhs, rhs);
+        /*
+         * We use KTIME_SEC_MAX here, the maximum timeout which we can
+         * return to user space in a timespec:
+         */
+        if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64)
+                res = ktime_set(KTIME_SEC_MAX, 0);
+        return res;
+}
+EXPORT_SYMBOL_GPL(ktime_add_safe);
+#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
+static struct debug_obj_descr hrtimer_debug_descr;
+static void *hrtimer_debug_hint(void *addr)
+{
+        return ((struct hrtimer *) addr)->function;
+}
+/*
+ * fixup_init is called when:
+ * - an active object is initialized
+ */
+static int hrtimer_fixup_init(void *addr, enum debug_obj_state state)
+{
+        struct hrtimer *timer = addr;
+        switch (state) {
+        case ODEBUG_STATE_ACTIVE:
+                hrtimer_cancel(timer);
+                debug_object_init(timer, &hrtimer_debug_descr);
+                return 1;
+        default:
+                return 0;
+        }
+}
+/*
+ * fixup_activate is called when:
+ * - an active object is activated
+ * - an unknown object is activated (might be a statically initialized object)
+ */
+static int hrtimer_fixup_activate(void *addr, enum debug_obj_state state)
+{
+        switch (state) {
+        case ODEBUG_STATE_NOTAVAILABLE:
+                WARN_ON_ONCE(1);
+                return 0;
+        case ODEBUG_STATE_ACTIVE:
+                WARN_ON(1);
+        default:
+                return 0;
+        }
+}
+/*
+ * fixup_free is called when:
+ * - an active object is freed
+ */
+static int hrtimer_fixup_free(void *addr, enum debug_obj_state state)
+{
+        struct hrtimer *timer = addr;
+        switch (state) {
+        case ODEBUG_STATE_ACTIVE:
+                hrtimer_cancel(timer);
+                debug_object_free(timer, &hrtimer_debug_descr);
+                return 1;
+        default:
+                return 0;
+        }
+}
+static struct debug_obj_descr hrtimer_debug_descr = {
+        .name           = "hrtimer",
+        .debug_hint     = hrtimer_debug_hint,
+        .fixup_init     = hrtimer_fixup_init,
+        .fixup_activate = hrtimer_fixup_activate,
+        .fixup_free     = hrtimer_fixup_free,
+};
+static inline void debug_hrtimer_init(struct hrtimer *timer)
+{
+        debug_object_init(timer, &hrtimer_debug_descr);
+}
+static inline void debug_hrtimer_activate(struct hrtimer *timer)
+{
+        debug_object_activate(timer, &hrtimer_debug_descr);
+}
+static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
+{
+        debug_object_deactivate(timer, &hrtimer_debug_descr);
+}
+static inline void debug_hrtimer_free(struct hrtimer *timer)
+{
+        debug_object_free(timer, &hrtimer_debug_descr);
+}
+static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
+                           enum hrtimer_mode mode);
+void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
+                           enum hrtimer_mode mode)
+{
+        debug_object_init_on_stack(timer, &hrtimer_debug_descr);
+        __hrtimer_init(timer, clock_id, mode);
+}
+EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
+void destroy_hrtimer_on_stack(struct hrtimer *timer)
+{
+        debug_object_free(timer, &hrtimer_debug_descr);
+}
+#else
+static inline void debug_hrtimer_init(struct hrtimer *timer) { }
+static inline void debug_hrtimer_activate(struct hrtimer *timer) { }
+static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
+#endif
+static inline void
+debug_init(struct hrtimer *timer, clockid_t clockid,
+           enum hrtimer_mode mode)
+{
+        debug_hrtimer_init(timer);
+        trace_hrtimer_init(timer, clockid, mode);
+}
+static inline void debug_activate(struct hrtimer *timer)
+{
+        debug_hrtimer_activate(timer);
+        trace_hrtimer_start(timer);
+}
+static inline void debug_deactivate(struct hrtimer *timer)
+{
+        debug_hrtimer_deactivate(timer);
+        trace_hrtimer_cancel(timer);
+}
+/* High resolution timer related functions */
+#ifdef CONFIG_HIGH_RES_TIMERS
+/*
+ * High resolution timer enabled ?
+ */
+static int hrtimer_hres_enabled __read_mostly  = 1;
+/*
+ * Enable / Disable high resolution mode
+ */
+static int __init setup_hrtimer_hres(char *str)
+{
+        if (!strcmp(str, "off"))
+                hrtimer_hres_enabled = 0;
+        else if (!strcmp(str, "on"))
+                hrtimer_hres_enabled = 1;
+        else
+                return 0;
+        return 1;
+}
+__setup("highres=", setup_hrtimer_hres);
+/*
+ * hrtimer_high_res_enabled - query, if the highres mode is enabled
+ */
+static inline int hrtimer_is_hres_enabled(void)
+{
+        return hrtimer_hres_enabled;
+}
+/*
+ * Is the high resolution mode active ?
+ */
+static inline int hrtimer_hres_active(void)
+{
+        return __this_cpu_read(hrtimer_bases.hres_active);
+}
+/*
+ * Reprogram the event source with checking both queues for the
+ * next event
+ * Called with interrupts disabled and base->lock held
+ */
+static void
+hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
+{
+        int i;
+        struct hrtimer_clock_base *base = cpu_base->clock_base;
+        ktime_t expires, expires_next;
+        expires_next.tv64 = KTIME_MAX;
+        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
+                struct hrtimer *timer;
+                struct timerqueue_node *next;
+                next = timerqueue_getnext(&base->active);
+                if (!next)
+                        continue;
+                timer = container_of(next, struct hrtimer, node);
+                expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
+                /*
+                 * clock_was_set() has changed base->offset so the
+                 * result might be negative. Fix it up to prevent a
+                 * false positive in clockevents_program_event()
+                 */
+                if (expires.tv64 < 0)
+                        expires.tv64 = 0;
+                if (expires.tv64 < expires_next.tv64)
+                        expires_next = expires;
+        }
+        if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64)
+                return;
+        cpu_base->expires_next.tv64 = expires_next.tv64;
+        /*
+         * If a hang was detected in the last timer interrupt then we
+         * leave the hang delay active in the hardware. We want the
+         * system to make progress. That also prevents the following
+         * scenario:
+         * T1 expires 50ms from now
+         * T2 expires 5s from now
+         *
+         * T1 is removed, so this code is called and would reprogram
+         * the hardware to 5s from now. Any hrtimer_start after that
+         * will not reprogram the hardware due to hang_detected being
+         * set. So we'd effectivly block all timers until the T2 event
+         * fires.
+         */
+        if (cpu_base->hang_detected)
+                return;
+        if (cpu_base->expires_next.tv64 != KTIME_MAX)
+                tick_program_event(cpu_base->expires_next, 1);
+}
+/*
+ * Shared reprogramming for clock_realtime and clock_monotonic
+ *
+ * When a timer is enqueued and expires earlier than the already enqueued
+ * timers, we have to check, whether it expires earlier than the timer for
+ * which the clock event device was armed.
+ *
+ * Note, that in case the state has HRTIMER_STATE_CALLBACK set, no reprogramming
+ * and no expiry check happens. The timer gets enqueued into the rbtree. The
+ * reprogramming and expiry check is done in the hrtimer_interrupt or in the
+ * softirq.
+ *
+ * Called with interrupts disabled and base->cpu_base.lock held
+ */
+static int hrtimer_reprogram(struct hrtimer *timer,
+                             struct hrtimer_clock_base *base)
+{
+        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+        ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
+        int res;
+        WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
+        /*
+         * When the callback is running, we do not reprogram the clock event
+         * device. The timer callback is either running on a different CPU or
+         * the callback is executed in the hrtimer_interrupt context. The
+         * reprogramming is handled either by the softirq, which called the
+         * callback or at the end of the hrtimer_interrupt.
+         */
+        if (hrtimer_callback_running(timer))
+                return 0;
+        /*
+         * CLOCK_REALTIME timer might be requested with an absolute
+         * expiry time which is less than base->offset. Nothing wrong
+         * about that, just avoid to call into the tick code, which
+         * has now objections against negative expiry values.
+         */
+        if (expires.tv64 < 0)
+                return -ETIME;
+        if (expires.tv64 >= cpu_base->expires_next.tv64)
+                return 0;
+        /*
+         * If a hang was detected in the last timer interrupt then we
+         * do not schedule a timer which is earlier than the expiry
+         * which we enforced in the hang detection. We want the system
+         * to make progress.
+         */
+        if (cpu_base->hang_detected)
+                return 0;
+        /*
+         * Clockevents returns -ETIME, when the event was in the past.
+         */
+        res = tick_program_event(expires, 0);
+        if (!IS_ERR_VALUE(res))
+                cpu_base->expires_next = expires;
+        return res;
+}
+/*
+ * Initialize the high resolution related parts of cpu_base
+ */
+static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
+{
+        base->expires_next.tv64 = KTIME_MAX;
+        base->hres_active = 0;
+}
+static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
+{
+        ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
+        ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
+        ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
+        return ktime_get_update_offsets_now(offs_real, offs_boot, offs_tai);
+}
+/*
+ * Retrigger next event is called after clock was set
+ *
+ * Called with interrupts disabled via on_each_cpu()
+ */
+static void retrigger_next_event(void *arg)
+{
+        struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
+        if (!hrtimer_hres_active())
+                return;
+        raw_spin_lock(&base->lock);
+        hrtimer_update_base(base);
+        hrtimer_force_reprogram(base, 0);
+        raw_spin_unlock(&base->lock);
+}
+/*
+ * Switch to high resolution mode
+ */
+static int hrtimer_switch_to_hres(void)
+{
+        int i, cpu = smp_processor_id();
+        struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
+        unsigned long flags;
+        if (base->hres_active)
+                return 1;
+        local_irq_save(flags);
+        if (tick_init_highres()) {
+                local_irq_restore(flags);
+                printk(KERN_WARNING "Could not switch to high resolution "
+                                    "mode on CPU %d\n", cpu);
+                return 0;
+        }
+        base->hres_active = 1;
+        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
+                base->clock_base[i].resolution = KTIME_HIGH_RES;
+        tick_setup_sched_timer();
+        /* "Retrigger" the interrupt to get things going */
+        retrigger_next_event(NULL);
+        local_irq_restore(flags);
+        return 1;
+}
+static void clock_was_set_work(struct work_struct *work)
+{
+        clock_was_set();
+}
+static DECLARE_WORK(hrtimer_work, clock_was_set_work);
+/*
+ * Called from timekeeping and resume code to reprogramm the hrtimer
+ * interrupt device on all cpus.
+ */
+void clock_was_set_delayed(void)
+{
+        schedule_work(&hrtimer_work);
+}
+#else
+static inline int hrtimer_hres_active(void) { return 0; }
+static inline int hrtimer_is_hres_enabled(void) { return 0; }
+static inline int hrtimer_switch_to_hres(void) { return 0; }
+static inline void
+hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
+static inline int hrtimer_reprogram(struct hrtimer *timer,
+                                    struct hrtimer_clock_base *base)
+{
+        return 0;
+}
+static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
+static inline void retrigger_next_event(void *arg) { }
+#endif /* CONFIG_HIGH_RES_TIMERS */
+/*
+ * Clock realtime was set
+ *
+ * Change the offset of the realtime clock vs. the monotonic
+ * clock.
+ *
+ * We might have to reprogram the high resolution timer interrupt. On
+ * SMP we call the architecture specific code to retrigger _all_ high
+ * resolution timer interrupts. On UP we just disable interrupts and
+ * call the high resolution interrupt code.
+ */
+void clock_was_set(void)
+{
+#ifdef CONFIG_HIGH_RES_TIMERS
+        /* Retrigger the CPU local events everywhere */
+        on_each_cpu(retrigger_next_event, NULL, 1);
+#endif
+        timerfd_clock_was_set();
+}
+/*
+ * During resume we might have to reprogram the high resolution timer
+ * interrupt on all online CPUs.  However, all other CPUs will be
+ * stopped with IRQs interrupts disabled so the clock_was_set() call
+ * must be deferred.
+ */
+void hrtimers_resume(void)
+{
+        WARN_ONCE(!irqs_disabled(),
+                  KERN_INFO "hrtimers_resume() called with IRQs enabled!");
+        /* Retrigger on the local CPU */
+        retrigger_next_event(NULL);
+        /* And schedule a retrigger for all others */
+        clock_was_set_delayed();
+}
+static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
+{
+#ifdef CONFIG_TIMER_STATS
+        if (timer->start_site)
+                return;
+        timer->start_site = __builtin_return_address(0);
+        memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
+        timer->start_pid = current->pid;
+#endif
+}
+static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
+{
+#ifdef CONFIG_TIMER_STATS
+        timer->start_site = NULL;
+#endif
+}
+static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
+{
+#ifdef CONFIG_TIMER_STATS
+        if (likely(!timer_stats_active))
+                return;
+        timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
+                                 timer->function, timer->start_comm, 0);
+#endif
+}
+/*
+ * Counterpart to lock_hrtimer_base above:
+ */
+static inline
+void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
+{
+        raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
+}
+/**
+ * hrtimer_forward - forward the timer expiry
+ * @timer:      hrtimer to forward
+ * @now:        forward past this time
+ * @interval:   the interval to forward
+ *
+ * Forward the timer expiry so it will expire in the future.
+ * Returns the number of overruns.
+ */
+u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
+{
+        u64 orun = 1;
+        ktime_t delta;
+        delta = ktime_sub(now, hrtimer_get_expires(timer));
+        if (delta.tv64 < 0)
+                return 0;
+        if (interval.tv64 < timer->base->resolution.tv64)
+                interval.tv64 = timer->base->resolution.tv64;
+        if (unlikely(delta.tv64 >= interval.tv64)) {
+                s64 incr = ktime_to_ns(interval);
+                orun = ktime_divns(delta, incr);
+                hrtimer_add_expires_ns(timer, incr * orun);
+                if (hrtimer_get_expires_tv64(timer) > now.tv64)
+                        return orun;
+                /*
+                 * This (and the ktime_add() below) is the
+                 * correction for exact:
+                 */
+                orun++;
+        }
+        hrtimer_add_expires(timer, interval);
+        return orun;
+}
+EXPORT_SYMBOL_GPL(hrtimer_forward);
+/*
+ * enqueue_hrtimer - internal function to (re)start a timer
+ *
+ * The timer is inserted in expiry order. Insertion into the
+ * red black tree is O(log(n)). Must hold the base lock.
+ *
+ * Returns 1 when the new timer is the leftmost timer in the tree.
+ */
+static int enqueue_hrtimer(struct hrtimer *timer,
+                           struct hrtimer_clock_base *base)
+{
+        debug_activate(timer);
+        timerqueue_add(&base->active, &timer->node);
+        base->cpu_base->active_bases |= 1 << base->index;
+        /*
+         * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
+         * state of a possibly running callback.
+         */
+        timer->state |= HRTIMER_STATE_ENQUEUED;
+        return (&timer->node == base->active.next);
+}
+/*
+ * __remove_hrtimer - internal function to remove a timer
+ *
+ * Caller must hold the base lock.
+ *
+ * High resolution timer mode reprograms the clock event device when the
+ * timer is the one which expires next. The caller can disable this by setting
+ * reprogram to zero. This is useful, when the context does a reprogramming
+ * anyway (e.g. timer interrupt)
+ */
+static void __remove_hrtimer(struct hrtimer *timer,
+                             struct hrtimer_clock_base *base,
+                             unsigned long newstate, int reprogram)
+{
+        struct timerqueue_node *next_timer;
+        if (!(timer->state & HRTIMER_STATE_ENQUEUED))
+                goto out;
+        next_timer = timerqueue_getnext(&base->active);
+        timerqueue_del(&base->active, &timer->node);
+        if (&timer->node == next_timer) {
+#ifdef CONFIG_HIGH_RES_TIMERS
+                /* Reprogram the clock event device. if enabled */
+                if (reprogram && hrtimer_hres_active()) {
+                        ktime_t expires;
+                        expires = ktime_sub(hrtimer_get_expires(timer),
+                                            base->offset);
+                        if (base->cpu_base->expires_next.tv64 == expires.tv64)
+                                hrtimer_force_reprogram(base->cpu_base, 1);
+                }
+#endif
+        }
+        if (!timerqueue_getnext(&base->active))
+                base->cpu_base->active_bases &= ~(1 << base->index);
+out:
+        timer->state = newstate;
+}
+/*
+ * remove hrtimer, called with base lock held
+ */
+static inline int
+remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
+{
+        if (hrtimer_is_queued(timer)) {
+                unsigned long state;
+                int reprogram;
+                /*
+                 * Remove the timer and force reprogramming when high
+                 * resolution mode is active and the timer is on the current
+                 * CPU. If we remove a timer on another CPU, reprogramming is
+                 * skipped. The interrupt event on this CPU is fired and
+                 * reprogramming happens in the interrupt handler. This is a
+                 * rare case and less expensive than a smp call.
+                 */
+                debug_deactivate(timer);
+                timer_stats_hrtimer_clear_start_info(timer);
+                reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases);
+                /*
+                 * We must preserve the CALLBACK state flag here,
+                 * otherwise we could move the timer base in
+                 * switch_hrtimer_base.
+                 */
+                state = timer->state & HRTIMER_STATE_CALLBACK;
+                __remove_hrtimer(timer, base, state, reprogram);
+                return 1;
+        }
+        return 0;
+}
+int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
+                unsigned long delta_ns, const enum hrtimer_mode mode,
+                int wakeup)
+{
+        struct hrtimer_clock_base *base, *new_base;
+        unsigned long flags;
+        int ret, leftmost;
+        base = lock_hrtimer_base(timer, &flags);
+        /* Remove an active timer from the queue: */
+        ret = remove_hrtimer(timer, base);
+        if (mode & HRTIMER_MODE_REL) {
+                tim = ktime_add_safe(tim, base->get_time());
+                /*
+                 * CONFIG_TIME_LOW_RES is a temporary way for architectures
+                 * to signal that they simply return xtime in
+                 * do_gettimeoffset(). In this case we want to round up by
+                 * resolution when starting a relative timer, to avoid short
+                 * timeouts. This will go away with the GTOD framework.
+                 */
+#ifdef CONFIG_TIME_LOW_RES
+                tim = ktime_add_safe(tim, base->resolution);
+#endif
+        }
+        hrtimer_set_expires_range_ns(timer, tim, delta_ns);
+        /* Switch the timer base, if necessary: */
+        new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
+        timer_stats_hrtimer_set_start_info(timer);
+        leftmost = enqueue_hrtimer(timer, new_base);
+        if (!leftmost) {
+                unlock_hrtimer_base(timer, &flags);
+                return ret;
+        }
+        if (!hrtimer_is_hres_active(timer)) {
+                /*
+                 * Kick to reschedule the next tick to handle the new timer
+                 * on dynticks target.
+                 */
+                wake_up_nohz_cpu(new_base->cpu_base->cpu);
+        } else if (new_base->cpu_base == &__get_cpu_var(hrtimer_bases) &&
+                        hrtimer_reprogram(timer, new_base)) {
+                /*
+                 * Only allow reprogramming if the new base is on this CPU.
+                 * (it might still be on another CPU if the timer was pending)
+                 *
+                 * XXX send_remote_softirq() ?
+                 */
+                if (wakeup) {
+                        /*
+                         * We need to drop cpu_base->lock to avoid a
+                         * lock ordering issue vs. rq->lock.
+                         */
+                        raw_spin_unlock(&new_base->cpu_base->lock);
+                        raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+                        local_irq_restore(flags);
+                        return ret;
+                } else {
+                        __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+                }
+        }
+        unlock_hrtimer_base(timer, &flags);
+        return ret;
+}
+EXPORT_SYMBOL_GPL(__hrtimer_start_range_ns);
+/**
+ * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
+ * @timer:      the timer to be added
+ * @tim:        expiry time
+ * @delta_ns:   "slack" range for the timer
+ * @mode:       expiry mode: absolute (HRTIMER_MODE_ABS) or
+ *              relative (HRTIMER_MODE_REL)
+ *
+ * Returns:
+ *  0 on success
+ *  1 when the timer was active
+ */
+int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
+                unsigned long delta_ns, const enum hrtimer_mode mode)
+{
+        return __hrtimer_start_range_ns(timer, tim, delta_ns, mode, 1);
+}
+EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
+/**
+ * hrtimer_start - (re)start an hrtimer on the current CPU
+ * @timer:      the timer to be added
+ * @tim:        expiry time
+ * @mode:       expiry mode: absolute (HRTIMER_MODE_ABS) or
+ *              relative (HRTIMER_MODE_REL)
+ *
+ * Returns:
+ *  0 on success
+ *  1 when the timer was active
+ */
+int
+hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
+{
+        return __hrtimer_start_range_ns(timer, tim, 0, mode, 1);
+}
+EXPORT_SYMBOL_GPL(hrtimer_start);
+/**
+ * hrtimer_try_to_cancel - try to deactivate a timer
+ * @timer:      hrtimer to stop
+ *
+ * Returns:
+ *  0 when the timer was not active
+ *  1 when the timer was active
+ * -1 when the timer is currently excuting the callback function and
+ *    cannot be stopped
+ */
+int hrtimer_try_to_cancel(struct hrtimer *timer)
+{
+        struct hrtimer_clock_base *base;
+        unsigned long flags;
+        int ret = -1;
+        base = lock_hrtimer_base(timer, &flags);
+        if (!hrtimer_callback_running(timer))
+                ret = remove_hrtimer(timer, base);
+        unlock_hrtimer_base(timer, &flags);
+        return ret;
+}
+EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
+/**
+ * hrtimer_cancel - cancel a timer and wait for the handler to finish.
+ * @timer:      the timer to be cancelled
+ *
+ * Returns:
+ *  0 when the timer was not active
+ *  1 when the timer was active
+ */
+int hrtimer_cancel(struct hrtimer *timer)
+{
+        for (;;) {
+                int ret = hrtimer_try_to_cancel(timer);
+                if (ret >= 0)
+                        return ret;
+                cpu_relax();
+        }
+}
+EXPORT_SYMBOL_GPL(hrtimer_cancel);
+/**
+ * hrtimer_get_remaining - get remaining time for the timer
+ * @timer:      the timer to read
+ */
+ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
+{
+        unsigned long flags;
+        ktime_t rem;
+        lock_hrtimer_base(timer, &flags);
+        rem = hrtimer_expires_remaining(timer);
+        unlock_hrtimer_base(timer, &flags);
+        return rem;
+}
+EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
+#ifdef CONFIG_NO_HZ_COMMON
+/**
+ * hrtimer_get_next_event - get the time until next expiry event
+ *
+ * Returns the delta to the next expiry event or KTIME_MAX if no timer
+ * is pending.
+ */
+ktime_t hrtimer_get_next_event(void)
+{
+        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+        struct hrtimer_clock_base *base = cpu_base->clock_base;
+        ktime_t delta, mindelta = { .tv64 = KTIME_MAX };
+        unsigned long flags;
+        int i;
+        raw_spin_lock_irqsave(&cpu_base->lock, flags);
+        if (!hrtimer_hres_active()) {
+                for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
+                        struct hrtimer *timer;
+                        struct timerqueue_node *next;
+                        next = timerqueue_getnext(&base->active);
+                        if (!next)
+                                continue;
+                        timer = container_of(next, struct hrtimer, node);
+                        delta.tv64 = hrtimer_get_expires_tv64(timer);
+                        delta = ktime_sub(delta, base->get_time());
+                        if (delta.tv64 < mindelta.tv64)
+                                mindelta.tv64 = delta.tv64;
+                }
+        }
+        raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+        if (mindelta.tv64 < 0)
+                mindelta.tv64 = 0;
+        return mindelta;
+}
+#endif
+static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
+                           enum hrtimer_mode mode)
+{
+        struct hrtimer_cpu_base *cpu_base;
+        int base;
+        memset(timer, 0, sizeof(struct hrtimer));
+        cpu_base = &__raw_get_cpu_var(hrtimer_bases);
+        if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS)
+                clock_id = CLOCK_MONOTONIC;
+        base = hrtimer_clockid_to_base(clock_id);
+        timer->base = &cpu_base->clock_base[base];
+        timerqueue_init(&timer->node);
+#ifdef CONFIG_TIMER_STATS
+        timer->start_site = NULL;
+        timer->start_pid = -1;
+        memset(timer->start_comm, 0, TASK_COMM_LEN);
+#endif
+}
+/**
+ * hrtimer_init - initialize a timer to the given clock
+ * @timer:      the timer to be initialized
+ * @clock_id:   the clock to be used
+ * @mode:       timer mode abs/rel
+ */
+void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
+                  enum hrtimer_mode mode)
+{
+        debug_init(timer, clock_id, mode);
+        __hrtimer_init(timer, clock_id, mode);
+}
+EXPORT_SYMBOL_GPL(hrtimer_init);
+/**
+ * hrtimer_get_res - get the timer resolution for a clock
+ * @which_clock: which clock to query
+ * @tp:          pointer to timespec variable to store the resolution
+ *
+ * Store the resolution of the clock selected by @which_clock in the
+ * variable pointed to by @tp.
+ */
+int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
+{
+        struct hrtimer_cpu_base *cpu_base;
+        int base = hrtimer_clockid_to_base(which_clock);
+        cpu_base = &__raw_get_cpu_var(hrtimer_bases);
+        *tp = ktime_to_timespec(cpu_base->clock_base[base].resolution);
+        return 0;
+}
+EXPORT_SYMBOL_GPL(hrtimer_get_res);
+static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
+{
+        struct hrtimer_clock_base *base = timer->base;
+        struct hrtimer_cpu_base *cpu_base = base->cpu_base;
+        enum hrtimer_restart (*fn)(struct hrtimer *);
+        int restart;
+        WARN_ON(!irqs_disabled());
+        debug_deactivate(timer);
+        __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
+        timer_stats_account_hrtimer(timer);
+        fn = timer->function;
+        /*
+         * Because we run timers from hardirq context, there is no chance
+         * they get migrated to another cpu, therefore its safe to unlock
+         * the timer base.
+         */
+        raw_spin_unlock(&cpu_base->lock);
+        trace_hrtimer_expire_entry(timer, now);
+        restart = fn(timer);
+        trace_hrtimer_expire_exit(timer);
+        raw_spin_lock(&cpu_base->lock);
+        /*
+         * Note: We clear the CALLBACK bit after enqueue_hrtimer and
+         * we do not reprogramm the event hardware. Happens either in
+         * hrtimer_start_range_ns() or in hrtimer_interrupt()
+         */
+        if (restart != HRTIMER_NORESTART) {
+                BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
+                enqueue_hrtimer(timer, base);
+        }
+        WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK));
+        timer->state &= ~HRTIMER_STATE_CALLBACK;
+}
+#ifdef CONFIG_HIGH_RES_TIMERS
+/*
+ * High resolution timer interrupt
+ * Called with interrupts disabled
+ */
+void hrtimer_interrupt(struct clock_event_device *dev)
+{
+        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+        ktime_t expires_next, now, entry_time, delta;
+        int i, retries = 0;
+        BUG_ON(!cpu_base->hres_active);
+        cpu_base->nr_events++;
+        dev->next_event.tv64 = KTIME_MAX;
+        raw_spin_lock(&cpu_base->lock);
+        entry_time = now = hrtimer_update_base(cpu_base);
+retry:
+        expires_next.tv64 = KTIME_MAX;
+        /*
+         * We set expires_next to KTIME_MAX here with cpu_base->lock
+         * held to prevent that a timer is enqueued in our queue via
+         * the migration code. This does not affect enqueueing of
+         * timers which run their callback and need to be requeued on
+         * this CPU.
+         */
+        cpu_base->expires_next.tv64 = KTIME_MAX;
+        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
+                struct hrtimer_clock_base *base;
+                struct timerqueue_node *node;
+                ktime_t basenow;
+                if (!(cpu_base->active_bases & (1 << i)))
+                        continue;
+                base = cpu_base->clock_base + i;
+                basenow = ktime_add(now, base->offset);
+                while ((node = timerqueue_getnext(&base->active))) {
+                        struct hrtimer *timer;
+                        timer = container_of(node, struct hrtimer, node);
+                        /*
+                         * The immediate goal for using the softexpires is
+                         * minimizing wakeups, not running timers at the
+                         * earliest interrupt after their soft expiration.
+                         * This allows us to avoid using a Priority Search
+                         * Tree, which can answer a stabbing querry for
+                         * overlapping intervals and instead use the simple
+                         * BST we already have.
+                         * We don't add extra wakeups by delaying timers that
+                         * are right-of a not yet expired timer, because that
+                         * timer will have to trigger a wakeup anyway.
+                         */
+                        if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) {
+                                ktime_t expires;
+                                expires = ktime_sub(hrtimer_get_expires(timer),
+                                                    base->offset);
+                                if (expires.tv64 < 0)
+                                        expires.tv64 = KTIME_MAX;
+                                if (expires.tv64 < expires_next.tv64)
+                                        expires_next = expires;
+                                break;
+                        }
+                        __run_hrtimer(timer, &basenow);
+                }
+        }
+        /*
+         * Store the new expiry value so the migration code can verify
+         * against it.
+         */
+        cpu_base->expires_next = expires_next;
+        raw_spin_unlock(&cpu_base->lock);
+        /* Reprogramming necessary ? */
+        if (expires_next.tv64 == KTIME_MAX ||
+            !tick_program_event(expires_next, 0)) {
+                cpu_base->hang_detected = 0;
+                return;
+        }
+        /*
+         * The next timer was already expired due to:
+         * - tracing
+         * - long lasting callbacks
+         * - being scheduled away when running in a VM
+         *
+         * We need to prevent that we loop forever in the hrtimer
+         * interrupt routine. We give it 3 attempts to avoid
+         * overreacting on some spurious event.
+         *
+         * Acquire base lock for updating the offsets and retrieving
+         * the current time.
+         */
+        raw_spin_lock(&cpu_base->lock);
+        now = hrtimer_update_base(cpu_base);
+        cpu_base->nr_retries++;
+        if (++retries < 3)
+                goto retry;
+        /*
+         * Give the system a chance to do something else than looping
+         * here. We stored the entry time, so we know exactly how long
+         * we spent here. We schedule the next event this amount of
+         * time away.
+         */
+        cpu_base->nr_hangs++;
+        cpu_base->hang_detected = 1;
+        raw_spin_unlock(&cpu_base->lock);
+        delta = ktime_sub(now, entry_time);
+        if (delta.tv64 > cpu_base->max_hang_time.tv64)
+                cpu_base->max_hang_time = delta;
+        /*
+         * Limit it to a sensible value as we enforce a longer
+         * delay. Give the CPU at least 100ms to catch up.
+         */
+        if (delta.tv64 > 100 * NSEC_PER_MSEC)
+                expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
+        else
+                expires_next = ktime_add(now, delta);
+        tick_program_event(expires_next, 1);
+        printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
+                    ktime_to_ns(delta));
+}
+/*
+ * local version of hrtimer_peek_ahead_timers() called with interrupts
+ * disabled.
+ */
+static void __hrtimer_peek_ahead_timers(void)
+{
+        struct tick_device *td;
+        if (!hrtimer_hres_active())
+                return;
+        td = &__get_cpu_var(tick_cpu_device);
+        if (td && td->evtdev)
+                hrtimer_interrupt(td->evtdev);
+}
+/**
+ * hrtimer_peek_ahead_timers -- run soft-expired timers now
+ *
+ * hrtimer_peek_ahead_timers will peek at the timer queue of
+ * the current cpu and check if there are any timers for which
+ * the soft expires time has passed. If any such timers exist,
+ * they are run immediately and then removed from the timer queue.
+ *
+ */
+void hrtimer_peek_ahead_timers(void)
+{
+        unsigned long flags;
+        local_irq_save(flags);
+        __hrtimer_peek_ahead_timers();
+        local_irq_restore(flags);
+}
+static void run_hrtimer_softirq(struct softirq_action *h)
+{
+        hrtimer_peek_ahead_timers();
+}
+#else /* CONFIG_HIGH_RES_TIMERS */
+static inline void __hrtimer_peek_ahead_timers(void) { }
+#endif  /* !CONFIG_HIGH_RES_TIMERS */
+/*
+ * Called from timer softirq every jiffy, expire hrtimers:
+ *
+ * For HRT its the fall back code to run the softirq in the timer
+ * softirq context in case the hrtimer initialization failed or has
+ * not been done yet.
+ */
+void hrtimer_run_pending(void)
+{
+        if (hrtimer_hres_active())
+                return;
+        /*
+         * This _is_ ugly: We have to check in the softirq context,
+         * whether we can switch to highres and / or nohz mode. The
+         * clocksource switch happens in the timer interrupt with
+         * xtime_lock held. Notification from there only sets the
+         * check bit in the tick_oneshot code, otherwise we might
+         * deadlock vs. xtime_lock.
+         */
+        if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
+                hrtimer_switch_to_hres();
+}
+/*
+ * Called from hardirq context every jiffy
+ */
+void hrtimer_run_queues(void)
+{
+        struct timerqueue_node *node;
+        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+        struct hrtimer_clock_base *base;
+        int index, gettime = 1;
+        if (hrtimer_hres_active())
+                return;
+        for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
+                base = &cpu_base->clock_base[index];
+                if (!timerqueue_getnext(&base->active))
+                        continue;
+                if (gettime) {
+                        hrtimer_get_softirq_time(cpu_base);
+                        gettime = 0;
+                }
+                raw_spin_lock(&cpu_base->lock);
+                while ((node = timerqueue_getnext(&base->active))) {
+                        struct hrtimer *timer;
+                        timer = container_of(node, struct hrtimer, node);
+                        if (base->softirq_time.tv64 <=
+                                        hrtimer_get_expires_tv64(timer))
+                                break;
+                        __run_hrtimer(timer, &base->softirq_time);
+                }
+                raw_spin_unlock(&cpu_base->lock);
+        }
+}
+/*
+ * Sleep related functions:
+ */
+static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
+{
+        struct hrtimer_sleeper *t =
+                container_of(timer, struct hrtimer_sleeper, timer);
+        struct task_struct *task = t->task;
+        t->task = NULL;
+        if (task)
+                wake_up_process(task);
+        return HRTIMER_NORESTART;
+}
+void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
+{
+        sl->timer.function = hrtimer_wakeup;
+        sl->task = task;
+}
+EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
+static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
+{
+        hrtimer_init_sleeper(t, current);
+        do {
+                set_current_state(TASK_INTERRUPTIBLE);
+                hrtimer_start_expires(&t->timer, mode);
+                if (!hrtimer_active(&t->timer))
+                        t->task = NULL;
+                if (likely(t->task))
+                        freezable_schedule();
+                hrtimer_cancel(&t->timer);
+                mode = HRTIMER_MODE_ABS;
+        } while (t->task && !signal_pending(current));
+        __set_current_state(TASK_RUNNING);
+        return t->task == NULL;
+}
+static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp)
+{
+        struct timespec rmt;
+        ktime_t rem;
+        rem = hrtimer_expires_remaining(timer);
+        if (rem.tv64 <= 0)
+                return 0;
+        rmt = ktime_to_timespec(rem);
+        if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
+                return -EFAULT;
+        return 1;
+}
+long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
+{
+        struct hrtimer_sleeper t;
+        struct timespec __user  *rmtp;
+        int ret = 0;
+        hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid,
+                                HRTIMER_MODE_ABS);
+        hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
+        if (do_nanosleep(&t, HRTIMER_MODE_ABS))
+                goto out;
+        rmtp = restart->nanosleep.rmtp;
+        if (rmtp) {
+                ret = update_rmtp(&t.timer, rmtp);
+                if (ret <= 0)
+                        goto out;
+        }
+        /* The other values in restart are already filled in */
+        ret = -ERESTART_RESTARTBLOCK;
+out:
+        destroy_hrtimer_on_stack(&t.timer);
+        return ret;
+}
+long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
+                       const enum hrtimer_mode mode, const clockid_t clockid)
+{
+        struct restart_block *restart;
+        struct hrtimer_sleeper t;
+        int ret = 0;
+        unsigned long slack;
+        slack = current->timer_slack_ns;
+        if (dl_task(current) || rt_task(current))
+                slack = 0;
+        hrtimer_init_on_stack(&t.timer, clockid, mode);
+        hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
+        if (do_nanosleep(&t, mode))
+                goto out;
+        /* Absolute timers do not update the rmtp value and restart: */
+        if (mode == HRTIMER_MODE_ABS) {
+                ret = -ERESTARTNOHAND;
+                goto out;
+        }
+        if (rmtp) {
+                ret = update_rmtp(&t.timer, rmtp);
+                if (ret <= 0)
+                        goto out;
+        }
+        restart = &current_thread_info()->restart_block;
+        restart->fn = hrtimer_nanosleep_restart;
+        restart->nanosleep.clockid = t.timer.base->clockid;
+        restart->nanosleep.rmtp = rmtp;
+        restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
+        ret = -ERESTART_RESTARTBLOCK;
+out:
+        destroy_hrtimer_on_stack(&t.timer);
+        return ret;
+}
+SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
+                struct timespec __user *, rmtp)
+{
+        struct timespec tu;
+        if (copy_from_user(&tu, rqtp, sizeof(tu)))
+                return -EFAULT;
+        if (!timespec_valid(&tu))
+                return -EINVAL;
+        return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
+}
+/*
+ * Functions related to boot-time initialization:
+ */
+static void init_hrtimers_cpu(int cpu)
+{
+        struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
+        int i;
+        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
+                cpu_base->clock_base[i].cpu_base = cpu_base;
+                timerqueue_init_head(&cpu_base->clock_base[i].active);
+        }
+        cpu_base->cpu = cpu;
+        hrtimer_init_hres(cpu_base);
+}
+#ifdef CONFIG_HOTPLUG_CPU
+static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
+                                struct hrtimer_clock_base *new_base)
+{
+        struct hrtimer *timer;
+        struct timerqueue_node *node;
+        while ((node = timerqueue_getnext(&old_base->active))) {
+                timer = container_of(node, struct hrtimer, node);
+                BUG_ON(hrtimer_callback_running(timer));
+                debug_deactivate(timer);
+                /*
+                 * Mark it as STATE_MIGRATE not INACTIVE otherwise the
+                 * timer could be seen as !active and just vanish away
+                 * under us on another CPU
+                 */
+                __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
+                timer->base = new_base;
+                /*
+                 * Enqueue the timers on the new cpu. This does not
+                 * reprogram the event device in case the timer
+                 * expires before the earliest on this CPU, but we run
+                 * hrtimer_interrupt after we migrated everything to
+                 * sort out already expired timers and reprogram the
+                 * event device.
+                 */
+                enqueue_hrtimer(timer, new_base);
+                /* Clear the migration state bit */
+                timer->state &= ~HRTIMER_STATE_MIGRATE;
+        }
+}
+static void migrate_hrtimers(int scpu)
+{
+        struct hrtimer_cpu_base *old_base, *new_base;
+        int i;
+        BUG_ON(cpu_online(scpu));
+        tick_cancel_sched_timer(scpu);
+        local_irq_disable();
+        old_base = &per_cpu(hrtimer_bases, scpu);
+        new_base = &__get_cpu_var(hrtimer_bases);
+        /*
+         * The caller is globally serialized and nobody else
+         * takes two locks at once, deadlock is not possible.
+         */
+        raw_spin_lock(&new_base->lock);
+        raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
+        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
+                migrate_hrtimer_list(&old_base->clock_base[i],
+                                     &new_base->clock_base[i]);
+        }
+        raw_spin_unlock(&old_base->lock);
+        raw_spin_unlock(&new_base->lock);
+        /* Check, if we got expired work to do */
+        __hrtimer_peek_ahead_timers();
+        local_irq_enable();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+static int hrtimer_cpu_notify(struct notifier_block *self,
+                                        unsigned long action, void *hcpu)
+{
+        int scpu = (long)hcpu;
+        switch (action) {
+        case CPU_UP_PREPARE:
+        case CPU_UP_PREPARE_FROZEN:
+                init_hrtimers_cpu(scpu);
+                break;
+#ifdef CONFIG_HOTPLUG_CPU
+        case CPU_DYING:
+        case CPU_DYING_FROZEN:
+                clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
+                break;
+        case CPU_DEAD:
+        case CPU_DEAD_FROZEN:
+        {
+                clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu);
+                migrate_hrtimers(scpu);
+                break;
+        }
+#endif
+        default:
+                break;
+        }
+        return NOTIFY_OK;
+}
+static struct notifier_block hrtimers_nb = {
+        .notifier_call = hrtimer_cpu_notify,
+};
+void __init hrtimers_init(void)
+{
+        hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
+                          (void *)(long)smp_processor_id());
+        register_cpu_notifier(&hrtimers_nb);
+#ifdef CONFIG_HIGH_RES_TIMERS
+        open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
+#endif
+}
+/**
+ * schedule_hrtimeout_range_clock - sleep until timeout
+ * @expires:    timeout value (ktime_t)
+ * @delta:      slack in expires timeout (ktime_t)
+ * @mode:       timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
+ * @clock:      timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
+ */
+int __sched
+schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
+                               const enum hrtimer_mode mode, int clock)
+{
+        struct hrtimer_sleeper t;
+        /*
+         * Optimize when a zero timeout value is given. It does not
+         * matter whether this is an absolute or a relative time.
+         */
+        if (expires && !expires->tv64) {
+                __set_current_state(TASK_RUNNING);
+                return 0;
+        }
+        /*
+         * A NULL parameter means "infinite"
+         */
+        if (!expires) {
+                schedule();
+                __set_current_state(TASK_RUNNING);
+                return -EINTR;
+        }
+        hrtimer_init_on_stack(&t.timer, clock, mode);
+        hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
+        hrtimer_init_sleeper(&t, current);
+        hrtimer_start_expires(&t.timer, mode);
+        if (!hrtimer_active(&t.timer))
+                t.task = NULL;
+        if (likely(t.task))
+                schedule();
+        hrtimer_cancel(&t.timer);
+        destroy_hrtimer_on_stack(&t.timer);
+        __set_current_state(TASK_RUNNING);
+        return !t.task ? 0 : -EINTR;
+}
+/**
+ * schedule_hrtimeout_range - sleep until timeout
+ * @expires:    timeout value (ktime_t)
+ * @delta:      slack in expires timeout (ktime_t)
+ * @mode:       timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
+ *
+ * Make the current task sleep until the given expiry time has
+ * elapsed. The routine will return immediately unless
+ * the current task state has been set (see set_current_state()).
+ *
+ * The @delta argument gives the kernel the freedom to schedule the
+ * actual wakeup to a time that is both power and performance friendly.
+ * The kernel give the normal best effort behavior for "@expires+@delta",
+ * but may decide to fire the timer earlier, but no earlier than @expires.
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
+ * pass before the routine returns.
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task.
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ *
+ * Returns 0 when the timer has expired otherwise -EINTR
+ */
+int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
+                                     const enum hrtimer_mode mode)
+{
+        return schedule_hrtimeout_range_clock(expires, delta, mode,
+                                              CLOCK_MONOTONIC);
+}
+EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
+/**
+ * schedule_hrtimeout - sleep until timeout
+ * @expires:    timeout value (ktime_t)
+ * @mode:       timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
+ *
+ * Make the current task sleep until the given expiry time has
+ * elapsed. The routine will return immediately unless
+ * the current task state has been set (see set_current_state()).
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
+ * pass before the routine returns.
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task.
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ *
+ * Returns 0 when the timer has expired otherwise -EINTR
+ */
+int __sched schedule_hrtimeout(ktime_t *expires,
+                               const enum hrtimer_mode mode)
+{
+        return schedule_hrtimeout_range(expires, 0, mode);
+}
+EXPORT_SYMBOL_GPL(schedule_hrtimeout);
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
new file mode 100644
index 000000000000..8d262b467573
--- /dev/null
+++ b/kernel/time/itimer.c
@@ -0,0 +1,301 @@
+/*
+ * linux/kernel/itimer.c
+ *
+ * Copyright (C) 1992 Darren Senn
+ */
+/* These are all the functions necessary to implement itimers */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/syscalls.h>
+#include <linux/time.h>
+#include <linux/posix-timers.h>
+#include <linux/hrtimer.h>
+#include <trace/events/timer.h>
+#include <asm/uaccess.h>
+/**
+ * itimer_get_remtime - get remaining time for the timer
+ *
+ * @timer: the timer to read
+ *
+ * Returns the delta between the expiry time and now, which can be
+ * less than zero or 1usec for an pending expired timer
+ */
+static struct timeval itimer_get_remtime(struct hrtimer *timer)
+{
+        ktime_t rem = hrtimer_get_remaining(timer);
+        /*
+         * Racy but safe: if the itimer expires after the above
+         * hrtimer_get_remtime() call but before this condition
+         * then we return 0 - which is correct.
+         */
+        if (hrtimer_active(timer)) {
+                if (rem.tv64 <= 0)
+                        rem.tv64 = NSEC_PER_USEC;
+        } else
+                rem.tv64 = 0;
+        return ktime_to_timeval(rem);
+}
+static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
+                           struct itimerval *const value)
+{
+        cputime_t cval, cinterval;
+        struct cpu_itimer *it = &tsk->signal->it[clock_id];
+        spin_lock_irq(&tsk->sighand->siglock);
+        cval = it->expires;
+        cinterval = it->incr;
+        if (cval) {
+                struct task_cputime cputime;
+                cputime_t t;
+                thread_group_cputimer(tsk, &cputime);
+                if (clock_id == CPUCLOCK_PROF)
+                        t = cputime.utime + cputime.stime;
+                else
+                        /* CPUCLOCK_VIRT */
+                        t = cputime.utime;
+                if (cval < t)
+                        /* about to fire */
+                        cval = cputime_one_jiffy;
+                else
+                        cval = cval - t;
+        }
+        spin_unlock_irq(&tsk->sighand->siglock);
+        cputime_to_timeval(cval, &value->it_value);
+        cputime_to_timeval(cinterval, &value->it_interval);
+}
+int do_getitimer(int which, struct itimerval *value)
+{
+        struct task_struct *tsk = current;
+        switch (which) {
+        case ITIMER_REAL:
+                spin_lock_irq(&tsk->sighand->siglock);
+                value->it_value = itimer_get_remtime(&tsk->signal->real_timer);
+                value->it_interval =
+                        ktime_to_timeval(tsk->signal->it_real_incr);
+                spin_unlock_irq(&tsk->sighand->siglock);
+                break;
+        case ITIMER_VIRTUAL:
+                get_cpu_itimer(tsk, CPUCLOCK_VIRT, value);
+                break;
+        case ITIMER_PROF:
+                get_cpu_itimer(tsk, CPUCLOCK_PROF, value);
+                break;
+        default:
+                return(-EINVAL);
+        }
+        return 0;
+}
+SYSCALL_DEFINE2(getitimer, int, which, struct itimerval __user *, value)
+{
+        int error = -EFAULT;
+        struct itimerval get_buffer;
+        if (value) {
+                error = do_getitimer(which, &get_buffer);
+                if (!error &&
+                    copy_to_user(value, &get_buffer, sizeof(get_buffer)))
+                        error = -EFAULT;
+        }
+        return error;
+}
+/*
+ * The timer is automagically restarted, when interval != 0
+ */
+enum hrtimer_restart it_real_fn(struct hrtimer *timer)
+{
+        struct signal_struct *sig =
+                container_of(timer, struct signal_struct, real_timer);
+        trace_itimer_expire(ITIMER_REAL, sig->leader_pid, 0);
+        kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
+        return HRTIMER_NORESTART;
+}
+static inline u32 cputime_sub_ns(cputime_t ct, s64 real_ns)
+{
+        struct timespec ts;
+        s64 cpu_ns;
+        cputime_to_timespec(ct, &ts);
+        cpu_ns = timespec_to_ns(&ts);
+        return (cpu_ns <= real_ns) ? 0 : cpu_ns - real_ns;
+}
+static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
+                           const struct itimerval *const value,
+                           struct itimerval *const ovalue)
+{
+        cputime_t cval, nval, cinterval, ninterval;
+        s64 ns_ninterval, ns_nval;
+        u32 error, incr_error;
+        struct cpu_itimer *it = &tsk->signal->it[clock_id];
+        nval = timeval_to_cputime(&value->it_value);
+        ns_nval = timeval_to_ns(&value->it_value);
+        ninterval = timeval_to_cputime(&value->it_interval);
+        ns_ninterval = timeval_to_ns(&value->it_interval);
+        error = cputime_sub_ns(nval, ns_nval);
+        incr_error = cputime_sub_ns(ninterval, ns_ninterval);
+        spin_lock_irq(&tsk->sighand->siglock);
+        cval = it->expires;
+        cinterval = it->incr;
+        if (cval || nval) {
+                if (nval > 0)
+                        nval += cputime_one_jiffy;
+                set_process_cpu_timer(tsk, clock_id, &nval, &cval);
+        }
+        it->expires = nval;
+        it->incr = ninterval;
+        it->error = error;
+        it->incr_error = incr_error;
+        trace_itimer_state(clock_id == CPUCLOCK_VIRT ?
+                           ITIMER_VIRTUAL : ITIMER_PROF, value, nval);
+        spin_unlock_irq(&tsk->sighand->siglock);
+        if (ovalue) {
+                cputime_to_timeval(cval, &ovalue->it_value);
+                cputime_to_timeval(cinterval, &ovalue->it_interval);
+        }
+}
+/*
+ * Returns true if the timeval is in canonical form
+ */
+#define timeval_valid(t) \
+        (((t)->tv_sec >= 0) && (((unsigned long) (t)->tv_usec) < USEC_PER_SEC))
+int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
+{
+        struct task_struct *tsk = current;
+        struct hrtimer *timer;
+        ktime_t expires;
+        /*
+         * Validate the timevals in value.
+         */
+        if (!timeval_valid(&value->it_value) ||
+            !timeval_valid(&value->it_interval))
+                return -EINVAL;
+        switch (which) {
+        case ITIMER_REAL:
+again:
+                spin_lock_irq(&tsk->sighand->siglock);
+                timer = &tsk->signal->real_timer;
+                if (ovalue) {
+                        ovalue->it_value = itimer_get_remtime(timer);
+                        ovalue->it_interval
+                                = ktime_to_timeval(tsk->signal->it_real_incr);
+                }
+                /* We are sharing ->siglock with it_real_fn() */
+                if (hrtimer_try_to_cancel(timer) < 0) {
+                        spin_unlock_irq(&tsk->sighand->siglock);
+                        goto again;
+                }
+                expires = timeval_to_ktime(value->it_value);
+                if (expires.tv64 != 0) {
+                        tsk->signal->it_real_incr =
+                                timeval_to_ktime(value->it_interval);
+                        hrtimer_start(timer, expires, HRTIMER_MODE_REL);
+                } else
+                        tsk->signal->it_real_incr.tv64 = 0;
+                trace_itimer_state(ITIMER_REAL, value, 0);
+                spin_unlock_irq(&tsk->sighand->siglock);
+                break;
+        case ITIMER_VIRTUAL:
+                set_cpu_itimer(tsk, CPUCLOCK_VIRT, value, ovalue);
+                break;
+        case ITIMER_PROF:
+                set_cpu_itimer(tsk, CPUCLOCK_PROF, value, ovalue);
+                break;
+        default:
+                return -EINVAL;
+        }
+        return 0;
+}
+/**
+ * alarm_setitimer - set alarm in seconds
+ *
+ * @seconds:    number of seconds until alarm
+ *              0 disables the alarm
+ *
+ * Returns the remaining time in seconds of a pending timer or 0 when
+ * the timer is not active.
+ *
+ * On 32 bit machines the seconds value is limited to (INT_MAX/2) to avoid
+ * negative timeval settings which would cause immediate expiry.
+ */
+unsigned int alarm_setitimer(unsigned int seconds)
+{
+        struct itimerval it_new, it_old;
+#if BITS_PER_LONG < 64
+        if (seconds > INT_MAX)
+                seconds = INT_MAX;
+#endif
+        it_new.it_value.tv_sec = seconds;
+        it_new.it_value.tv_usec = 0;
+        it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
+        do_setitimer(ITIMER_REAL, &it_new, &it_old);
+        /*
+         * We can't return 0 if we have an alarm pending ...  And we'd
+         * better return too much than too little anyway
+         */
+        if ((!it_old.it_value.tv_sec && it_old.it_value.tv_usec) ||
+              it_old.it_value.tv_usec >= 500000)
+                it_old.it_value.tv_sec++;
+        return it_old.it_value.tv_sec;
+}
+SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value,
+                struct itimerval __user *, ovalue)
+{
+        struct itimerval set_buffer, get_buffer;
+        int error;
+        if (value) {
+                if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
+                        return -EFAULT;
+        } else {
+                memset(&set_buffer, 0, sizeof(set_buffer));
+                printk_once(KERN_WARNING "%s calls setitimer() with new_value NULL pointer."
+                            " Misfeature support will be removed\n",
+                            current->comm);
+        }
+        error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL);
+        if (error || !ovalue)
+                return error;
+        if (copy_to_user(ovalue, &get_buffer, sizeof(get_buffer)))
+                return -EFAULT;
+        return 0;
+}
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 33db43a39515..87a346fd6d61 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -466,7 +466,8 @@ static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
 static void sync_cmos_clock(struct work_struct *work)
 {
-        struct timespec now, next;
+        struct timespec64 now;
+        struct timespec next;
        int fail = 1;
        /*
@@ -485,9 +486,9 @@ static void sync_cmos_clock(struct work_struct *work)
                return;
        }
-        getnstimeofday(&now);
+        getnstimeofday64(&now);
        if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec * 5) {
-                struct timespec adjust = now;
+                struct timespec adjust = timespec64_to_timespec(now);
                fail = -ENODEV;
                if (persistent_clock_is_local)
@@ -531,7 +532,7 @@ void ntp_notify_cmos_timer(void) { }
 /*
 * Propagate a new txc->status value into the NTP state:
 */
-static inline void process_adj_status(struct timex *txc, struct timespec *ts)
+static inline void process_adj_status(struct timex *txc, struct timespec64 *ts)
 {
        if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
                time_state = TIME_OK;
@@ -554,7 +555,7 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
 static inline void process_adjtimex_modes(struct timex *txc,
-                                                struct timespec *ts,
+                                                struct timespec64 *ts,
                                                s32 *time_tai)
 {
        if (txc->modes & ADJ_STATUS)
@@ -640,7 +641,7 @@ int ntp_validate_timex(struct timex *txc)
 * adjtimex mainly allows reading (and writing, if superuser) of
 * kernel time-keeping variables. used by xntpd.
 */
-int __do_adjtimex(struct timex *txc, struct timespec *ts, s32 *time_tai)
+int __do_adjtimex(struct timex *txc, struct timespec64 *ts, s32 *time_tai)
 {
        int result;
@@ -684,7 +685,7 @@ int __do_adjtimex(struct timex *txc, struct timespec *ts, s32 *time_tai)
        /* fill PPS status fields */
        pps_fill_timex(txc);
-        txc->time.tv_sec = ts->tv_sec;
+        txc->time.tv_sec = (time_t)ts->tv_sec;
        txc->time.tv_usec = ts->tv_nsec;
        if (!(time_status & STA_NANO))
                txc->time.tv_usec /= NSEC_PER_USEC;
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index 1950cb4ca2a4..bbd102ad9df7 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -7,6 +7,6 @@ extern void ntp_clear(void);
 extern u64 ntp_tick_length(void);
 extern int second_overflow(unsigned long secs);
 extern int ntp_validate_timex(struct timex *);
-extern int __do_adjtimex(struct timex *, struct timespec *, s32 *);
+extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *);
 extern void __hardpps(const struct timespec *, const struct timespec *);
 #endif /* _LINUX_NTP_INTERNAL_H */
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
new file mode 100644
index 000000000000..3b8946416a5f
--- /dev/null
+++ b/kernel/time/posix-cpu-timers.c
@@ -0,0 +1,1490 @@
+/*
+ * Implement CPU time clocks for the POSIX clock interface.
+ */
+#include <linux/sched.h>
+#include <linux/posix-timers.h>
+#include <linux/errno.h>
+#include <linux/math64.h>
+#include <asm/uaccess.h>
+#include <linux/kernel_stat.h>
+#include <trace/events/timer.h>
+#include <linux/random.h>
+#include <linux/tick.h>
+#include <linux/workqueue.h>
+/*
+ * Called after updating RLIMIT_CPU to run cpu timer and update
+ * tsk->signal->cputime_expires expiration cache if necessary. Needs
+ * siglock protection since other code may update expiration cache as
+ * well.
+ */
+void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
+{
+        cputime_t cputime = secs_to_cputime(rlim_new);
+        spin_lock_irq(&task->sighand->siglock);
+        set_process_cpu_timer(task, CPUCLOCK_PROF, &cputime, NULL);
+        spin_unlock_irq(&task->sighand->siglock);
+}
+static int check_clock(const clockid_t which_clock)
+{
+        int error = 0;
+        struct task_struct *p;
+        const pid_t pid = CPUCLOCK_PID(which_clock);
+        if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX)
+                return -EINVAL;
+        if (pid == 0)
+                return 0;
+        rcu_read_lock();
+        p = find_task_by_vpid(pid);
+        if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
+                   same_thread_group(p, current) : has_group_leader_pid(p))) {
+                error = -EINVAL;
+        }
+        rcu_read_unlock();
+        return error;
+}
+static inline unsigned long long
+timespec_to_sample(const clockid_t which_clock, const struct timespec *tp)
+{
+        unsigned long long ret;
+        ret = 0;                /* high half always zero when .cpu used */
+        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
+                ret = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
+        } else {
+                ret = cputime_to_expires(timespec_to_cputime(tp));
+        }
+        return ret;
+}
+static void sample_to_timespec(const clockid_t which_clock,
+                               unsigned long long expires,
+                               struct timespec *tp)
+{
+        if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED)
+                *tp = ns_to_timespec(expires);
+        else
+                cputime_to_timespec((__force cputime_t)expires, tp);
+}
+/*
+ * Update expiry time from increment, and increase overrun count,
+ * given the current clock sample.
+ */
+static void bump_cpu_timer(struct k_itimer *timer,
+                           unsigned long long now)
+{
+        int i;
+        unsigned long long delta, incr;
+        if (timer->it.cpu.incr == 0)
+                return;
+        if (now < timer->it.cpu.expires)
+                return;
+        incr = timer->it.cpu.incr;
+        delta = now + incr - timer->it.cpu.expires;
+        /* Don't use (incr*2 < delta), incr*2 might overflow. */
+        for (i = 0; incr < delta - incr; i++)
+                incr = incr << 1;
+        for (; i >= 0; incr >>= 1, i--) {
+                if (delta < incr)
+                        continue;
+                timer->it.cpu.expires += incr;
+                timer->it_overrun += 1 << i;
+                delta -= incr;
+        }
+}
+/**
+ * task_cputime_zero - Check a task_cputime struct for all zero fields.
+ *
+ * @cputime:    The struct to compare.
+ *
+ * Checks @cputime to see if all fields are zero.  Returns true if all fields
+ * are zero, false if any field is nonzero.
+ */
+static inline int task_cputime_zero(const struct task_cputime *cputime)
+{
+        if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
+                return 1;
+        return 0;
+}
+static inline unsigned long long prof_ticks(struct task_struct *p)
+{
+        cputime_t utime, stime;
+        task_cputime(p, &utime, &stime);
+        return cputime_to_expires(utime + stime);
+}
+static inline unsigned long long virt_ticks(struct task_struct *p)
+{
+        cputime_t utime;
+        task_cputime(p, &utime, NULL);
+        return cputime_to_expires(utime);
+}
+static int
+posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
+{
+        int error = check_clock(which_clock);
+        if (!error) {
+                tp->tv_sec = 0;
+                tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
+                if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
+                        /*
+                         * If sched_clock is using a cycle counter, we
+                         * don't have any idea of its true resolution
+                         * exported, but it is much more than 1s/HZ.
+                         */
+                        tp->tv_nsec = 1;
+                }
+        }
+        return error;
+}
+static int
+posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
+{
+        /*
+         * You can never reset a CPU clock, but we check for other errors
+         * in the call before failing with EPERM.
+         */
+        int error = check_clock(which_clock);
+        if (error == 0) {
+                error = -EPERM;
+        }
+        return error;
+}
+/*
+ * Sample a per-thread clock for the given task.
+ */
+static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
+                            unsigned long long *sample)
+{
+        switch (CPUCLOCK_WHICH(which_clock)) {
+        default:
+                return -EINVAL;
+        case CPUCLOCK_PROF:
+                *sample = prof_ticks(p);
+                break;
+        case CPUCLOCK_VIRT:
+                *sample = virt_ticks(p);
+                break;
+        case CPUCLOCK_SCHED:
+                *sample = task_sched_runtime(p);
+                break;
+        }
+        return 0;
+}
+static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
+{
+        if (b->utime > a->utime)
+                a->utime = b->utime;
+        if (b->stime > a->stime)
+                a->stime = b->stime;
+        if (b->sum_exec_runtime > a->sum_exec_runtime)
+                a->sum_exec_runtime = b->sum_exec_runtime;
+}
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
+{
+        struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+        struct task_cputime sum;
+        unsigned long flags;
+        if (!cputimer->running) {
+                /*
+                 * The POSIX timer interface allows for absolute time expiry
+                 * values through the TIMER_ABSTIME flag, therefore we have
+                 * to synchronize the timer to the clock every time we start
+                 * it.
+                 */
+                thread_group_cputime(tsk, &sum);
+                raw_spin_lock_irqsave(&cputimer->lock, flags);
+                cputimer->running = 1;
+                update_gt_cputime(&cputimer->cputime, &sum);
+        } else
+                raw_spin_lock_irqsave(&cputimer->lock, flags);
+        *times = cputimer->cputime;
+        raw_spin_unlock_irqrestore(&cputimer->lock, flags);
+}
+/*
+ * Sample a process (thread group) clock for the given group_leader task.
+ * Must be called with task sighand lock held for safe while_each_thread()
+ * traversal.
+ */
+static int cpu_clock_sample_group(const clockid_t which_clock,
+                                  struct task_struct *p,
+                                  unsigned long long *sample)
+{
+        struct task_cputime cputime;
+        switch (CPUCLOCK_WHICH(which_clock)) {
+        default:
+                return -EINVAL;
+        case CPUCLOCK_PROF:
+                thread_group_cputime(p, &cputime);
+                *sample = cputime_to_expires(cputime.utime + cputime.stime);
+                break;
+        case CPUCLOCK_VIRT:
+                thread_group_cputime(p, &cputime);
+                *sample = cputime_to_expires(cputime.utime);
+                break;
+        case CPUCLOCK_SCHED:
+                thread_group_cputime(p, &cputime);
+                *sample = cputime.sum_exec_runtime;
+                break;
+        }
+        return 0;
+}
+static int posix_cpu_clock_get_task(struct task_struct *tsk,
+                                    const clockid_t which_clock,
+                                    struct timespec *tp)
+{
+        int err = -EINVAL;
+        unsigned long long rtn;
+        if (CPUCLOCK_PERTHREAD(which_clock)) {
+                if (same_thread_group(tsk, current))
+                        err = cpu_clock_sample(which_clock, tsk, &rtn);
+        } else {
+                unsigned long flags;
+                struct sighand_struct *sighand;
+                /*
+                 * while_each_thread() is not yet entirely RCU safe,
+                 * keep locking the group while sampling process
+                 * clock for now.
+                 */
+                sighand = lock_task_sighand(tsk, &flags);
+                if (!sighand)
+                        return err;
+                if (tsk == current || thread_group_leader(tsk))
+                        err = cpu_clock_sample_group(which_clock, tsk, &rtn);
+                unlock_task_sighand(tsk, &flags);
+        }
+        if (!err)
+                sample_to_timespec(which_clock, rtn, tp);
+        return err;
+}
+static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
+{
+        const pid_t pid = CPUCLOCK_PID(which_clock);
+        int err = -EINVAL;
+        if (pid == 0) {
+                /*
+                 * Special case constant value for our own clocks.
+                 * We don't have to do any lookup to find ourselves.
+                 */
+                err = posix_cpu_clock_get_task(current, which_clock, tp);
+        } else {
+                /*
+                 * Find the given PID, and validate that the caller
+                 * should be able to see it.
+                 */
+                struct task_struct *p;
+                rcu_read_lock();
+                p = find_task_by_vpid(pid);
+                if (p)
+                        err = posix_cpu_clock_get_task(p, which_clock, tp);
+                rcu_read_unlock();
+        }
+        return err;
+}
+/*
+ * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
+ * This is called from sys_timer_create() and do_cpu_nanosleep() with the
+ * new timer already all-zeros initialized.
+ */
+static int posix_cpu_timer_create(struct k_itimer *new_timer)
+{
+        int ret = 0;
+        const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
+        struct task_struct *p;
+        if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX)
+                return -EINVAL;
+        INIT_LIST_HEAD(&new_timer->it.cpu.entry);
+        rcu_read_lock();
+        if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
+                if (pid == 0) {
+                        p = current;
+                } else {
+                        p = find_task_by_vpid(pid);
+                        if (p && !same_thread_group(p, current))
+                                p = NULL;
+                }
+        } else {
+                if (pid == 0) {
+                        p = current->group_leader;
+                } else {
+                        p = find_task_by_vpid(pid);
+                        if (p && !has_group_leader_pid(p))
+                                p = NULL;
+                }
+        }
+        new_timer->it.cpu.task = p;
+        if (p) {
+                get_task_struct(p);
+        } else {
+                ret = -EINVAL;
+        }
+        rcu_read_unlock();
+        return ret;
+}
+/*
+ * Clean up a CPU-clock timer that is about to be destroyed.
+ * This is called from timer deletion with the timer already locked.
+ * If we return TIMER_RETRY, it's necessary to release the timer's lock
+ * and try again.  (This happens when the timer is in the middle of firing.)
+ */
+static int posix_cpu_timer_del(struct k_itimer *timer)
+{
+        int ret = 0;
+        unsigned long flags;
+        struct sighand_struct *sighand;
+        struct task_struct *p = timer->it.cpu.task;
+        WARN_ON_ONCE(p == NULL);
+        /*
+         * Protect against sighand release/switch in exit/exec and process/
+         * thread timer list entry concurrent read/writes.
+         */
+        sighand = lock_task_sighand(p, &flags);
+        if (unlikely(sighand == NULL)) {
+                /*
+                 * We raced with the reaping of the task.
+                 * The deletion should have cleared us off the list.
+                 */
+                WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry));
+        } else {
+                if (timer->it.cpu.firing)
+                        ret = TIMER_RETRY;
+                else
+                        list_del(&timer->it.cpu.entry);
+                unlock_task_sighand(p, &flags);
+        }
+        if (!ret)
+                put_task_struct(p);
+        return ret;
+}
+static void cleanup_timers_list(struct list_head *head)
+{
+        struct cpu_timer_list *timer, *next;
+        list_for_each_entry_safe(timer, next, head, entry)
+                list_del_init(&timer->entry);
+}
+/*
+ * Clean out CPU timers still ticking when a thread exited.  The task
+ * pointer is cleared, and the expiry time is replaced with the residual
+ * time for later timer_gettime calls to return.
+ * This must be called with the siglock held.
+ */
+static void cleanup_timers(struct list_head *head)
+{
+        cleanup_timers_list(head);
+        cleanup_timers_list(++head);
+        cleanup_timers_list(++head);
+}
+/*
+ * These are both called with the siglock held, when the current thread
+ * is being reaped.  When the final (leader) thread in the group is reaped,
+ * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit.
+ */
+void posix_cpu_timers_exit(struct task_struct *tsk)
+{
+        add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
+                                                sizeof(unsigned long long));
+        cleanup_timers(tsk->cpu_timers);
+}
+void posix_cpu_timers_exit_group(struct task_struct *tsk)
+{
+        cleanup_timers(tsk->signal->cpu_timers);
+}
+static inline int expires_gt(cputime_t expires, cputime_t new_exp)
+{
+        return expires == 0 || expires > new_exp;
+}
+/*
+ * Insert the timer on the appropriate list before any timers that
+ * expire later.  This must be called with the sighand lock held.
+ */
+static void arm_timer(struct k_itimer *timer)
+{
+        struct task_struct *p = timer->it.cpu.task;
+        struct list_head *head, *listpos;
+        struct task_cputime *cputime_expires;
+        struct cpu_timer_list *const nt = &timer->it.cpu;
+        struct cpu_timer_list *next;
+        if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
+                head = p->cpu_timers;
+                cputime_expires = &p->cputime_expires;
+        } else {
+                head = p->signal->cpu_timers;
+                cputime_expires = &p->signal->cputime_expires;
+        }
+        head += CPUCLOCK_WHICH(timer->it_clock);
+        listpos = head;
+        list_for_each_entry(next, head, entry) {
+                if (nt->expires < next->expires)
+                        break;
+                listpos = &next->entry;
+        }
+        list_add(&nt->entry, listpos);
+        if (listpos == head) {
+                unsigned long long exp = nt->expires;
+                /*
+                 * We are the new earliest-expiring POSIX 1.b timer, hence
+                 * need to update expiration cache. Take into account that
+                 * for process timers we share expiration cache with itimers
+                 * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
+                 */
+                switch (CPUCLOCK_WHICH(timer->it_clock)) {
+                case CPUCLOCK_PROF:
+                        if (expires_gt(cputime_expires->prof_exp, expires_to_cputime(exp)))
+                                cputime_expires->prof_exp = expires_to_cputime(exp);
+                        break;
+                case CPUCLOCK_VIRT:
+                        if (expires_gt(cputime_expires->virt_exp, expires_to_cputime(exp)))
+                                cputime_expires->virt_exp = expires_to_cputime(exp);
+                        break;
+                case CPUCLOCK_SCHED:
+                        if (cputime_expires->sched_exp == 0 ||
+                            cputime_expires->sched_exp > exp)
+                                cputime_expires->sched_exp = exp;
+                        break;
+                }
+        }
+}
+/*
+ * The timer is locked, fire it and arrange for its reload.
+ */
+static void cpu_timer_fire(struct k_itimer *timer)
+{
+        if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
+                /*
+                 * User don't want any signal.
+                 */
+                timer->it.cpu.expires = 0;
+        } else if (unlikely(timer->sigq == NULL)) {
+                /*
+                 * This a special case for clock_nanosleep,
+                 * not a normal timer from sys_timer_create.
+                 */
+                wake_up_process(timer->it_process);
+                timer->it.cpu.expires = 0;
+        } else if (timer->it.cpu.incr == 0) {
+                /*
+                 * One-shot timer.  Clear it as soon as it's fired.
+                 */
+                posix_timer_event(timer, 0);
+                timer->it.cpu.expires = 0;
+        } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
+                /*
+                 * The signal did not get queued because the signal
+                 * was ignored, so we won't get any callback to
+                 * reload the timer.  But we need to keep it
+                 * ticking in case the signal is deliverable next time.
+                 */
+                posix_cpu_timer_schedule(timer);
+        }
+}
+/*
+ * Sample a process (thread group) timer for the given group_leader task.
+ * Must be called with task sighand lock held for safe while_each_thread()
+ * traversal.
+ */
+static int cpu_timer_sample_group(const clockid_t which_clock,
+                                  struct task_struct *p,
+                                  unsigned long long *sample)
+{
+        struct task_cputime cputime;
+        thread_group_cputimer(p, &cputime);
+        switch (CPUCLOCK_WHICH(which_clock)) {
+        default:
+                return -EINVAL;
+        case CPUCLOCK_PROF:
+                *sample = cputime_to_expires(cputime.utime + cputime.stime);
+                break;
+        case CPUCLOCK_VIRT:
+                *sample = cputime_to_expires(cputime.utime);
+                break;
+        case CPUCLOCK_SCHED:
+                *sample = cputime.sum_exec_runtime + task_delta_exec(p);
+                break;
+        }
+        return 0;
+}
+#ifdef CONFIG_NO_HZ_FULL
+static void nohz_kick_work_fn(struct work_struct *work)
+{
+        tick_nohz_full_kick_all();
+}
+static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
+/*
+ * We need the IPIs to be sent from sane process context.
+ * The posix cpu timers are always set with irqs disabled.
+ */
+static void posix_cpu_timer_kick_nohz(void)
+{
+        if (context_tracking_is_enabled())
+                schedule_work(&nohz_kick_work);
+}
+bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
+{
+        if (!task_cputime_zero(&tsk->cputime_expires))
+                return false;
+        if (tsk->signal->cputimer.running)
+                return false;
+        return true;
+}
+#else
+static inline void posix_cpu_timer_kick_nohz(void) { }
+#endif
+/*
+ * Guts of sys_timer_settime for CPU timers.
+ * This is called with the timer locked and interrupts disabled.
+ * If we return TIMER_RETRY, it's necessary to release the timer's lock
+ * and try again.  (This happens when the timer is in the middle of firing.)
+ */
+static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
+                               struct itimerspec *new, struct itimerspec *old)
+{
+        unsigned long flags;
+        struct sighand_struct *sighand;
+        struct task_struct *p = timer->it.cpu.task;
+        unsigned long long old_expires, new_expires, old_incr, val;
+        int ret;
+        WARN_ON_ONCE(p == NULL);
+        new_expires = timespec_to_sample(timer->it_clock, &new->it_value);
+        /*
+         * Protect against sighand release/switch in exit/exec and p->cpu_timers
+         * and p->signal->cpu_timers read/write in arm_timer()
+         */
+        sighand = lock_task_sighand(p, &flags);
+        /*
+         * If p has just been reaped, we can no
+         * longer get any information about it at all.
+         */
+        if (unlikely(sighand == NULL)) {
+                return -ESRCH;
+        }
+        /*
+         * Disarm any old timer after extracting its expiry time.
+         */
+        WARN_ON_ONCE(!irqs_disabled());
+        ret = 0;
+        old_incr = timer->it.cpu.incr;
+        old_expires = timer->it.cpu.expires;
+        if (unlikely(timer->it.cpu.firing)) {
+                timer->it.cpu.firing = -1;
+                ret = TIMER_RETRY;
+        } else
+                list_del_init(&timer->it.cpu.entry);
+        /*
+         * We need to sample the current value to convert the new
+         * value from to relative and absolute, and to convert the
+         * old value from absolute to relative.  To set a process
+         * timer, we need a sample to balance the thread expiry
+         * times (in arm_timer).  With an absolute time, we must
+         * check if it's already passed.  In short, we need a sample.
+         */
+        if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
+                cpu_clock_sample(timer->it_clock, p, &val);
+        } else {
+                cpu_timer_sample_group(timer->it_clock, p, &val);
+        }
+        if (old) {
+                if (old_expires == 0) {
+                        old->it_value.tv_sec = 0;
+                        old->it_value.tv_nsec = 0;
+                } else {
+                        /*
+                         * Update the timer in case it has
+                         * overrun already.  If it has,
+                         * we'll report it as having overrun
+                         * and with the next reloaded timer
+                         * already ticking, though we are
+                         * swallowing that pending
+                         * notification here to install the
+                         * new setting.
+                         */
+                        bump_cpu_timer(timer, val);
+                        if (val < timer->it.cpu.expires) {
+                                old_expires = timer->it.cpu.expires - val;
+                                sample_to_timespec(timer->it_clock,
+                                                   old_expires,
+                                                   &old->it_value);
+                        } else {
+                                old->it_value.tv_nsec = 1;
+                                old->it_value.tv_sec = 0;
+                        }
+                }
+        }
+        if (unlikely(ret)) {
+                /*
+                 * We are colliding with the timer actually firing.
+                 * Punt after filling in the timer's old value, and
+                 * disable this firing since we are already reporting
+                 * it as an overrun (thanks to bump_cpu_timer above).
+                 */
+                unlock_task_sighand(p, &flags);
+                goto out;
+        }
+        if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) {
+                new_expires += val;
+        }
+        /*
+         * Install the new expiry time (or zero).
+         * For a timer with no notification action, we don't actually
+         * arm the timer (we'll just fake it for timer_gettime).
+         */
+        timer->it.cpu.expires = new_expires;
+        if (new_expires != 0 && val < new_expires) {
+                arm_timer(timer);
+        }
+        unlock_task_sighand(p, &flags);
+        /*
+         * Install the new reload setting, and
+         * set up the signal and overrun bookkeeping.
+         */
+        timer->it.cpu.incr = timespec_to_sample(timer->it_clock,
+                                                &new->it_interval);
+        /*
+         * This acts as a modification timestamp for the timer,
+         * so any automatic reload attempt will punt on seeing
+         * that we have reset the timer manually.
+         */
+        timer->it_requeue_pending = (timer->it_requeue_pending + 2) &
+                ~REQUEUE_PENDING;
+        timer->it_overrun_last = 0;
+        timer->it_overrun = -1;
+        if (new_expires != 0 && !(val < new_expires)) {
+                /*
+                 * The designated time already passed, so we notify
+                 * immediately, even if the thread never runs to
+                 * accumulate more time on this clock.
+                 */
+                cpu_timer_fire(timer);
+        }
+        ret = 0;
+ out:
+        if (old) {
+                sample_to_timespec(timer->it_clock,
+                                   old_incr, &old->it_interval);
+        }
+        if (!ret)
+                posix_cpu_timer_kick_nohz();
+        return ret;
+}
+static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
+{
+        unsigned long long now;
+        struct task_struct *p = timer->it.cpu.task;
+        WARN_ON_ONCE(p == NULL);
+        /*
+         * Easy part: convert the reload time.
+         */
+        sample_to_timespec(timer->it_clock,
+                           timer->it.cpu.incr, &itp->it_interval);
+        if (timer->it.cpu.expires == 0) {       /* Timer not armed at all.  */
+                itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
+                return;
+        }
+        /*
+         * Sample the clock to take the difference with the expiry time.
+         */
+        if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
+                cpu_clock_sample(timer->it_clock, p, &now);
+        } else {
+                struct sighand_struct *sighand;
+                unsigned long flags;
+                /*
+                 * Protect against sighand release/switch in exit/exec and
+                 * also make timer sampling safe if it ends up calling
+                 * thread_group_cputime().
+                 */
+                sighand = lock_task_sighand(p, &flags);
+                if (unlikely(sighand == NULL)) {
+                        /*
+                         * The process has been reaped.
+                         * We can't even collect a sample any more.
+                         * Call the timer disarmed, nothing else to do.
+                         */
+                        timer->it.cpu.expires = 0;
+                        sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
+                                           &itp->it_value);
+                } else {
+                        cpu_timer_sample_group(timer->it_clock, p, &now);
+                        unlock_task_sighand(p, &flags);
+                }
+        }
+        if (now < timer->it.cpu.expires) {
+                sample_to_timespec(timer->it_clock,
+                                   timer->it.cpu.expires - now,
+                                   &itp->it_value);
+        } else {
+                /*
+                 * The timer should have expired already, but the firing
+                 * hasn't taken place yet.  Say it's just about to expire.
+                 */
+                itp->it_value.tv_nsec = 1;
+                itp->it_value.tv_sec = 0;
+        }
+}
+static unsigned long long
+check_timers_list(struct list_head *timers,
+                  struct list_head *firing,
+                  unsigned long long curr)
+{
+        int maxfire = 20;
+        while (!list_empty(timers)) {
+                struct cpu_timer_list *t;
+                t = list_first_entry(timers, struct cpu_timer_list, entry);
+                if (!--maxfire || curr < t->expires)
+                        return t->expires;
+                t->firing = 1;
+                list_move_tail(&t->entry, firing);
+        }
+        return 0;
+}
+/*
+ * Check for any per-thread CPU timers that have fired and move them off
+ * the tsk->cpu_timers[N] list onto the firing list.  Here we update the
+ * tsk->it_*_expires values to reflect the remaining thread CPU timers.
+ */
+static void check_thread_timers(struct task_struct *tsk,
+                                struct list_head *firing)
+{
+        struct list_head *timers = tsk->cpu_timers;
+        struct signal_struct *const sig = tsk->signal;
+        struct task_cputime *tsk_expires = &tsk->cputime_expires;
+        unsigned long long expires;
+        unsigned long soft;
+        expires = check_timers_list(timers, firing, prof_ticks(tsk));
+        tsk_expires->prof_exp = expires_to_cputime(expires);
+        expires = check_timers_list(++timers, firing, virt_ticks(tsk));
+        tsk_expires->virt_exp = expires_to_cputime(expires);
+        tsk_expires->sched_exp = check_timers_list(++timers, firing,
+                                                   tsk->se.sum_exec_runtime);
+        /*
+         * Check for the special case thread timers.
+         */
+        soft = ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur);
+        if (soft != RLIM_INFINITY) {
+                unsigned long hard =
+                        ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max);
+                if (hard != RLIM_INFINITY &&
+                    tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
+                        /*
+                         * At the hard limit, we just die.
+                         * No need to calculate anything else now.
+                         */
+                        __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
+                        return;
+                }
+                if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
+                        /*
+                         * At the soft limit, send a SIGXCPU every second.
+                         */
+                        if (soft < hard) {
+                                soft += USEC_PER_SEC;
+                                sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
+                        }
+                        printk(KERN_INFO
+                                "RT Watchdog Timeout: %s[%d]\n",
+                                tsk->comm, task_pid_nr(tsk));
+                        __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
+                }
+        }
+}
+static void stop_process_timers(struct signal_struct *sig)
+{
+        struct thread_group_cputimer *cputimer = &sig->cputimer;
+        unsigned long flags;
+        raw_spin_lock_irqsave(&cputimer->lock, flags);
+        cputimer->running = 0;
+        raw_spin_unlock_irqrestore(&cputimer->lock, flags);
+}
+static u32 onecputick;
+static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
+                             unsigned long long *expires,
+                             unsigned long long cur_time, int signo)
+{
+        if (!it->expires)
+                return;
+        if (cur_time >= it->expires) {
+                if (it->incr) {
+                        it->expires += it->incr;
+                        it->error += it->incr_error;
+                        if (it->error >= onecputick) {
+                                it->expires -= cputime_one_jiffy;
+                                it->error -= onecputick;
+                        }
+                } else {
+                        it->expires = 0;
+                }
+                trace_itimer_expire(signo == SIGPROF ?
+                                    ITIMER_PROF : ITIMER_VIRTUAL,
+                                    tsk->signal->leader_pid, cur_time);
+                __group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
+        }
+        if (it->expires && (!*expires || it->expires < *expires)) {
+                *expires = it->expires;
+        }
+}
+/*
+ * Check for any per-thread CPU timers that have fired and move them
+ * off the tsk->*_timers list onto the firing list.  Per-thread timers
+ * have already been taken off.
+ */
+static void check_process_timers(struct task_struct *tsk,
+                                 struct list_head *firing)
+{
+        struct signal_struct *const sig = tsk->signal;
+        unsigned long long utime, ptime, virt_expires, prof_expires;
+        unsigned long long sum_sched_runtime, sched_expires;
+        struct list_head *timers = sig->cpu_timers;
+        struct task_cputime cputime;
+        unsigned long soft;
+        /*
+         * Collect the current process totals.
+         */
+        thread_group_cputimer(tsk, &cputime);
+        utime = cputime_to_expires(cputime.utime);
+        ptime = utime + cputime_to_expires(cputime.stime);
+        sum_sched_runtime = cputime.sum_exec_runtime;
+        prof_expires = check_timers_list(timers, firing, ptime);
+        virt_expires = check_timers_list(++timers, firing, utime);
+        sched_expires = check_timers_list(++timers, firing, sum_sched_runtime);
+        /*
+         * Check for the special case process timers.
+         */
+        check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime,
+                         SIGPROF);
+        check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,
+                         SIGVTALRM);
+        soft = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
+        if (soft != RLIM_INFINITY) {
+                unsigned long psecs = cputime_to_secs(ptime);
+                unsigned long hard =
+                        ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_max);
+                cputime_t x;
+                if (psecs >= hard) {
+                        /*
+                         * At the hard limit, we just die.
+                         * No need to calculate anything else now.
+                         */
+                        __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
+                        return;
+                }
+                if (psecs >= soft) {
+                        /*
+                         * At the soft limit, send a SIGXCPU every second.
+                         */
+                        __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
+                        if (soft < hard) {
+                                soft++;
+                                sig->rlim[RLIMIT_CPU].rlim_cur = soft;
+                        }
+                }
+                x = secs_to_cputime(soft);
+                if (!prof_expires || x < prof_expires) {
+                        prof_expires = x;
+                }
+        }
+        sig->cputime_expires.prof_exp = expires_to_cputime(prof_expires);
+        sig->cputime_expires.virt_exp = expires_to_cputime(virt_expires);
+        sig->cputime_expires.sched_exp = sched_expires;
+        if (task_cputime_zero(&sig->cputime_expires))
+                stop_process_timers(sig);
+}
+/*
+ * This is called from the signal code (via do_schedule_next_timer)
+ * when the last timer signal was delivered and we have to reload the timer.
+ */
+void posix_cpu_timer_schedule(struct k_itimer *timer)
+{
+        struct sighand_struct *sighand;
+        unsigned long flags;
+        struct task_struct *p = timer->it.cpu.task;
+        unsigned long long now;
+        WARN_ON_ONCE(p == NULL);
+        /*
+         * Fetch the current sample and update the timer's expiry time.
+         */
+        if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
+                cpu_clock_sample(timer->it_clock, p, &now);
+                bump_cpu_timer(timer, now);
+                if (unlikely(p->exit_state))
+                        goto out;
+                /* Protect timer list r/w in arm_timer() */
+                sighand = lock_task_sighand(p, &flags);
+                if (!sighand)
+                        goto out;
+        } else {
+                /*
+                 * Protect arm_timer() and timer sampling in case of call to
+                 * thread_group_cputime().
+                 */
+                sighand = lock_task_sighand(p, &flags);
+                if (unlikely(sighand == NULL)) {
+                        /*
+                         * The process has been reaped.
+                         * We can't even collect a sample any more.
+                         */
+                        timer->it.cpu.expires = 0;
+                        goto out;
+                } else if (unlikely(p->exit_state) && thread_group_empty(p)) {
+                        unlock_task_sighand(p, &flags);
+                        /* Optimizations: if the process is dying, no need to rearm */
+                        goto out;
+                }
+                cpu_timer_sample_group(timer->it_clock, p, &now);
+                bump_cpu_timer(timer, now);
+                /* Leave the sighand locked for the call below.  */
+        }
+        /*
+         * Now re-arm for the new expiry time.
+         */
+        WARN_ON_ONCE(!irqs_disabled());
+        arm_timer(timer);
+        unlock_task_sighand(p, &flags);
+        /* Kick full dynticks CPUs in case they need to tick on the new timer */
+        posix_cpu_timer_kick_nohz();
+out:
+        timer->it_overrun_last = timer->it_overrun;
+        timer->it_overrun = -1;
+        ++timer->it_requeue_pending;
+}
+/**
+ * task_cputime_expired - Compare two task_cputime entities.
+ *
+ * @sample:     The task_cputime structure to be checked for expiration.
+ * @expires:    Expiration times, against which @sample will be checked.
+ *
+ * Checks @sample against @expires to see if any field of @sample has expired.
+ * Returns true if any field of the former is greater than the corresponding
+ * field of the latter if the latter field is set.  Otherwise returns false.
+ */
+static inline int task_cputime_expired(const struct task_cputime *sample,
+                                        const struct task_cputime *expires)
+{
+        if (expires->utime && sample->utime >= expires->utime)
+                return 1;
+        if (expires->stime && sample->utime + sample->stime >= expires->stime)
+                return 1;
+        if (expires->sum_exec_runtime != 0 &&
+            sample->sum_exec_runtime >= expires->sum_exec_runtime)
+                return 1;
+        return 0;
+}
+/**
+ * fastpath_timer_check - POSIX CPU timers fast path.
+ *
+ * @tsk:        The task (thread) being checked.
+ *
+ * Check the task and thread group timers.  If both are zero (there are no
+ * timers set) return false.  Otherwise snapshot the task and thread group
+ * timers and compare them with the corresponding expiration times.  Return
+ * true if a timer has expired, else return false.
+ */
+static inline int fastpath_timer_check(struct task_struct *tsk)
+{
+        struct signal_struct *sig;
+        cputime_t utime, stime;
+        task_cputime(tsk, &utime, &stime);
+        if (!task_cputime_zero(&tsk->cputime_expires)) {
+                struct task_cputime task_sample = {
+                        .utime = utime,
+                        .stime = stime,
+                        .sum_exec_runtime = tsk->se.sum_exec_runtime
+                };
+                if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
+                        return 1;
+        }
+        sig = tsk->signal;
+        if (sig->cputimer.running) {
+                struct task_cputime group_sample;
+                raw_spin_lock(&sig->cputimer.lock);
+                group_sample = sig->cputimer.cputime;
+                raw_spin_unlock(&sig->cputimer.lock);
+                if (task_cputime_expired(&group_sample, &sig->cputime_expires))
+                        return 1;
+        }
+        return 0;
+}
+/*
+ * This is called from the timer interrupt handler.  The irq handler has
+ * already updated our counts.  We need to check if any timers fire now.
+ * Interrupts are disabled.
+ */
+void run_posix_cpu_timers(struct task_struct *tsk)
+{
+        LIST_HEAD(firing);
+        struct k_itimer *timer, *next;
+        unsigned long flags;
+        WARN_ON_ONCE(!irqs_disabled());
+        /*
+         * The fast path checks that there are no expired thread or thread
+         * group timers.  If that's so, just return.
+         */
+        if (!fastpath_timer_check(tsk))
+                return;
+        if (!lock_task_sighand(tsk, &flags))
+                return;
+        /*
+         * Here we take off tsk->signal->cpu_timers[N] and
+         * tsk->cpu_timers[N] all the timers that are firing, and
+         * put them on the firing list.
+         */
+        check_thread_timers(tsk, &firing);
+        /*
+         * If there are any active process wide timers (POSIX 1.b, itimers,
+         * RLIMIT_CPU) cputimer must be running.
+         */
+        if (tsk->signal->cputimer.running)
+                check_process_timers(tsk, &firing);
+        /*
+         * We must release these locks before taking any timer's lock.
+         * There is a potential race with timer deletion here, as the
+         * siglock now protects our private firing list.  We have set
+         * the firing flag in each timer, so that a deletion attempt
+         * that gets the timer lock before we do will give it up and
+         * spin until we've taken care of that timer below.
+         */
+        unlock_task_sighand(tsk, &flags);
+        /*
+         * Now that all the timers on our list have the firing flag,
+         * no one will touch their list entries but us.  We'll take
+         * each timer's lock before clearing its firing flag, so no
+         * timer call will interfere.
+         */
+        list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
+                int cpu_firing;
+                spin_lock(&timer->it_lock);
+                list_del_init(&timer->it.cpu.entry);
+                cpu_firing = timer->it.cpu.firing;
+                timer->it.cpu.firing = 0;
+                /*
+                 * The firing flag is -1 if we collided with a reset
+                 * of the timer, which already reported this
+                 * almost-firing as an overrun.  So don't generate an event.
+                 */
+                if (likely(cpu_firing >= 0))
+                        cpu_timer_fire(timer);
+                spin_unlock(&timer->it_lock);
+        }
+}
+/*
+ * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
+ * The tsk->sighand->siglock must be held by the caller.
+ */
+void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
+                           cputime_t *newval, cputime_t *oldval)
+{
+        unsigned long long now;
+        WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED);
+        cpu_timer_sample_group(clock_idx, tsk, &now);
+        if (oldval) {
+                /*
+                 * We are setting itimer. The *oldval is absolute and we update
+                 * it to be relative, *newval argument is relative and we update
+                 * it to be absolute.
+                 */
+                if (*oldval) {
+                        if (*oldval <= now) {
+                                /* Just about to fire. */
+                                *oldval = cputime_one_jiffy;
+                        } else {
+                                *oldval -= now;
+                        }
+                }
+                if (!*newval)
+                        goto out;
+                *newval += now;
+        }
+        /*
+         * Update expiration cache if we are the earliest timer, or eventually
+         * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
+         */
+        switch (clock_idx) {
+        case CPUCLOCK_PROF:
+                if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
+                        tsk->signal->cputime_expires.prof_exp = *newval;
+                break;
+        case CPUCLOCK_VIRT:
+                if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
+                        tsk->signal->cputime_expires.virt_exp = *newval;
+                break;
+        }
+out:
+        posix_cpu_timer_kick_nohz();
+}
+static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
+                            struct timespec *rqtp, struct itimerspec *it)
+{
+        struct k_itimer timer;
+        int error;
+        /*
+         * Set up a temporary timer and then wait for it to go off.
+         */
+        memset(&timer, 0, sizeof timer);
+        spin_lock_init(&timer.it_lock);
+        timer.it_clock = which_clock;
+        timer.it_overrun = -1;
+        error = posix_cpu_timer_create(&timer);
+        timer.it_process = current;
+        if (!error) {
+                static struct itimerspec zero_it;
+                memset(it, 0, sizeof *it);
+                it->it_value = *rqtp;
+                spin_lock_irq(&timer.it_lock);
+                error = posix_cpu_timer_set(&timer, flags, it, NULL);
+                if (error) {
+                        spin_unlock_irq(&timer.it_lock);
+                        return error;
+                }
+                while (!signal_pending(current)) {
+                        if (timer.it.cpu.expires == 0) {
+                                /*
+                                 * Our timer fired and was reset, below
+                                 * deletion can not fail.
+                                 */
+                                posix_cpu_timer_del(&timer);
+                                spin_unlock_irq(&timer.it_lock);
+                                return 0;
+                        }
+                        /*
+                         * Block until cpu_timer_fire (or a signal) wakes us.
+                         */
+                        __set_current_state(TASK_INTERRUPTIBLE);
+                        spin_unlock_irq(&timer.it_lock);
+                        schedule();
+                        spin_lock_irq(&timer.it_lock);
+                }
+                /*
+                 * We were interrupted by a signal.
+                 */
+                sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp);
+                error = posix_cpu_timer_set(&timer, 0, &zero_it, it);
+                if (!error) {
+                        /*
+                         * Timer is now unarmed, deletion can not fail.
+                         */
+                        posix_cpu_timer_del(&timer);
+                }
+                spin_unlock_irq(&timer.it_lock);
+                while (error == TIMER_RETRY) {
+                        /*
+                         * We need to handle case when timer was or is in the
+                         * middle of firing. In other cases we already freed
+                         * resources.
+                         */
+                        spin_lock_irq(&timer.it_lock);
+                        error = posix_cpu_timer_del(&timer);
+                        spin_unlock_irq(&timer.it_lock);
+                }
+                if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) {
+                        /*
+                         * It actually did fire already.
+                         */
+                        return 0;
+                }
+                error = -ERESTART_RESTARTBLOCK;
+        }
+        return error;
+}
+static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
+static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
+                            struct timespec *rqtp, struct timespec __user *rmtp)
+{
+        struct restart_block *restart_block =
+                &current_thread_info()->restart_block;
+        struct itimerspec it;
+        int error;
+        /*
+         * Diagnose required errors first.
+         */
+        if (CPUCLOCK_PERTHREAD(which_clock) &&
+            (CPUCLOCK_PID(which_clock) == 0 ||
+             CPUCLOCK_PID(which_clock) == current->pid))
+                return -EINVAL;
+        error = do_cpu_nanosleep(which_clock, flags, rqtp, &it);
+        if (error == -ERESTART_RESTARTBLOCK) {
+                if (flags & TIMER_ABSTIME)
+                        return -ERESTARTNOHAND;
+                /*
+                 * Report back to the user the time still remaining.
+                 */
+                if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
+                        return -EFAULT;
+                restart_block->fn = posix_cpu_nsleep_restart;
+                restart_block->nanosleep.clockid = which_clock;
+                restart_block->nanosleep.rmtp = rmtp;
+                restart_block->nanosleep.expires = timespec_to_ns(rqtp);
+        }
+        return error;
+}
+static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
+{
+        clockid_t which_clock = restart_block->nanosleep.clockid;
+        struct timespec t;
+        struct itimerspec it;
+        int error;
+        t = ns_to_timespec(restart_block->nanosleep.expires);
+        error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it);
+        if (error == -ERESTART_RESTARTBLOCK) {
+                struct timespec __user *rmtp = restart_block->nanosleep.rmtp;
+                /*
+                 * Report back to the user the time still remaining.
+                 */
+                if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
+                        return -EFAULT;
+                restart_block->nanosleep.expires = timespec_to_ns(&t);
+        }
+        return error;
+}
+#define PROCESS_CLOCK   MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
+#define THREAD_CLOCK    MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)
+static int process_cpu_clock_getres(const clockid_t which_clock,
+                                    struct timespec *tp)
+{
+        return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
+}
+static int process_cpu_clock_get(const clockid_t which_clock,
+                                 struct timespec *tp)
+{
+        return posix_cpu_clock_get(PROCESS_CLOCK, tp);
+}
+static int process_cpu_timer_create(struct k_itimer *timer)
+{
+        timer->it_clock = PROCESS_CLOCK;
+        return posix_cpu_timer_create(timer);
+}
+static int process_cpu_nsleep(const clockid_t which_clock, int flags,
+                              struct timespec *rqtp,
+                              struct timespec __user *rmtp)
+{
+        return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp);
+}
+static long process_cpu_nsleep_restart(struct restart_block *restart_block)
+{
+        return -EINVAL;
+}
+static int thread_cpu_clock_getres(const clockid_t which_clock,
+                                   struct timespec *tp)
+{
+        return posix_cpu_clock_getres(THREAD_CLOCK, tp);
+}
+static int thread_cpu_clock_get(const clockid_t which_clock,
+                                struct timespec *tp)
+{
+        return posix_cpu_clock_get(THREAD_CLOCK, tp);
+}
+static int thread_cpu_timer_create(struct k_itimer *timer)
+{
+        timer->it_clock = THREAD_CLOCK;
+        return posix_cpu_timer_create(timer);
+}
+struct k_clock clock_posix_cpu = {
+        .clock_getres   = posix_cpu_clock_getres,
+        .clock_set      = posix_cpu_clock_set,
+        .clock_get      = posix_cpu_clock_get,
+        .timer_create   = posix_cpu_timer_create,
+        .nsleep         = posix_cpu_nsleep,
+        .nsleep_restart = posix_cpu_nsleep_restart,
+        .timer_set      = posix_cpu_timer_set,
+        .timer_del      = posix_cpu_timer_del,
+        .timer_get      = posix_cpu_timer_get,
+};
+static __init int init_posix_cpu_timers(void)
+{
+        struct k_clock process = {
+                .clock_getres   = process_cpu_clock_getres,
+                .clock_get      = process_cpu_clock_get,
+                .timer_create   = process_cpu_timer_create,
+                .nsleep         = process_cpu_nsleep,
+                .nsleep_restart = process_cpu_nsleep_restart,
+        };
+        struct k_clock thread = {
+                .clock_getres   = thread_cpu_clock_getres,
+                .clock_get      = thread_cpu_clock_get,
+                .timer_create   = thread_cpu_timer_create,
+        };
+        struct timespec ts;
+        posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
+        posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
+        cputime_to_timespec(cputime_one_jiffy, &ts);
+        onecputick = ts.tv_nsec;
+        WARN_ON(ts.tv_sec != 0);
+        return 0;
+}
+__initcall(init_posix_cpu_timers);
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
new file mode 100644
index 000000000000..42b463ad90f2
--- /dev/null
+++ b/kernel/time/posix-timers.c
@@ -0,0 +1,1123 @@
+/*
+ * linux/kernel/posix-timers.c
+ *
+ *
+ * 2002-10-15  Posix Clocks & timers
+ *                           by George Anzinger george@mvista.com
+ *
+ *                           Copyright (C) 2002 2003 by MontaVista Software.
+ *
+ * 2004-06-01  Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
+ *                           Copyright (C) 2004 Boris Hu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * MontaVista Software | 1237 East Arques Avenue | Sunnyvale | CA 94085 | USA
+ */
+/* These are all the functions necessary to implement
+ * POSIX clocks & timers
+ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/mutex.h>
+#include <asm/uaccess.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/compiler.h>
+#include <linux/hash.h>
+#include <linux/posix-clock.h>
+#include <linux/posix-timers.h>
+#include <linux/syscalls.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <linux/export.h>
+#include <linux/hashtable.h>
+#include "timekeeping.h"
+/*
+ * Management arrays for POSIX timers. Timers are now kept in static hash table
+ * with 512 entries.
+ * Timer ids are allocated by local routine, which selects proper hash head by
+ * key, constructed from current->signal address and per signal struct counter.
+ * This keeps timer ids unique per process, but now they can intersect between
+ * processes.
+ */
+/*
+ * Lets keep our timers in a slab cache :-)
+ */
+static struct kmem_cache *posix_timers_cache;
+static DEFINE_HASHTABLE(posix_timers_hashtable, 9);
+static DEFINE_SPINLOCK(hash_lock);
+/*
+ * we assume that the new SIGEV_THREAD_ID shares no bits with the other
+ * SIGEV values.  Here we put out an error if this assumption fails.
+ */
+#if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \
+                       ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD))
+#error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
+#endif
+/*
+ * parisc wants ENOTSUP instead of EOPNOTSUPP
+ */
+#ifndef ENOTSUP
+# define ENANOSLEEP_NOTSUP EOPNOTSUPP
+#else
+# define ENANOSLEEP_NOTSUP ENOTSUP
+#endif
+/*
+ * The timer ID is turned into a timer address by idr_find().
+ * Verifying a valid ID consists of:
+ *
+ * a) checking that idr_find() returns other than -1.
+ * b) checking that the timer id matches the one in the timer itself.
+ * c) that the timer owner is in the callers thread group.
+ */
+/*
+ * CLOCKs: The POSIX standard calls for a couple of clocks and allows us
+ *          to implement others.  This structure defines the various
+ *          clocks.
+ *
+ * RESOLUTION: Clock resolution is used to round up timer and interval
+ *          times, NOT to report clock times, which are reported with as
+ *          much resolution as the system can muster.  In some cases this
+ *          resolution may depend on the underlying clock hardware and
+ *          may not be quantifiable until run time, and only then is the
+ *          necessary code is written.  The standard says we should say
+ *          something about this issue in the documentation...
+ *
+ * FUNCTIONS: The CLOCKs structure defines possible functions to
+ *          handle various clock functions.
+ *
+ *          The standard POSIX timer management code assumes the
+ *          following: 1.) The k_itimer struct (sched.h) is used for
+ *          the timer.  2.) The list, it_lock, it_clock, it_id and
+ *          it_pid fields are not modified by timer code.
+ *
+ * Permissions: It is assumed that the clock_settime() function defined
+ *          for each clock will take care of permission checks.  Some
+ *          clocks may be set able by any user (i.e. local process
+ *          clocks) others not.  Currently the only set able clock we
+ *          have is CLOCK_REALTIME and its high res counter part, both of
+ *          which we beg off on and pass to do_sys_settimeofday().
+ */
+static struct k_clock posix_clocks[MAX_CLOCKS];
+/*
+ * These ones are defined below.
+ */
+static int common_nsleep(const clockid_t, int flags, struct timespec *t,
+                         struct timespec __user *rmtp);
+static int common_timer_create(struct k_itimer *new_timer);
+static void common_timer_get(struct k_itimer *, struct itimerspec *);
+static int common_timer_set(struct k_itimer *, int,
+                            struct itimerspec *, struct itimerspec *);
+static int common_timer_del(struct k_itimer *timer);
+static enum hrtimer_restart posix_timer_fn(struct hrtimer *data);
+static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags);
+#define lock_timer(tid, flags)                                             \
+({      struct k_itimer *__timr;                                           \
+        __cond_lock(&__timr->it_lock, __timr = __lock_timer(tid, flags));  \
+        __timr;                                                            \
+})
+static int hash(struct signal_struct *sig, unsigned int nr)
+{
+        return hash_32(hash32_ptr(sig) ^ nr, HASH_BITS(posix_timers_hashtable));
+}
+static struct k_itimer *__posix_timers_find(struct hlist_head *head,
+                                            struct signal_struct *sig,
+                                            timer_t id)
+{
+        struct k_itimer *timer;
+        hlist_for_each_entry_rcu(timer, head, t_hash) {
+                if ((timer->it_signal == sig) && (timer->it_id == id))
+                        return timer;
+        }
+        return NULL;
+}
+static struct k_itimer *posix_timer_by_id(timer_t id)
+{
+        struct signal_struct *sig = current->signal;
+        struct hlist_head *head = &posix_timers_hashtable[hash(sig, id)];
+        return __posix_timers_find(head, sig, id);
+}
+static int posix_timer_add(struct k_itimer *timer)
+{
+        struct signal_struct *sig = current->signal;
+        int first_free_id = sig->posix_timer_id;
+        struct hlist_head *head;
+        int ret = -ENOENT;
+        do {
+                spin_lock(&hash_lock);
+                head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)];
+                if (!__posix_timers_find(head, sig, sig->posix_timer_id)) {
+                        hlist_add_head_rcu(&timer->t_hash, head);
+                        ret = sig->posix_timer_id;
+                }
+                if (++sig->posix_timer_id < 0)
+                        sig->posix_timer_id = 0;
+                if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT))
+                        /* Loop over all possible ids completed */
+                        ret = -EAGAIN;
+                spin_unlock(&hash_lock);
+        } while (ret == -ENOENT);
+        return ret;
+}
+static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
+{
+        spin_unlock_irqrestore(&timr->it_lock, flags);
+}
+/* Get clock_realtime */
+static int posix_clock_realtime_get(clockid_t which_clock, struct timespec *tp)
+{
+        ktime_get_real_ts(tp);
+        return 0;
+}
+/* Set clock_realtime */
+static int posix_clock_realtime_set(const clockid_t which_clock,
+                                    const struct timespec *tp)
+{
+        return do_sys_settimeofday(tp, NULL);
+}
+static int posix_clock_realtime_adj(const clockid_t which_clock,
+                                    struct timex *t)
+{
+        return do_adjtimex(t);
+}
+/*
+ * Get monotonic time for posix timers
+ */
+static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp)
+{
+        ktime_get_ts(tp);
+        return 0;
+}
+/*
+ * Get monotonic-raw time for posix timers
+ */
+static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp)
+{
+        getrawmonotonic(tp);
+        return 0;
+}
+static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec *tp)
+{
+        *tp = current_kernel_time();
+        return 0;
+}
+static int posix_get_monotonic_coarse(clockid_t which_clock,
+                                                struct timespec *tp)
+{
+        *tp = get_monotonic_coarse();
+        return 0;
+}
+static int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp)
+{
+        *tp = ktime_to_timespec(KTIME_LOW_RES);
+        return 0;
+}
+static int posix_get_boottime(const clockid_t which_clock, struct timespec *tp)
+{
+        get_monotonic_boottime(tp);
+        return 0;
+}
+static int posix_get_tai(clockid_t which_clock, struct timespec *tp)
+{
+        timekeeping_clocktai(tp);
+        return 0;
+}
+/*
+ * Initialize everything, well, just everything in Posix clocks/timers ;)
+ */
+static __init int init_posix_timers(void)
+{
+        struct k_clock clock_realtime = {
+                .clock_getres   = hrtimer_get_res,
+                .clock_get      = posix_clock_realtime_get,
+                .clock_set      = posix_clock_realtime_set,
+                .clock_adj      = posix_clock_realtime_adj,
+                .nsleep         = common_nsleep,
+                .nsleep_restart = hrtimer_nanosleep_restart,
+                .timer_create   = common_timer_create,
+                .timer_set      = common_timer_set,
+                .timer_get      = common_timer_get,
+                .timer_del      = common_timer_del,
+        };
+        struct k_clock clock_monotonic = {
+                .clock_getres   = hrtimer_get_res,
+                .clock_get      = posix_ktime_get_ts,
+                .nsleep         = common_nsleep,
+                .nsleep_restart = hrtimer_nanosleep_restart,
+                .timer_create   = common_timer_create,
+                .timer_set      = common_timer_set,
+                .timer_get      = common_timer_get,
+                .timer_del      = common_timer_del,
+        };
+        struct k_clock clock_monotonic_raw = {
+                .clock_getres   = hrtimer_get_res,
+                .clock_get      = posix_get_monotonic_raw,
+        };
+        struct k_clock clock_realtime_coarse = {
+                .clock_getres   = posix_get_coarse_res,
+                .clock_get      = posix_get_realtime_coarse,
+        };
+        struct k_clock clock_monotonic_coarse = {
+                .clock_getres   = posix_get_coarse_res,
+                .clock_get      = posix_get_monotonic_coarse,
+        };
+        struct k_clock clock_tai = {
+                .clock_getres   = hrtimer_get_res,
+                .clock_get      = posix_get_tai,
+                .nsleep         = common_nsleep,
+                .nsleep_restart = hrtimer_nanosleep_restart,
+                .timer_create   = common_timer_create,
+                .timer_set      = common_timer_set,
+                .timer_get      = common_timer_get,
+                .timer_del      = common_timer_del,
+        };
+        struct k_clock clock_boottime = {
+                .clock_getres   = hrtimer_get_res,
+                .clock_get      = posix_get_boottime,
+                .nsleep         = common_nsleep,
+                .nsleep_restart = hrtimer_nanosleep_restart,
+                .timer_create   = common_timer_create,
+                .timer_set      = common_timer_set,
+                .timer_get      = common_timer_get,
+                .timer_del      = common_timer_del,
+        };
+        posix_timers_register_clock(CLOCK_REALTIME, &clock_realtime);
+        posix_timers_register_clock(CLOCK_MONOTONIC, &clock_monotonic);
+        posix_timers_register_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
+        posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
+        posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
+        posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime);
+        posix_timers_register_clock(CLOCK_TAI, &clock_tai);
+        posix_timers_cache = kmem_cache_create("posix_timers_cache",
+                                        sizeof (struct k_itimer), 0, SLAB_PANIC,
+                                        NULL);
+        return 0;
+}
+__initcall(init_posix_timers);
+static void schedule_next_timer(struct k_itimer *timr)
+{
+        struct hrtimer *timer = &timr->it.real.timer;
+        if (timr->it.real.interval.tv64 == 0)
+                return;
+        timr->it_overrun += (unsigned int) hrtimer_forward(timer,
+                                                timer->base->get_time(),
+                                                timr->it.real.interval);
+        timr->it_overrun_last = timr->it_overrun;
+        timr->it_overrun = -1;
+        ++timr->it_requeue_pending;
+        hrtimer_restart(timer);
+}
+/*
+ * This function is exported for use by the signal deliver code.  It is
+ * called just prior to the info block being released and passes that
+ * block to us.  It's function is to update the overrun entry AND to
+ * restart the timer.  It should only be called if the timer is to be
+ * restarted (i.e. we have flagged this in the sys_private entry of the
+ * info block).
+ *
+ * To protect against the timer going away while the interrupt is queued,
+ * we require that the it_requeue_pending flag be set.
+ */
+void do_schedule_next_timer(struct siginfo *info)
+{
+        struct k_itimer *timr;
+        unsigned long flags;
+        timr = lock_timer(info->si_tid, &flags);
+        if (timr && timr->it_requeue_pending == info->si_sys_private) {
+                if (timr->it_clock < 0)
+                        posix_cpu_timer_schedule(timr);
+                else
+                        schedule_next_timer(timr);
+                info->si_overrun += timr->it_overrun_last;
+        }
+        if (timr)
+                unlock_timer(timr, flags);
+}
+int posix_timer_event(struct k_itimer *timr, int si_private)
+{
+        struct task_struct *task;
+        int shared, ret = -1;
+        /*
+         * FIXME: if ->sigq is queued we can race with
+         * dequeue_signal()->do_schedule_next_timer().
+         *
+         * If dequeue_signal() sees the "right" value of
+         * si_sys_private it calls do_schedule_next_timer().
+         * We re-queue ->sigq and drop ->it_lock().
+         * do_schedule_next_timer() locks the timer
+         * and re-schedules it while ->sigq is pending.
+         * Not really bad, but not that we want.
+         */
+        timr->sigq->info.si_sys_private = si_private;
+        rcu_read_lock();
+        task = pid_task(timr->it_pid, PIDTYPE_PID);
+        if (task) {
+                shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
+                ret = send_sigqueue(timr->sigq, task, shared);
+        }
+        rcu_read_unlock();
+        /* If we failed to send the signal the timer stops. */
+        return ret > 0;
+}
+EXPORT_SYMBOL_GPL(posix_timer_event);
+/*
+ * This function gets called when a POSIX.1b interval timer expires.  It
+ * is used as a callback from the kernel internal timer.  The
+ * run_timer_list code ALWAYS calls with interrupts on.
+ * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers.
+ */
+static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
+{
+        struct k_itimer *timr;
+        unsigned long flags;
+        int si_private = 0;
+        enum hrtimer_restart ret = HRTIMER_NORESTART;
+        timr = container_of(timer, struct k_itimer, it.real.timer);
+        spin_lock_irqsave(&timr->it_lock, flags);
+        if (timr->it.real.interval.tv64 != 0)
+                si_private = ++timr->it_requeue_pending;
+        if (posix_timer_event(timr, si_private)) {
+                /*
+                 * signal was not sent because of sig_ignor
+                 * we will not get a call back to restart it AND
+                 * it should be restarted.
+                 */
+                if (timr->it.real.interval.tv64 != 0) {
+                        ktime_t now = hrtimer_cb_get_time(timer);
+                        /*
+                         * FIXME: What we really want, is to stop this
+                         * timer completely and restart it in case the
+                         * SIG_IGN is removed. This is a non trivial
+                         * change which involves sighand locking
+                         * (sigh !), which we don't want to do late in
+                         * the release cycle.
+                         *
+                         * For now we just let timers with an interval
+                         * less than a jiffie expire every jiffie to
+                         * avoid softirq starvation in case of SIG_IGN
+                         * and a very small interval, which would put
+                         * the timer right back on the softirq pending
+                         * list. By moving now ahead of time we trick
+                         * hrtimer_forward() to expire the timer
+                         * later, while we still maintain the overrun
+                         * accuracy, but have some inconsistency in
+                         * the timer_gettime() case. This is at least
+                         * better than a starved softirq. A more
+                         * complex fix which solves also another related
+                         * inconsistency is already in the pipeline.
+                         */
+#ifdef CONFIG_HIGH_RES_TIMERS
+                        {
+                                ktime_t kj = ktime_set(0, NSEC_PER_SEC / HZ);
+                                if (timr->it.real.interval.tv64 < kj.tv64)
+                                        now = ktime_add(now, kj);
+                        }
+#endif
+                        timr->it_overrun += (unsigned int)
+                                hrtimer_forward(timer, now,
+                                                timr->it.real.interval);
+                        ret = HRTIMER_RESTART;
+                        ++timr->it_requeue_pending;
+                }
+        }
+        unlock_timer(timr, flags);
+        return ret;
+}
+static struct pid *good_sigevent(sigevent_t * event)
+{
+        struct task_struct *rtn = current->group_leader;
+        if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
+                (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
+                 !same_thread_group(rtn, current) ||
+                 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
+                return NULL;
+        if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
+            ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
+                return NULL;
+        return task_pid(rtn);
+}
+void posix_timers_register_clock(const clockid_t clock_id,
+                                 struct k_clock *new_clock)
+{
+        if ((unsigned) clock_id >= MAX_CLOCKS) {
+                printk(KERN_WARNING "POSIX clock register failed for clock_id %d\n",
+                       clock_id);
+                return;
+        }
+        if (!new_clock->clock_get) {
+                printk(KERN_WARNING "POSIX clock id %d lacks clock_get()\n",
+                       clock_id);
+                return;
+        }
+        if (!new_clock->clock_getres) {
+                printk(KERN_WARNING "POSIX clock id %d lacks clock_getres()\n",
+                       clock_id);
+                return;
+        }
+        posix_clocks[clock_id] = *new_clock;
+}
+EXPORT_SYMBOL_GPL(posix_timers_register_clock);
+static struct k_itimer * alloc_posix_timer(void)
+{
+        struct k_itimer *tmr;
+        tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);
+        if (!tmr)
+                return tmr;
+        if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
+                kmem_cache_free(posix_timers_cache, tmr);
+                return NULL;
+        }
+        memset(&tmr->sigq->info, 0, sizeof(siginfo_t));
+        return tmr;
+}
+static void k_itimer_rcu_free(struct rcu_head *head)
+{
+        struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu);
+        kmem_cache_free(posix_timers_cache, tmr);
+}
+#define IT_ID_SET       1
+#define IT_ID_NOT_SET   0
+static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
+{
+        if (it_id_set) {
+                unsigned long flags;
+                spin_lock_irqsave(&hash_lock, flags);
+                hlist_del_rcu(&tmr->t_hash);
+                spin_unlock_irqrestore(&hash_lock, flags);
+        }
+        put_pid(tmr->it_pid);
+        sigqueue_free(tmr->sigq);
+        call_rcu(&tmr->it.rcu, k_itimer_rcu_free);
+}
+static struct k_clock *clockid_to_kclock(const clockid_t id)
+{
+        if (id < 0)
+                return (id & CLOCKFD_MASK) == CLOCKFD ?
+                        &clock_posix_dynamic : &clock_posix_cpu;
+        if (id >= MAX_CLOCKS || !posix_clocks[id].clock_getres)
+                return NULL;
+        return &posix_clocks[id];
+}
+static int common_timer_create(struct k_itimer *new_timer)
+{
+        hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
+        return 0;
+}
+/* Create a POSIX.1b interval timer. */
+SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
+                struct sigevent __user *, timer_event_spec,
+                timer_t __user *, created_timer_id)
+{
+        struct k_clock *kc = clockid_to_kclock(which_clock);
+        struct k_itimer *new_timer;
+        int error, new_timer_id;
+        sigevent_t event;
+        int it_id_set = IT_ID_NOT_SET;
+        if (!kc)
+                return -EINVAL;
+        if (!kc->timer_create)
+                return -EOPNOTSUPP;
+        new_timer = alloc_posix_timer();
+        if (unlikely(!new_timer))
+                return -EAGAIN;
+        spin_lock_init(&new_timer->it_lock);
+        new_timer_id = posix_timer_add(new_timer);
+        if (new_timer_id < 0) {
+                error = new_timer_id;
+                goto out;
+        }
+        it_id_set = IT_ID_SET;
+        new_timer->it_id = (timer_t) new_timer_id;
+        new_timer->it_clock = which_clock;
+        new_timer->it_overrun = -1;
+        if (timer_event_spec) {
+                if (copy_from_user(&event, timer_event_spec, sizeof (event))) {
+                        error = -EFAULT;
+                        goto out;
+                }
+                rcu_read_lock();
+                new_timer->it_pid = get_pid(good_sigevent(&event));
+                rcu_read_unlock();
+                if (!new_timer->it_pid) {
+                        error = -EINVAL;
+                        goto out;
+                }
+        } else {
+                event.sigev_notify = SIGEV_SIGNAL;
+                event.sigev_signo = SIGALRM;
+                event.sigev_value.sival_int = new_timer->it_id;
+                new_timer->it_pid = get_pid(task_tgid(current));
+        }
+        new_timer->it_sigev_notify     = event.sigev_notify;
+        new_timer->sigq->info.si_signo = event.sigev_signo;
+        new_timer->sigq->info.si_value = event.sigev_value;
+        new_timer->sigq->info.si_tid   = new_timer->it_id;
+        new_timer->sigq->info.si_code  = SI_TIMER;
+        if (copy_to_user(created_timer_id,
+                         &new_timer_id, sizeof (new_timer_id))) {
+                error = -EFAULT;
+                goto out;
+        }
+        error = kc->timer_create(new_timer);
+        if (error)
+                goto out;
+        spin_lock_irq(&current->sighand->siglock);
+        new_timer->it_signal = current->signal;
+        list_add(&new_timer->list, &current->signal->posix_timers);
+        spin_unlock_irq(&current->sighand->siglock);
+        return 0;
+        /*
+         * In the case of the timer belonging to another task, after
+         * the task is unlocked, the timer is owned by the other task
+         * and may cease to exist at any time.  Don't use or modify
+         * new_timer after the unlock call.
+         */
+out:
+        release_posix_timer(new_timer, it_id_set);
+        return error;
+}
+/*
+ * Locking issues: We need to protect the result of the id look up until
+ * we get the timer locked down so it is not deleted under us.  The
+ * removal is done under the idr spinlock so we use that here to bridge
+ * the find to the timer lock.  To avoid a dead lock, the timer id MUST
+ * be release with out holding the timer lock.
+ */
+static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
+{
+        struct k_itimer *timr;
+        /*
+         * timer_t could be any type >= int and we want to make sure any
+         * @timer_id outside positive int range fails lookup.
+         */
+        if ((unsigned long long)timer_id > INT_MAX)
+                return NULL;
+        rcu_read_lock();
+        timr = posix_timer_by_id(timer_id);
+        if (timr) {
+                spin_lock_irqsave(&timr->it_lock, *flags);
+                if (timr->it_signal == current->signal) {
+                        rcu_read_unlock();
+                        return timr;
+                }
+                spin_unlock_irqrestore(&timr->it_lock, *flags);
+        }
+        rcu_read_unlock();
+        return NULL;
+}
+/*
+ * Get the time remaining on a POSIX.1b interval timer.  This function
+ * is ALWAYS called with spin_lock_irq on the timer, thus it must not
+ * mess with irq.
+ *
+ * We have a couple of messes to clean up here.  First there is the case
+ * of a timer that has a requeue pending.  These timers should appear to
+ * be in the timer list with an expiry as if we were to requeue them
+ * now.
+ *
+ * The second issue is the SIGEV_NONE timer which may be active but is
+ * not really ever put in the timer list (to save system resources).
+ * This timer may be expired, and if so, we will do it here.  Otherwise
+ * it is the same as a requeue pending timer WRT to what we should
+ * report.
+ */
+static void
+common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
+{
+        ktime_t now, remaining, iv;
+        struct hrtimer *timer = &timr->it.real.timer;
+        memset(cur_setting, 0, sizeof(struct itimerspec));
+        iv = timr->it.real.interval;
+        /* interval timer ? */
+        if (iv.tv64)
+                cur_setting->it_interval = ktime_to_timespec(iv);
+        else if (!hrtimer_active(timer) &&
+                 (timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)
+                return;
+        now = timer->base->get_time();
+        /*
+         * When a requeue is pending or this is a SIGEV_NONE
+         * timer move the expiry time forward by intervals, so
+         * expiry is > now.
+         */
+        if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING ||
+            (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
+                timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
+        remaining = ktime_sub(hrtimer_get_expires(timer), now);
+        /* Return 0 only, when the timer is expired and not pending */
+        if (remaining.tv64 <= 0) {
+                /*
+                 * A single shot SIGEV_NONE timer must return 0, when
+                 * it is expired !
+                 */
+                if ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)
+                        cur_setting->it_value.tv_nsec = 1;
+        } else
+                cur_setting->it_value = ktime_to_timespec(remaining);
+}
+/* Get the time remaining on a POSIX.1b interval timer. */
+SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
+                struct itimerspec __user *, setting)
+{
+        struct itimerspec cur_setting;
+        struct k_itimer *timr;
+        struct k_clock *kc;
+        unsigned long flags;
+        int ret = 0;
+        timr = lock_timer(timer_id, &flags);
+        if (!timr)
+                return -EINVAL;
+        kc = clockid_to_kclock(timr->it_clock);
+        if (WARN_ON_ONCE(!kc || !kc->timer_get))
+                ret = -EINVAL;
+        else
+                kc->timer_get(timr, &cur_setting);
+        unlock_timer(timr, flags);
+        if (!ret && copy_to_user(setting, &cur_setting, sizeof (cur_setting)))
+                return -EFAULT;
+        return ret;
+}
+/*
+ * Get the number of overruns of a POSIX.1b interval timer.  This is to
+ * be the overrun of the timer last delivered.  At the same time we are
+ * accumulating overruns on the next timer.  The overrun is frozen when
+ * the signal is delivered, either at the notify time (if the info block
+ * is not queued) or at the actual delivery time (as we are informed by
+ * the call back to do_schedule_next_timer().  So all we need to do is
+ * to pick up the frozen overrun.
+ */
+SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
+{
+        struct k_itimer *timr;
+        int overrun;
+        unsigned long flags;
+        timr = lock_timer(timer_id, &flags);
+        if (!timr)
+                return -EINVAL;
+        overrun = timr->it_overrun_last;
+        unlock_timer(timr, flags);
+        return overrun;
+}
+/* Set a POSIX.1b interval timer. */
+/* timr->it_lock is taken. */
+static int
+common_timer_set(struct k_itimer *timr, int flags,
+                 struct itimerspec *new_setting, struct itimerspec *old_setting)
+{
+        struct hrtimer *timer = &timr->it.real.timer;
+        enum hrtimer_mode mode;
+        if (old_setting)
+                common_timer_get(timr, old_setting);
+        /* disable the timer */
+        timr->it.real.interval.tv64 = 0;
+        /*
+         * careful here.  If smp we could be in the "fire" routine which will
+         * be spinning as we hold the lock.  But this is ONLY an SMP issue.
+         */
+        if (hrtimer_try_to_cancel(timer) < 0)
+                return TIMER_RETRY;
+        timr->it_requeue_pending = (timr->it_requeue_pending + 2) & 
+                ~REQUEUE_PENDING;
+        timr->it_overrun_last = 0;
+        /* switch off the timer when it_value is zero */
+        if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec)
+                return 0;
+        mode = flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL;
+        hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
+        timr->it.real.timer.function = posix_timer_fn;
+        hrtimer_set_expires(timer, timespec_to_ktime(new_setting->it_value));
+        /* Convert interval */
+        timr->it.real.interval = timespec_to_ktime(new_setting->it_interval);
+        /* SIGEV_NONE timers are not queued ! See common_timer_get */
+        if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
+                /* Setup correct expiry time for relative timers */
+                if (mode == HRTIMER_MODE_REL) {
+                        hrtimer_add_expires(timer, timer->base->get_time());
+                }
+                return 0;
+        }
+        hrtimer_start_expires(timer, mode);
+        return 0;
+}
+/* Set a POSIX.1b interval timer */
+SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
+                const struct itimerspec __user *, new_setting,
+                struct itimerspec __user *, old_setting)
+{
+        struct k_itimer *timr;
+        struct itimerspec new_spec, old_spec;
+        int error = 0;
+        unsigned long flag;
+        struct itimerspec *rtn = old_setting ? &old_spec : NULL;
+        struct k_clock *kc;
+        if (!new_setting)
+                return -EINVAL;
+        if (copy_from_user(&new_spec, new_setting, sizeof (new_spec)))
+                return -EFAULT;
+        if (!timespec_valid(&new_spec.it_interval) ||
+            !timespec_valid(&new_spec.it_value))
+                return -EINVAL;
+retry:
+        timr = lock_timer(timer_id, &flag);
+        if (!timr)
+                return -EINVAL;
+        kc = clockid_to_kclock(timr->it_clock);
+        if (WARN_ON_ONCE(!kc || !kc->timer_set))
+                error = -EINVAL;
+        else
+                error = kc->timer_set(timr, flags, &new_spec, rtn);
+        unlock_timer(timr, flag);
+        if (error == TIMER_RETRY) {
+                rtn = NULL;     // We already got the old time...
+                goto retry;
+        }
+        if (old_setting && !error &&
+            copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
+                error = -EFAULT;
+        return error;
+}
+static int common_timer_del(struct k_itimer *timer)
+{
+        timer->it.real.interval.tv64 = 0;
+        if (hrtimer_try_to_cancel(&timer->it.real.timer) < 0)
+                return TIMER_RETRY;
+        return 0;
+}
+static inline int timer_delete_hook(struct k_itimer *timer)
+{
+        struct k_clock *kc = clockid_to_kclock(timer->it_clock);
+        if (WARN_ON_ONCE(!kc || !kc->timer_del))
+                return -EINVAL;
+        return kc->timer_del(timer);
+}
+/* Delete a POSIX.1b interval timer. */
+SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
+{
+        struct k_itimer *timer;
+        unsigned long flags;
+retry_delete:
+        timer = lock_timer(timer_id, &flags);
+        if (!timer)
+                return -EINVAL;
+        if (timer_delete_hook(timer) == TIMER_RETRY) {
+                unlock_timer(timer, flags);
+                goto retry_delete;
+        }
+        spin_lock(&current->sighand->siglock);
+        list_del(&timer->list);
+        spin_unlock(&current->sighand->siglock);
+        /*
+         * This keeps any tasks waiting on the spin lock from thinking
+         * they got something (see the lock code above).
+         */
+        timer->it_signal = NULL;
+        unlock_timer(timer, flags);
+        release_posix_timer(timer, IT_ID_SET);
+        return 0;
+}
+/*
+ * return timer owned by the process, used by exit_itimers
+ */
+static void itimer_delete(struct k_itimer *timer)
+{
+        unsigned long flags;
+retry_delete:
+        spin_lock_irqsave(&timer->it_lock, flags);
+        if (timer_delete_hook(timer) == TIMER_RETRY) {
+                unlock_timer(timer, flags);
+                goto retry_delete;
+        }
+        list_del(&timer->list);
+        /*
+         * This keeps any tasks waiting on the spin lock from thinking
+         * they got something (see the lock code above).
+         */
+        timer->it_signal = NULL;
+        unlock_timer(timer, flags);
+        release_posix_timer(timer, IT_ID_SET);
+}
+/*
+ * This is called by do_exit or de_thread, only when there are no more
+ * references to the shared signal_struct.
+ */
+void exit_itimers(struct signal_struct *sig)
+{
+        struct k_itimer *tmr;
+        while (!list_empty(&sig->posix_timers)) {
+                tmr = list_entry(sig->posix_timers.next, struct k_itimer, list);
+                itimer_delete(tmr);
+        }
+}
+SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
+                const struct timespec __user *, tp)
+{
+        struct k_clock *kc = clockid_to_kclock(which_clock);
+        struct timespec new_tp;
+        if (!kc || !kc->clock_set)
+                return -EINVAL;
+        if (copy_from_user(&new_tp, tp, sizeof (*tp)))
+                return -EFAULT;
+        return kc->clock_set(which_clock, &new_tp);
+}
+SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
+                struct timespec __user *,tp)
+{
+        struct k_clock *kc = clockid_to_kclock(which_clock);
+        struct timespec kernel_tp;
+        int error;
+        if (!kc)
+                return -EINVAL;
+        error = kc->clock_get(which_clock, &kernel_tp);
+        if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp)))
+                error = -EFAULT;
+        return error;
+}
+SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
+                struct timex __user *, utx)
+{
+        struct k_clock *kc = clockid_to_kclock(which_clock);
+        struct timex ktx;
+        int err;
+        if (!kc)
+                return -EINVAL;
+        if (!kc->clock_adj)
+                return -EOPNOTSUPP;
+        if (copy_from_user(&ktx, utx, sizeof(ktx)))
+                return -EFAULT;
+        err = kc->clock_adj(which_clock, &ktx);
+        if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx)))
+                return -EFAULT;
+        return err;
+}
+SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
+                struct timespec __user *, tp)
+{
+        struct k_clock *kc = clockid_to_kclock(which_clock);
+        struct timespec rtn_tp;
+        int error;
+        if (!kc)
+                return -EINVAL;
+        error = kc->clock_getres(which_clock, &rtn_tp);
+        if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp)))
+                error = -EFAULT;
+        return error;
+}
+/*
+ * nanosleep for monotonic and realtime clocks
+ */
+static int common_nsleep(const clockid_t which_clock, int flags,
+                         struct timespec *tsave, struct timespec __user *rmtp)
+{
+        return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ?
+                                 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
+                                 which_clock);
+}
+SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
+                const struct timespec __user *, rqtp,
+                struct timespec __user *, rmtp)
+{
+        struct k_clock *kc = clockid_to_kclock(which_clock);
+        struct timespec t;
+        if (!kc)
+                return -EINVAL;
+        if (!kc->nsleep)
+                return -ENANOSLEEP_NOTSUP;
+        if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
+                return -EFAULT;
+        if (!timespec_valid(&t))
+                return -EINVAL;
+        return kc->nsleep(which_clock, flags, &t, rmtp);
+}
+/*
+ * This will restart clock_nanosleep. This is required only by
+ * compat_clock_nanosleep_restart for now.
+ */
+long clock_nanosleep_restart(struct restart_block *restart_block)
+{
+        clockid_t which_clock = restart_block->nanosleep.clockid;
+        struct k_clock *kc = clockid_to_kclock(which_clock);
+        if (WARN_ON_ONCE(!kc || !kc->nsleep_restart))
+                return -EINVAL;
+        return kc->nsleep_restart(restart_block);
+}
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 7ab92b19965a..c19c1d84b6f3 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -4,6 +4,8 @@
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
+#include "timekeeping.h"
 extern seqlock_t jiffies_lock;
 #define CS_NAME_LEN     32
diff --git a/kernel/time/time.c b/kernel/time/time.c
new file mode 100644
index 000000000000..f0294ba14634
--- /dev/null
+++ b/kernel/time/time.c
@@ -0,0 +1,778 @@
+/*
+ *  linux/kernel/time.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  This file contains the interface functions for the various
+ *  time related system calls: time, stime, gettimeofday, settimeofday,
+ *                             adjtime
+ */
+/*
+ * Modification history kernel/time.c
+ *
+ * 1993-09-02    Philip Gladstone
+ *      Created file with time related functions from sched/core.c and adjtimex()
+ * 1993-10-08    Torsten Duwe
+ *      adjtime interface update and CMOS clock write code
+ * 1995-08-13    Torsten Duwe
+ *      kernel PLL updated to 1994-12-13 specs (rfc-1589)
+ * 1999-01-16    Ulrich Windl
+ *      Introduced error checking for many cases in adjtimex().
+ *      Updated NTP code according to technical memorandum Jan '96
+ *      "A Kernel Model for Precision Timekeeping" by Dave Mills
+ *      Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10)
+ *      (Even though the technical memorandum forbids it)
+ * 2004-07-14    Christoph Lameter
+ *      Added getnstimeofday to allow the posix timer functions to return
+ *      with nanosecond accuracy
+ */
+#include <linux/export.h>
+#include <linux/timex.h>
+#include <linux/capability.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/errno.h>
+#include <linux/syscalls.h>
+#include <linux/security.h>
+#include <linux/fs.h>
+#include <linux/math64.h>
+#include <linux/ptrace.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include "timeconst.h"
+#include "timekeeping.h"
+/*
+ * The timezone where the local system is located.  Used as a default by some
+ * programs who obtain this value by using gettimeofday.
+ */
+struct timezone sys_tz;
+EXPORT_SYMBOL(sys_tz);
+#ifdef __ARCH_WANT_SYS_TIME
+/*
+ * sys_time() can be implemented in user-level using
+ * sys_gettimeofday().  Is this for backwards compatibility?  If so,
+ * why not move it into the appropriate arch directory (for those
+ * architectures that need it).
+ */
+SYSCALL_DEFINE1(time, time_t __user *, tloc)
+{
+        time_t i = get_seconds();
+        if (tloc) {
+                if (put_user(i,tloc))
+                        return -EFAULT;
+        }
+        force_successful_syscall_return();
+        return i;
+}
+/*
+ * sys_stime() can be implemented in user-level using
+ * sys_settimeofday().  Is this for backwards compatibility?  If so,
+ * why not move it into the appropriate arch directory (for those
+ * architectures that need it).
+ */
+SYSCALL_DEFINE1(stime, time_t __user *, tptr)
+{
+        struct timespec tv;
+        int err;
+        if (get_user(tv.tv_sec, tptr))
+                return -EFAULT;
+        tv.tv_nsec = 0;
+        err = security_settime(&tv, NULL);
+        if (err)
+                return err;
+        do_settimeofday(&tv);
+        return 0;
+}
+#endif /* __ARCH_WANT_SYS_TIME */
+SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
+                struct timezone __user *, tz)
+{
+        if (likely(tv != NULL)) {
+                struct timeval ktv;
+                do_gettimeofday(&ktv);
+                if (copy_to_user(tv, &ktv, sizeof(ktv)))
+                        return -EFAULT;
+        }
+        if (unlikely(tz != NULL)) {
+                if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
+                        return -EFAULT;
+        }
+        return 0;
+}
+/*
+ * Indicates if there is an offset between the system clock and the hardware
+ * clock/persistent clock/rtc.
+ */
+int persistent_clock_is_local;
+/*
+ * Adjust the time obtained from the CMOS to be UTC time instead of
+ * local time.
+ *
+ * This is ugly, but preferable to the alternatives.  Otherwise we
+ * would either need to write a program to do it in /etc/rc (and risk
+ * confusion if the program gets run more than once; it would also be
+ * hard to make the program warp the clock precisely n hours)  or
+ * compile in the timezone information into the kernel.  Bad, bad....
+ *
+ *                                              - TYT, 1992-01-01
+ *
+ * The best thing to do is to keep the CMOS clock in universal time (UTC)
+ * as real UNIX machines always do it. This avoids all headaches about
+ * daylight saving times and warping kernel clocks.
+ */
+static inline void warp_clock(void)
+{
+        if (sys_tz.tz_minuteswest != 0) {
+                struct timespec adjust;
+                persistent_clock_is_local = 1;
+                adjust.tv_sec = sys_tz.tz_minuteswest * 60;
+                adjust.tv_nsec = 0;
+                timekeeping_inject_offset(&adjust);
+        }
+}
+/*
+ * In case for some reason the CMOS clock has not already been running
+ * in UTC, but in some local time: The first time we set the timezone,
+ * we will warp the clock so that it is ticking UTC time instead of
+ * local time. Presumably, if someone is setting the timezone then we
+ * are running in an environment where the programs understand about
+ * timezones. This should be done at boot time in the /etc/rc script,
+ * as soon as possible, so that the clock can be set right. Otherwise,
+ * various programs will get confused when the clock gets warped.
+ */
+int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
+{
+        static int firsttime = 1;
+        int error = 0;
+        if (tv && !timespec_valid(tv))
+                return -EINVAL;
+        error = security_settime(tv, tz);
+        if (error)
+                return error;
+        if (tz) {
+                sys_tz = *tz;
+                update_vsyscall_tz();
+                if (firsttime) {
+                        firsttime = 0;
+                        if (!tv)
+                                warp_clock();
+                }
+        }
+        if (tv)
+                return do_settimeofday(tv);
+        return 0;
+}
+SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv,
+                struct timezone __user *, tz)
+{
+        struct timeval user_tv;
+        struct timespec new_ts;
+        struct timezone new_tz;
+        if (tv) {
+                if (copy_from_user(&user_tv, tv, sizeof(*tv)))
+                        return -EFAULT;
+                new_ts.tv_sec = user_tv.tv_sec;
+                new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC;
+        }
+        if (tz) {
+                if (copy_from_user(&new_tz, tz, sizeof(*tz)))
+                        return -EFAULT;
+        }
+        return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL);
+}
+SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
+{
+        struct timex txc;               /* Local copy of parameter */
+        int ret;
+        /* Copy the user data space into the kernel copy
+         * structure. But bear in mind that the structures
+         * may change
+         */
+        if(copy_from_user(&txc, txc_p, sizeof(struct timex)))
+                return -EFAULT;
+        ret = do_adjtimex(&txc);
+        return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
+}
+/**
+ * current_fs_time - Return FS time
+ * @sb: Superblock.
+ *
+ * Return the current time truncated to the time granularity supported by
+ * the fs.
+ */
+struct timespec current_fs_time(struct super_block *sb)
+{
+        struct timespec now = current_kernel_time();
+        return timespec_trunc(now, sb->s_time_gran);
+}
+EXPORT_SYMBOL(current_fs_time);
+/*
+ * Convert jiffies to milliseconds and back.
+ *
+ * Avoid unnecessary multiplications/divisions in the
+ * two most common HZ cases:
+ */
+unsigned int jiffies_to_msecs(const unsigned long j)
+{
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+        return (MSEC_PER_SEC / HZ) * j;
+#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
+        return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
+#else
+# if BITS_PER_LONG == 32
+        return (HZ_TO_MSEC_MUL32 * j) >> HZ_TO_MSEC_SHR32;
+# else
+        return (j * HZ_TO_MSEC_NUM) / HZ_TO_MSEC_DEN;
+# endif
+#endif
+}
+EXPORT_SYMBOL(jiffies_to_msecs);
+unsigned int jiffies_to_usecs(const unsigned long j)
+{
+#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
+        return (USEC_PER_SEC / HZ) * j;
+#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
+        return (j + (HZ / USEC_PER_SEC) - 1)/(HZ / USEC_PER_SEC);
+#else
+# if BITS_PER_LONG == 32
+        return (HZ_TO_USEC_MUL32 * j) >> HZ_TO_USEC_SHR32;
+# else
+        return (j * HZ_TO_USEC_NUM) / HZ_TO_USEC_DEN;
+# endif
+#endif
+}
+EXPORT_SYMBOL(jiffies_to_usecs);
+/**
+ * timespec_trunc - Truncate timespec to a granularity
+ * @t: Timespec
+ * @gran: Granularity in ns.
+ *
+ * Truncate a timespec to a granularity. gran must be smaller than a second.
+ * Always rounds down.
+ *
+ * This function should be only used for timestamps returned by
+ * current_kernel_time() or CURRENT_TIME, not with do_gettimeofday() because
+ * it doesn't handle the better resolution of the latter.
+ */
+struct timespec timespec_trunc(struct timespec t, unsigned gran)
+{
+        /*
+         * Division is pretty slow so avoid it for common cases.
+         * Currently current_kernel_time() never returns better than
+         * jiffies resolution. Exploit that.
+         */
+        if (gran <= jiffies_to_usecs(1) * 1000) {
+                /* nothing */
+        } else if (gran == 1000000000) {
+                t.tv_nsec = 0;
+        } else {
+                t.tv_nsec -= t.tv_nsec % gran;
+        }
+        return t;
+}
+EXPORT_SYMBOL(timespec_trunc);
+/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
+ * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
+ * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
+ *
+ * [For the Julian calendar (which was used in Russia before 1917,
+ * Britain & colonies before 1752, anywhere else before 1582,
+ * and is still in use by some communities) leave out the
+ * -year/100+year/400 terms, and add 10.]
+ *
+ * This algorithm was first published by Gauss (I think).
+ *
+ * WARNING: this function will overflow on 2106-02-07 06:28:16 on
+ * machines where long is 32-bit! (However, as time_t is signed, we
+ * will already get problems at other places on 2038-01-19 03:14:08)
+ */
+unsigned long
+mktime(const unsigned int year0, const unsigned int mon0,
+       const unsigned int day, const unsigned int hour,
+       const unsigned int min, const unsigned int sec)
+{
+        unsigned int mon = mon0, year = year0;
+        /* 1..12 -> 11,12,1..10 */
+        if (0 >= (int) (mon -= 2)) {
+                mon += 12;      /* Puts Feb last since it has leap day */
+                year -= 1;
+        }
+        return ((((unsigned long)
+                  (year/4 - year/100 + year/400 + 367*mon/12 + day) +
+                  year*365 - 719499
+            )*24 + hour /* now have hours */
+          )*60 + min /* now have minutes */
+        )*60 + sec; /* finally seconds */
+}
+EXPORT_SYMBOL(mktime);
+/**
+ * set_normalized_timespec - set timespec sec and nsec parts and normalize
+ *
+ * @ts:         pointer to timespec variable to be set
+ * @sec:        seconds to set
+ * @nsec:       nanoseconds to set
+ *
+ * Set seconds and nanoseconds field of a timespec variable and
+ * normalize to the timespec storage format
+ *
+ * Note: The tv_nsec part is always in the range of
+ *      0 <= tv_nsec < NSEC_PER_SEC
+ * For negative values only the tv_sec field is negative !
+ */
+void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec)
+{
+        while (nsec >= NSEC_PER_SEC) {
+                /*
+                 * The following asm() prevents the compiler from
+                 * optimising this loop into a modulo operation. See
+                 * also __iter_div_u64_rem() in include/linux/time.h
+                 */
+                asm("" : "+rm"(nsec));
+                nsec -= NSEC_PER_SEC;
+                ++sec;
+        }
+        while (nsec < 0) {
+                asm("" : "+rm"(nsec));
+                nsec += NSEC_PER_SEC;
+                --sec;
+        }
+        ts->tv_sec = sec;
+        ts->tv_nsec = nsec;
+}
+EXPORT_SYMBOL(set_normalized_timespec);
+/**
+ * ns_to_timespec - Convert nanoseconds to timespec
+ * @nsec:       the nanoseconds value to be converted
+ *
+ * Returns the timespec representation of the nsec parameter.
+ */
+struct timespec ns_to_timespec(const s64 nsec)
+{
+        struct timespec ts;
+        s32 rem;
+        if (!nsec)
+                return (struct timespec) {0, 0};
+        ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem);
+        if (unlikely(rem < 0)) {
+                ts.tv_sec--;
+                rem += NSEC_PER_SEC;
+        }
+        ts.tv_nsec = rem;
+        return ts;
+}
+EXPORT_SYMBOL(ns_to_timespec);
+/**
+ * ns_to_timeval - Convert nanoseconds to timeval
+ * @nsec:       the nanoseconds value to be converted
+ *
+ * Returns the timeval representation of the nsec parameter.
+ */
+struct timeval ns_to_timeval(const s64 nsec)
+{
+        struct timespec ts = ns_to_timespec(nsec);
+        struct timeval tv;
+        tv.tv_sec = ts.tv_sec;
+        tv.tv_usec = (suseconds_t) ts.tv_nsec / 1000;
+        return tv;
+}
+EXPORT_SYMBOL(ns_to_timeval);
+#if BITS_PER_LONG == 32
+/**
+ * set_normalized_timespec - set timespec sec and nsec parts and normalize
+ *
+ * @ts:         pointer to timespec variable to be set
+ * @sec:        seconds to set
+ * @nsec:       nanoseconds to set
+ *
+ * Set seconds and nanoseconds field of a timespec variable and
+ * normalize to the timespec storage format
+ *
+ * Note: The tv_nsec part is always in the range of
+ *      0 <= tv_nsec < NSEC_PER_SEC
+ * For negative values only the tv_sec field is negative !
+ */
+void set_normalized_timespec64(struct timespec64 *ts, time64_t sec, s64 nsec)
+{
+        while (nsec >= NSEC_PER_SEC) {
+                /*
+                 * The following asm() prevents the compiler from
+                 * optimising this loop into a modulo operation. See
+                 * also __iter_div_u64_rem() in include/linux/time.h
+                 */
+                asm("" : "+rm"(nsec));
+                nsec -= NSEC_PER_SEC;
+                ++sec;
+        }
+        while (nsec < 0) {
+                asm("" : "+rm"(nsec));
+                nsec += NSEC_PER_SEC;
+                --sec;
+        }
+        ts->tv_sec = sec;
+        ts->tv_nsec = nsec;
+}
+EXPORT_SYMBOL(set_normalized_timespec64);
+/**
+ * ns_to_timespec64 - Convert nanoseconds to timespec64
+ * @nsec:       the nanoseconds value to be converted
+ *
+ * Returns the timespec64 representation of the nsec parameter.
+ */
+struct timespec64 ns_to_timespec64(const s64 nsec)
+{
+        struct timespec64 ts;
+        s32 rem;
+        if (!nsec)
+                return (struct timespec64) {0, 0};
+        ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem);
+        if (unlikely(rem < 0)) {
+                ts.tv_sec--;
+                rem += NSEC_PER_SEC;
+        }
+        ts.tv_nsec = rem;
+        return ts;
+}
+EXPORT_SYMBOL(ns_to_timespec64);
+#endif
+/*
+ * When we convert to jiffies then we interpret incoming values
+ * the following way:
+ *
+ * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET)
+ *
+ * - 'too large' values [that would result in larger than
+ *   MAX_JIFFY_OFFSET values] mean 'infinite timeout' too.
+ *
+ * - all other values are converted to jiffies by either multiplying
+ *   the input value by a factor or dividing it with a factor
+ *
+ * We must also be careful about 32-bit overflows.
+ */
+unsigned long msecs_to_jiffies(const unsigned int m)
+{
+        /*
+         * Negative value, means infinite timeout:
+         */
+        if ((int)m < 0)
+                return MAX_JIFFY_OFFSET;
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+        /*
+         * HZ is equal to or smaller than 1000, and 1000 is a nice
+         * round multiple of HZ, divide with the factor between them,
+         * but round upwards:
+         */
+        return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
+#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
+        /*
+         * HZ is larger than 1000, and HZ is a nice round multiple of
+         * 1000 - simply multiply with the factor between them.
+         *
+         * But first make sure the multiplication result cannot
+         * overflow:
+         */
+        if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
+                return MAX_JIFFY_OFFSET;
+        return m * (HZ / MSEC_PER_SEC);
+#else
+        /*
+         * Generic case - multiply, round and divide. But first
+         * check that if we are doing a net multiplication, that
+         * we wouldn't overflow:
+         */
+        if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
+                return MAX_JIFFY_OFFSET;
+        return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32)
+                >> MSEC_TO_HZ_SHR32;
+#endif
+}
+EXPORT_SYMBOL(msecs_to_jiffies);
+unsigned long usecs_to_jiffies(const unsigned int u)
+{
+        if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET))
+                return MAX_JIFFY_OFFSET;
+#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
+        return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ);
+#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
+        return u * (HZ / USEC_PER_SEC);
+#else
+        return (USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32)
+                >> USEC_TO_HZ_SHR32;
+#endif
+}
+EXPORT_SYMBOL(usecs_to_jiffies);
+/*
+ * The TICK_NSEC - 1 rounds up the value to the next resolution.  Note
+ * that a remainder subtract here would not do the right thing as the
+ * resolution values don't fall on second boundries.  I.e. the line:
+ * nsec -= nsec % TICK_NSEC; is NOT a correct resolution rounding.
+ *
+ * Rather, we just shift the bits off the right.
+ *
+ * The >> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC) converts the scaled nsec
+ * value to a scaled second value.
+ */
+unsigned long
+timespec_to_jiffies(const struct timespec *value)
+{
+        unsigned long sec = value->tv_sec;
+        long nsec = value->tv_nsec + TICK_NSEC - 1;
+        if (sec >= MAX_SEC_IN_JIFFIES){
+                sec = MAX_SEC_IN_JIFFIES;
+                nsec = 0;
+        }
+        return (((u64)sec * SEC_CONVERSION) +
+                (((u64)nsec * NSEC_CONVERSION) >>
+                 (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
+}
+EXPORT_SYMBOL(timespec_to_jiffies);
+void
+jiffies_to_timespec(const unsigned long jiffies, struct timespec *value)
+{
+        /*
+         * Convert jiffies to nanoseconds and separate with
+         * one divide.
+         */
+        u32 rem;
+        value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC,
+                                    NSEC_PER_SEC, &rem);
+        value->tv_nsec = rem;
+}
+EXPORT_SYMBOL(jiffies_to_timespec);
+/* Same for "timeval"
+ *
+ * Well, almost.  The problem here is that the real system resolution is
+ * in nanoseconds and the value being converted is in micro seconds.
+ * Also for some machines (those that use HZ = 1024, in-particular),
+ * there is a LARGE error in the tick size in microseconds.
+ * The solution we use is to do the rounding AFTER we convert the
+ * microsecond part.  Thus the USEC_ROUND, the bits to be shifted off.
+ * Instruction wise, this should cost only an additional add with carry
+ * instruction above the way it was done above.
+ */
+unsigned long
+timeval_to_jiffies(const struct timeval *value)
+{
+        unsigned long sec = value->tv_sec;
+        long usec = value->tv_usec;
+        if (sec >= MAX_SEC_IN_JIFFIES){
+                sec = MAX_SEC_IN_JIFFIES;
+                usec = 0;
+        }
+        return (((u64)sec * SEC_CONVERSION) +
+                (((u64)usec * USEC_CONVERSION + USEC_ROUND) >>
+                 (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
+}
+EXPORT_SYMBOL(timeval_to_jiffies);
+void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value)
+{
+        /*
+         * Convert jiffies to nanoseconds and separate with
+         * one divide.
+         */
+        u32 rem;
+        value->tv_sec = div_u64_rem((u64)jiffies * TICK_NSEC,
+                                    NSEC_PER_SEC, &rem);
+        value->tv_usec = rem / NSEC_PER_USEC;
+}
+EXPORT_SYMBOL(jiffies_to_timeval);
+/*
+ * Convert jiffies/jiffies_64 to clock_t and back.
+ */
+clock_t jiffies_to_clock_t(unsigned long x)
+{
+#if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
+# if HZ < USER_HZ
+        return x * (USER_HZ / HZ);
+# else
+        return x / (HZ / USER_HZ);
+# endif
+#else
+        return div_u64((u64)x * TICK_NSEC, NSEC_PER_SEC / USER_HZ);
+#endif
+}
+EXPORT_SYMBOL(jiffies_to_clock_t);
+unsigned long clock_t_to_jiffies(unsigned long x)
+{
+#if (HZ % USER_HZ)==0
+        if (x >= ~0UL / (HZ / USER_HZ))
+                return ~0UL;
+        return x * (HZ / USER_HZ);
+#else
+        /* Don't worry about loss of precision here .. */
+        if (x >= ~0UL / HZ * USER_HZ)
+                return ~0UL;
+        /* .. but do try to contain it here */
+        return div_u64((u64)x * HZ, USER_HZ);
+#endif
+}
+EXPORT_SYMBOL(clock_t_to_jiffies);
+u64 jiffies_64_to_clock_t(u64 x)
+{
+#if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
+# if HZ < USER_HZ
+        x = div_u64(x * USER_HZ, HZ);
+# elif HZ > USER_HZ
+        x = div_u64(x, HZ / USER_HZ);
+# else
+        /* Nothing to do */
+# endif
+#else
+        /*
+         * There are better ways that don't overflow early,
+         * but even this doesn't overflow in hundreds of years
+         * in 64 bits, so..
+         */
+        x = div_u64(x * TICK_NSEC, (NSEC_PER_SEC / USER_HZ));
+#endif
+        return x;
+}
+EXPORT_SYMBOL(jiffies_64_to_clock_t);
+u64 nsec_to_clock_t(u64 x)
+{
+#if (NSEC_PER_SEC % USER_HZ) == 0
+        return div_u64(x, NSEC_PER_SEC / USER_HZ);
+#elif (USER_HZ % 512) == 0
+        return div_u64(x * USER_HZ / 512, NSEC_PER_SEC / 512);
+#else
+        /*
+         * max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024,
+         * overflow after 64.99 years.
+         * exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ...
+         */
+        return div_u64(x * 9, (9ull * NSEC_PER_SEC + (USER_HZ / 2)) / USER_HZ);
+#endif
+}
+/**
+ * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64
+ *
+ * @n:  nsecs in u64
+ *
+ * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
+ * And this doesn't return MAX_JIFFY_OFFSET since this function is designed
+ * for scheduler, not for use in device drivers to calculate timeout value.
+ *
+ * note:
+ *   NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
+ *   ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
+ */
+u64 nsecs_to_jiffies64(u64 n)
+{
+#if (NSEC_PER_SEC % HZ) == 0
+        /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
+        return div_u64(n, NSEC_PER_SEC / HZ);
+#elif (HZ % 512) == 0
+        /* overflow after 292 years if HZ = 1024 */
+        return div_u64(n * HZ / 512, NSEC_PER_SEC / 512);
+#else
+        /*
+         * Generic case - optimized for cases where HZ is a multiple of 3.
+         * overflow after 64.99 years, exact for HZ = 60, 72, 90, 120 etc.
+         */
+        return div_u64(n * 9, (9ull * NSEC_PER_SEC + HZ / 2) / HZ);
+#endif
+}
+/**
+ * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
+ *
+ * @n:  nsecs in u64
+ *
+ * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
+ * And this doesn't return MAX_JIFFY_OFFSET since this function is designed
+ * for scheduler, not for use in device drivers to calculate timeout value.
+ *
+ * note:
+ *   NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
+ *   ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
+ */
+unsigned long nsecs_to_jiffies(u64 n)
+{
+        return (unsigned long)nsecs_to_jiffies64(n);
+}
+EXPORT_SYMBOL_GPL(nsecs_to_jiffies);
+/*
+ * Add two timespec values and do a safety check for overflow.
+ * It's assumed that both values are valid (>= 0)
+ */
+struct timespec timespec_add_safe(const struct timespec lhs,
+                                  const struct timespec rhs)
+{
+        struct timespec res;
+        set_normalized_timespec(&res, lhs.tv_sec + rhs.tv_sec,
+                                lhs.tv_nsec + rhs.tv_nsec);
+        if (res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec)
+                res.tv_sec = TIME_T_MAX;
+        return res;
+}
diff --git a/kernel/time/timeconst.bc b/kernel/time/timeconst.bc
new file mode 100644
index 000000000000..511bdf2cafda
--- /dev/null
+++ b/kernel/time/timeconst.bc
@@ -0,0 +1,108 @@
+scale=0
+define gcd(a,b) {
+        auto t;
+        while (b) {
+                t = b;
+                b = a % b;
+                a = t;
+        }
+        return a;
+}
+/* Division by reciprocal multiplication. */
+define fmul(b,n,d) {
+       return (2^b*n+d-1)/d;
+}
+/* Adjustment factor when a ceiling value is used.  Use as:
+   (imul * n) + (fmulxx * n + fadjxx) >> xx) */
+define fadj(b,n,d) {
+        auto v;
+        d = d/gcd(n,d);
+        v = 2^b*(d-1)/d;
+        return v;
+}
+/* Compute the appropriate mul/adj values as well as a shift count,
+   which brings the mul value into the range 2^b-1 <= x < 2^b.  Such
+   a shift value will be correct in the signed integer range and off
+   by at most one in the upper half of the unsigned range. */
+define fmuls(b,n,d) {
+        auto s, m;
+        for (s = 0; 1; s++) {
+                m = fmul(s,n,d);
+                if (m >= 2^(b-1))
+                        return s;
+        }
+        return 0;
+}
+define timeconst(hz) {
+        print "/* Automatically generated by kernel/timeconst.bc */\n"
+        print "/* Time conversion constants for HZ == ", hz, " */\n"
+        print "\n"
+        print "#ifndef KERNEL_TIMECONST_H\n"
+        print "#define KERNEL_TIMECONST_H\n\n"
+        print "#include <linux/param.h>\n"
+        print "#include <linux/types.h>\n\n"
+        print "#if HZ != ", hz, "\n"
+        print "#error \qkernel/timeconst.h has the wrong HZ value!\q\n"
+        print "#endif\n\n"
+        if (hz < 2) {
+                print "#error Totally bogus HZ value!\n"
+        } else {
+                s=fmuls(32,1000,hz)
+                obase=16
+                print "#define HZ_TO_MSEC_MUL32\tU64_C(0x", fmul(s,1000,hz), ")\n"
+                print "#define HZ_TO_MSEC_ADJ32\tU64_C(0x", fadj(s,1000,hz), ")\n"
+                obase=10
+                print "#define HZ_TO_MSEC_SHR32\t", s, "\n"
+                s=fmuls(32,hz,1000)
+                obase=16
+                print "#define MSEC_TO_HZ_MUL32\tU64_C(0x", fmul(s,hz,1000), ")\n"
+                print "#define MSEC_TO_HZ_ADJ32\tU64_C(0x", fadj(s,hz,1000), ")\n"
+                obase=10
+                print "#define MSEC_TO_HZ_SHR32\t", s, "\n"
+                obase=10
+                cd=gcd(hz,1000)
+                print "#define HZ_TO_MSEC_NUM\t\t", 1000/cd, "\n"
+                print "#define HZ_TO_MSEC_DEN\t\t", hz/cd, "\n"
+                print "#define MSEC_TO_HZ_NUM\t\t", hz/cd, "\n"
+                print "#define MSEC_TO_HZ_DEN\t\t", 1000/cd, "\n"
+                print "\n"
+                s=fmuls(32,1000000,hz)
+                obase=16
+                print "#define HZ_TO_USEC_MUL32\tU64_C(0x", fmul(s,1000000,hz), ")\n"
+                print "#define HZ_TO_USEC_ADJ32\tU64_C(0x", fadj(s,1000000,hz), ")\n"
+                obase=10
+                print "#define HZ_TO_USEC_SHR32\t", s, "\n"
+                s=fmuls(32,hz,1000000)
+                obase=16
+                print "#define USEC_TO_HZ_MUL32\tU64_C(0x", fmul(s,hz,1000000), ")\n"
+                print "#define USEC_TO_HZ_ADJ32\tU64_C(0x", fadj(s,hz,1000000), ")\n"
+                obase=10
+                print "#define USEC_TO_HZ_SHR32\t", s, "\n"
+                obase=10
+                cd=gcd(hz,1000000)
+                print "#define HZ_TO_USEC_NUM\t\t", 1000000/cd, "\n"
+                print "#define HZ_TO_USEC_DEN\t\t", hz/cd, "\n"
+                print "#define USEC_TO_HZ_NUM\t\t", hz/cd, "\n"
+                print "#define USEC_TO_HZ_DEN\t\t", 1000000/cd, "\n"
+                print "\n"
+                print "#endif /* KERNEL_TIMECONST_H */\n"
+        }
+        halt
+}
+timeconst(hz)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 32d8d6aaedb8..f36b02838a47 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -32,11 +32,34 @@
 #define TK_MIRROR               (1 << 1)
 #define TK_CLOCK_WAS_SET        (1 << 2)
-static struct timekeeper timekeeper;
+/*
+ * The most important data for readout fits into a single 64 byte
+ * cache line.
+ */
+static struct {
+        seqcount_t              seq;
+        struct timekeeper       timekeeper;
+} tk_core ____cacheline_aligned;
 static DEFINE_RAW_SPINLOCK(timekeeper_lock);
-static seqcount_t timekeeper_seq;
 static struct timekeeper shadow_timekeeper;
+/**
+ * struct tk_fast - NMI safe timekeeper
+ * @seq:        Sequence counter for protecting updates. The lowest bit
+ *              is the index for the tk_read_base array
+ * @base:       tk_read_base array. Access is indexed by the lowest bit of
+ *              @seq.
+ *
+ * See @update_fast_timekeeper() below.
+ */
+struct tk_fast {
+        seqcount_t              seq;
+        struct tk_read_base     base[2];
+};
+static struct tk_fast tk_fast_mono ____cacheline_aligned;
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
@@ -45,49 +68,54 @@ bool __read_mostly persistent_clock_exist = false;
 static inline void tk_normalize_xtime(struct timekeeper *tk)
 {
-        while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) {
+        while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) {
-                tk->xtime_nsec -= (u64)NSEC_PER_SEC << tk->shift;
+                tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift;
                tk->xtime_sec++;
        }
 }
-static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts)
+static inline struct timespec64 tk_xtime(struct timekeeper *tk)
+{
+        struct timespec64 ts;
+        ts.tv_sec = tk->xtime_sec;
+        ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+        return ts;
+}
+static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
 {
        tk->xtime_sec = ts->tv_sec;
-        tk->xtime_nsec = (u64)ts->tv_nsec << tk->shift;
+        tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift;
 }
-static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts)
+static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
 {
        tk->xtime_sec += ts->tv_sec;
-        tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift;
+        tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift;
        tk_normalize_xtime(tk);
 }
-static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm)
+static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
 {
-        struct timespec tmp;
+        struct timespec64 tmp;
        /*
         * Verify consistency of: offset_real = -wall_to_monotonic
         * before modifying anything
         */
-        set_normalized_timespec(&tmp, -tk->wall_to_monotonic.tv_sec,
+        set_normalized_timespec64(&tmp, -tk->wall_to_monotonic.tv_sec,
                                        -tk->wall_to_monotonic.tv_nsec);
-        WARN_ON_ONCE(tk->offs_real.tv64 != timespec_to_ktime(tmp).tv64);
+        WARN_ON_ONCE(tk->offs_real.tv64 != timespec64_to_ktime(tmp).tv64);
        tk->wall_to_monotonic = wtm;
-        set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
+        set_normalized_timespec64(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
-        tk->offs_real = timespec_to_ktime(tmp);
+        tk->offs_real = timespec64_to_ktime(tmp);
        tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0));
 }
-static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t)
+static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
 {
-        /* Verify consistency before modifying */
+        tk->offs_boot = ktime_add(tk->offs_boot, delta);
-        WARN_ON_ONCE(tk->offs_boot.tv64 != timespec_to_ktime(tk->total_sleep_time).tv64);
-        tk->total_sleep_time    = t;
-        tk->offs_boot           = timespec_to_ktime(t);
 }
 /**
@@ -107,9 +135,11 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
        u64 tmp, ntpinterval;
        struct clocksource *old_clock;
-        old_clock = tk->clock;
+        old_clock = tk->tkr.clock;
-        tk->clock = clock;
+        tk->tkr.clock = clock;
-        tk->cycle_last = clock->cycle_last = clock->read(clock);
+        tk->tkr.read = clock->read;
+        tk->tkr.mask = clock->mask;
+        tk->tkr.cycle_last = tk->tkr.read(clock);
        /* Do the ns -> cycle conversion first, using original mult */
        tmp = NTP_INTERVAL_LENGTH;
@@ -133,78 +163,212 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
        if (old_clock) {
                int shift_change = clock->shift - old_clock->shift;
                if (shift_change < 0)
-                        tk->xtime_nsec >>= -shift_change;
+                        tk->tkr.xtime_nsec >>= -shift_change;
                else
-                        tk->xtime_nsec <<= shift_change;
+                        tk->tkr.xtime_nsec <<= shift_change;
        }
-        tk->shift = clock->shift;
+        tk->tkr.shift = clock->shift;
        tk->ntp_error = 0;
        tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
+        tk->ntp_tick = ntpinterval << tk->ntp_error_shift;
        /*
         * The timekeeper keeps its own mult values for the currently
         * active clocksource. These value will be adjusted via NTP
         * to counteract clock drifting.
         */
-        tk->mult = clock->mult;
+        tk->tkr.mult = clock->mult;
+        tk->ntp_err_mult = 0;
 }
 /* Timekeeper helper functions. */
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
-u32 (*arch_gettimeoffset)(void);
+static u32 default_arch_gettimeoffset(void) { return 0; }
+u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset;
-u32 get_arch_timeoffset(void)
-{
-        if (likely(arch_gettimeoffset))
-                return arch_gettimeoffset();
-        return 0;
-}
 #else
-static inline u32 get_arch_timeoffset(void) { return 0; }
+static inline u32 arch_gettimeoffset(void) { return 0; }
 #endif
-static inline s64 timekeeping_get_ns(struct timekeeper *tk)
+static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
 {
-        cycle_t cycle_now, cycle_delta;
+        cycle_t cycle_now, delta;
-        struct clocksource *clock;
        s64 nsec;
        /* read clocksource: */
-        clock = tk->clock;
+        cycle_now = tkr->read(tkr->clock);
-        cycle_now = clock->read(clock);
        /* calculate the delta since the last update_wall_time: */
-        cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+        delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
-        nsec = cycle_delta * tk->mult + tk->xtime_nsec;
+        nsec = delta * tkr->mult + tkr->xtime_nsec;
-        nsec >>= tk->shift;
+        nsec >>= tkr->shift;
        /* If arch requires, add in get_arch_timeoffset() */
-        return nsec + get_arch_timeoffset();
+        return nsec + arch_gettimeoffset();
 }
 static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
 {
-        cycle_t cycle_now, cycle_delta;
+        struct clocksource *clock = tk->tkr.clock;
-        struct clocksource *clock;
+        cycle_t cycle_now, delta;
        s64 nsec;
        /* read clocksource: */
-        clock = tk->clock;
+        cycle_now = tk->tkr.read(clock);
-        cycle_now = clock->read(clock);
        /* calculate the delta since the last update_wall_time: */
-        cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+        delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
        /* convert delta to nanoseconds. */
-        nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
+        nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
        /* If arch requires, add in get_arch_timeoffset() */
-        return nsec + get_arch_timeoffset();
+        return nsec + arch_gettimeoffset();
+}
+/**
+ * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
+ * @tk:         The timekeeper from which we take the update
+ * @tkf:        The fast timekeeper to update
+ * @tbase:      The time base for the fast timekeeper (mono/raw)
+ *
+ * We want to use this from any context including NMI and tracing /
+ * instrumenting the timekeeping code itself.
+ *
+ * So we handle this differently than the other timekeeping accessor
+ * functions which retry when the sequence count has changed. The
+ * update side does:
+ *
+ * smp_wmb();   <- Ensure that the last base[1] update is visible
+ * tkf->seq++;
+ * smp_wmb();   <- Ensure that the seqcount update is visible
+ * update(tkf->base[0], tk);
+ * smp_wmb();   <- Ensure that the base[0] update is visible
+ * tkf->seq++;
+ * smp_wmb();   <- Ensure that the seqcount update is visible
+ * update(tkf->base[1], tk);
+ *
+ * The reader side does:
+ *
+ * do {
+ *      seq = tkf->seq;
+ *      smp_rmb();
+ *      idx = seq & 0x01;
+ *      now = now(tkf->base[idx]);
+ *      smp_rmb();
+ * } while (seq != tkf->seq)
+ *
+ * As long as we update base[0] readers are forced off to
+ * base[1]. Once base[0] is updated readers are redirected to base[0]
+ * and the base[1] update takes place.
+ *
+ * So if a NMI hits the update of base[0] then it will use base[1]
+ * which is still consistent. In the worst case this can result is a
+ * slightly wrong timestamp (a few nanoseconds). See
+ * @ktime_get_mono_fast_ns.
+ */
+static void update_fast_timekeeper(struct timekeeper *tk)
+{
+        struct tk_read_base *base = tk_fast_mono.base;
+        /* Force readers off to base[1] */
+        raw_write_seqcount_latch(&tk_fast_mono.seq);
+        /* Update base[0] */
+        memcpy(base, &tk->tkr, sizeof(*base));
+        /* Force readers back to base[0] */
+        raw_write_seqcount_latch(&tk_fast_mono.seq);
+        /* Update base[1] */
+        memcpy(base + 1, base, sizeof(*base));
 }
+/**
+ * ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic
+ *
+ * This timestamp is not guaranteed to be monotonic across an update.
+ * The timestamp is calculated by:
+ *
+ *      now = base_mono + clock_delta * slope
+ *
+ * So if the update lowers the slope, readers who are forced to the
+ * not yet updated second array are still using the old steeper slope.
+ *
+ * tmono
+ * ^
+ * |    o  n
+ * |   o n
+ * |  u
+ * | o
+ * |o
+ * |12345678---> reader order
+ *
+ * o = old slope
+ * u = update
+ * n = new slope
+ *
+ * So reader 6 will observe time going backwards versus reader 5.
+ *
+ * While other CPUs are likely to be able observe that, the only way
+ * for a CPU local observation is when an NMI hits in the middle of
+ * the update. Timestamps taken from that NMI context might be ahead
+ * of the following timestamps. Callers need to be aware of that and
+ * deal with it.
+ */
+u64 notrace ktime_get_mono_fast_ns(void)
+{
+        struct tk_read_base *tkr;
+        unsigned int seq;
+        u64 now;
+        do {
+                seq = raw_read_seqcount(&tk_fast_mono.seq);
+                tkr = tk_fast_mono.base + (seq & 0x01);
+                now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr);
+        } while (read_seqcount_retry(&tk_fast_mono.seq, seq));
+        return now;
+}
+EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
+#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
+static inline void update_vsyscall(struct timekeeper *tk)
+{
+        struct timespec xt;
+        xt = timespec64_to_timespec(tk_xtime(tk));
+        update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->tkr.clock, tk->tkr.mult,
+                            tk->tkr.cycle_last);
+}
+static inline void old_vsyscall_fixup(struct timekeeper *tk)
+{
+        s64 remainder;
+        /*
+        * Store only full nanoseconds into xtime_nsec after rounding
+        * it up and add the remainder to the error difference.
+        * XXX - This is necessary to avoid small 1ns inconsistnecies caused
+        * by truncating the remainder in vsyscalls. However, it causes
+        * additional work to be done in timekeeping_adjust(). Once
+        * the vsyscall implementations are converted to use xtime_nsec
+        * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
+        * users are removed, this can be killed.
+        */
+        remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1);
+        tk->tkr.xtime_nsec -= remainder;
+        tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift;
+        tk->ntp_error += remainder << tk->ntp_error_shift;
+        tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift;
+}
+#else
+#define old_vsyscall_fixup(tk)
+#endif
 static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
 static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
@@ -217,7 +381,7 @@ static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
 */
 int pvclock_gtod_register_notifier(struct notifier_block *nb)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned long flags;
        int ret;
@@ -247,6 +411,29 @@ int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
+/*
+ * Update the ktime_t based scalar nsec members of the timekeeper
+ */
+static inline void tk_update_ktime_data(struct timekeeper *tk)
+{
+        s64 nsec;
+        /*
+         * The xtime based monotonic readout is:
+         *      nsec = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec + now();
+         * The ktime based monotonic readout is:
+         *      nsec = base_mono + now();
+         * ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec
+         */
+        nsec = (s64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
+        nsec *= NSEC_PER_SEC;
+        nsec += tk->wall_to_monotonic.tv_nsec;
+        tk->tkr.base_mono = ns_to_ktime(nsec);
+        /* Update the monotonic raw base */
+        tk->base_raw = timespec64_to_ktime(tk->raw_time);
+}
 /* must hold timekeeper_lock */
 static void timekeeping_update(struct timekeeper *tk, unsigned int action)
 {
@@ -257,8 +444,13 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
        update_vsyscall(tk);
        update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
+        tk_update_ktime_data(tk);
        if (action & TK_MIRROR)
-                memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
+                memcpy(&shadow_timekeeper, &tk_core.timekeeper,
+                       sizeof(tk_core.timekeeper));
+        update_fast_timekeeper(tk);
 }
 /**
@@ -270,49 +462,48 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
 */
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
-        cycle_t cycle_now, cycle_delta;
+        struct clocksource *clock = tk->tkr.clock;
-        struct clocksource *clock;
+        cycle_t cycle_now, delta;
        s64 nsec;
-        clock = tk->clock;
+        cycle_now = tk->tkr.read(clock);
-        cycle_now = clock->read(clock);
+        delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
-        cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+        tk->tkr.cycle_last = cycle_now;
-        tk->cycle_last = clock->cycle_last = cycle_now;
-        tk->xtime_nsec += cycle_delta * tk->mult;
+        tk->tkr.xtime_nsec += delta * tk->tkr.mult;
        /* If arch requires, add in get_arch_timeoffset() */
-        tk->xtime_nsec += (u64)get_arch_timeoffset() << tk->shift;
+        tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift;
        tk_normalize_xtime(tk);
-        nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
+        nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
-        timespec_add_ns(&tk->raw_time, nsec);
+        timespec64_add_ns(&tk->raw_time, nsec);
 }
 /**
- * __getnstimeofday - Returns the time of day in a timespec.
+ * __getnstimeofday64 - Returns the time of day in a timespec64.
 * @ts:         pointer to the timespec to be set
 *
 * Updates the time of day in the timespec.
 * Returns 0 on success, or -ve when suspended (timespec will be undefined).
 */
-int __getnstimeofday(struct timespec *ts)
+int __getnstimeofday64(struct timespec64 *ts)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned long seq;
        s64 nsecs = 0;
        do {
-                seq = read_seqcount_begin(&timekeeper_seq);
+                seq = read_seqcount_begin(&tk_core.seq);
                ts->tv_sec = tk->xtime_sec;
-                nsecs = timekeeping_get_ns(tk);
+                nsecs = timekeeping_get_ns(&tk->tkr);
-        } while (read_seqcount_retry(&timekeeper_seq, seq));
+        } while (read_seqcount_retry(&tk_core.seq, seq));
        ts->tv_nsec = 0;
-        timespec_add_ns(ts, nsecs);
+        timespec64_add_ns(ts, nsecs);
        /*
         * Do not bail out early, in case there were callers still using
@@ -322,116 +513,138 @@ int __getnstimeofday(struct timespec *ts)
                return -EAGAIN;
        return 0;
 }
-EXPORT_SYMBOL(__getnstimeofday);
+EXPORT_SYMBOL(__getnstimeofday64);
 /**
- * getnstimeofday - Returns the time of day in a timespec.
+ * getnstimeofday64 - Returns the time of day in a timespec64.
 * @ts:         pointer to the timespec to be set
 *
 * Returns the time of day in a timespec (WARN if suspended).
 */
-void getnstimeofday(struct timespec *ts)
+void getnstimeofday64(struct timespec64 *ts)
 {
-        WARN_ON(__getnstimeofday(ts));
+        WARN_ON(__getnstimeofday64(ts));
 }
-EXPORT_SYMBOL(getnstimeofday);
+EXPORT_SYMBOL(getnstimeofday64);
 ktime_t ktime_get(void)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
-        s64 secs, nsecs;
+        ktime_t base;
+        s64 nsecs;
        WARN_ON(timekeeping_suspended);
        do {
-                seq = read_seqcount_begin(&timekeeper_seq);
+                seq = read_seqcount_begin(&tk_core.seq);
-                secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+                base = tk->tkr.base_mono;
-                nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec;
+                nsecs = timekeeping_get_ns(&tk->tkr);
-        } while (read_seqcount_retry(&timekeeper_seq, seq));
+        } while (read_seqcount_retry(&tk_core.seq, seq));
-        /*
-         * Use ktime_set/ktime_add_ns to create a proper ktime on
+        return ktime_add_ns(base, nsecs);
-         * 32-bit architectures without CONFIG_KTIME_SCALAR.
-         */
-        return ktime_add_ns(ktime_set(secs, 0), nsecs);
 }
 EXPORT_SYMBOL_GPL(ktime_get);
-/**
+static ktime_t *offsets[TK_OFFS_MAX] = {
- * ktime_get_ts - get the monotonic clock in timespec format
+        [TK_OFFS_REAL]  = &tk_core.timekeeper.offs_real,
- * @ts:         pointer to timespec variable
+        [TK_OFFS_BOOT]  = &tk_core.timekeeper.offs_boot,
- *
+        [TK_OFFS_TAI]   = &tk_core.timekeeper.offs_tai,
- * The function calculates the monotonic clock from the realtime
+};
- * clock and the wall_to_monotonic offset and stores the result
- * in normalized timespec format in the variable pointed to by @ts.
+ktime_t ktime_get_with_offset(enum tk_offsets offs)
- */
-void ktime_get_ts(struct timespec *ts)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
-        struct timespec tomono;
-        s64 nsec;
        unsigned int seq;
+        ktime_t base, *offset = offsets[offs];
+        s64 nsecs;
        WARN_ON(timekeeping_suspended);
        do {
-                seq = read_seqcount_begin(&timekeeper_seq);
+                seq = read_seqcount_begin(&tk_core.seq);
-                ts->tv_sec = tk->xtime_sec;
+                base = ktime_add(tk->tkr.base_mono, *offset);
-                nsec = timekeeping_get_ns(tk);
+                nsecs = timekeeping_get_ns(&tk->tkr);
-                tomono = tk->wall_to_monotonic;
-        } while (read_seqcount_retry(&timekeeper_seq, seq));
+        } while (read_seqcount_retry(&tk_core.seq, seq));
-        ts->tv_sec += tomono.tv_sec;
+        return ktime_add_ns(base, nsecs);
-        ts->tv_nsec = 0;
-        timespec_add_ns(ts, nsec + tomono.tv_nsec);
-}
-EXPORT_SYMBOL_GPL(ktime_get_ts);
+}
+EXPORT_SYMBOL_GPL(ktime_get_with_offset);
 /**
- * timekeeping_clocktai - Returns the TAI time of day in a timespec
+ * ktime_mono_to_any() - convert mononotic time to any other time
- * @ts:         pointer to the timespec to be set
+ * @tmono:      time to convert.
- *
+ * @offs:       which offset to use
- * Returns the time of day in a timespec.
 */
-void timekeeping_clocktai(struct timespec *ts)
+ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs)
 {
-        struct timekeeper *tk = &timekeeper;
+        ktime_t *offset = offsets[offs];
        unsigned long seq;
-        u64 nsecs;
+        ktime_t tconv;
-        WARN_ON(timekeeping_suspended);
        do {
-                seq = read_seqcount_begin(&timekeeper_seq);
+                seq = read_seqcount_begin(&tk_core.seq);
+                tconv = ktime_add(tmono, *offset);
+        } while (read_seqcount_retry(&tk_core.seq, seq));
-                ts->tv_sec = tk->xtime_sec + tk->tai_offset;
+        return tconv;
-                nsecs = timekeeping_get_ns(tk);
+}
+EXPORT_SYMBOL_GPL(ktime_mono_to_any);
-        } while (read_seqcount_retry(&timekeeper_seq, seq));
+/**
+ * ktime_get_raw - Returns the raw monotonic time in ktime_t format
+ */
+ktime_t ktime_get_raw(void)
+{
+        struct timekeeper *tk = &tk_core.timekeeper;
+        unsigned int seq;
+        ktime_t base;
+        s64 nsecs;
-        ts->tv_nsec = 0;
+        do {
-        timespec_add_ns(ts, nsecs);
+                seq = read_seqcount_begin(&tk_core.seq);
+                base = tk->base_raw;
+                nsecs = timekeeping_get_ns_raw(tk);
-}
+        } while (read_seqcount_retry(&tk_core.seq, seq));
-EXPORT_SYMBOL(timekeeping_clocktai);
+        return ktime_add_ns(base, nsecs);
+}
+EXPORT_SYMBOL_GPL(ktime_get_raw);
 /**
- * ktime_get_clocktai - Returns the TAI time of day in a ktime
+ * ktime_get_ts64 - get the monotonic clock in timespec64 format
+ * @ts:         pointer to timespec variable
 *
- * Returns the time of day in a ktime.
+ * The function calculates the monotonic clock from the realtime
+ * clock and the wall_to_monotonic offset and stores the result
+ * in normalized timespec format in the variable pointed to by @ts.
 */
-ktime_t ktime_get_clocktai(void)
+void ktime_get_ts64(struct timespec64 *ts)
 {
-        struct timespec ts;
+        struct timekeeper *tk = &tk_core.timekeeper;
+        struct timespec64 tomono;
+        s64 nsec;
+        unsigned int seq;
+        WARN_ON(timekeeping_suspended);
-        timekeeping_clocktai(&ts);
+        do {
-        return timespec_to_ktime(ts);
+                seq = read_seqcount_begin(&tk_core.seq);
+                ts->tv_sec = tk->xtime_sec;
+                nsec = timekeeping_get_ns(&tk->tkr);
+                tomono = tk->wall_to_monotonic;
+        } while (read_seqcount_retry(&tk_core.seq, seq));
+        ts->tv_sec += tomono.tv_sec;
+        ts->tv_nsec = 0;
+        timespec64_add_ns(ts, nsec + tomono.tv_nsec);
 }
-EXPORT_SYMBOL(ktime_get_clocktai);
+EXPORT_SYMBOL_GPL(ktime_get_ts64);
 #ifdef CONFIG_NTP_PPS
@@ -446,23 +659,23 @@ EXPORT_SYMBOL(ktime_get_clocktai);
 */
 void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned long seq;
        s64 nsecs_raw, nsecs_real;
        WARN_ON_ONCE(timekeeping_suspended);
        do {
-                seq = read_seqcount_begin(&timekeeper_seq);
+                seq = read_seqcount_begin(&tk_core.seq);
-                *ts_raw = tk->raw_time;
+                *ts_raw = timespec64_to_timespec(tk->raw_time);
                ts_real->tv_sec = tk->xtime_sec;
                ts_real->tv_nsec = 0;
                nsecs_raw = timekeeping_get_ns_raw(tk);
-                nsecs_real = timekeeping_get_ns(tk);
+                nsecs_real = timekeeping_get_ns(&tk->tkr);
-        } while (read_seqcount_retry(&timekeeper_seq, seq));
+        } while (read_seqcount_retry(&tk_core.seq, seq));
        timespec_add_ns(ts_raw, nsecs_raw);
        timespec_add_ns(ts_real, nsecs_real);
@@ -479,9 +692,9 @@ EXPORT_SYMBOL(getnstime_raw_and_real);
 */
 void do_gettimeofday(struct timeval *tv)
 {
-        struct timespec now;
+        struct timespec64 now;
-        getnstimeofday(&now);
+        getnstimeofday64(&now);
        tv->tv_sec = now.tv_sec;
        tv->tv_usec = now.tv_nsec/1000;
 }
@@ -495,15 +708,15 @@ EXPORT_SYMBOL(do_gettimeofday);
 */
 int do_settimeofday(const struct timespec *tv)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
-        struct timespec ts_delta, xt;
+        struct timespec64 ts_delta, xt, tmp;
        unsigned long flags;
        if (!timespec_valid_strict(tv))
                return -EINVAL;
        raw_spin_lock_irqsave(&timekeeper_lock, flags);
-        write_seqcount_begin(&timekeeper_seq);
+        write_seqcount_begin(&tk_core.seq);
        timekeeping_forward_now(tk);
@@ -511,13 +724,14 @@ int do_settimeofday(const struct timespec *tv)
        ts_delta.tv_sec = tv->tv_sec - xt.tv_sec;
        ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec;
-        tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, ts_delta));
+        tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts_delta));
-        tk_set_xtime(tk, tv);
+        tmp = timespec_to_timespec64(*tv);
+        tk_set_xtime(tk, &tmp);
        timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
-        write_seqcount_end(&timekeeper_seq);
+        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
        /* signal hrtimers about time change */
@@ -535,33 +749,35 @@ EXPORT_SYMBOL(do_settimeofday);
 */
 int timekeeping_inject_offset(struct timespec *ts)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned long flags;
-        struct timespec tmp;
+        struct timespec64 ts64, tmp;
        int ret = 0;
        if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
                return -EINVAL;
+        ts64 = timespec_to_timespec64(*ts);
        raw_spin_lock_irqsave(&timekeeper_lock, flags);
-        write_seqcount_begin(&timekeeper_seq);
+        write_seqcount_begin(&tk_core.seq);
        timekeeping_forward_now(tk);
        /* Make sure the proposed value is valid */
-        tmp = timespec_add(tk_xtime(tk),  *ts);
+        tmp = timespec64_add(tk_xtime(tk),  ts64);
-        if (!timespec_valid_strict(&tmp)) {
+        if (!timespec64_valid_strict(&tmp)) {
                ret = -EINVAL;
                goto error;
        }
-        tk_xtime_add(tk, ts);
+        tk_xtime_add(tk, &ts64);
-        tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
+        tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts64));
 error: /* even if we error out, we forwarded the time, so call update */
        timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
-        write_seqcount_end(&timekeeper_seq);
+        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
        /* signal hrtimers about time change */
@@ -578,14 +794,14 @@ EXPORT_SYMBOL(timekeeping_inject_offset);
 */
 s32 timekeeping_get_tai_offset(void)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
        s32 ret;
        do {
-                seq = read_seqcount_begin(&timekeeper_seq);
+                seq = read_seqcount_begin(&tk_core.seq);
                ret = tk->tai_offset;
-        } while (read_seqcount_retry(&timekeeper_seq, seq));
+        } while (read_seqcount_retry(&tk_core.seq, seq));
        return ret;
 }
@@ -606,14 +822,14 @@ static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
 */
 void timekeeping_set_tai_offset(s32 tai_offset)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned long flags;
        raw_spin_lock_irqsave(&timekeeper_lock, flags);
-        write_seqcount_begin(&timekeeper_seq);
+        write_seqcount_begin(&tk_core.seq);
        __timekeeping_set_tai_offset(tk, tai_offset);
        timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
-        write_seqcount_end(&timekeeper_seq);
+        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
        clock_was_set();
 }
@@ -625,14 +841,14 @@ void timekeeping_set_tai_offset(s32 tai_offset)
 */
 static int change_clocksource(void *data)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
        struct clocksource *new, *old;
        unsigned long flags;
        new = (struct clocksource *) data;
        raw_spin_lock_irqsave(&timekeeper_lock, flags);
-        write_seqcount_begin(&timekeeper_seq);
+        write_seqcount_begin(&tk_core.seq);
        timekeeping_forward_now(tk);
        /*
@@ -641,7 +857,7 @@ static int change_clocksource(void *data)
         */
        if (try_module_get(new->owner)) {
                if (!new->enable || new->enable(new) == 0) {
-                        old = tk->clock;
+                        old = tk->tkr.clock;
                        tk_setup_internals(tk, new);
                        if (old->disable)
                                old->disable(old);
@@ -652,7 +868,7 @@ static int change_clocksource(void *data)
        }
        timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
-        write_seqcount_end(&timekeeper_seq);
+        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
        return 0;
@@ -667,29 +883,14 @@ static int change_clocksource(void *data)
 */
 int timekeeping_notify(struct clocksource *clock)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
-        if (tk->clock == clock)
+        if (tk->tkr.clock == clock)
                return 0;
        stop_machine(change_clocksource, clock, NULL);
        tick_clock_notify();
-        return tk->clock == clock ? 0 : -1;
+        return tk->tkr.clock == clock ? 0 : -1;
-}
-/**
- * ktime_get_real - get the real (wall-) time in ktime_t format
- *
- * returns the time in ktime_t format
- */
-ktime_t ktime_get_real(void)
-{
-        struct timespec now;
-        getnstimeofday(&now);
-        return timespec_to_ktime(now);
 }
-EXPORT_SYMBOL_GPL(ktime_get_real);
 /**
 * getrawmonotonic - Returns the raw monotonic time in a timespec
@@ -699,18 +900,20 @@ EXPORT_SYMBOL_GPL(ktime_get_real);
 */
 void getrawmonotonic(struct timespec *ts)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
+        struct timespec64 ts64;
        unsigned long seq;
        s64 nsecs;
        do {
-                seq = read_seqcount_begin(&timekeeper_seq);
+                seq = read_seqcount_begin(&tk_core.seq);
                nsecs = timekeeping_get_ns_raw(tk);
-                *ts = tk->raw_time;
+                ts64 = tk->raw_time;
-        } while (read_seqcount_retry(&timekeeper_seq, seq));
+        } while (read_seqcount_retry(&tk_core.seq, seq));
-        timespec_add_ns(ts, nsecs);
+        timespec64_add_ns(&ts64, nsecs);
+        *ts = timespec64_to_timespec(ts64);
 }
 EXPORT_SYMBOL(getrawmonotonic);
@@ -719,16 +922,16 @@ EXPORT_SYMBOL(getrawmonotonic);
 */
 int timekeeping_valid_for_hres(void)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned long seq;
        int ret;
        do {
-                seq = read_seqcount_begin(&timekeeper_seq);
+                seq = read_seqcount_begin(&tk_core.seq);
-                ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
+                ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
-        } while (read_seqcount_retry(&timekeeper_seq, seq));
+        } while (read_seqcount_retry(&tk_core.seq, seq));
        return ret;
 }
@@ -738,16 +941,16 @@ int timekeeping_valid_for_hres(void)
 */
 u64 timekeeping_max_deferment(void)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned long seq;
        u64 ret;
        do {
-                seq = read_seqcount_begin(&timekeeper_seq);
+                seq = read_seqcount_begin(&tk_core.seq);
-                ret = tk->clock->max_idle_ns;
+                ret = tk->tkr.clock->max_idle_ns;
-        } while (read_seqcount_retry(&timekeeper_seq, seq));
+        } while (read_seqcount_retry(&tk_core.seq, seq));
        return ret;
 }
@@ -787,14 +990,15 @@ void __weak read_boot_clock(struct timespec *ts)
 */
 void __init timekeeping_init(void)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
        struct clocksource *clock;
        unsigned long flags;
-        struct timespec now, boot, tmp;
+        struct timespec64 now, boot, tmp;
+        struct timespec ts;
-        read_persistent_clock(&now);
-        if (!timespec_valid_strict(&now)) {
+        read_persistent_clock(&ts);
+        now = timespec_to_timespec64(ts);
+        if (!timespec64_valid_strict(&now)) {
                pr_warn("WARNING: Persistent clock returned invalid value!\n"
                        "         Check your CMOS/BIOS settings.\n");
                now.tv_sec = 0;
@@ -802,8 +1006,9 @@ void __init timekeeping_init(void)
        } else if (now.tv_sec || now.tv_nsec)
                persistent_clock_exist = true;
-        read_boot_clock(&boot);
+        read_boot_clock(&ts);
-        if (!timespec_valid_strict(&boot)) {
+        boot = timespec_to_timespec64(ts);
+        if (!timespec64_valid_strict(&boot)) {
                pr_warn("WARNING: Boot clock returned invalid value!\n"
                        "         Check your CMOS/BIOS settings.\n");
                boot.tv_sec = 0;
@@ -811,7 +1016,7 @@ void __init timekeeping_init(void)
        }
        raw_spin_lock_irqsave(&timekeeper_lock, flags);
-        write_seqcount_begin(&timekeeper_seq);
+        write_seqcount_begin(&tk_core.seq);
        ntp_init();
        clock = clocksource_default_clock();
@@ -822,24 +1027,21 @@ void __init timekeeping_init(void)
        tk_set_xtime(tk, &now);
        tk->raw_time.tv_sec = 0;
        tk->raw_time.tv_nsec = 0;
+        tk->base_raw.tv64 = 0;
        if (boot.tv_sec == 0 && boot.tv_nsec == 0)
                boot = tk_xtime(tk);
-        set_normalized_timespec(&tmp, -boot.tv_sec, -boot.tv_nsec);
+        set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec);
        tk_set_wall_to_mono(tk, tmp);
-        tmp.tv_sec = 0;
+        timekeeping_update(tk, TK_MIRROR);
-        tmp.tv_nsec = 0;
-        tk_set_sleep_time(tk, tmp);
-        memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
-        write_seqcount_end(&timekeeper_seq);
+        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 }
 /* time in seconds when suspend began */
-static struct timespec timekeeping_suspend_time;
+static struct timespec64 timekeeping_suspend_time;
 /**
 * __timekeeping_inject_sleeptime - Internal function to add sleep interval
@@ -849,17 +1051,17 @@ static struct timespec timekeeping_suspend_time;
 * adds the sleep offset to the timekeeping variables.
 */
 static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
-                                                        struct timespec *delta)
+                                           struct timespec64 *delta)
 {
-        if (!timespec_valid_strict(delta)) {
+        if (!timespec64_valid_strict(delta)) {
                printk_deferred(KERN_WARNING
                                "__timekeeping_inject_sleeptime: Invalid "
                                "sleep delta value!\n");
                return;
        }
        tk_xtime_add(tk, delta);
-        tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta));
+        tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta));
-        tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta));
+        tk_update_sleep_time(tk, timespec64_to_ktime(*delta));
        tk_debug_account_sleep_time(delta);
 }
@@ -875,7 +1077,8 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
 */
 void timekeeping_inject_sleeptime(struct timespec *delta)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
+        struct timespec64 tmp;
        unsigned long flags;
        /*
@@ -886,15 +1089,16 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
                return;
        raw_spin_lock_irqsave(&timekeeper_lock, flags);
-        write_seqcount_begin(&timekeeper_seq);
+        write_seqcount_begin(&tk_core.seq);
        timekeeping_forward_now(tk);
-        __timekeeping_inject_sleeptime(tk, delta);
+        tmp = timespec_to_timespec64(*delta);
+        __timekeeping_inject_sleeptime(tk, &tmp);
        timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
-        write_seqcount_end(&timekeeper_seq);
+        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
        /* signal hrtimers about time change */
@@ -910,20 +1114,22 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 */
 static void timekeeping_resume(void)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
-        struct clocksource *clock = tk->clock;
+        struct clocksource *clock = tk->tkr.clock;
        unsigned long flags;
-        struct timespec ts_new, ts_delta;
+        struct timespec64 ts_new, ts_delta;
+        struct timespec tmp;
        cycle_t cycle_now, cycle_delta;
        bool suspendtime_found = false;
-        read_persistent_clock(&ts_new);
+        read_persistent_clock(&tmp);
+        ts_new = timespec_to_timespec64(tmp);
        clockevents_resume();
        clocksource_resume();
        raw_spin_lock_irqsave(&timekeeper_lock, flags);
-        write_seqcount_begin(&timekeeper_seq);
+        write_seqcount_begin(&tk_core.seq);
        /*
         * After system resumes, we need to calculate the suspended time and
@@ -937,15 +1143,16 @@ static void timekeeping_resume(void)
         * The less preferred source will only be tried if there is no better
         * usable source. The rtc part is handled separately in rtc core code.
         */
-        cycle_now = clock->read(clock);
+        cycle_now = tk->tkr.read(clock);
        if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
-                cycle_now > clock->cycle_last) {
+                cycle_now > tk->tkr.cycle_last) {
                u64 num, max = ULLONG_MAX;
                u32 mult = clock->mult;
                u32 shift = clock->shift;
                s64 nsec = 0;
-                cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+                cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last,
+                                                tk->tkr.mask);
                /*
                 * "cycle_delta * mutl" may cause 64 bits overflow, if the
@@ -960,10 +1167,10 @@ static void timekeeping_resume(void)
                }
                nsec += ((u64) cycle_delta * mult) >> shift;
-                ts_delta = ns_to_timespec(nsec);
+                ts_delta = ns_to_timespec64(nsec);
                suspendtime_found = true;
-        } else if (timespec_compare(&ts_new, &timekeeping_suspend_time) > 0) {
+        } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
-                ts_delta = timespec_sub(ts_new, timekeeping_suspend_time);
+                ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time);
                suspendtime_found = true;
        }
@@ -971,11 +1178,11 @@ static void timekeeping_resume(void)
                __timekeeping_inject_sleeptime(tk, &ts_delta);
        /* Re-base the last cycle value */
-        tk->cycle_last = clock->cycle_last = cycle_now;
+        tk->tkr.cycle_last = cycle_now;
        tk->ntp_error = 0;
        timekeeping_suspended = 0;
        timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
-        write_seqcount_end(&timekeeper_seq);
+        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
        touch_softlockup_watchdog();
@@ -988,12 +1195,14 @@ static void timekeeping_resume(void)
 static int timekeeping_suspend(void)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned long flags;
-        struct timespec         delta, delta_delta;
+        struct timespec64               delta, delta_delta;
-        static struct timespec  old_delta;
+        static struct timespec64        old_delta;
+        struct timespec tmp;
-        read_persistent_clock(&timekeeping_suspend_time);
+        read_persistent_clock(&tmp);
+        timekeeping_suspend_time = timespec_to_timespec64(tmp);
        /*
         * On some systems the persistent_clock can not be detected at
@@ -1004,7 +1213,7 @@ static int timekeeping_suspend(void)
                persistent_clock_exist = true;
        raw_spin_lock_irqsave(&timekeeper_lock, flags);
-        write_seqcount_begin(&timekeeper_seq);
+        write_seqcount_begin(&tk_core.seq);
        timekeeping_forward_now(tk);
        timekeeping_suspended = 1;
@@ -1014,8 +1223,8 @@ static int timekeeping_suspend(void)
         * try to compensate so the difference in system time
         * and persistent_clock time stays close to constant.
         */
-        delta = timespec_sub(tk_xtime(tk), timekeeping_suspend_time);
+        delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
-        delta_delta = timespec_sub(delta, old_delta);
+        delta_delta = timespec64_sub(delta, old_delta);
        if (abs(delta_delta.tv_sec)  >= 2) {
                /*
                 * if delta_delta is too large, assume time correction
@@ -1025,11 +1234,11 @@ static int timekeeping_suspend(void)
        } else {
                /* Otherwise try to adjust old_system to compensate */
                timekeeping_suspend_time =
-                        timespec_add(timekeeping_suspend_time, delta_delta);
+                        timespec64_add(timekeeping_suspend_time, delta_delta);
        }
        timekeeping_update(tk, TK_MIRROR);
-        write_seqcount_end(&timekeeper_seq);
+        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
        clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
@@ -1050,125 +1259,34 @@ static int __init timekeeping_init_ops(void)
        register_syscore_ops(&timekeeping_syscore_ops);
        return 0;
 }
 device_initcall(timekeeping_init_ops);
 /*
- * If the error is already larger, we look ahead even further
+ * Apply a multiplier adjustment to the timekeeper
- * to compensate for late or lost adjustments.
 */
-static __always_inline int timekeeping_bigadjust(struct timekeeper *tk,
+static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
-                                                 s64 error, s64 *interval,
+                                                         s64 offset,
-                                                 s64 *offset)
+                                                         bool negative,
+                                                         int adj_scale)
 {
-        s64 tick_error, i;
+        s64 interval = tk->cycle_interval;
-        u32 look_ahead, adj;
+        s32 mult_adj = 1;
-        s32 error2, mult;
-        /*
-         * Use the current error value to determine how much to look ahead.
-         * The larger the error the slower we adjust for it to avoid problems
-         * with losing too many ticks, otherwise we would overadjust and
-         * produce an even larger error.  The smaller the adjustment the
-         * faster we try to adjust for it, as lost ticks can do less harm
-         * here.  This is tuned so that an error of about 1 msec is adjusted
-         * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
-         */
-        error2 = tk->ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ);
-        error2 = abs(error2);
-        for (look_ahead = 0; error2 > 0; look_ahead++)
-                error2 >>= 2;
-        /*
+        if (negative) {
-         * Now calculate the error in (1 << look_ahead) ticks, but first
+                mult_adj = -mult_adj;
-         * remove the single look ahead already included in the error.
+                interval = -interval;
-         */
+                offset  = -offset;
-        tick_error = ntp_tick_length() >> (tk->ntp_error_shift + 1);
-        tick_error -= tk->xtime_interval >> 1;
-        error = ((error - tick_error) >> look_ahead) + tick_error;
-        /* Finally calculate the adjustment shift value.  */
-        i = *interval;
-        mult = 1;
-        if (error < 0) {
-                error = -error;
-                *interval = -*interval;
-                *offset = -*offset;
-                mult = -1;
        }
-        for (adj = 0; error > i; adj++)
+        mult_adj <<= adj_scale;
-                error >>= 1;
+        interval <<= adj_scale;
+        offset <<= adj_scale;
-        *interval <<= adj;
-        *offset <<= adj;
-        return mult << adj;
-}
-/*
- * Adjust the multiplier to reduce the error value,
- * this is optimized for the most common adjustments of -1,0,1,
- * for other values we can do a bit more work.
- */
-static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
-{
-        s64 error, interval = tk->cycle_interval;
-        int adj;
        /*
-         * The point of this is to check if the error is greater than half
-         * an interval.
-         *
-         * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs.
-         *
-         * Note we subtract one in the shift, so that error is really error*2.
-         * This "saves" dividing(shifting) interval twice, but keeps the
-         * (error > interval) comparison as still measuring if error is
-         * larger than half an interval.
-         *
-         * Note: It does not "save" on aggravation when reading the code.
-         */
-        error = tk->ntp_error >> (tk->ntp_error_shift - 1);
-        if (error > interval) {
-                /*
-                 * We now divide error by 4(via shift), which checks if
-                 * the error is greater than twice the interval.
-                 * If it is greater, we need a bigadjust, if its smaller,
-                 * we can adjust by 1.
-                 */
-                error >>= 2;
-                if (likely(error <= interval))
-                        adj = 1;
-                else
-                        adj = timekeeping_bigadjust(tk, error, &interval, &offset);
-        } else {
-                if (error < -interval) {
-                        /* See comment above, this is just switched for the negative */
-                        error >>= 2;
-                        if (likely(error >= -interval)) {
-                                adj = -1;
-                                interval = -interval;
-                                offset = -offset;
-                        } else {
-                                adj = timekeeping_bigadjust(tk, error, &interval, &offset);
-                        }
-                } else {
-                        goto out_adjust;
-                }
-        }
-        if (unlikely(tk->clock->maxadj &&
-                (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) {
-                printk_deferred_once(KERN_WARNING
-                        "Adjusting %s more than 11%% (%ld vs %ld)\n",
-                        tk->clock->name, (long)tk->mult + adj,
-                        (long)tk->clock->mult + tk->clock->maxadj);
-        }
-        /*
         * So the following can be confusing.
         *
-         * To keep things simple, lets assume adj == 1 for now.
+         * To keep things simple, lets assume mult_adj == 1 for now.
         *
-         * When adj != 1, remember that the interval and offset values
+         * When mult_adj != 1, remember that the interval and offset values
         * have been appropriately scaled so the math is the same.
         *
         * The basic idea here is that we're increasing the multiplier
@@ -1212,12 +1330,78 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
         *
         * XXX - TODO: Doc ntp_error calculation.
         */
-        tk->mult += adj;
+        tk->tkr.mult += mult_adj;
        tk->xtime_interval += interval;
-        tk->xtime_nsec -= offset;
+        tk->tkr.xtime_nsec -= offset;
        tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
+}
+/*
+ * Calculate the multiplier adjustment needed to match the frequency
+ * specified by NTP
+ */
+static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
+                                                        s64 offset)
+{
+        s64 interval = tk->cycle_interval;
+        s64 xinterval = tk->xtime_interval;
+        s64 tick_error;
+        bool negative;
+        u32 adj;
+        /* Remove any current error adj from freq calculation */
+        if (tk->ntp_err_mult)
+                xinterval -= tk->cycle_interval;
+        tk->ntp_tick = ntp_tick_length();
+        /* Calculate current error per tick */
+        tick_error = ntp_tick_length() >> tk->ntp_error_shift;
+        tick_error -= (xinterval + tk->xtime_remainder);
+        /* Don't worry about correcting it if its small */
+        if (likely((tick_error >= 0) && (tick_error <= interval)))
+                return;
+        /* preserve the direction of correction */
+        negative = (tick_error < 0);
+        /* Sort out the magnitude of the correction */
+        tick_error = abs(tick_error);
+        for (adj = 0; tick_error > interval; adj++)
+                tick_error >>= 1;
+        /* scale the corrections */
+        timekeeping_apply_adjustment(tk, offset, negative, adj);
+}
+/*
+ * Adjust the timekeeper's multiplier to the correct frequency
+ * and also to reduce the accumulated error value.
+ */
+static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
+{
+        /* Correct for the current frequency error */
+        timekeeping_freqadjust(tk, offset);
+        /* Next make a small adjustment to fix any cumulative error */
+        if (!tk->ntp_err_mult && (tk->ntp_error > 0)) {
+                tk->ntp_err_mult = 1;
+                timekeeping_apply_adjustment(tk, offset, 0, 0);
+        } else if (tk->ntp_err_mult && (tk->ntp_error <= 0)) {
+                /* Undo any existing error adjustment */
+                timekeeping_apply_adjustment(tk, offset, 1, 0);
+                tk->ntp_err_mult = 0;
+        }
+        if (unlikely(tk->tkr.clock->maxadj &&
+                (tk->tkr.mult > tk->tkr.clock->mult + tk->tkr.clock->maxadj))) {
+                printk_once(KERN_WARNING
+                        "Adjusting %s more than 11%% (%ld vs %ld)\n",
+                        tk->tkr.clock->name, (long)tk->tkr.mult,
+                        (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);
+        }
-out_adjust:
        /*
         * It may be possible that when we entered this function, xtime_nsec
         * was very small.  Further, if we're slightly speeding the clocksource
@@ -1232,12 +1416,11 @@ out_adjust:
         * We'll correct this error next time through this function, when
         * xtime_nsec is not as small.
         */
-        if (unlikely((s64)tk->xtime_nsec < 0)) {
+        if (unlikely((s64)tk->tkr.xtime_nsec < 0)) {
-                s64 neg = -(s64)tk->xtime_nsec;
+                s64 neg = -(s64)tk->tkr.xtime_nsec;
-                tk->xtime_nsec = 0;
+                tk->tkr.xtime_nsec = 0;
                tk->ntp_error += neg << tk->ntp_error_shift;
        }
 }
 /**
@@ -1250,26 +1433,26 @@ out_adjust:
 */
 static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
 {
-        u64 nsecps = (u64)NSEC_PER_SEC << tk->shift;
+        u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift;
        unsigned int clock_set = 0;
-        while (tk->xtime_nsec >= nsecps) {
+        while (tk->tkr.xtime_nsec >= nsecps) {
                int leap;
-                tk->xtime_nsec -= nsecps;
+                tk->tkr.xtime_nsec -= nsecps;
                tk->xtime_sec++;
                /* Figure out if its a leap sec and apply if needed */
                leap = second_overflow(tk->xtime_sec);
                if (unlikely(leap)) {
-                        struct timespec ts;
+                        struct timespec64 ts;
                        tk->xtime_sec += leap;
                        ts.tv_sec = leap;
                        ts.tv_nsec = 0;
                        tk_set_wall_to_mono(tk,
-                                timespec_sub(tk->wall_to_monotonic, ts));
+                                timespec64_sub(tk->wall_to_monotonic, ts));
                        __timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
@@ -1301,9 +1484,9 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
        /* Accumulate one shifted interval */
        offset -= interval;
-        tk->cycle_last += interval;
+        tk->tkr.cycle_last += interval;
-        tk->xtime_nsec += tk->xtime_interval << shift;
+        tk->tkr.xtime_nsec += tk->xtime_interval << shift;
        *clock_set |= accumulate_nsecs_to_secs(tk);
        /* Accumulate raw time */
@@ -1317,48 +1500,20 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
        tk->raw_time.tv_nsec = raw_nsecs;
        /* Accumulate error between NTP and clock interval */
-        tk->ntp_error += ntp_tick_length() << shift;
+        tk->ntp_error += tk->ntp_tick << shift;
        tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) <<
                                                (tk->ntp_error_shift + shift);
        return offset;
 }
-#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
-static inline void old_vsyscall_fixup(struct timekeeper *tk)
-{
-        s64 remainder;
-        /*
-        * Store only full nanoseconds into xtime_nsec after rounding
-        * it up and add the remainder to the error difference.
-        * XXX - This is necessary to avoid small 1ns inconsistnecies caused
-        * by truncating the remainder in vsyscalls. However, it causes
-        * additional work to be done in timekeeping_adjust(). Once
-        * the vsyscall implementations are converted to use xtime_nsec
-        * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
-        * users are removed, this can be killed.
-        */
-        remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
-        tk->xtime_nsec -= remainder;
-        tk->xtime_nsec += 1ULL << tk->shift;
-        tk->ntp_error += remainder << tk->ntp_error_shift;
-        tk->ntp_error -= (1ULL << tk->shift) << tk->ntp_error_shift;
-}
-#else
-#define old_vsyscall_fixup(tk)
-#endif
 /**
 * update_wall_time - Uses the current clocksource to increment the wall time
 *
 */
 void update_wall_time(void)
 {
-        struct clocksource *clock;
+        struct timekeeper *real_tk = &tk_core.timekeeper;
-        struct timekeeper *real_tk = &timekeeper;
        struct timekeeper *tk = &shadow_timekeeper;
        cycle_t offset;
        int shift = 0, maxshift;
@@ -1371,12 +1526,11 @@ void update_wall_time(void)
        if (unlikely(timekeeping_suspended))
                goto out;
-        clock = real_tk->clock;
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
        offset = real_tk->cycle_interval;
 #else
-        offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
+        offset = clocksource_delta(tk->tkr.read(tk->tkr.clock),
+                                   tk->tkr.cycle_last, tk->tkr.mask);
 #endif
        /* Check if there's really nothing to do */
@@ -1418,9 +1572,7 @@ void update_wall_time(void)
         */
        clock_set |= accumulate_nsecs_to_secs(tk);
-        write_seqcount_begin(&timekeeper_seq);
+        write_seqcount_begin(&tk_core.seq);
-        /* Update clock->cycle_last with the new value */
-        clock->cycle_last = tk->cycle_last;
        /*
         * Update the real timekeeper.
         *
@@ -1428,12 +1580,12 @@ void update_wall_time(void)
         * requires changes to all other timekeeper usage sites as
         * well, i.e. move the timekeeper pointer getter into the
         * spinlocked/seqcount protected sections. And we trade this
-         * memcpy under the timekeeper_seq against one before we start
+         * memcpy under the tk_core.seq against one before we start
         * updating.
         */
        memcpy(real_tk, tk, sizeof(*tk));
        timekeeping_update(real_tk, clock_set);
-        write_seqcount_end(&timekeeper_seq);
+        write_seqcount_end(&tk_core.seq);
 out:
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
        if (clock_set)
@@ -1454,83 +1606,16 @@ out:
 */
 void getboottime(struct timespec *ts)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
-        struct timespec boottime = {
+        ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot);
-                .tv_sec = tk->wall_to_monotonic.tv_sec +
-                                tk->total_sleep_time.tv_sec,
-                .tv_nsec = tk->wall_to_monotonic.tv_nsec +
-                                tk->total_sleep_time.tv_nsec
-        };
-        set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
-}
-EXPORT_SYMBOL_GPL(getboottime);
-/**
- * get_monotonic_boottime - Returns monotonic time since boot
- * @ts:         pointer to the timespec to be set
- *
- * Returns the monotonic time since boot in a timespec.
- *
- * This is similar to CLOCK_MONTONIC/ktime_get_ts, but also
- * includes the time spent in suspend.
- */
-void get_monotonic_boottime(struct timespec *ts)
-{
-        struct timekeeper *tk = &timekeeper;
-        struct timespec tomono, sleep;
-        s64 nsec;
-        unsigned int seq;
-        WARN_ON(timekeeping_suspended);
-        do {
-                seq = read_seqcount_begin(&timekeeper_seq);
-                ts->tv_sec = tk->xtime_sec;
-                nsec = timekeeping_get_ns(tk);
-                tomono = tk->wall_to_monotonic;
-                sleep = tk->total_sleep_time;
-        } while (read_seqcount_retry(&timekeeper_seq, seq));
-        ts->tv_sec += tomono.tv_sec + sleep.tv_sec;
-        ts->tv_nsec = 0;
-        timespec_add_ns(ts, nsec + tomono.tv_nsec + sleep.tv_nsec);
-}
-EXPORT_SYMBOL_GPL(get_monotonic_boottime);
-/**
- * ktime_get_boottime - Returns monotonic time since boot in a ktime
- *
- * Returns the monotonic time since boot in a ktime
- *
- * This is similar to CLOCK_MONTONIC/ktime_get, but also
- * includes the time spent in suspend.
- */
-ktime_t ktime_get_boottime(void)
-{
-        struct timespec ts;
-        get_monotonic_boottime(&ts);
-        return timespec_to_ktime(ts);
-}
-EXPORT_SYMBOL_GPL(ktime_get_boottime);
-/**
- * monotonic_to_bootbased - Convert the monotonic time to boot based.
- * @ts:         pointer to the timespec to be converted
- */
-void monotonic_to_bootbased(struct timespec *ts)
-{
-        struct timekeeper *tk = &timekeeper;
-        *ts = timespec_add(*ts, tk->total_sleep_time);
+        *ts = ktime_to_timespec(t);
 }
-EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
+EXPORT_SYMBOL_GPL(getboottime);
 unsigned long get_seconds(void)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
        return tk->xtime_sec;
 }
@@ -1538,43 +1623,44 @@ EXPORT_SYMBOL(get_seconds);
 struct timespec __current_kernel_time(void)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
-        return tk_xtime(tk);
+        return timespec64_to_timespec(tk_xtime(tk));
 }
 struct timespec current_kernel_time(void)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
-        struct timespec now;
+        struct timespec64 now;
        unsigned long seq;
        do {
-                seq = read_seqcount_begin(&timekeeper_seq);
+                seq = read_seqcount_begin(&tk_core.seq);
                now = tk_xtime(tk);
-        } while (read_seqcount_retry(&timekeeper_seq, seq));
+        } while (read_seqcount_retry(&tk_core.seq, seq));
-        return now;
+        return timespec64_to_timespec(now);
 }
 EXPORT_SYMBOL(current_kernel_time);
 struct timespec get_monotonic_coarse(void)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
-        struct timespec now, mono;
+        struct timespec64 now, mono;
        unsigned long seq;
        do {
-                seq = read_seqcount_begin(&timekeeper_seq);
+                seq = read_seqcount_begin(&tk_core.seq);
                now = tk_xtime(tk);
                mono = tk->wall_to_monotonic;
-        } while (read_seqcount_retry(&timekeeper_seq, seq));
+        } while (read_seqcount_retry(&tk_core.seq, seq));
-        set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
+        set_normalized_timespec64(&now, now.tv_sec + mono.tv_sec,
                                now.tv_nsec + mono.tv_nsec);
-        return now;
+        return timespec64_to_timespec(now);
 }
 /*
@@ -1587,29 +1673,38 @@ void do_timer(unsigned long ticks)
 }
 /**
- * get_xtime_and_monotonic_and_sleep_offset() - get xtime, wall_to_monotonic,
+ * ktime_get_update_offsets_tick - hrtimer helper
- *    and sleep offsets.
+ * @offs_real:  pointer to storage for monotonic -> realtime offset
- * @xtim:       pointer to timespec to be set with xtime
+ * @offs_boot:  pointer to storage for monotonic -> boottime offset
- * @wtom:       pointer to timespec to be set with wall_to_monotonic
+ * @offs_tai:   pointer to storage for monotonic -> clock tai offset
- * @sleep:      pointer to timespec to be set with time in suspend
+ *
+ * Returns monotonic time at last tick and various offsets
 */
-void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
+ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
-                                struct timespec *wtom, struct timespec *sleep)
+                                                        ktime_t *offs_tai)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
-        unsigned long seq;
+        unsigned int seq;
+        ktime_t base;
+        u64 nsecs;
        do {
-                seq = read_seqcount_begin(&timekeeper_seq);
+                seq = read_seqcount_begin(&tk_core.seq);
-                *xtim = tk_xtime(tk);
-                *wtom = tk->wall_to_monotonic;
+                base = tk->tkr.base_mono;
-                *sleep = tk->total_sleep_time;
+                nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift;
-        } while (read_seqcount_retry(&timekeeper_seq, seq));
+                *offs_real = tk->offs_real;
+                *offs_boot = tk->offs_boot;
+                *offs_tai = tk->offs_tai;
+        } while (read_seqcount_retry(&tk_core.seq, seq));
+        return ktime_add_ns(base, nsecs);
 }
 #ifdef CONFIG_HIGH_RES_TIMERS
 /**
- * ktime_get_update_offsets - hrtimer helper
+ * ktime_get_update_offsets_now - hrtimer helper
 * @offs_real:  pointer to storage for monotonic -> realtime offset
 * @offs_boot:  pointer to storage for monotonic -> boottime offset
 * @offs_tai:   pointer to storage for monotonic -> clock tai offset
@@ -1617,57 +1712,37 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
 * Returns current monotonic time and updates the offsets
 * Called from hrtimer_interrupt() or retrigger_next_event()
 */
-ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot,
+ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
                                                        ktime_t *offs_tai)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
-        ktime_t now;
        unsigned int seq;
-        u64 secs, nsecs;
+        ktime_t base;
+        u64 nsecs;
        do {
-                seq = read_seqcount_begin(&timekeeper_seq);
+                seq = read_seqcount_begin(&tk_core.seq);
-                secs = tk->xtime_sec;
+                base = tk->tkr.base_mono;
-                nsecs = timekeeping_get_ns(tk);
+                nsecs = timekeeping_get_ns(&tk->tkr);
                *offs_real = tk->offs_real;
                *offs_boot = tk->offs_boot;
                *offs_tai = tk->offs_tai;
-        } while (read_seqcount_retry(&timekeeper_seq, seq));
+        } while (read_seqcount_retry(&tk_core.seq, seq));
-        now = ktime_add_ns(ktime_set(secs, 0), nsecs);
+        return ktime_add_ns(base, nsecs);
-        now = ktime_sub(now, *offs_real);
-        return now;
 }
 #endif
 /**
- * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format
- */
-ktime_t ktime_get_monotonic_offset(void)
-{
-        struct timekeeper *tk = &timekeeper;
-        unsigned long seq;
-        struct timespec wtom;
-        do {
-                seq = read_seqcount_begin(&timekeeper_seq);
-                wtom = tk->wall_to_monotonic;
-        } while (read_seqcount_retry(&timekeeper_seq, seq));
-        return timespec_to_ktime(wtom);
-}
-EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset);
-/**
 * do_adjtimex() - Accessor function to NTP __do_adjtimex function
 */
 int do_adjtimex(struct timex *txc)
 {
-        struct timekeeper *tk = &timekeeper;
+        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned long flags;
-        struct timespec ts;
+        struct timespec64 ts;
        s32 orig_tai, tai;
        int ret;
@@ -1687,10 +1762,10 @@ int do_adjtimex(struct timex *txc)
                        return ret;
        }
-        getnstimeofday(&ts);
+        getnstimeofday64(&ts);
        raw_spin_lock_irqsave(&timekeeper_lock, flags);
-        write_seqcount_begin(&timekeeper_seq);
+        write_seqcount_begin(&tk_core.seq);
        orig_tai = tai = tk->tai_offset;
        ret = __do_adjtimex(txc, &ts, &tai);
@@ -1699,7 +1774,7 @@ int do_adjtimex(struct timex *txc)
                __timekeeping_set_tai_offset(tk, tai);
                timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
        }
-        write_seqcount_end(&timekeeper_seq);
+        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
        if (tai != orig_tai)
@@ -1719,11 +1794,11 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
        unsigned long flags;
        raw_spin_lock_irqsave(&timekeeper_lock, flags);
-        write_seqcount_begin(&timekeeper_seq);
+        write_seqcount_begin(&tk_core.seq);
        __hardpps(phase_ts, raw_ts);
-        write_seqcount_end(&timekeeper_seq);
+        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 }
 EXPORT_SYMBOL(hardpps);
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
new file mode 100644
index 000000000000..adc1fc98bde3
--- /dev/null
+++ b/kernel/time/timekeeping.h
@@ -0,0 +1,20 @@
+#ifndef _KERNEL_TIME_TIMEKEEPING_H
+#define _KERNEL_TIME_TIMEKEEPING_H
+/*
+ * Internal interfaces for kernel/time/
+ */
+extern ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real,
+                                                ktime_t *offs_boot,
+                                                ktime_t *offs_tai);
+extern ktime_t ktime_get_update_offsets_now(ktime_t *offs_real,
+                                                ktime_t *offs_boot,
+                                                ktime_t *offs_tai);
+extern int timekeeping_valid_for_hres(void);
+extern u64 timekeeping_max_deferment(void);
+extern int timekeeping_inject_offset(struct timespec *ts);
+extern s32 timekeeping_get_tai_offset(void);
+extern void timekeeping_set_tai_offset(s32 tai_offset);
+extern void timekeeping_clocktai(struct timespec *ts);
+#endif
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index 4d54f97558df..f6bd65236712 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -67,7 +67,7 @@ static int __init tk_debug_sleep_time_init(void)
 }
 late_initcall(tk_debug_sleep_time_init);
-void tk_debug_account_sleep_time(struct timespec *t)
+void tk_debug_account_sleep_time(struct timespec64 *t)
 {
        sleep_time_bin[fls(t->tv_sec)]++;
 }
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
index 13323ea08ffa..4ea005a7f9da 100644
--- a/kernel/time/timekeeping_internal.h
+++ b/kernel/time/timekeeping_internal.h
@@ -3,12 +3,27 @@
 /*
 * timekeeping debug functions
 */
+#include <linux/clocksource.h>
 #include <linux/time.h>
 #ifdef CONFIG_DEBUG_FS
-extern void tk_debug_account_sleep_time(struct timespec *t);
+extern void tk_debug_account_sleep_time(struct timespec64 *t);
 #else
 #define tk_debug_account_sleep_time(x)
 #endif
+#ifdef CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE
+static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t mask)
+{
+        cycle_t ret = (now - last) & mask;
+        return (s64) ret > 0 ? ret : 0;
+}
+#else
+static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t mask)
+{
+        return (now - last) & mask;
+}
+#endif
 #endif /* _TIMEKEEPING_INTERNAL_H */
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
new file mode 100644
index 000000000000..aca5dfe2fa3d
--- /dev/null
+++ b/kernel/time/timer.c
@@ -0,0 +1,1736 @@
+/*
+ *  linux/kernel/timer.c
+ *
+ *  Kernel internal timers
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
+ *
+ *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
+ *              "A Kernel Model for Precision Timekeeping" by Dave Mills
+ *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
+ *              serialize accesses to xtime/lost_ticks).
+ *                              Copyright (C) 1998  Andrea Arcangeli
+ *  1999-03-10  Improved NTP compatibility by Ulrich Windl
+ *  2002-05-31  Move sys_sysinfo here and make its locking sane, Robert Love
+ *  2000-10-05  Implemented scalable SMP per-CPU timer handling.
+ *                              Copyright (C) 2000, 2001, 2002  Ingo Molnar
+ *              Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
+ */
+#include <linux/kernel_stat.h>
+#include <linux/export.h>
+#include <linux/interrupt.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/pid_namespace.h>
+#include <linux/notifier.h>
+#include <linux/thread_info.h>
+#include <linux/time.h>
+#include <linux/jiffies.h>
+#include <linux/posix-timers.h>
+#include <linux/cpu.h>
+#include <linux/syscalls.h>
+#include <linux/delay.h>
+#include <linux/tick.h>
+#include <linux/kallsyms.h>
+#include <linux/irq_work.h>
+#include <linux/sched.h>
+#include <linux/sched/sysctl.h>
+#include <linux/slab.h>
+#include <linux/compat.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/div64.h>
+#include <asm/timex.h>
+#include <asm/io.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/timer.h>
+__visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
+EXPORT_SYMBOL(jiffies_64);
+/*
+ * per-CPU timer vector definitions:
+ */
+#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
+#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
+#define TVN_SIZE (1 << TVN_BITS)
+#define TVR_SIZE (1 << TVR_BITS)
+#define TVN_MASK (TVN_SIZE - 1)
+#define TVR_MASK (TVR_SIZE - 1)
+#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
+struct tvec {
+        struct list_head vec[TVN_SIZE];
+};
+struct tvec_root {
+        struct list_head vec[TVR_SIZE];
+};
+struct tvec_base {
+        spinlock_t lock;
+        struct timer_list *running_timer;
+        unsigned long timer_jiffies;
+        unsigned long next_timer;
+        unsigned long active_timers;
+        unsigned long all_timers;
+        int cpu;
+        struct tvec_root tv1;
+        struct tvec tv2;
+        struct tvec tv3;
+        struct tvec tv4;
+        struct tvec tv5;
+} ____cacheline_aligned;
+struct tvec_base boot_tvec_bases;
+EXPORT_SYMBOL(boot_tvec_bases);
+static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
+/* Functions below help us manage 'deferrable' flag */
+static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
+{
+        return ((unsigned int)(unsigned long)base & TIMER_DEFERRABLE);
+}
+static inline unsigned int tbase_get_irqsafe(struct tvec_base *base)
+{
+        return ((unsigned int)(unsigned long)base & TIMER_IRQSAFE);
+}
+static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
+{
+        return ((struct tvec_base *)((unsigned long)base & ~TIMER_FLAG_MASK));
+}
+static inline void
+timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
+{
+        unsigned long flags = (unsigned long)timer->base & TIMER_FLAG_MASK;
+        timer->base = (struct tvec_base *)((unsigned long)(new_base) | flags);
+}
+static unsigned long round_jiffies_common(unsigned long j, int cpu,
+                bool force_up)
+{
+        int rem;
+        unsigned long original = j;
+        /*
+         * We don't want all cpus firing their timers at once hitting the
+         * same lock or cachelines, so we skew each extra cpu with an extra
+         * 3 jiffies. This 3 jiffies came originally from the mm/ code which
+         * already did this.
+         * The skew is done by adding 3*cpunr, then round, then subtract this
+         * extra offset again.
+         */
+        j += cpu * 3;
+        rem = j % HZ;
+        /*
+         * If the target jiffie is just after a whole second (which can happen
+         * due to delays of the timer irq, long irq off times etc etc) then
+         * we should round down to the whole second, not up. Use 1/4th second
+         * as cutoff for this rounding as an extreme upper bound for this.
+         * But never round down if @force_up is set.
+         */
+        if (rem < HZ/4 && !force_up) /* round down */
+                j = j - rem;
+        else /* round up */
+                j = j - rem + HZ;
+        /* now that we have rounded, subtract the extra skew again */
+        j -= cpu * 3;
+        /*
+         * Make sure j is still in the future. Otherwise return the
+         * unmodified value.
+         */
+        return time_is_after_jiffies(j) ? j : original;
+}
+/**
+ * __round_jiffies - function to round jiffies to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ * @cpu: the processor number on which the timeout will happen
+ *
+ * __round_jiffies() rounds an absolute time in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The exact rounding is skewed for each processor to avoid all
+ * processors firing at the exact same time, which could lead
+ * to lock contention or spurious cache line bouncing.
+ *
+ * The return value is the rounded version of the @j parameter.
+ */
+unsigned long __round_jiffies(unsigned long j, int cpu)
+{
+        return round_jiffies_common(j, cpu, false);
+}
+EXPORT_SYMBOL_GPL(__round_jiffies);
+/**
+ * __round_jiffies_relative - function to round jiffies to a full second
+ * @j: the time in (relative) jiffies that should be rounded
+ * @cpu: the processor number on which the timeout will happen
+ *
+ * __round_jiffies_relative() rounds a time delta  in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The exact rounding is skewed for each processor to avoid all
+ * processors firing at the exact same time, which could lead
+ * to lock contention or spurious cache line bouncing.
+ *
+ * The return value is the rounded version of the @j parameter.
+ */
+unsigned long __round_jiffies_relative(unsigned long j, int cpu)
+{
+        unsigned long j0 = jiffies;
+        /* Use j0 because jiffies might change while we run */
+        return round_jiffies_common(j + j0, cpu, false) - j0;
+}
+EXPORT_SYMBOL_GPL(__round_jiffies_relative);
+/**
+ * round_jiffies - function to round jiffies to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ *
+ * round_jiffies() rounds an absolute time in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The return value is the rounded version of the @j parameter.
+ */
+unsigned long round_jiffies(unsigned long j)
+{
+        return round_jiffies_common(j, raw_smp_processor_id(), false);
+}
+EXPORT_SYMBOL_GPL(round_jiffies);
+/**
+ * round_jiffies_relative - function to round jiffies to a full second
+ * @j: the time in (relative) jiffies that should be rounded
+ *
+ * round_jiffies_relative() rounds a time delta  in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The return value is the rounded version of the @j parameter.
+ */
+unsigned long round_jiffies_relative(unsigned long j)
+{
+        return __round_jiffies_relative(j, raw_smp_processor_id());
+}
+EXPORT_SYMBOL_GPL(round_jiffies_relative);
+/**
+ * __round_jiffies_up - function to round jiffies up to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ * @cpu: the processor number on which the timeout will happen
+ *
+ * This is the same as __round_jiffies() except that it will never
+ * round down.  This is useful for timeouts for which the exact time
+ * of firing does not matter too much, as long as they don't fire too
+ * early.
+ */
+unsigned long __round_jiffies_up(unsigned long j, int cpu)
+{
+        return round_jiffies_common(j, cpu, true);
+}
+EXPORT_SYMBOL_GPL(__round_jiffies_up);
+/**
+ * __round_jiffies_up_relative - function to round jiffies up to a full second
+ * @j: the time in (relative) jiffies that should be rounded
+ * @cpu: the processor number on which the timeout will happen
+ *
+ * This is the same as __round_jiffies_relative() except that it will never
+ * round down.  This is useful for timeouts for which the exact time
+ * of firing does not matter too much, as long as they don't fire too
+ * early.
+ */
+unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
+{
+        unsigned long j0 = jiffies;
+        /* Use j0 because jiffies might change while we run */
+        return round_jiffies_common(j + j0, cpu, true) - j0;
+}
+EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
+/**
+ * round_jiffies_up - function to round jiffies up to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ *
+ * This is the same as round_jiffies() except that it will never
+ * round down.  This is useful for timeouts for which the exact time
+ * of firing does not matter too much, as long as they don't fire too
+ * early.
+ */
+unsigned long round_jiffies_up(unsigned long j)
+{
+        return round_jiffies_common(j, raw_smp_processor_id(), true);
+}
+EXPORT_SYMBOL_GPL(round_jiffies_up);
+/**
+ * round_jiffies_up_relative - function to round jiffies up to a full second
+ * @j: the time in (relative) jiffies that should be rounded
+ *
+ * This is the same as round_jiffies_relative() except that it will never
+ * round down.  This is useful for timeouts for which the exact time
+ * of firing does not matter too much, as long as they don't fire too
+ * early.
+ */
+unsigned long round_jiffies_up_relative(unsigned long j)
+{
+        return __round_jiffies_up_relative(j, raw_smp_processor_id());
+}
+EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
+/**
+ * set_timer_slack - set the allowed slack for a timer
+ * @timer: the timer to be modified
+ * @slack_hz: the amount of time (in jiffies) allowed for rounding
+ *
+ * Set the amount of time, in jiffies, that a certain timer has
+ * in terms of slack. By setting this value, the timer subsystem
+ * will schedule the actual timer somewhere between
+ * the time mod_timer() asks for, and that time plus the slack.
+ *
+ * By setting the slack to -1, a percentage of the delay is used
+ * instead.
+ */
+void set_timer_slack(struct timer_list *timer, int slack_hz)
+{
+        timer->slack = slack_hz;
+}
+EXPORT_SYMBOL_GPL(set_timer_slack);
+/*
+ * If the list is empty, catch up ->timer_jiffies to the current time.
+ * The caller must hold the tvec_base lock.  Returns true if the list
+ * was empty and therefore ->timer_jiffies was updated.
+ */
+static bool catchup_timer_jiffies(struct tvec_base *base)
+{
+        if (!base->all_timers) {
+                base->timer_jiffies = jiffies;
+                return true;
+        }
+        return false;
+}
+static void
+__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
+{
+        unsigned long expires = timer->expires;
+        unsigned long idx = expires - base->timer_jiffies;
+        struct list_head *vec;
+        if (idx < TVR_SIZE) {
+                int i = expires & TVR_MASK;
+                vec = base->tv1.vec + i;
+        } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
+                int i = (expires >> TVR_BITS) & TVN_MASK;
+                vec = base->tv2.vec + i;
+        } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
+                int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
+                vec = base->tv3.vec + i;
+        } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
+                int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
+                vec = base->tv4.vec + i;
+        } else if ((signed long) idx < 0) {
+                /*
+                 * Can happen if you add a timer with expires == jiffies,
+                 * or you set a timer to go off in the past
+                 */
+                vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
+        } else {
+                int i;
+                /* If the timeout is larger than MAX_TVAL (on 64-bit
+                 * architectures or with CONFIG_BASE_SMALL=1) then we
+                 * use the maximum timeout.
+                 */
+                if (idx > MAX_TVAL) {
+                        idx = MAX_TVAL;
+                        expires = idx + base->timer_jiffies;
+                }
+                i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
+                vec = base->tv5.vec + i;
+        }
+        /*
+         * Timers are FIFO:
+         */
+        list_add_tail(&timer->entry, vec);
+}
+static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
+{
+        (void)catchup_timer_jiffies(base);
+        __internal_add_timer(base, timer);
+        /*
+         * Update base->active_timers and base->next_timer
+         */
+        if (!tbase_get_deferrable(timer->base)) {
+                if (!base->active_timers++ ||
+                    time_before(timer->expires, base->next_timer))
+                        base->next_timer = timer->expires;
+        }
+        base->all_timers++;
+        /*
+         * Check whether the other CPU is in dynticks mode and needs
+         * to be triggered to reevaluate the timer wheel.
+         * We are protected against the other CPU fiddling
+         * with the timer by holding the timer base lock. This also
+         * makes sure that a CPU on the way to stop its tick can not
+         * evaluate the timer wheel.
+         *
+         * Spare the IPI for deferrable timers on idle targets though.
+         * The next busy ticks will take care of it. Except full dynticks
+         * require special care against races with idle_cpu(), lets deal
+         * with that later.
+         */
+        if (!tbase_get_deferrable(base) || tick_nohz_full_cpu(base->cpu))
+                wake_up_nohz_cpu(base->cpu);
+}
+#ifdef CONFIG_TIMER_STATS
+void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
+{
+        if (timer->start_site)
+                return;
+        timer->start_site = addr;
+        memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
+        timer->start_pid = current->pid;
+}
+static void timer_stats_account_timer(struct timer_list *timer)
+{
+        unsigned int flag = 0;
+        if (likely(!timer->start_site))
+                return;
+        if (unlikely(tbase_get_deferrable(timer->base)))
+                flag |= TIMER_STATS_FLAG_DEFERRABLE;
+        timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
+                                 timer->function, timer->start_comm, flag);
+}
+#else
+static void timer_stats_account_timer(struct timer_list *timer) {}
+#endif
+#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
+static struct debug_obj_descr timer_debug_descr;
+static void *timer_debug_hint(void *addr)
+{
+        return ((struct timer_list *) addr)->function;
+}
+/*
+ * fixup_init is called when:
+ * - an active object is initialized
+ */
+static int timer_fixup_init(void *addr, enum debug_obj_state state)
+{
+        struct timer_list *timer = addr;
+        switch (state) {
+        case ODEBUG_STATE_ACTIVE:
+                del_timer_sync(timer);
+                debug_object_init(timer, &timer_debug_descr);
+                return 1;
+        default:
+                return 0;
+        }
+}
+/* Stub timer callback for improperly used timers. */
+static void stub_timer(unsigned long data)
+{
+        WARN_ON(1);
+}
+/*
+ * fixup_activate is called when:
+ * - an active object is activated
+ * - an unknown object is activated (might be a statically initialized object)
+ */
+static int timer_fixup_activate(void *addr, enum debug_obj_state state)
+{
+        struct timer_list *timer = addr;
+        switch (state) {
+        case ODEBUG_STATE_NOTAVAILABLE:
+                /*
+                 * This is not really a fixup. The timer was
+                 * statically initialized. We just make sure that it
+                 * is tracked in the object tracker.
+                 */
+                if (timer->entry.next == NULL &&
+                    timer->entry.prev == TIMER_ENTRY_STATIC) {
+                        debug_object_init(timer, &timer_debug_descr);
+                        debug_object_activate(timer, &timer_debug_descr);
+                        return 0;
+                } else {
+                        setup_timer(timer, stub_timer, 0);
+                        return 1;
+                }
+                return 0;
+        case ODEBUG_STATE_ACTIVE:
+                WARN_ON(1);
+        default:
+                return 0;
+        }
+}
+/*
+ * fixup_free is called when:
+ * - an active object is freed
+ */
+static int timer_fixup_free(void *addr, enum debug_obj_state state)
+{
+        struct timer_list *timer = addr;
+        switch (state) {
+        case ODEBUG_STATE_ACTIVE:
+                del_timer_sync(timer);
+                debug_object_free(timer, &timer_debug_descr);
+                return 1;
+        default:
+                return 0;
+        }
+}
+/*
+ * fixup_assert_init is called when:
+ * - an untracked/uninit-ed object is found
+ */
+static int timer_fixup_assert_init(void *addr, enum debug_obj_state state)
+{
+        struct timer_list *timer = addr;
+        switch (state) {
+        case ODEBUG_STATE_NOTAVAILABLE:
+                if (timer->entry.prev == TIMER_ENTRY_STATIC) {
+                        /*
+                         * This is not really a fixup. The timer was
+                         * statically initialized. We just make sure that it
+                         * is tracked in the object tracker.
+                         */
+                        debug_object_init(timer, &timer_debug_descr);
+                        return 0;
+                } else {
+                        setup_timer(timer, stub_timer, 0);
+                        return 1;
+                }
+        default:
+                return 0;
+        }
+}
+static struct debug_obj_descr timer_debug_descr = {
+        .name                   = "timer_list",
+        .debug_hint             = timer_debug_hint,
+        .fixup_init             = timer_fixup_init,
+        .fixup_activate         = timer_fixup_activate,
+        .fixup_free             = timer_fixup_free,
+        .fixup_assert_init      = timer_fixup_assert_init,
+};
+static inline void debug_timer_init(struct timer_list *timer)
+{
+        debug_object_init(timer, &timer_debug_descr);
+}
+static inline void debug_timer_activate(struct timer_list *timer)
+{
+        debug_object_activate(timer, &timer_debug_descr);
+}
+static inline void debug_timer_deactivate(struct timer_list *timer)
+{
+        debug_object_deactivate(timer, &timer_debug_descr);
+}
+static inline void debug_timer_free(struct timer_list *timer)
+{
+        debug_object_free(timer, &timer_debug_descr);
+}
+static inline void debug_timer_assert_init(struct timer_list *timer)
+{
+        debug_object_assert_init(timer, &timer_debug_descr);
+}
+static void do_init_timer(struct timer_list *timer, unsigned int flags,
+                          const char *name, struct lock_class_key *key);
+void init_timer_on_stack_key(struct timer_list *timer, unsigned int flags,
+                             const char *name, struct lock_class_key *key)
+{
+        debug_object_init_on_stack(timer, &timer_debug_descr);
+        do_init_timer(timer, flags, name, key);
+}
+EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
+void destroy_timer_on_stack(struct timer_list *timer)
+{
+        debug_object_free(timer, &timer_debug_descr);
+}
+EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
+#else
+static inline void debug_timer_init(struct timer_list *timer) { }
+static inline void debug_timer_activate(struct timer_list *timer) { }
+static inline void debug_timer_deactivate(struct timer_list *timer) { }
+static inline void debug_timer_assert_init(struct timer_list *timer) { }
+#endif
+static inline void debug_init(struct timer_list *timer)
+{
+        debug_timer_init(timer);
+        trace_timer_init(timer);
+}
+static inline void
+debug_activate(struct timer_list *timer, unsigned long expires)
+{
+        debug_timer_activate(timer);
+        trace_timer_start(timer, expires);
+}
+static inline void debug_deactivate(struct timer_list *timer)
+{
+        debug_timer_deactivate(timer);
+        trace_timer_cancel(timer);
+}
+static inline void debug_assert_init(struct timer_list *timer)
+{
+        debug_timer_assert_init(timer);
+}
+static void do_init_timer(struct timer_list *timer, unsigned int flags,
+                          const char *name, struct lock_class_key *key)
+{
+        struct tvec_base *base = __raw_get_cpu_var(tvec_bases);
+        timer->entry.next = NULL;
+        timer->base = (void *)((unsigned long)base | flags);
+        timer->slack = -1;
+#ifdef CONFIG_TIMER_STATS
+        timer->start_site = NULL;
+        timer->start_pid = -1;
+        memset(timer->start_comm, 0, TASK_COMM_LEN);
+#endif
+        lockdep_init_map(&timer->lockdep_map, name, key, 0);
+}
+/**
+ * init_timer_key - initialize a timer
+ * @timer: the timer to be initialized
+ * @flags: timer flags
+ * @name: name of the timer
+ * @key: lockdep class key of the fake lock used for tracking timer
+ *       sync lock dependencies
+ *
+ * init_timer_key() must be done to a timer prior calling *any* of the
+ * other timer functions.
+ */
+void init_timer_key(struct timer_list *timer, unsigned int flags,
+                    const char *name, struct lock_class_key *key)
+{
+        debug_init(timer);
+        do_init_timer(timer, flags, name, key);
+}
+EXPORT_SYMBOL(init_timer_key);
+static inline void detach_timer(struct timer_list *timer, bool clear_pending)
+{
+        struct list_head *entry = &timer->entry;
+        debug_deactivate(timer);
+        __list_del(entry->prev, entry->next);
+        if (clear_pending)
+                entry->next = NULL;
+        entry->prev = LIST_POISON2;
+}
+static inline void
+detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
+{
+        detach_timer(timer, true);
+        if (!tbase_get_deferrable(timer->base))
+                base->active_timers--;
+        base->all_timers--;
+        (void)catchup_timer_jiffies(base);
+}
+static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
+                             bool clear_pending)
+{
+        if (!timer_pending(timer))
+                return 0;
+        detach_timer(timer, clear_pending);
+        if (!tbase_get_deferrable(timer->base)) {
+                base->active_timers--;
+                if (timer->expires == base->next_timer)
+                        base->next_timer = base->timer_jiffies;
+        }
+        base->all_timers--;
+        (void)catchup_timer_jiffies(base);
+        return 1;
+}
+/*
+ * We are using hashed locking: holding per_cpu(tvec_bases).lock
+ * means that all timers which are tied to this base via timer->base are
+ * locked, and the base itself is locked too.
+ *
+ * So __run_timers/migrate_timers can safely modify all timers which could
+ * be found on ->tvX lists.
+ *
+ * When the timer's base is locked, and the timer removed from list, it is
+ * possible to set timer->base = NULL and drop the lock: the timer remains
+ * locked.
+ */
+static struct tvec_base *lock_timer_base(struct timer_list *timer,
+                                        unsigned long *flags)
+        __acquires(timer->base->lock)
+{
+        struct tvec_base *base;
+        for (;;) {
+                struct tvec_base *prelock_base = timer->base;
+                base = tbase_get_base(prelock_base);
+                if (likely(base != NULL)) {
+                        spin_lock_irqsave(&base->lock, *flags);
+                        if (likely(prelock_base == timer->base))
+                                return base;
+                        /* The timer has migrated to another CPU */
+                        spin_unlock_irqrestore(&base->lock, *flags);
+                }
+                cpu_relax();
+        }
+}
+static inline int
+__mod_timer(struct timer_list *timer, unsigned long expires,
+                                                bool pending_only, int pinned)
+{
+        struct tvec_base *base, *new_base;
+        unsigned long flags;
+        int ret = 0 , cpu;
+        timer_stats_timer_set_start_info(timer);
+        BUG_ON(!timer->function);
+        base = lock_timer_base(timer, &flags);
+        ret = detach_if_pending(timer, base, false);
+        if (!ret && pending_only)
+                goto out_unlock;
+        debug_activate(timer, expires);
+        cpu = get_nohz_timer_target(pinned);
+        new_base = per_cpu(tvec_bases, cpu);
+        if (base != new_base) {
+                /*
+                 * We are trying to schedule the timer on the local CPU.
+                 * However we can't change timer's base while it is running,
+                 * otherwise del_timer_sync() can't detect that the timer's
+                 * handler yet has not finished. This also guarantees that
+                 * the timer is serialized wrt itself.
+                 */
+                if (likely(base->running_timer != timer)) {
+                        /* See the comment in lock_timer_base() */
+                        timer_set_base(timer, NULL);
+                        spin_unlock(&base->lock);
+                        base = new_base;
+                        spin_lock(&base->lock);
+                        timer_set_base(timer, base);
+                }
+        }
+        timer->expires = expires;
+        internal_add_timer(base, timer);
+out_unlock:
+        spin_unlock_irqrestore(&base->lock, flags);
+        return ret;
+}
+/**
+ * mod_timer_pending - modify a pending timer's timeout
+ * @timer: the pending timer to be modified
+ * @expires: new timeout in jiffies
+ *
+ * mod_timer_pending() is the same for pending timers as mod_timer(),
+ * but will not re-activate and modify already deleted timers.
+ *
+ * It is useful for unserialized use of timers.
+ */
+int mod_timer_pending(struct timer_list *timer, unsigned long expires)
+{
+        return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
+}
+EXPORT_SYMBOL(mod_timer_pending);
+/*
+ * Decide where to put the timer while taking the slack into account
+ *
+ * Algorithm:
+ *   1) calculate the maximum (absolute) time
+ *   2) calculate the highest bit where the expires and new max are different
+ *   3) use this bit to make a mask
+ *   4) use the bitmask to round down the maximum time, so that all last
+ *      bits are zeros
+ */
+static inline
+unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
+{
+        unsigned long expires_limit, mask;
+        int bit;
+        if (timer->slack >= 0) {
+                expires_limit = expires + timer->slack;
+        } else {
+                long delta = expires - jiffies;
+                if (delta < 256)
+                        return expires;
+                expires_limit = expires + delta / 256;
+        }
+        mask = expires ^ expires_limit;
+        if (mask == 0)
+                return expires;
+        bit = find_last_bit(&mask, BITS_PER_LONG);
+        mask = (1UL << bit) - 1;
+        expires_limit = expires_limit & ~(mask);
+        return expires_limit;
+}
+/**
+ * mod_timer - modify a timer's timeout
+ * @timer: the timer to be modified
+ * @expires: new timeout in jiffies
+ *
+ * mod_timer() is a more efficient way to update the expire field of an
+ * active timer (if the timer is inactive it will be activated)
+ *
+ * mod_timer(timer, expires) is equivalent to:
+ *
+ *     del_timer(timer); timer->expires = expires; add_timer(timer);
+ *
+ * Note that if there are multiple unserialized concurrent users of the
+ * same timer, then mod_timer() is the only safe way to modify the timeout,
+ * since add_timer() cannot modify an already running timer.
+ *
+ * The function returns whether it has modified a pending timer or not.
+ * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
+ * active timer returns 1.)
+ */
+int mod_timer(struct timer_list *timer, unsigned long expires)
+{
+        expires = apply_slack(timer, expires);
+        /*
+         * This is a common optimization triggered by the
+         * networking code - if the timer is re-modified
+         * to be the same thing then just return:
+         */
+        if (timer_pending(timer) && timer->expires == expires)
+                return 1;
+        return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
+}
+EXPORT_SYMBOL(mod_timer);
+/**
+ * mod_timer_pinned - modify a timer's timeout
+ * @timer: the timer to be modified
+ * @expires: new timeout in jiffies
+ *
+ * mod_timer_pinned() is a way to update the expire field of an
+ * active timer (if the timer is inactive it will be activated)
+ * and to ensure that the timer is scheduled on the current CPU.
+ *
+ * Note that this does not prevent the timer from being migrated
+ * when the current CPU goes offline.  If this is a problem for
+ * you, use CPU-hotplug notifiers to handle it correctly, for
+ * example, cancelling the timer when the corresponding CPU goes
+ * offline.
+ *
+ * mod_timer_pinned(timer, expires) is equivalent to:
+ *
+ *     del_timer(timer); timer->expires = expires; add_timer(timer);
+ */
+int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
+{
+        if (timer->expires == expires && timer_pending(timer))
+                return 1;
+        return __mod_timer(timer, expires, false, TIMER_PINNED);
+}
+EXPORT_SYMBOL(mod_timer_pinned);
+/**
+ * add_timer - start a timer
+ * @timer: the timer to be added
+ *
+ * The kernel will do a ->function(->data) callback from the
+ * timer interrupt at the ->expires point in the future. The
+ * current time is 'jiffies'.
+ *
+ * The timer's ->expires, ->function (and if the handler uses it, ->data)
+ * fields must be set prior calling this function.
+ *
+ * Timers with an ->expires field in the past will be executed in the next
+ * timer tick.
+ */
+void add_timer(struct timer_list *timer)
+{
+        BUG_ON(timer_pending(timer));
+        mod_timer(timer, timer->expires);
+}
+EXPORT_SYMBOL(add_timer);
+/**
+ * add_timer_on - start a timer on a particular CPU
+ * @timer: the timer to be added
+ * @cpu: the CPU to start it on
+ *
+ * This is not very scalable on SMP. Double adds are not possible.
+ */
+void add_timer_on(struct timer_list *timer, int cpu)
+{
+        struct tvec_base *base = per_cpu(tvec_bases, cpu);
+        unsigned long flags;
+        timer_stats_timer_set_start_info(timer);
+        BUG_ON(timer_pending(timer) || !timer->function);
+        spin_lock_irqsave(&base->lock, flags);
+        timer_set_base(timer, base);
+        debug_activate(timer, timer->expires);
+        internal_add_timer(base, timer);
+        spin_unlock_irqrestore(&base->lock, flags);
+}
+EXPORT_SYMBOL_GPL(add_timer_on);
+/**
+ * del_timer - deactive a timer.
+ * @timer: the timer to be deactivated
+ *
+ * del_timer() deactivates a timer - this works on both active and inactive
+ * timers.
+ *
+ * The function returns whether it has deactivated a pending timer or not.
+ * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
+ * active timer returns 1.)
+ */
+int del_timer(struct timer_list *timer)
+{
+        struct tvec_base *base;
+        unsigned long flags;
+        int ret = 0;
+        debug_assert_init(timer);
+        timer_stats_timer_clear_start_info(timer);
+        if (timer_pending(timer)) {
+                base = lock_timer_base(timer, &flags);
+                ret = detach_if_pending(timer, base, true);
+                spin_unlock_irqrestore(&base->lock, flags);
+        }
+        return ret;
+}
+EXPORT_SYMBOL(del_timer);
+/**
+ * try_to_del_timer_sync - Try to deactivate a timer
+ * @timer: timer do del
+ *
+ * This function tries to deactivate a timer. Upon successful (ret >= 0)
+ * exit the timer is not queued and the handler is not running on any CPU.
+ */
+int try_to_del_timer_sync(struct timer_list *timer)
+{
+        struct tvec_base *base;
+        unsigned long flags;
+        int ret = -1;
+        debug_assert_init(timer);
+        base = lock_timer_base(timer, &flags);
+        if (base->running_timer != timer) {
+                timer_stats_timer_clear_start_info(timer);
+                ret = detach_if_pending(timer, base, true);
+        }
+        spin_unlock_irqrestore(&base->lock, flags);
+        return ret;
+}
+EXPORT_SYMBOL(try_to_del_timer_sync);
+#ifdef CONFIG_SMP
+/**
+ * del_timer_sync - deactivate a timer and wait for the handler to finish.
+ * @timer: the timer to be deactivated
+ *
+ * This function only differs from del_timer() on SMP: besides deactivating
+ * the timer it also makes sure the handler has finished executing on other
+ * CPUs.
+ *
+ * Synchronization rules: Callers must prevent restarting of the timer,
+ * otherwise this function is meaningless. It must not be called from
+ * interrupt contexts unless the timer is an irqsafe one. The caller must
+ * not hold locks which would prevent completion of the timer's
+ * handler. The timer's handler must not call add_timer_on(). Upon exit the
+ * timer is not queued and the handler is not running on any CPU.
+ *
+ * Note: For !irqsafe timers, you must not hold locks that are held in
+ *   interrupt context while calling this function. Even if the lock has
+ *   nothing to do with the timer in question.  Here's why:
+ *
+ *    CPU0                             CPU1
+ *    ----                             ----
+ *                                   <SOFTIRQ>
+ *                                   call_timer_fn();
+ *                                     base->running_timer = mytimer;
+ *  spin_lock_irq(somelock);
+ *                                     <IRQ>
+ *                                        spin_lock(somelock);
+ *  del_timer_sync(mytimer);
+ *   while (base->running_timer == mytimer);
+ *
+ * Now del_timer_sync() will never return and never release somelock.
+ * The interrupt on the other CPU is waiting to grab somelock but
+ * it has interrupted the softirq that CPU0 is waiting to finish.
+ *
+ * The function returns whether it has deactivated a pending timer or not.
+ */
+int del_timer_sync(struct timer_list *timer)
+{
+#ifdef CONFIG_LOCKDEP
+        unsigned long flags;
+        /*
+         * If lockdep gives a backtrace here, please reference
+         * the synchronization rules above.
+         */
+        local_irq_save(flags);
+        lock_map_acquire(&timer->lockdep_map);
+        lock_map_release(&timer->lockdep_map);
+        local_irq_restore(flags);
+#endif
+        /*
+         * don't use it in hardirq context, because it
+         * could lead to deadlock.
+         */
+        WARN_ON(in_irq() && !tbase_get_irqsafe(timer->base));
+        for (;;) {
+                int ret = try_to_del_timer_sync(timer);
+                if (ret >= 0)
+                        return ret;
+                cpu_relax();
+        }
+}
+EXPORT_SYMBOL(del_timer_sync);
+#endif
+static int cascade(struct tvec_base *base, struct tvec *tv, int index)
+{
+        /* cascade all the timers from tv up one level */
+        struct timer_list *timer, *tmp;
+        struct list_head tv_list;
+        list_replace_init(tv->vec + index, &tv_list);
+        /*
+         * We are removing _all_ timers from the list, so we
+         * don't have to detach them individually.
+         */
+        list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
+                BUG_ON(tbase_get_base(timer->base) != base);
+                /* No accounting, while moving them */
+                __internal_add_timer(base, timer);
+        }
+        return index;
+}
+static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
+                          unsigned long data)
+{
+        int count = preempt_count();
+#ifdef CONFIG_LOCKDEP
+        /*
+         * It is permissible to free the timer from inside the
+         * function that is called from it, this we need to take into
+         * account for lockdep too. To avoid bogus "held lock freed"
+         * warnings as well as problems when looking into
+         * timer->lockdep_map, make a copy and use that here.
+         */
+        struct lockdep_map lockdep_map;
+        lockdep_copy_map(&lockdep_map, &timer->lockdep_map);
+#endif
+        /*
+         * Couple the lock chain with the lock chain at
+         * del_timer_sync() by acquiring the lock_map around the fn()
+         * call here and in del_timer_sync().
+         */
+        lock_map_acquire(&lockdep_map);
+        trace_timer_expire_entry(timer);
+        fn(data);
+        trace_timer_expire_exit(timer);
+        lock_map_release(&lockdep_map);
+        if (count != preempt_count()) {
+                WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
+                          fn, count, preempt_count());
+                /*
+                 * Restore the preempt count. That gives us a decent
+                 * chance to survive and extract information. If the
+                 * callback kept a lock held, bad luck, but not worse
+                 * than the BUG() we had.
+                 */
+                preempt_count_set(count);
+        }
+}
+#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
+/**
+ * __run_timers - run all expired timers (if any) on this CPU.
+ * @base: the timer vector to be processed.
+ *
+ * This function cascades all vectors and executes all expired timer
+ * vectors.
+ */
+static inline void __run_timers(struct tvec_base *base)
+{
+        struct timer_list *timer;
+        spin_lock_irq(&base->lock);
+        if (catchup_timer_jiffies(base)) {
+                spin_unlock_irq(&base->lock);
+                return;
+        }
+        while (time_after_eq(jiffies, base->timer_jiffies)) {
+                struct list_head work_list;
+                struct list_head *head = &work_list;
+                int index = base->timer_jiffies & TVR_MASK;
+                /*
+                 * Cascade timers:
+                 */
+                if (!index &&
+                        (!cascade(base, &base->tv2, INDEX(0))) &&
+                                (!cascade(base, &base->tv3, INDEX(1))) &&
+                                        !cascade(base, &base->tv4, INDEX(2)))
+                        cascade(base, &base->tv5, INDEX(3));
+                ++base->timer_jiffies;
+                list_replace_init(base->tv1.vec + index, head);
+                while (!list_empty(head)) {
+                        void (*fn)(unsigned long);
+                        unsigned long data;
+                        bool irqsafe;
+                        timer = list_first_entry(head, struct timer_list,entry);
+                        fn = timer->function;
+                        data = timer->data;
+                        irqsafe = tbase_get_irqsafe(timer->base);
+                        timer_stats_account_timer(timer);
+                        base->running_timer = timer;
+                        detach_expired_timer(timer, base);
+                        if (irqsafe) {
+                                spin_unlock(&base->lock);
+                                call_timer_fn(timer, fn, data);
+                                spin_lock(&base->lock);
+                        } else {
+                                spin_unlock_irq(&base->lock);
+                                call_timer_fn(timer, fn, data);
+                                spin_lock_irq(&base->lock);
+                        }
+                }
+        }
+        base->running_timer = NULL;
+        spin_unlock_irq(&base->lock);
+}
+#ifdef CONFIG_NO_HZ_COMMON
+/*
+ * Find out when the next timer event is due to happen. This
+ * is used on S/390 to stop all activity when a CPU is idle.
+ * This function needs to be called with interrupts disabled.
+ */
+static unsigned long __next_timer_interrupt(struct tvec_base *base)
+{
+        unsigned long timer_jiffies = base->timer_jiffies;
+        unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
+        int index, slot, array, found = 0;
+        struct timer_list *nte;
+        struct tvec *varray[4];
+        /* Look for timer events in tv1. */
+        index = slot = timer_jiffies & TVR_MASK;
+        do {
+                list_for_each_entry(nte, base->tv1.vec + slot, entry) {
+                        if (tbase_get_deferrable(nte->base))
+                                continue;
+                        found = 1;
+                        expires = nte->expires;
+                        /* Look at the cascade bucket(s)? */
+                        if (!index || slot < index)
+                                goto cascade;
+                        return expires;
+                }
+                slot = (slot + 1) & TVR_MASK;
+        } while (slot != index);
+cascade:
+        /* Calculate the next cascade event */
+        if (index)
+                timer_jiffies += TVR_SIZE - index;
+        timer_jiffies >>= TVR_BITS;
+        /* Check tv2-tv5. */
+        varray[0] = &base->tv2;
+        varray[1] = &base->tv3;
+        varray[2] = &base->tv4;
+        varray[3] = &base->tv5;
+        for (array = 0; array < 4; array++) {
+                struct tvec *varp = varray[array];
+                index = slot = timer_jiffies & TVN_MASK;
+                do {
+                        list_for_each_entry(nte, varp->vec + slot, entry) {
+                                if (tbase_get_deferrable(nte->base))
+                                        continue;
+                                found = 1;
+                                if (time_before(nte->expires, expires))
+                                        expires = nte->expires;
+                        }
+                        /*
+                         * Do we still search for the first timer or are
+                         * we looking up the cascade buckets ?
+                         */
+                        if (found) {
+                                /* Look at the cascade bucket(s)? */
+                                if (!index || slot < index)
+                                        break;
+                                return expires;
+                        }
+                        slot = (slot + 1) & TVN_MASK;
+                } while (slot != index);
+                if (index)
+                        timer_jiffies += TVN_SIZE - index;
+                timer_jiffies >>= TVN_BITS;
+        }
+        return expires;
+}
+/*
+ * Check, if the next hrtimer event is before the next timer wheel
+ * event:
+ */
+static unsigned long cmp_next_hrtimer_event(unsigned long now,
+                                            unsigned long expires)
+{
+        ktime_t hr_delta = hrtimer_get_next_event();
+        struct timespec tsdelta;
+        unsigned long delta;
+        if (hr_delta.tv64 == KTIME_MAX)
+                return expires;
+        /*
+         * Expired timer available, let it expire in the next tick
+         */
+        if (hr_delta.tv64 <= 0)
+                return now + 1;
+        tsdelta = ktime_to_timespec(hr_delta);
+        delta = timespec_to_jiffies(&tsdelta);
+        /*
+         * Limit the delta to the max value, which is checked in
+         * tick_nohz_stop_sched_tick():
+         */
+        if (delta > NEXT_TIMER_MAX_DELTA)
+                delta = NEXT_TIMER_MAX_DELTA;
+        /*
+         * Take rounding errors in to account and make sure, that it
+         * expires in the next tick. Otherwise we go into an endless
+         * ping pong due to tick_nohz_stop_sched_tick() retriggering
+         * the timer softirq
+         */
+        if (delta < 1)
+                delta = 1;
+        now += delta;
+        if (time_before(now, expires))
+                return now;
+        return expires;
+}
+/**
+ * get_next_timer_interrupt - return the jiffy of the next pending timer
+ * @now: current time (in jiffies)
+ */
+unsigned long get_next_timer_interrupt(unsigned long now)
+{
+        struct tvec_base *base = __this_cpu_read(tvec_bases);
+        unsigned long expires = now + NEXT_TIMER_MAX_DELTA;
+        /*
+         * Pretend that there is no timer pending if the cpu is offline.
+         * Possible pending timers will be migrated later to an active cpu.
+         */
+        if (cpu_is_offline(smp_processor_id()))
+                return expires;
+        spin_lock(&base->lock);
+        if (base->active_timers) {
+                if (time_before_eq(base->next_timer, base->timer_jiffies))
+                        base->next_timer = __next_timer_interrupt(base);
+                expires = base->next_timer;
+        }
+        spin_unlock(&base->lock);
+        if (time_before_eq(expires, now))
+                return now;
+        return cmp_next_hrtimer_event(now, expires);
+}
+#endif
+/*
+ * Called from the timer interrupt handler to charge one tick to the current
+ * process.  user_tick is 1 if the tick is user time, 0 for system.
+ */
+void update_process_times(int user_tick)
+{
+        struct task_struct *p = current;
+        int cpu = smp_processor_id();
+        /* Note: this timer irq context must be accounted for as well. */
+        account_process_tick(p, user_tick);
+        run_local_timers();
+        rcu_check_callbacks(cpu, user_tick);
+#ifdef CONFIG_IRQ_WORK
+        if (in_irq())
+                irq_work_run();
+#endif
+        scheduler_tick();
+        run_posix_cpu_timers(p);
+}
+/*
+ * This function runs timers and the timer-tq in bottom half context.
+ */
+static void run_timer_softirq(struct softirq_action *h)
+{
+        struct tvec_base *base = __this_cpu_read(tvec_bases);
+        hrtimer_run_pending();
+        if (time_after_eq(jiffies, base->timer_jiffies))
+                __run_timers(base);
+}
+/*
+ * Called by the local, per-CPU timer interrupt on SMP.
+ */
+void run_local_timers(void)
+{
+        hrtimer_run_queues();
+        raise_softirq(TIMER_SOFTIRQ);
+}
+#ifdef __ARCH_WANT_SYS_ALARM
+/*
+ * For backwards compatibility?  This can be done in libc so Alpha
+ * and all newer ports shouldn't need it.
+ */
+SYSCALL_DEFINE1(alarm, unsigned int, seconds)
+{
+        return alarm_setitimer(seconds);
+}
+#endif
+static void process_timeout(unsigned long __data)
+{
+        wake_up_process((struct task_struct *)__data);
+}
+/**
+ * schedule_timeout - sleep until timeout
+ * @timeout: timeout value in jiffies
+ *
+ * Make the current task sleep until @timeout jiffies have
+ * elapsed. The routine will return immediately unless
+ * the current task state has been set (see set_current_state()).
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
+ * pass before the routine returns. The routine will return 0
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task. In this case the remaining time
+ * in jiffies will be returned, or 0 if the timer expired in time
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ *
+ * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
+ * the CPU away without a bound on the timeout. In this case the return
+ * value will be %MAX_SCHEDULE_TIMEOUT.
+ *
+ * In all cases the return value is guaranteed to be non-negative.
+ */
+signed long __sched schedule_timeout(signed long timeout)
+{
+        struct timer_list timer;
+        unsigned long expire;
+        switch (timeout)
+        {
+        case MAX_SCHEDULE_TIMEOUT:
+                /*
+                 * These two special cases are useful to be comfortable
+                 * in the caller. Nothing more. We could take
+                 * MAX_SCHEDULE_TIMEOUT from one of the negative value
+                 * but I' d like to return a valid offset (>=0) to allow
+                 * the caller to do everything it want with the retval.
+                 */
+                schedule();
+                goto out;
+        default:
+                /*
+                 * Another bit of PARANOID. Note that the retval will be
+                 * 0 since no piece of kernel is supposed to do a check
+                 * for a negative retval of schedule_timeout() (since it
+                 * should never happens anyway). You just have the printk()
+                 * that will tell you if something is gone wrong and where.
+                 */
+                if (timeout < 0) {
+                        printk(KERN_ERR "schedule_timeout: wrong timeout "
+                                "value %lx\n", timeout);
+                        dump_stack();
+                        current->state = TASK_RUNNING;
+                        goto out;
+                }
+        }
+        expire = timeout + jiffies;
+        setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
+        __mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
+        schedule();
+        del_singleshot_timer_sync(&timer);
+        /* Remove the timer from the object tracker */
+        destroy_timer_on_stack(&timer);
+        timeout = expire - jiffies;
+ out:
+        return timeout < 0 ? 0 : timeout;
+}
+EXPORT_SYMBOL(schedule_timeout);
+/*
+ * We can use __set_current_state() here because schedule_timeout() calls
+ * schedule() unconditionally.
+ */
+signed long __sched schedule_timeout_interruptible(signed long timeout)
+{
+        __set_current_state(TASK_INTERRUPTIBLE);
+        return schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(schedule_timeout_interruptible);
+signed long __sched schedule_timeout_killable(signed long timeout)
+{
+        __set_current_state(TASK_KILLABLE);
+        return schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(schedule_timeout_killable);
+signed long __sched schedule_timeout_uninterruptible(signed long timeout)
+{
+        __set_current_state(TASK_UNINTERRUPTIBLE);
+        return schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(schedule_timeout_uninterruptible);
+static int init_timers_cpu(int cpu)
+{
+        int j;
+        struct tvec_base *base;
+        static char tvec_base_done[NR_CPUS];
+        if (!tvec_base_done[cpu]) {
+                static char boot_done;
+                if (boot_done) {
+                        /*
+                         * The APs use this path later in boot
+                         */
+                        base = kzalloc_node(sizeof(*base), GFP_KERNEL,
+                                            cpu_to_node(cpu));
+                        if (!base)
+                                return -ENOMEM;
+                        /* Make sure tvec_base has TIMER_FLAG_MASK bits free */
+                        if (WARN_ON(base != tbase_get_base(base))) {
+                                kfree(base);
+                                return -ENOMEM;
+                        }
+                        per_cpu(tvec_bases, cpu) = base;
+                } else {
+                        /*
+                         * This is for the boot CPU - we use compile-time
+                         * static initialisation because per-cpu memory isn't
+                         * ready yet and because the memory allocators are not
+                         * initialised either.
+                         */
+                        boot_done = 1;
+                        base = &boot_tvec_bases;
+                }
+                spin_lock_init(&base->lock);
+                tvec_base_done[cpu] = 1;
+                base->cpu = cpu;
+        } else {
+                base = per_cpu(tvec_bases, cpu);
+        }
+        for (j = 0; j < TVN_SIZE; j++) {
+                INIT_LIST_HEAD(base->tv5.vec + j);
+                INIT_LIST_HEAD(base->tv4.vec + j);
+                INIT_LIST_HEAD(base->tv3.vec + j);
+                INIT_LIST_HEAD(base->tv2.vec + j);
+        }
+        for (j = 0; j < TVR_SIZE; j++)
+                INIT_LIST_HEAD(base->tv1.vec + j);
+        base->timer_jiffies = jiffies;
+        base->next_timer = base->timer_jiffies;
+        base->active_timers = 0;
+        base->all_timers = 0;
+        return 0;
+}
+#ifdef CONFIG_HOTPLUG_CPU
+static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
+{
+        struct timer_list *timer;
+        while (!list_empty(head)) {
+                timer = list_first_entry(head, struct timer_list, entry);
+                /* We ignore the accounting on the dying cpu */
+                detach_timer(timer, false);
+                timer_set_base(timer, new_base);
+                internal_add_timer(new_base, timer);
+        }
+}
+static void migrate_timers(int cpu)
+{
+        struct tvec_base *old_base;
+        struct tvec_base *new_base;
+        int i;
+        BUG_ON(cpu_online(cpu));
+        old_base = per_cpu(tvec_bases, cpu);
+        new_base = get_cpu_var(tvec_bases);
+        /*
+         * The caller is globally serialized and nobody else
+         * takes two locks at once, deadlock is not possible.
+         */
+        spin_lock_irq(&new_base->lock);
+        spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
+        BUG_ON(old_base->running_timer);
+        for (i = 0; i < TVR_SIZE; i++)
+                migrate_timer_list(new_base, old_base->tv1.vec + i);
+        for (i = 0; i < TVN_SIZE; i++) {
+                migrate_timer_list(new_base, old_base->tv2.vec + i);
+                migrate_timer_list(new_base, old_base->tv3.vec + i);
+                migrate_timer_list(new_base, old_base->tv4.vec + i);
+                migrate_timer_list(new_base, old_base->tv5.vec + i);
+        }
+        spin_unlock(&old_base->lock);
+        spin_unlock_irq(&new_base->lock);
+        put_cpu_var(tvec_bases);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+static int timer_cpu_notify(struct notifier_block *self,
+                                unsigned long action, void *hcpu)
+{
+        long cpu = (long)hcpu;
+        int err;
+        switch(action) {
+        case CPU_UP_PREPARE:
+        case CPU_UP_PREPARE_FROZEN:
+                err = init_timers_cpu(cpu);
+                if (err < 0)
+                        return notifier_from_errno(err);
+                break;
+#ifdef CONFIG_HOTPLUG_CPU
+        case CPU_DEAD:
+        case CPU_DEAD_FROZEN:
+                migrate_timers(cpu);
+                break;
+#endif
+        default:
+                break;
+        }
+        return NOTIFY_OK;
+}
+static struct notifier_block timers_nb = {
+        .notifier_call  = timer_cpu_notify,
+};
+void __init init_timers(void)
+{
+        int err;
+        /* ensure there are enough low bits for flags in timer->base pointer */
+        BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
+        err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
+                               (void *)(long)smp_processor_id());
+        BUG_ON(err != NOTIFY_OK);
+        init_timer_stats();
+        register_cpu_notifier(&timers_nb);
+        open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
+}
+/**
+ * msleep - sleep safely even with waitqueue interruptions
+ * @msecs: Time in milliseconds to sleep for
+ */
+void msleep(unsigned int msecs)
+{
+        unsigned long timeout = msecs_to_jiffies(msecs) + 1;
+        while (timeout)
+                timeout = schedule_timeout_uninterruptible(timeout);
+}
+EXPORT_SYMBOL(msleep);
+/**
+ * msleep_interruptible - sleep waiting for signals
+ * @msecs: Time in milliseconds to sleep for
+ */
+unsigned long msleep_interruptible(unsigned int msecs)
+{
+        unsigned long timeout = msecs_to_jiffies(msecs) + 1;
+        while (timeout && !signal_pending(current))
+                timeout = schedule_timeout_interruptible(timeout);
+        return jiffies_to_msecs(timeout);
+}
+EXPORT_SYMBOL(msleep_interruptible);
+static int __sched do_usleep_range(unsigned long min, unsigned long max)
+{
+        ktime_t kmin;
+        unsigned long delta;
+        kmin = ktime_set(0, min * NSEC_PER_USEC);
+        delta = (max - min) * NSEC_PER_USEC;
+        return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
+}
+/**
+ * usleep_range - Drop in replacement for udelay where wakeup is flexible
+ * @min: Minimum time in usecs to sleep
+ * @max: Maximum time in usecs to sleep
+ */
+void usleep_range(unsigned long min, unsigned long max)
+{
+        __set_current_state(TASK_UNINTERRUPTIBLE);
+        do_usleep_range(min, max);
+}
+EXPORT_SYMBOL(usleep_range);
diff --git a/kernel/time/udelay_test.c b/kernel/time/udelay_test.c
new file mode 100644
index 000000000000..e622ba365a13
--- /dev/null
+++ b/kernel/time/udelay_test.c
@@ -0,0 +1,168 @@
+/*
+ * udelay() test kernel module
+ *
+ * Test is executed by writing and reading to /sys/kernel/debug/udelay_test
+ * Tests are configured by writing: USECS ITERATIONS
+ * Tests are executed by reading from the same file.
+ * Specifying usecs of 0 or negative values will run multiples tests.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/ktime.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#define DEFAULT_ITERATIONS 100
+#define DEBUGFS_FILENAME "udelay_test"
+static DEFINE_MUTEX(udelay_test_lock);
+static struct dentry *udelay_test_debugfs_file;
+static int udelay_test_usecs;
+static int udelay_test_iterations = DEFAULT_ITERATIONS;
+static int udelay_test_single(struct seq_file *s, int usecs, uint32_t iters)
+{
+        int min = 0, max = 0, fail_count = 0;
+        uint64_t sum = 0;
+        uint64_t avg;
+        int i;
+        /* Allow udelay to be up to 0.5% fast */
+        int allowed_error_ns = usecs * 5;
+        for (i = 0; i < iters; ++i) {
+                struct timespec ts1, ts2;
+                int time_passed;
+                ktime_get_ts(&ts1);
+                udelay(usecs);
+                ktime_get_ts(&ts2);
+                time_passed = timespec_to_ns(&ts2) - timespec_to_ns(&ts1);
+                if (i == 0 || time_passed < min)
+                        min = time_passed;
+                if (i == 0 || time_passed > max)
+                        max = time_passed;
+                if ((time_passed + allowed_error_ns) / 1000 < usecs)
+                        ++fail_count;
+                WARN_ON(time_passed < 0);
+                sum += time_passed;
+        }
+        avg = sum;
+        do_div(avg, iters);
+        seq_printf(s, "%d usecs x %d: exp=%d allowed=%d min=%d avg=%lld max=%d",
+                        usecs, iters, usecs * 1000,
+                        (usecs * 1000) - allowed_error_ns, min, avg, max);
+        if (fail_count)
+                seq_printf(s, " FAIL=%d", fail_count);
+        seq_puts(s, "\n");
+        return 0;
+}
+static int udelay_test_show(struct seq_file *s, void *v)
+{
+        int usecs;
+        int iters;
+        int ret = 0;
+        mutex_lock(&udelay_test_lock);
+        usecs = udelay_test_usecs;
+        iters = udelay_test_iterations;
+        mutex_unlock(&udelay_test_lock);
+        if (usecs > 0 && iters > 0) {
+                return udelay_test_single(s, usecs, iters);
+        } else if (usecs == 0) {
+                struct timespec ts;
+                ktime_get_ts(&ts);
+                seq_printf(s, "udelay() test (lpj=%ld kt=%ld.%09ld)\n",
+                                loops_per_jiffy, ts.tv_sec, ts.tv_nsec);
+                seq_puts(s, "usage:\n");
+                seq_puts(s, "echo USECS [ITERS] > " DEBUGFS_FILENAME "\n");
+                seq_puts(s, "cat " DEBUGFS_FILENAME "\n");
+        }
+        return ret;
+}
+static int udelay_test_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, udelay_test_show, inode->i_private);
+}
+static ssize_t udelay_test_write(struct file *file, const char __user *buf,
+                size_t count, loff_t *pos)
+{
+        char lbuf[32];
+        int ret;
+        int usecs;
+        int iters;
+        if (count >= sizeof(lbuf))
+                return -EINVAL;
+        if (copy_from_user(lbuf, buf, count))
+                return -EFAULT;
+        lbuf[count] = '\0';
+        ret = sscanf(lbuf, "%d %d", &usecs, &iters);
+        if (ret < 1)
+                return -EINVAL;
+        else if (ret < 2)
+                iters = DEFAULT_ITERATIONS;
+        mutex_lock(&udelay_test_lock);
+        udelay_test_usecs = usecs;
+        udelay_test_iterations = iters;
+        mutex_unlock(&udelay_test_lock);
+        return count;
+}
+static const struct file_operations udelay_test_debugfs_ops = {
+        .owner = THIS_MODULE,
+        .open = udelay_test_open,
+        .read = seq_read,
+        .write = udelay_test_write,
+        .llseek = seq_lseek,
+        .release = single_release,
+};
+static int __init udelay_test_init(void)
+{
+        mutex_lock(&udelay_test_lock);
+        udelay_test_debugfs_file = debugfs_create_file(DEBUGFS_FILENAME,
+                        S_IRUSR, NULL, NULL, &udelay_test_debugfs_ops);
+        mutex_unlock(&udelay_test_lock);
+        return 0;
+}
+module_init(udelay_test_init);
+static void __exit udelay_test_exit(void)
+{
+        mutex_lock(&udelay_test_lock);
+        debugfs_remove(udelay_test_debugfs_file);
+        mutex_unlock(&udelay_test_lock);
+}
+module_exit(udelay_test_exit);
+MODULE_AUTHOR("David Riley <davidriley@chromium.org>");
+MODULE_LICENSE("GPL");
author	Linus Torvalds <torvalds@linux-foundation.org>	2014-08-05 20:46:42 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2014-08-05 20:46:42 -0400
commit	e7fda6c4c3c1a7d6996dd75fd84670fa0b5d448f (patch)
tree	daa51c16462c318b890acf7f01fba5827275dd74 /kernel/time
parent	08d69a25714429850cf9ef71f22d8cdc9189d93f (diff)
parent	953dec21aed4038464fec02f96a2f1b8701a5bce (diff)