aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-04-30 11:15:40 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-04-30 11:15:40 -0400
commitab86e974f04b1cd827a9c7c35273834ebcd9ab38 (patch)
tree41df33732d2700d6d57d1e7ab3f430942f09ffcc
parent8700c95adb033843fc163d112b9d21d4fda78018 (diff)
parent6f7a05d7018de222e40ca003721037a530979974 (diff)
Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core timer updates from Ingo Molnar: "The main changes in this cycle's merge are: - Implement shadow timekeeper to shorten in kernel reader side blocking, by Thomas Gleixner. - Posix timers enhancements by Pavel Emelyanov: - allocate timer ID per process, so that exact timer ID allocations can be re-created be checkpoint/restore code. - debuggability and tooling (/proc/PID/timers, etc.) improvements. - suspend/resume enhancements by Feng Tang: on certain new Intel Atom processors (Penwell and Cloverview), there is a feature that the TSC won't stop in S3 state, so the TSC value won't be reset to 0 after resume. This can be taken advantage of by the generic via the CLOCK_SOURCE_SUSPEND_NONSTOP flag: instead of using the RTC to recover/approximate sleep time, the main (and precise) clocksource can be used. - Fix /proc/timer_list for 4096 CPUs by Nathan Zimmer: on so many CPUs the file goes beyond 4MB of size and thus the current simplistic seqfile approach fails. Convert /proc/timer_list to a proper seq_file with its own iterator. - Cleanups and refactorings of the core timekeeping code by John Stultz. - International Atomic Clock time is managed by the NTP code internally currently but not exposed externally. Separate the TAI code out and add CLOCK_TAI support and TAI support to the hrtimer and posix-timer code, by John Stultz. - Add deep idle support enhacement to the broadcast clockevents core timer code, by Daniel Lezcano: add an opt-in CLOCK_EVT_FEAT_DYNIRQ clockevents feature (which will be utilized by future clockevents driver updates), which allows the use of IRQ affinities to avoid spurious wakeups of idle CPUs - the right CPU with an expiring timer will be woken. - Add new ARM bcm281xx clocksource driver, by Christian Daudt - ... various other fixes and cleanups" * 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (52 commits) clockevents: Set dummy handler on CPU_DEAD shutdown timekeeping: Update tk->cycle_last in resume posix-timers: Remove unused variable clockevents: Switch into oneshot mode even if broadcast registered late timer_list: Convert timer list to be a proper seq_file timer_list: Split timer_list_show_tickdevices posix-timers: Show sigevent info in proc file posix-timers: Introduce /proc/PID/timers file posix timers: Allocate timer id per process (v2) timekeeping: Make sure to notify hrtimers when TAI offset changes hrtimer: Fix ktime_add_ns() overflow on 32bit architectures hrtimer: Add expiry time overflow check in hrtimer_interrupt timekeeping: Shorten seq_count region timekeeping: Implement a shadow timekeeper timekeeping: Delay update of clock->cycle_last timekeeping: Store cycle_last value in timekeeper struct as well ntp: Remove ntp_lock, using the timekeeping locks to protect ntp state timekeeping: Simplify tai updating from do_adjtimex timekeeping: Hold timekeepering locks in do_adjtimex and hardpps timekeeping: Move ADJ_SETOFFSET to top level do_adjtimex() ...
-rw-r--r--arch/arm/mach-bcm/Kconfig1
-rw-r--r--arch/arm/mach-bcm/board_bcm.c7
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/kernel/cpu/intel.c12
-rw-r--r--arch/x86/kernel/rtc.c69
-rw-r--r--arch/x86/kernel/tsc.c6
-rw-r--r--arch/x86/platform/efi/efi.c24
-rw-r--r--arch/x86/platform/mrst/vrtc.c44
-rw-r--r--drivers/clocksource/Makefile1
-rw-r--r--drivers/clocksource/bcm_kona_timer.c211
-rw-r--r--fs/proc/base.c100
-rw-r--r--include/linux/clockchips.h12
-rw-r--r--include/linux/clocksource.h1
-rw-r--r--include/linux/hrtimer.h5
-rw-r--r--include/linux/jiffies.h1
-rw-r--r--include/linux/posix-timers.h1
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/time.h3
-rw-r--r--include/linux/timekeeper_internal.h9
-rw-r--r--include/linux/timex.h7
-rw-r--r--include/uapi/linux/time.h6
-rw-r--r--init/main.c2
-rw-r--r--kernel/cpu/idle.c11
-rw-r--r--kernel/hrtimer.c26
-rw-r--r--kernel/posix-timers.c121
-rw-r--r--kernel/time.c11
-rw-r--r--kernel/time/ntp.c105
-rw-r--r--kernel/time/ntp_internal.h12
-rw-r--r--kernel/time/tick-broadcast.c239
-rw-r--r--kernel/time/tick-common.c2
-rw-r--r--kernel/time/tick-internal.h5
-rw-r--r--kernel/time/tick-sched.c4
-rw-r--r--kernel/time/timekeeping.c396
-rw-r--r--kernel/time/timer_list.c104
35 files changed, 1178 insertions, 385 deletions
diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig
index bf02471d7e7c..f11289519c39 100644
--- a/arch/arm/mach-bcm/Kconfig
+++ b/arch/arm/mach-bcm/Kconfig
@@ -6,6 +6,7 @@ config ARCH_BCM
6 select ARM_ERRATA_764369 if SMP 6 select ARM_ERRATA_764369 if SMP
7 select ARM_GIC 7 select ARM_GIC
8 select CPU_V7 8 select CPU_V7
9 select CLKSRC_OF
9 select GENERIC_CLOCKEVENTS 10 select GENERIC_CLOCKEVENTS
10 select GENERIC_TIME 11 select GENERIC_TIME
11 select GPIO_BCM 12 select GPIO_BCM
diff --git a/arch/arm/mach-bcm/board_bcm.c b/arch/arm/mach-bcm/board_bcm.c
index f0f9abafad29..259593540477 100644
--- a/arch/arm/mach-bcm/board_bcm.c
+++ b/arch/arm/mach-bcm/board_bcm.c
@@ -16,14 +16,11 @@
16#include <linux/device.h> 16#include <linux/device.h>
17#include <linux/platform_device.h> 17#include <linux/platform_device.h>
18#include <linux/irqchip.h> 18#include <linux/irqchip.h>
19#include <linux/clocksource.h>
19 20
20#include <asm/mach/arch.h> 21#include <asm/mach/arch.h>
21#include <asm/mach/time.h> 22#include <asm/mach/time.h>
22 23
23static void timer_init(void)
24{
25}
26
27 24
28static void __init board_init(void) 25static void __init board_init(void)
29{ 26{
@@ -35,7 +32,7 @@ static const char * const bcm11351_dt_compat[] = { "bcm,bcm11351", NULL, };
35 32
36DT_MACHINE_START(BCM11351_DT, "Broadcom Application Processor") 33DT_MACHINE_START(BCM11351_DT, "Broadcom Application Processor")
37 .init_irq = irqchip_init, 34 .init_irq = irqchip_init,
38 .init_time = timer_init, 35 .init_time = clocksource_of_init,
39 .init_machine = board_init, 36 .init_machine = board_init,
40 .dt_compat = bcm11351_dt_compat, 37 .dt_compat = bcm11351_dt_compat,
41MACHINE_END 38MACHINE_END
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d75b48c11be5..e93ccb9b1cc1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -120,6 +120,7 @@ config X86
120 select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION 120 select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
121 select OLD_SIGACTION if X86_32 121 select OLD_SIGACTION if X86_32
122 select COMPAT_OLD_SIGACTION if IA32_EMULATION 122 select COMPAT_OLD_SIGACTION if IA32_EMULATION
123 select RTC_LIB
123 124
124config INSTRUCTION_DECODER 125config INSTRUCTION_DECODER
125 def_bool y 126 def_bool y
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index ac10df72925b..6ce479800258 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -100,6 +100,7 @@
100#define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ 100#define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */
101#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ 101#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */
102#define X86_FEATURE_EAGER_FPU (3*32+29) /* "eagerfpu" Non lazy FPU restore */ 102#define X86_FEATURE_EAGER_FPU (3*32+29) /* "eagerfpu" Non lazy FPU restore */
103#define X86_FEATURE_NONSTOP_TSC_S3 (3*32+30) /* TSC doesn't stop in S3 state */
103 104
104/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 105/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
105#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ 106#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 1905ce98bee0..e7ae0d89e7e0 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -96,6 +96,18 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
96 sched_clock_stable = 1; 96 sched_clock_stable = 1;
97 } 97 }
98 98
99 /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
100 if (c->x86 == 6) {
101 switch (c->x86_model) {
102 case 0x27: /* Penwell */
103 case 0x35: /* Cloverview */
104 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3);
105 break;
106 default:
107 break;
108 }
109 }
110
99 /* 111 /*
100 * There is a known erratum on Pentium III and Core Solo 112 * There is a known erratum on Pentium III and Core Solo
101 * and Core Duo CPUs. 113 * and Core Duo CPUs.
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 2e8f3d3b5641..198eb201ed3b 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -13,6 +13,7 @@
13#include <asm/x86_init.h> 13#include <asm/x86_init.h>
14#include <asm/time.h> 14#include <asm/time.h>
15#include <asm/mrst.h> 15#include <asm/mrst.h>
16#include <asm/rtc.h>
16 17
17#ifdef CONFIG_X86_32 18#ifdef CONFIG_X86_32
18/* 19/*
@@ -36,70 +37,24 @@ EXPORT_SYMBOL(rtc_lock);
36 * nowtime is written into the registers of the CMOS clock, it will 37 * nowtime is written into the registers of the CMOS clock, it will
37 * jump to the next second precisely 500 ms later. Check the Motorola 38 * jump to the next second precisely 500 ms later. Check the Motorola
38 * MC146818A or Dallas DS12887 data sheet for details. 39 * MC146818A or Dallas DS12887 data sheet for details.
39 *
40 * BUG: This routine does not handle hour overflow properly; it just
41 * sets the minutes. Usually you'll only notice that after reboot!
42 */ 40 */
43int mach_set_rtc_mmss(unsigned long nowtime) 41int mach_set_rtc_mmss(unsigned long nowtime)
44{ 42{
45 int real_seconds, real_minutes, cmos_minutes; 43 struct rtc_time tm;
46 unsigned char save_control, save_freq_select;
47 unsigned long flags;
48 int retval = 0; 44 int retval = 0;
49 45
50 spin_lock_irqsave(&rtc_lock, flags); 46 rtc_time_to_tm(nowtime, &tm);
51 47 if (!rtc_valid_tm(&tm)) {
52 /* tell the clock it's being set */ 48 retval = set_rtc_time(&tm);
53 save_control = CMOS_READ(RTC_CONTROL); 49 if (retval)
54 CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); 50 printk(KERN_ERR "%s: RTC write failed with error %d\n",
55 51 __FUNCTION__, retval);
56 /* stop and reset prescaler */
57 save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
58 CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
59
60 cmos_minutes = CMOS_READ(RTC_MINUTES);
61 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
62 cmos_minutes = bcd2bin(cmos_minutes);
63
64 /*
65 * since we're only adjusting minutes and seconds,
66 * don't interfere with hour overflow. This avoids
67 * messing with unknown time zones but requires your
68 * RTC not to be off by more than 15 minutes
69 */
70 real_seconds = nowtime % 60;
71 real_minutes = nowtime / 60;
72 /* correct for half hour time zone */
73 if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1)
74 real_minutes += 30;
75 real_minutes %= 60;
76
77 if (abs(real_minutes - cmos_minutes) < 30) {
78 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
79 real_seconds = bin2bcd(real_seconds);
80 real_minutes = bin2bcd(real_minutes);
81 }
82 CMOS_WRITE(real_seconds, RTC_SECONDS);
83 CMOS_WRITE(real_minutes, RTC_MINUTES);
84 } else { 52 } else {
85 printk_once(KERN_NOTICE 53 printk(KERN_ERR
86 "set_rtc_mmss: can't update from %d to %d\n", 54 "%s: Invalid RTC value: write of %lx to RTC failed\n",
87 cmos_minutes, real_minutes); 55 __FUNCTION__, nowtime);
88 retval = -1; 56 retval = -EINVAL;
89 } 57 }
90
91 /* The following flags have to be released exactly in this order,
92 * otherwise the DS12887 (popular MC146818A clone with integrated
93 * battery and quartz) will not reset the oscillator and will not
94 * update precisely 500 ms later. You won't find this mentioned in
95 * the Dallas Semiconductor data sheets, but who believes data
96 * sheets anyway ... -- Markus Kuhn
97 */
98 CMOS_WRITE(save_control, RTC_CONTROL);
99 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
100
101 spin_unlock_irqrestore(&rtc_lock, flags);
102
103 return retval; 58 return retval;
104} 59}
105 60
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 4b9ea101fe3b..098b3cfda72e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -768,7 +768,8 @@ static cycle_t read_tsc(struct clocksource *cs)
768 768
769static void resume_tsc(struct clocksource *cs) 769static void resume_tsc(struct clocksource *cs)
770{ 770{
771 clocksource_tsc.cycle_last = 0; 771 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
772 clocksource_tsc.cycle_last = 0;
772} 773}
773 774
774static struct clocksource clocksource_tsc = { 775static struct clocksource clocksource_tsc = {
@@ -939,6 +940,9 @@ static int __init init_tsc_clocksource(void)
939 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 940 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
940 } 941 }
941 942
943 if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
944 clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
945
942 /* 946 /*
943 * Trust the results of the earlier calibration on systems 947 * Trust the results of the earlier calibration on systems
944 * exporting a reliable TSC. 948 * exporting a reliable TSC.
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index e4a86a677ce1..b55d174e5034 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -49,6 +49,7 @@
49#include <asm/cacheflush.h> 49#include <asm/cacheflush.h>
50#include <asm/tlbflush.h> 50#include <asm/tlbflush.h>
51#include <asm/x86_init.h> 51#include <asm/x86_init.h>
52#include <asm/rtc.h>
52 53
53#define EFI_DEBUG 1 54#define EFI_DEBUG 1
54 55
@@ -352,10 +353,10 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
352 353
353int efi_set_rtc_mmss(unsigned long nowtime) 354int efi_set_rtc_mmss(unsigned long nowtime)
354{ 355{
355 int real_seconds, real_minutes;
356 efi_status_t status; 356 efi_status_t status;
357 efi_time_t eft; 357 efi_time_t eft;
358 efi_time_cap_t cap; 358 efi_time_cap_t cap;
359 struct rtc_time tm;
359 360
360 status = efi.get_time(&eft, &cap); 361 status = efi.get_time(&eft, &cap);
361 if (status != EFI_SUCCESS) { 362 if (status != EFI_SUCCESS) {
@@ -363,13 +364,20 @@ int efi_set_rtc_mmss(unsigned long nowtime)
363 return -1; 364 return -1;
364 } 365 }
365 366
366 real_seconds = nowtime % 60; 367 rtc_time_to_tm(nowtime, &tm);
367 real_minutes = nowtime / 60; 368 if (!rtc_valid_tm(&tm)) {
368 if (((abs(real_minutes - eft.minute) + 15)/30) & 1) 369 eft.year = tm.tm_year + 1900;
369 real_minutes += 30; 370 eft.month = tm.tm_mon + 1;
370 real_minutes %= 60; 371 eft.day = tm.tm_mday;
371 eft.minute = real_minutes; 372 eft.minute = tm.tm_min;
372 eft.second = real_seconds; 373 eft.second = tm.tm_sec;
374 eft.nanosecond = 0;
375 } else {
376 printk(KERN_ERR
377 "%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n",
378 __FUNCTION__, nowtime);
379 return -1;
380 }
373 381
374 status = efi.set_time(&eft); 382 status = efi.set_time(&eft);
375 if (status != EFI_SUCCESS) { 383 if (status != EFI_SUCCESS) {
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
index 225bd0f0f675..d62b0a3b5c14 100644
--- a/arch/x86/platform/mrst/vrtc.c
+++ b/arch/x86/platform/mrst/vrtc.c
@@ -85,27 +85,35 @@ unsigned long vrtc_get_time(void)
85 return mktime(year, mon, mday, hour, min, sec); 85 return mktime(year, mon, mday, hour, min, sec);
86} 86}
87 87
88/* Only care about the minutes and seconds */
89int vrtc_set_mmss(unsigned long nowtime) 88int vrtc_set_mmss(unsigned long nowtime)
90{ 89{
91 int real_sec, real_min;
92 unsigned long flags; 90 unsigned long flags;
93 int vrtc_min; 91 struct rtc_time tm;
94 92 int year;
95 spin_lock_irqsave(&rtc_lock, flags); 93 int retval = 0;
96 vrtc_min = vrtc_cmos_read(RTC_MINUTES); 94
97 95 rtc_time_to_tm(nowtime, &tm);
98 real_sec = nowtime % 60; 96 if (!rtc_valid_tm(&tm) && tm.tm_year >= 72) {
99 real_min = nowtime / 60; 97 /*
100 if (((abs(real_min - vrtc_min) + 15)/30) & 1) 98 * tm.year is the number of years since 1900, and the
101 real_min += 30; 99 * vrtc need the years since 1972.
102 real_min %= 60; 100 */
103 101 year = tm.tm_year - 72;
104 vrtc_cmos_write(real_sec, RTC_SECONDS); 102 spin_lock_irqsave(&rtc_lock, flags);
105 vrtc_cmos_write(real_min, RTC_MINUTES); 103 vrtc_cmos_write(year, RTC_YEAR);
106 spin_unlock_irqrestore(&rtc_lock, flags); 104 vrtc_cmos_write(tm.tm_mon, RTC_MONTH);
107 105 vrtc_cmos_write(tm.tm_mday, RTC_DAY_OF_MONTH);
108 return 0; 106 vrtc_cmos_write(tm.tm_hour, RTC_HOURS);
107 vrtc_cmos_write(tm.tm_min, RTC_MINUTES);
108 vrtc_cmos_write(tm.tm_sec, RTC_SECONDS);
109 spin_unlock_irqrestore(&rtc_lock, flags);
110 } else {
111 printk(KERN_ERR
112 "%s: Invalid vRTC value: write of %lx to vRTC failed\n",
113 __FUNCTION__, nowtime);
114 retval = -EINVAL;
115 }
116 return retval;
109} 117}
110 118
111void __init mrst_rtc_init(void) 119void __init mrst_rtc_init(void)
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 4d8283aec5b5..96e25319659b 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_ARCH_BCM2835) += bcm2835_timer.o
19obj-$(CONFIG_SUNXI_TIMER) += sunxi_timer.o 19obj-$(CONFIG_SUNXI_TIMER) += sunxi_timer.o
20obj-$(CONFIG_ARCH_TEGRA) += tegra20_timer.o 20obj-$(CONFIG_ARCH_TEGRA) += tegra20_timer.o
21obj-$(CONFIG_VT8500_TIMER) += vt8500_timer.o 21obj-$(CONFIG_VT8500_TIMER) += vt8500_timer.o
22obj-$(CONFIG_ARCH_BCM) += bcm_kona_timer.o
22 23
23obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o 24obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o
24obj-$(CONFIG_CLKSRC_METAG_GENERIC) += metag_generic.o 25obj-$(CONFIG_CLKSRC_METAG_GENERIC) += metag_generic.o
diff --git a/drivers/clocksource/bcm_kona_timer.c b/drivers/clocksource/bcm_kona_timer.c
new file mode 100644
index 000000000000..350f49356458
--- /dev/null
+++ b/drivers/clocksource/bcm_kona_timer.c
@@ -0,0 +1,211 @@
1/*
2 * Copyright (C) 2012 Broadcom Corporation
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation version 2.
7 *
8 * This program is distributed "as is" WITHOUT ANY WARRANTY of any
9 * kind, whether express or implied; without even the implied warranty
10 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13
14#include <linux/init.h>
15#include <linux/irq.h>
16#include <linux/interrupt.h>
17#include <linux/jiffies.h>
18#include <linux/clockchips.h>
19#include <linux/types.h>
20
21#include <linux/io.h>
22#include <asm/mach/time.h>
23
24#include <linux/of.h>
25#include <linux/of_address.h>
26#include <linux/of_irq.h>
27
28
29#define KONA_GPTIMER_STCS_OFFSET 0x00000000
30#define KONA_GPTIMER_STCLO_OFFSET 0x00000004
31#define KONA_GPTIMER_STCHI_OFFSET 0x00000008
32#define KONA_GPTIMER_STCM0_OFFSET 0x0000000C
33
34#define KONA_GPTIMER_STCS_TIMER_MATCH_SHIFT 0
35#define KONA_GPTIMER_STCS_COMPARE_ENABLE_SHIFT 4
36
37struct kona_bcm_timers {
38 int tmr_irq;
39 void __iomem *tmr_regs;
40};
41
42static struct kona_bcm_timers timers;
43
44static u32 arch_timer_rate;
45
46/*
47 * We use the peripheral timers for system tick, the cpu global timer for
48 * profile tick
49 */
50static void kona_timer_disable_and_clear(void __iomem *base)
51{
52 uint32_t reg;
53
54 /*
55 * clear and disable interrupts
56 * We are using compare/match register 0 for our system interrupts
57 */
58 reg = readl(base + KONA_GPTIMER_STCS_OFFSET);
59
60 /* Clear compare (0) interrupt */
61 reg |= 1 << KONA_GPTIMER_STCS_TIMER_MATCH_SHIFT;
62 /* disable compare */
63 reg &= ~(1 << KONA_GPTIMER_STCS_COMPARE_ENABLE_SHIFT);
64
65 writel(reg, base + KONA_GPTIMER_STCS_OFFSET);
66
67}
68
69static void
70kona_timer_get_counter(void *timer_base, uint32_t *msw, uint32_t *lsw)
71{
72 void __iomem *base = IOMEM(timer_base);
73 int loop_limit = 4;
74
75 /*
76 * Read 64-bit free running counter
77 * 1. Read hi-word
78 * 2. Read low-word
79 * 3. Read hi-word again
80 * 4.1
81 * if new hi-word is not equal to previously read hi-word, then
82 * start from #1
83 * 4.2
84 * if new hi-word is equal to previously read hi-word then stop.
85 */
86
87 while (--loop_limit) {
88 *msw = readl(base + KONA_GPTIMER_STCHI_OFFSET);
89 *lsw = readl(base + KONA_GPTIMER_STCLO_OFFSET);
90 if (*msw == readl(base + KONA_GPTIMER_STCHI_OFFSET))
91 break;
92 }
93 if (!loop_limit) {
94 pr_err("bcm_kona_timer: getting counter failed.\n");
95 pr_err(" Timer will be impacted\n");
96 }
97
98 return;
99}
100
101static const struct of_device_id bcm_timer_ids[] __initconst = {
102 {.compatible = "bcm,kona-timer"},
103 {},
104};
105
106static void __init kona_timers_init(void)
107{
108 struct device_node *node;
109 u32 freq;
110
111 node = of_find_matching_node(NULL, bcm_timer_ids);
112
113 if (!node)
114 panic("No timer");
115
116 if (!of_property_read_u32(node, "clock-frequency", &freq))
117 arch_timer_rate = freq;
118 else
119 panic("clock-frequency not set in the .dts file");
120
121 /* Setup IRQ numbers */
122 timers.tmr_irq = irq_of_parse_and_map(node, 0);
123
124 /* Setup IO addresses */
125 timers.tmr_regs = of_iomap(node, 0);
126
127 kona_timer_disable_and_clear(timers.tmr_regs);
128}
129
130static int kona_timer_set_next_event(unsigned long clc,
131 struct clock_event_device *unused)
132{
133 /*
134 * timer (0) is disabled by the timer interrupt already
135 * so, here we reload the next event value and re-enable
136 * the timer.
137 *
138 * This way, we are potentially losing the time between
139 * timer-interrupt->set_next_event. CPU local timers, when
140 * they come in should get rid of skew.
141 */
142
143 uint32_t lsw, msw;
144 uint32_t reg;
145
146 kona_timer_get_counter(timers.tmr_regs, &msw, &lsw);
147
148 /* Load the "next" event tick value */
149 writel(lsw + clc, timers.tmr_regs + KONA_GPTIMER_STCM0_OFFSET);
150
151 /* Enable compare */
152 reg = readl(timers.tmr_regs + KONA_GPTIMER_STCS_OFFSET);
153 reg |= (1 << KONA_GPTIMER_STCS_COMPARE_ENABLE_SHIFT);
154 writel(reg, timers.tmr_regs + KONA_GPTIMER_STCS_OFFSET);
155
156 return 0;
157}
158
159static void kona_timer_set_mode(enum clock_event_mode mode,
160 struct clock_event_device *unused)
161{
162 switch (mode) {
163 case CLOCK_EVT_MODE_ONESHOT:
164 /* by default mode is one shot don't do any thing */
165 break;
166 case CLOCK_EVT_MODE_UNUSED:
167 case CLOCK_EVT_MODE_SHUTDOWN:
168 default:
169 kona_timer_disable_and_clear(timers.tmr_regs);
170 }
171}
172
173static struct clock_event_device kona_clockevent_timer = {
174 .name = "timer 1",
175 .features = CLOCK_EVT_FEAT_ONESHOT,
176 .set_next_event = kona_timer_set_next_event,
177 .set_mode = kona_timer_set_mode
178};
179
180static void __init kona_timer_clockevents_init(void)
181{
182 kona_clockevent_timer.cpumask = cpumask_of(0);
183 clockevents_config_and_register(&kona_clockevent_timer,
184 arch_timer_rate, 6, 0xffffffff);
185}
186
187static irqreturn_t kona_timer_interrupt(int irq, void *dev_id)
188{
189 struct clock_event_device *evt = &kona_clockevent_timer;
190
191 kona_timer_disable_and_clear(timers.tmr_regs);
192 evt->event_handler(evt);
193 return IRQ_HANDLED;
194}
195
196static struct irqaction kona_timer_irq = {
197 .name = "Kona Timer Tick",
198 .flags = IRQF_TIMER,
199 .handler = kona_timer_interrupt,
200};
201
202static void __init kona_timer_init(void)
203{
204 kona_timers_init();
205 kona_timer_clockevents_init();
206 setup_irq(timers.tmr_irq, &kona_timer_irq);
207 kona_timer_set_next_event((arch_timer_rate / HZ), NULL);
208}
209
210CLOCKSOURCE_OF_DECLARE(bcm_kona, "bcm,kona-timer",
211 kona_timer_init);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 69078c7cef1f..a19308604145 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -86,6 +86,7 @@
86#include <linux/fs_struct.h> 86#include <linux/fs_struct.h>
87#include <linux/slab.h> 87#include <linux/slab.h>
88#include <linux/flex_array.h> 88#include <linux/flex_array.h>
89#include <linux/posix-timers.h>
89#ifdef CONFIG_HARDWALL 90#ifdef CONFIG_HARDWALL
90#include <asm/hardwall.h> 91#include <asm/hardwall.h>
91#endif 92#endif
@@ -2013,6 +2014,102 @@ static const struct file_operations proc_map_files_operations = {
2013 .llseek = default_llseek, 2014 .llseek = default_llseek,
2014}; 2015};
2015 2016
2017struct timers_private {
2018 struct pid *pid;
2019 struct task_struct *task;
2020 struct sighand_struct *sighand;
2021 struct pid_namespace *ns;
2022 unsigned long flags;
2023};
2024
2025static void *timers_start(struct seq_file *m, loff_t *pos)
2026{
2027 struct timers_private *tp = m->private;
2028
2029 tp->task = get_pid_task(tp->pid, PIDTYPE_PID);
2030 if (!tp->task)
2031 return ERR_PTR(-ESRCH);
2032
2033 tp->sighand = lock_task_sighand(tp->task, &tp->flags);
2034 if (!tp->sighand)
2035 return ERR_PTR(-ESRCH);
2036
2037 return seq_list_start(&tp->task->signal->posix_timers, *pos);
2038}
2039
2040static void *timers_next(struct seq_file *m, void *v, loff_t *pos)
2041{
2042 struct timers_private *tp = m->private;
2043 return seq_list_next(v, &tp->task->signal->posix_timers, pos);
2044}
2045
2046static void timers_stop(struct seq_file *m, void *v)
2047{
2048 struct timers_private *tp = m->private;
2049
2050 if (tp->sighand) {
2051 unlock_task_sighand(tp->task, &tp->flags);
2052 tp->sighand = NULL;
2053 }
2054
2055 if (tp->task) {
2056 put_task_struct(tp->task);
2057 tp->task = NULL;
2058 }
2059}
2060
2061static int show_timer(struct seq_file *m, void *v)
2062{
2063 struct k_itimer *timer;
2064 struct timers_private *tp = m->private;
2065 int notify;
2066 static char *nstr[] = {
2067 [SIGEV_SIGNAL] = "signal",
2068 [SIGEV_NONE] = "none",
2069 [SIGEV_THREAD] = "thread",
2070 };
2071
2072 timer = list_entry((struct list_head *)v, struct k_itimer, list);
2073 notify = timer->it_sigev_notify;
2074
2075 seq_printf(m, "ID: %d\n", timer->it_id);
2076 seq_printf(m, "signal: %d/%p\n", timer->sigq->info.si_signo,
2077 timer->sigq->info.si_value.sival_ptr);
2078 seq_printf(m, "notify: %s/%s.%d\n",
2079 nstr[notify & ~SIGEV_THREAD_ID],
2080 (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
2081 pid_nr_ns(timer->it_pid, tp->ns));
2082
2083 return 0;
2084}
2085
2086static const struct seq_operations proc_timers_seq_ops = {
2087 .start = timers_start,
2088 .next = timers_next,
2089 .stop = timers_stop,
2090 .show = show_timer,
2091};
2092
2093static int proc_timers_open(struct inode *inode, struct file *file)
2094{
2095 struct timers_private *tp;
2096
2097 tp = __seq_open_private(file, &proc_timers_seq_ops,
2098 sizeof(struct timers_private));
2099 if (!tp)
2100 return -ENOMEM;
2101
2102 tp->pid = proc_pid(inode);
2103 tp->ns = inode->i_sb->s_fs_info;
2104 return 0;
2105}
2106
2107static const struct file_operations proc_timers_operations = {
2108 .open = proc_timers_open,
2109 .read = seq_read,
2110 .llseek = seq_lseek,
2111 .release = seq_release_private,
2112};
2016#endif /* CONFIG_CHECKPOINT_RESTORE */ 2113#endif /* CONFIG_CHECKPOINT_RESTORE */
2017 2114
2018static struct dentry *proc_pident_instantiate(struct inode *dir, 2115static struct dentry *proc_pident_instantiate(struct inode *dir,
@@ -2583,6 +2680,9 @@ static const struct pid_entry tgid_base_stuff[] = {
2583 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 2680 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
2584 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), 2681 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
2585#endif 2682#endif
2683#ifdef CONFIG_CHECKPOINT_RESTORE
2684 REG("timers", S_IRUGO, proc_timers_operations),
2685#endif
2586}; 2686};
2587 2687
2588static int proc_tgid_base_readdir(struct file * filp, 2688static int proc_tgid_base_readdir(struct file * filp,
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 66346521cb65..464e229e7d84 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -55,6 +55,11 @@ enum clock_event_nofitiers {
55#define CLOCK_EVT_FEAT_C3STOP 0x000008 55#define CLOCK_EVT_FEAT_C3STOP 0x000008
56#define CLOCK_EVT_FEAT_DUMMY 0x000010 56#define CLOCK_EVT_FEAT_DUMMY 0x000010
57 57
58/*
59 * Core shall set the interrupt affinity dynamically in broadcast mode
60 */
61#define CLOCK_EVT_FEAT_DYNIRQ 0x000020
62
58/** 63/**
59 * struct clock_event_device - clock event device descriptor 64 * struct clock_event_device - clock event device descriptor
60 * @event_handler: Assigned by the framework to be called by the low 65 * @event_handler: Assigned by the framework to be called by the low
@@ -170,6 +175,12 @@ extern void tick_broadcast(const struct cpumask *mask);
170extern int tick_receive_broadcast(void); 175extern int tick_receive_broadcast(void);
171#endif 176#endif
172 177
178#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
179extern int tick_check_broadcast_expired(void);
180#else
181static inline int tick_check_broadcast_expired(void) { return 0; }
182#endif
183
173#ifdef CONFIG_GENERIC_CLOCKEVENTS 184#ifdef CONFIG_GENERIC_CLOCKEVENTS
174extern void clockevents_notify(unsigned long reason, void *arg); 185extern void clockevents_notify(unsigned long reason, void *arg);
175#else 186#else
@@ -182,6 +193,7 @@ static inline void clockevents_suspend(void) {}
182static inline void clockevents_resume(void) {} 193static inline void clockevents_resume(void) {}
183 194
184#define clockevents_notify(reason, arg) do { } while (0) 195#define clockevents_notify(reason, arg) do { } while (0)
196static inline int tick_check_broadcast_expired(void) { return 0; }
185 197
186#endif 198#endif
187 199
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 27cfda427dd9..aa7032c7238f 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -206,6 +206,7 @@ struct clocksource {
206#define CLOCK_SOURCE_WATCHDOG 0x10 206#define CLOCK_SOURCE_WATCHDOG 0x10
207#define CLOCK_SOURCE_VALID_FOR_HRES 0x20 207#define CLOCK_SOURCE_VALID_FOR_HRES 0x20
208#define CLOCK_SOURCE_UNSTABLE 0x40 208#define CLOCK_SOURCE_UNSTABLE 0x40
209#define CLOCK_SOURCE_SUSPEND_NONSTOP 0x80
209 210
210/* simplify initialization of mask field */ 211/* simplify initialization of mask field */
211#define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) 212#define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index cc07d2777bbe..d19a5c2d2270 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -157,6 +157,7 @@ enum hrtimer_base_type {
157 HRTIMER_BASE_MONOTONIC, 157 HRTIMER_BASE_MONOTONIC,
158 HRTIMER_BASE_REALTIME, 158 HRTIMER_BASE_REALTIME,
159 HRTIMER_BASE_BOOTTIME, 159 HRTIMER_BASE_BOOTTIME,
160 HRTIMER_BASE_TAI,
160 HRTIMER_MAX_CLOCK_BASES, 161 HRTIMER_MAX_CLOCK_BASES,
161}; 162};
162 163
@@ -327,7 +328,9 @@ extern ktime_t ktime_get(void);
327extern ktime_t ktime_get_real(void); 328extern ktime_t ktime_get_real(void);
328extern ktime_t ktime_get_boottime(void); 329extern ktime_t ktime_get_boottime(void);
329extern ktime_t ktime_get_monotonic_offset(void); 330extern ktime_t ktime_get_monotonic_offset(void);
330extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot); 331extern ktime_t ktime_get_clocktai(void);
332extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot,
333 ktime_t *offs_tai);
331 334
332DECLARE_PER_CPU(struct tick_device, tick_cpu_device); 335DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
333 336
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 82ed068b1ebe..8fb8edf12417 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -75,7 +75,6 @@ extern int register_refined_jiffies(long clock_tick_rate);
75 */ 75 */
76extern u64 __jiffy_data jiffies_64; 76extern u64 __jiffy_data jiffies_64;
77extern unsigned long volatile __jiffy_data jiffies; 77extern unsigned long volatile __jiffy_data jiffies;
78extern seqlock_t jiffies_lock;
79 78
80#if (BITS_PER_LONG < 64) 79#if (BITS_PER_LONG < 64)
81u64 get_jiffies_64(void); 80u64 get_jiffies_64(void);
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 042058fdb0af..60bac697a91b 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -55,6 +55,7 @@ struct cpu_timer_list {
55/* POSIX.1b interval timer structure. */ 55/* POSIX.1b interval timer structure. */
56struct k_itimer { 56struct k_itimer {
57 struct list_head list; /* free/ allocate list */ 57 struct list_head list; /* free/ allocate list */
58 struct hlist_node t_hash;
58 spinlock_t it_lock; 59 spinlock_t it_lock;
59 clockid_t it_clock; /* which timer type */ 60 clockid_t it_clock; /* which timer type */
60 timer_t it_id; /* timer id */ 61 timer_t it_id; /* timer id */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 981ab6887259..54ddcb82cddf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -514,7 +514,8 @@ struct signal_struct {
514 unsigned int has_child_subreaper:1; 514 unsigned int has_child_subreaper:1;
515 515
516 /* POSIX.1b Interval Timers */ 516 /* POSIX.1b Interval Timers */
517 struct list_head posix_timers; 517 int posix_timer_id;
518 struct list_head posix_timers;
518 519
519 /* ITIMER_REAL timer for the process */ 520 /* ITIMER_REAL timer for the process */
520 struct hrtimer real_timer; 521 struct hrtimer real_timer;
diff --git a/include/linux/time.h b/include/linux/time.h
index d4835dfdf25e..22d81b3c955b 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -181,6 +181,9 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
181extern int timekeeping_valid_for_hres(void); 181extern int timekeeping_valid_for_hres(void);
182extern u64 timekeeping_max_deferment(void); 182extern u64 timekeeping_max_deferment(void);
183extern int timekeeping_inject_offset(struct timespec *ts); 183extern int timekeeping_inject_offset(struct timespec *ts);
184extern s32 timekeeping_get_tai_offset(void);
185extern void timekeeping_set_tai_offset(s32 tai_offset);
186extern void timekeeping_clocktai(struct timespec *ts);
184 187
185struct tms; 188struct tms;
186extern void do_sys_times(struct tms *); 189extern void do_sys_times(struct tms *);
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index e1d558e237ec..c1825eb436ed 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -20,6 +20,8 @@ struct timekeeper {
20 u32 shift; 20 u32 shift;
21 /* Number of clock cycles in one NTP interval. */ 21 /* Number of clock cycles in one NTP interval. */
22 cycle_t cycle_interval; 22 cycle_t cycle_interval;
23 /* Last cycle value (also stored in clock->cycle_last) */
24 cycle_t cycle_last;
23 /* Number of clock shifted nano seconds in one NTP interval. */ 25 /* Number of clock shifted nano seconds in one NTP interval. */
24 u64 xtime_interval; 26 u64 xtime_interval;
25 /* shifted nano seconds left over when rounding cycle_interval */ 27 /* shifted nano seconds left over when rounding cycle_interval */
@@ -62,8 +64,11 @@ struct timekeeper {
62 ktime_t offs_boot; 64 ktime_t offs_boot;
63 /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ 65 /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
64 struct timespec raw_time; 66 struct timespec raw_time;
65 /* Seqlock for all timekeeper values */ 67 /* The current UTC to TAI offset in seconds */
66 seqlock_t lock; 68 s32 tai_offset;
69 /* Offset clock monotonic -> clock tai */
70 ktime_t offs_tai;
71
67}; 72};
68 73
69static inline struct timespec tk_xtime(struct timekeeper *tk) 74static inline struct timespec tk_xtime(struct timekeeper *tk)
diff --git a/include/linux/timex.h b/include/linux/timex.h
index 5ec87c60b97c..b3726e61368e 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -125,9 +125,6 @@
125extern unsigned long tick_usec; /* USER_HZ period (usec) */ 125extern unsigned long tick_usec; /* USER_HZ period (usec) */
126extern unsigned long tick_nsec; /* SHIFTED_HZ period (nsec) */ 126extern unsigned long tick_nsec; /* SHIFTED_HZ period (nsec) */
127 127
128extern void ntp_init(void);
129extern void ntp_clear(void);
130
131/* Required to safely shift negative values */ 128/* Required to safely shift negative values */
132#define shift_right(x, s) ({ \ 129#define shift_right(x, s) ({ \
133 __typeof__(x) __x = (x); \ 130 __typeof__(x) __x = (x); \
@@ -140,10 +137,6 @@ extern void ntp_clear(void);
140#define NTP_INTERVAL_FREQ (HZ) 137#define NTP_INTERVAL_FREQ (HZ)
141#define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ) 138#define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ)
142 139
143/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
144extern u64 ntp_tick_length(void);
145
146extern int second_overflow(unsigned long secs);
147extern int do_adjtimex(struct timex *); 140extern int do_adjtimex(struct timex *);
148extern void hardpps(const struct timespec *, const struct timespec *); 141extern void hardpps(const struct timespec *, const struct timespec *);
149 142
diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h
index 0d3c0edc3eda..e75e1b6ff27f 100644
--- a/include/uapi/linux/time.h
+++ b/include/uapi/linux/time.h
@@ -54,11 +54,9 @@ struct itimerval {
54#define CLOCK_BOOTTIME 7 54#define CLOCK_BOOTTIME 7
55#define CLOCK_REALTIME_ALARM 8 55#define CLOCK_REALTIME_ALARM 8
56#define CLOCK_BOOTTIME_ALARM 9 56#define CLOCK_BOOTTIME_ALARM 9
57#define CLOCK_SGI_CYCLE 10 /* Hardware specific */
58#define CLOCK_TAI 11
57 59
58/*
59 * The IDs of various hardware clocks:
60 */
61#define CLOCK_SGI_CYCLE 10
62#define MAX_CLOCKS 16 60#define MAX_CLOCKS 16
63#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC) 61#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC)
64#define CLOCKS_MONO CLOCK_MONOTONIC 62#define CLOCKS_MONO CLOCK_MONOTONIC
diff --git a/init/main.c b/init/main.c
index 12c366944dbd..bea1287aecdc 100644
--- a/init/main.c
+++ b/init/main.c
@@ -495,7 +495,6 @@ asmlinkage void __init start_kernel(void)
495 * Interrupts are still disabled. Do necessary setups, then 495 * Interrupts are still disabled. Do necessary setups, then
496 * enable them 496 * enable them
497 */ 497 */
498 tick_init();
499 boot_cpu_init(); 498 boot_cpu_init();
500 page_address_init(); 499 page_address_init();
501 pr_notice("%s", linux_banner); 500 pr_notice("%s", linux_banner);
@@ -549,6 +548,7 @@ asmlinkage void __init start_kernel(void)
549 /* init some links before init_ISA_irqs() */ 548 /* init some links before init_ISA_irqs() */
550 early_irq_init(); 549 early_irq_init();
551 init_IRQ(); 550 init_IRQ();
551 tick_init();
552 init_timers(); 552 init_timers();
553 hrtimers_init(); 553 hrtimers_init();
554 softirq_init(); 554 softirq_init();
diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c
index 168cf407a254..8b86c0c68edf 100644
--- a/kernel/cpu/idle.c
+++ b/kernel/cpu/idle.c
@@ -76,7 +76,16 @@ static void cpu_idle_loop(void)
76 local_irq_disable(); 76 local_irq_disable();
77 arch_cpu_idle_enter(); 77 arch_cpu_idle_enter();
78 78
79 if (cpu_idle_force_poll) { 79 /*
80 * In poll mode we reenable interrupts and spin.
81 *
82 * Also if we detected in the wakeup from idle
83 * path that the tick broadcast device expired
84 * for us, we don't want to go deep idle as we
85 * know that the IPI is going to arrive right
86 * away
87 */
88 if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
80 cpu_idle_poll(); 89 cpu_idle_poll();
81 } else { 90 } else {
82 current_clr_polling(); 91 current_clr_polling();
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 14be27feda49..609d8ff38b74 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -84,6 +84,12 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
84 .get_time = &ktime_get_boottime, 84 .get_time = &ktime_get_boottime,
85 .resolution = KTIME_LOW_RES, 85 .resolution = KTIME_LOW_RES,
86 }, 86 },
87 {
88 .index = HRTIMER_BASE_TAI,
89 .clockid = CLOCK_TAI,
90 .get_time = &ktime_get_clocktai,
91 .resolution = KTIME_LOW_RES,
92 },
87 } 93 }
88}; 94};
89 95
@@ -91,6 +97,7 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
91 [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, 97 [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME,
92 [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, 98 [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC,
93 [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, 99 [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME,
100 [CLOCK_TAI] = HRTIMER_BASE_TAI,
94}; 101};
95 102
96static inline int hrtimer_clockid_to_base(clockid_t clock_id) 103static inline int hrtimer_clockid_to_base(clockid_t clock_id)
@@ -107,8 +114,10 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
107{ 114{
108 ktime_t xtim, mono, boot; 115 ktime_t xtim, mono, boot;
109 struct timespec xts, tom, slp; 116 struct timespec xts, tom, slp;
117 s32 tai_offset;
110 118
111 get_xtime_and_monotonic_and_sleep_offset(&xts, &tom, &slp); 119 get_xtime_and_monotonic_and_sleep_offset(&xts, &tom, &slp);
120 tai_offset = timekeeping_get_tai_offset();
112 121
113 xtim = timespec_to_ktime(xts); 122 xtim = timespec_to_ktime(xts);
114 mono = ktime_add(xtim, timespec_to_ktime(tom)); 123 mono = ktime_add(xtim, timespec_to_ktime(tom));
@@ -116,6 +125,8 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
116 base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim; 125 base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
117 base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono; 126 base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
118 base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot; 127 base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
128 base->clock_base[HRTIMER_BASE_TAI].softirq_time =
129 ktime_add(xtim, ktime_set(tai_offset, 0));
119} 130}
120 131
121/* 132/*
@@ -276,6 +287,10 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
276 } else { 287 } else {
277 unsigned long rem = do_div(nsec, NSEC_PER_SEC); 288 unsigned long rem = do_div(nsec, NSEC_PER_SEC);
278 289
290 /* Make sure nsec fits into long */
291 if (unlikely(nsec > KTIME_SEC_MAX))
292 return (ktime_t){ .tv64 = KTIME_MAX };
293
279 tmp = ktime_set((long)nsec, rem); 294 tmp = ktime_set((long)nsec, rem);
280 } 295 }
281 296
@@ -652,8 +667,9 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
652{ 667{
653 ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; 668 ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
654 ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; 669 ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
670 ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
655 671
656 return ktime_get_update_offsets(offs_real, offs_boot); 672 return ktime_get_update_offsets(offs_real, offs_boot, offs_tai);
657} 673}
658 674
659/* 675/*
@@ -1011,7 +1027,8 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1011 * @timer: the timer to be added 1027 * @timer: the timer to be added
1012 * @tim: expiry time 1028 * @tim: expiry time
1013 * @delta_ns: "slack" range for the timer 1029 * @delta_ns: "slack" range for the timer
1014 * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) 1030 * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or
1031 * relative (HRTIMER_MODE_REL)
1015 * 1032 *
1016 * Returns: 1033 * Returns:
1017 * 0 on success 1034 * 0 on success
@@ -1028,7 +1045,8 @@ EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
1028 * hrtimer_start - (re)start an hrtimer on the current CPU 1045 * hrtimer_start - (re)start an hrtimer on the current CPU
1029 * @timer: the timer to be added 1046 * @timer: the timer to be added
1030 * @tim: expiry time 1047 * @tim: expiry time
1031 * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) 1048 * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or
1049 * relative (HRTIMER_MODE_REL)
1032 * 1050 *
1033 * Returns: 1051 * Returns:
1034 * 0 on success 1052 * 0 on success
@@ -1310,6 +1328,8 @@ retry:
1310 1328
1311 expires = ktime_sub(hrtimer_get_expires(timer), 1329 expires = ktime_sub(hrtimer_get_expires(timer),
1312 base->offset); 1330 base->offset);
1331 if (expires.tv64 < 0)
1332 expires.tv64 = KTIME_MAX;
1313 if (expires.tv64 < expires_next.tv64) 1333 if (expires.tv64 < expires_next.tv64)
1314 expires_next = expires; 1334 expires_next = expires;
1315 break; 1335 break;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 6edbb2c55c22..424c2d4265c9 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -40,38 +40,31 @@
40#include <linux/list.h> 40#include <linux/list.h>
41#include <linux/init.h> 41#include <linux/init.h>
42#include <linux/compiler.h> 42#include <linux/compiler.h>
43#include <linux/idr.h> 43#include <linux/hash.h>
44#include <linux/posix-clock.h> 44#include <linux/posix-clock.h>
45#include <linux/posix-timers.h> 45#include <linux/posix-timers.h>
46#include <linux/syscalls.h> 46#include <linux/syscalls.h>
47#include <linux/wait.h> 47#include <linux/wait.h>
48#include <linux/workqueue.h> 48#include <linux/workqueue.h>
49#include <linux/export.h> 49#include <linux/export.h>
50#include <linux/hashtable.h>
50 51
51/* 52/*
52 * Management arrays for POSIX timers. Timers are kept in slab memory 53 * Management arrays for POSIX timers. Timers are now kept in static hash table
53 * Timer ids are allocated by an external routine that keeps track of the 54 * with 512 entries.
54 * id and the timer. The external interface is: 55 * Timer ids are allocated by local routine, which selects proper hash head by
55 * 56 * key, constructed from current->signal address and per signal struct counter.
56 * void *idr_find(struct idr *idp, int id); to find timer_id <id> 57 * This keeps timer ids unique per process, but now they can intersect between
57 * int idr_get_new(struct idr *idp, void *ptr); to get a new id and 58 * processes.
58 * related it to <ptr>
59 * void idr_remove(struct idr *idp, int id); to release <id>
60 * void idr_init(struct idr *idp); to initialize <idp>
61 * which we supply.
62 * The idr_get_new *may* call slab for more memory so it must not be
63 * called under a spin lock. Likewise idr_remore may release memory
64 * (but it may be ok to do this under a lock...).
65 * idr_find is just a memory look up and is quite fast. A -1 return
66 * indicates that the requested id does not exist.
67 */ 59 */
68 60
69/* 61/*
70 * Lets keep our timers in a slab cache :-) 62 * Lets keep our timers in a slab cache :-)
71 */ 63 */
72static struct kmem_cache *posix_timers_cache; 64static struct kmem_cache *posix_timers_cache;
73static struct idr posix_timers_id; 65
74static DEFINE_SPINLOCK(idr_lock); 66static DEFINE_HASHTABLE(posix_timers_hashtable, 9);
67static DEFINE_SPINLOCK(hash_lock);
75 68
76/* 69/*
77 * we assume that the new SIGEV_THREAD_ID shares no bits with the other 70 * we assume that the new SIGEV_THREAD_ID shares no bits with the other
@@ -152,6 +145,56 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags);
152 __timr; \ 145 __timr; \
153}) 146})
154 147
148static int hash(struct signal_struct *sig, unsigned int nr)
149{
150 return hash_32(hash32_ptr(sig) ^ nr, HASH_BITS(posix_timers_hashtable));
151}
152
153static struct k_itimer *__posix_timers_find(struct hlist_head *head,
154 struct signal_struct *sig,
155 timer_t id)
156{
157 struct k_itimer *timer;
158
159 hlist_for_each_entry_rcu(timer, head, t_hash) {
160 if ((timer->it_signal == sig) && (timer->it_id == id))
161 return timer;
162 }
163 return NULL;
164}
165
166static struct k_itimer *posix_timer_by_id(timer_t id)
167{
168 struct signal_struct *sig = current->signal;
169 struct hlist_head *head = &posix_timers_hashtable[hash(sig, id)];
170
171 return __posix_timers_find(head, sig, id);
172}
173
174static int posix_timer_add(struct k_itimer *timer)
175{
176 struct signal_struct *sig = current->signal;
177 int first_free_id = sig->posix_timer_id;
178 struct hlist_head *head;
179 int ret = -ENOENT;
180
181 do {
182 spin_lock(&hash_lock);
183 head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)];
184 if (!__posix_timers_find(head, sig, sig->posix_timer_id)) {
185 hlist_add_head_rcu(&timer->t_hash, head);
186 ret = sig->posix_timer_id;
187 }
188 if (++sig->posix_timer_id < 0)
189 sig->posix_timer_id = 0;
190 if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT))
191 /* Loop over all possible ids completed */
192 ret = -EAGAIN;
193 spin_unlock(&hash_lock);
194 } while (ret == -ENOENT);
195 return ret;
196}
197
155static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) 198static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
156{ 199{
157 spin_unlock_irqrestore(&timr->it_lock, flags); 200 spin_unlock_irqrestore(&timr->it_lock, flags);
@@ -221,6 +264,11 @@ static int posix_get_boottime(const clockid_t which_clock, struct timespec *tp)
221 return 0; 264 return 0;
222} 265}
223 266
267static int posix_get_tai(clockid_t which_clock, struct timespec *tp)
268{
269 timekeeping_clocktai(tp);
270 return 0;
271}
224 272
225/* 273/*
226 * Initialize everything, well, just everything in Posix clocks/timers ;) 274 * Initialize everything, well, just everything in Posix clocks/timers ;)
@@ -261,6 +309,16 @@ static __init int init_posix_timers(void)
261 .clock_getres = posix_get_coarse_res, 309 .clock_getres = posix_get_coarse_res,
262 .clock_get = posix_get_monotonic_coarse, 310 .clock_get = posix_get_monotonic_coarse,
263 }; 311 };
312 struct k_clock clock_tai = {
313 .clock_getres = hrtimer_get_res,
314 .clock_get = posix_get_tai,
315 .nsleep = common_nsleep,
316 .nsleep_restart = hrtimer_nanosleep_restart,
317 .timer_create = common_timer_create,
318 .timer_set = common_timer_set,
319 .timer_get = common_timer_get,
320 .timer_del = common_timer_del,
321 };
264 struct k_clock clock_boottime = { 322 struct k_clock clock_boottime = {
265 .clock_getres = hrtimer_get_res, 323 .clock_getres = hrtimer_get_res,
266 .clock_get = posix_get_boottime, 324 .clock_get = posix_get_boottime,
@@ -278,11 +336,11 @@ static __init int init_posix_timers(void)
278 posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse); 336 posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
279 posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse); 337 posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
280 posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime); 338 posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime);
339 posix_timers_register_clock(CLOCK_TAI, &clock_tai);
281 340
282 posix_timers_cache = kmem_cache_create("posix_timers_cache", 341 posix_timers_cache = kmem_cache_create("posix_timers_cache",
283 sizeof (struct k_itimer), 0, SLAB_PANIC, 342 sizeof (struct k_itimer), 0, SLAB_PANIC,
284 NULL); 343 NULL);
285 idr_init(&posix_timers_id);
286 return 0; 344 return 0;
287} 345}
288 346
@@ -504,9 +562,9 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
504{ 562{
505 if (it_id_set) { 563 if (it_id_set) {
506 unsigned long flags; 564 unsigned long flags;
507 spin_lock_irqsave(&idr_lock, flags); 565 spin_lock_irqsave(&hash_lock, flags);
508 idr_remove(&posix_timers_id, tmr->it_id); 566 hlist_del_rcu(&tmr->t_hash);
509 spin_unlock_irqrestore(&idr_lock, flags); 567 spin_unlock_irqrestore(&hash_lock, flags);
510 } 568 }
511 put_pid(tmr->it_pid); 569 put_pid(tmr->it_pid);
512 sigqueue_free(tmr->sigq); 570 sigqueue_free(tmr->sigq);
@@ -552,22 +610,11 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
552 return -EAGAIN; 610 return -EAGAIN;
553 611
554 spin_lock_init(&new_timer->it_lock); 612 spin_lock_init(&new_timer->it_lock);
555 613 new_timer_id = posix_timer_add(new_timer);
556 idr_preload(GFP_KERNEL); 614 if (new_timer_id < 0) {
557 spin_lock_irq(&idr_lock); 615 error = new_timer_id;
558 error = idr_alloc(&posix_timers_id, new_timer, 0, 0, GFP_NOWAIT);
559 spin_unlock_irq(&idr_lock);
560 idr_preload_end();
561 if (error < 0) {
562 /*
563 * Weird looking, but we return EAGAIN if the IDR is
564 * full (proper POSIX return value for this)
565 */
566 if (error == -ENOSPC)
567 error = -EAGAIN;
568 goto out; 616 goto out;
569 } 617 }
570 new_timer_id = error;
571 618
572 it_id_set = IT_ID_SET; 619 it_id_set = IT_ID_SET;
573 new_timer->it_id = (timer_t) new_timer_id; 620 new_timer->it_id = (timer_t) new_timer_id;
@@ -645,7 +692,7 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
645 return NULL; 692 return NULL;
646 693
647 rcu_read_lock(); 694 rcu_read_lock();
648 timr = idr_find(&posix_timers_id, (int)timer_id); 695 timr = posix_timer_by_id(timer_id);
649 if (timr) { 696 if (timr) {
650 spin_lock_irqsave(&timr->it_lock, *flags); 697 spin_lock_irqsave(&timr->it_lock, *flags);
651 if (timr->it_signal == current->signal) { 698 if (timr->it_signal == current->signal) {
diff --git a/kernel/time.c b/kernel/time.c
index f8342a41efa6..d3617dbd3dca 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -138,13 +138,14 @@ int persistent_clock_is_local;
138 */ 138 */
139static inline void warp_clock(void) 139static inline void warp_clock(void)
140{ 140{
141 struct timespec adjust; 141 if (sys_tz.tz_minuteswest != 0) {
142 struct timespec adjust;
142 143
143 adjust = current_kernel_time();
144 if (sys_tz.tz_minuteswest != 0)
145 persistent_clock_is_local = 1; 144 persistent_clock_is_local = 1;
146 adjust.tv_sec += sys_tz.tz_minuteswest * 60; 145 adjust.tv_sec = sys_tz.tz_minuteswest * 60;
147 do_settimeofday(&adjust); 146 adjust.tv_nsec = 0;
147 timekeeping_inject_offset(&adjust);
148 }
148} 149}
149 150
150/* 151/*
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 072bb066bb7d..12ff13a838c6 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -18,13 +18,14 @@
18#include <linux/rtc.h> 18#include <linux/rtc.h>
19 19
20#include "tick-internal.h" 20#include "tick-internal.h"
21#include "ntp_internal.h"
21 22
22/* 23/*
23 * NTP timekeeping variables: 24 * NTP timekeeping variables:
25 *
26 * Note: All of the NTP state is protected by the timekeeping locks.
24 */ 27 */
25 28
26DEFINE_RAW_SPINLOCK(ntp_lock);
27
28 29
29/* USER_HZ period (usecs): */ 30/* USER_HZ period (usecs): */
30unsigned long tick_usec = TICK_USEC; 31unsigned long tick_usec = TICK_USEC;
@@ -53,9 +54,6 @@ static int time_state = TIME_OK;
53/* clock status bits: */ 54/* clock status bits: */
54static int time_status = STA_UNSYNC; 55static int time_status = STA_UNSYNC;
55 56
56/* TAI offset (secs): */
57static long time_tai;
58
59/* time adjustment (nsecs): */ 57/* time adjustment (nsecs): */
60static s64 time_offset; 58static s64 time_offset;
61 59
@@ -134,8 +132,6 @@ static inline void pps_reset_freq_interval(void)
134 132
135/** 133/**
136 * pps_clear - Clears the PPS state variables 134 * pps_clear - Clears the PPS state variables
137 *
138 * Must be called while holding a write on the ntp_lock
139 */ 135 */
140static inline void pps_clear(void) 136static inline void pps_clear(void)
141{ 137{
@@ -150,8 +146,6 @@ static inline void pps_clear(void)
150/* Decrease pps_valid to indicate that another second has passed since 146/* Decrease pps_valid to indicate that another second has passed since
151 * the last PPS signal. When it reaches 0, indicate that PPS signal is 147 * the last PPS signal. When it reaches 0, indicate that PPS signal is
152 * missing. 148 * missing.
153 *
154 * Must be called while holding a write on the ntp_lock
155 */ 149 */
156static inline void pps_dec_valid(void) 150static inline void pps_dec_valid(void)
157{ 151{
@@ -346,10 +340,6 @@ static void ntp_update_offset(long offset)
346 */ 340 */
347void ntp_clear(void) 341void ntp_clear(void)
348{ 342{
349 unsigned long flags;
350
351 raw_spin_lock_irqsave(&ntp_lock, flags);
352
353 time_adjust = 0; /* stop active adjtime() */ 343 time_adjust = 0; /* stop active adjtime() */
354 time_status |= STA_UNSYNC; 344 time_status |= STA_UNSYNC;
355 time_maxerror = NTP_PHASE_LIMIT; 345 time_maxerror = NTP_PHASE_LIMIT;
@@ -362,20 +352,12 @@ void ntp_clear(void)
362 352
363 /* Clear PPS state variables */ 353 /* Clear PPS state variables */
364 pps_clear(); 354 pps_clear();
365 raw_spin_unlock_irqrestore(&ntp_lock, flags);
366
367} 355}
368 356
369 357
370u64 ntp_tick_length(void) 358u64 ntp_tick_length(void)
371{ 359{
372 unsigned long flags; 360 return tick_length;
373 s64 ret;
374
375 raw_spin_lock_irqsave(&ntp_lock, flags);
376 ret = tick_length;
377 raw_spin_unlock_irqrestore(&ntp_lock, flags);
378 return ret;
379} 361}
380 362
381 363
@@ -393,9 +375,6 @@ int second_overflow(unsigned long secs)
393{ 375{
394 s64 delta; 376 s64 delta;
395 int leap = 0; 377 int leap = 0;
396 unsigned long flags;
397
398 raw_spin_lock_irqsave(&ntp_lock, flags);
399 378
400 /* 379 /*
401 * Leap second processing. If in leap-insert state at the end of the 380 * Leap second processing. If in leap-insert state at the end of the
@@ -415,7 +394,6 @@ int second_overflow(unsigned long secs)
415 else if (secs % 86400 == 0) { 394 else if (secs % 86400 == 0) {
416 leap = -1; 395 leap = -1;
417 time_state = TIME_OOP; 396 time_state = TIME_OOP;
418 time_tai++;
419 printk(KERN_NOTICE 397 printk(KERN_NOTICE
420 "Clock: inserting leap second 23:59:60 UTC\n"); 398 "Clock: inserting leap second 23:59:60 UTC\n");
421 } 399 }
@@ -425,7 +403,6 @@ int second_overflow(unsigned long secs)
425 time_state = TIME_OK; 403 time_state = TIME_OK;
426 else if ((secs + 1) % 86400 == 0) { 404 else if ((secs + 1) % 86400 == 0) {
427 leap = 1; 405 leap = 1;
428 time_tai--;
429 time_state = TIME_WAIT; 406 time_state = TIME_WAIT;
430 printk(KERN_NOTICE 407 printk(KERN_NOTICE
431 "Clock: deleting leap second 23:59:59 UTC\n"); 408 "Clock: deleting leap second 23:59:59 UTC\n");
@@ -479,8 +456,6 @@ int second_overflow(unsigned long secs)
479 time_adjust = 0; 456 time_adjust = 0;
480 457
481out: 458out:
482 raw_spin_unlock_irqrestore(&ntp_lock, flags);
483
484 return leap; 459 return leap;
485} 460}
486 461
@@ -575,11 +550,10 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
575 time_status |= txc->status & ~STA_RONLY; 550 time_status |= txc->status & ~STA_RONLY;
576} 551}
577 552
578/* 553
579 * Called with ntp_lock held, so we can access and modify 554static inline void process_adjtimex_modes(struct timex *txc,
580 * all the global NTP state: 555 struct timespec *ts,
581 */ 556 s32 *time_tai)
582static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts)
583{ 557{
584 if (txc->modes & ADJ_STATUS) 558 if (txc->modes & ADJ_STATUS)
585 process_adj_status(txc, ts); 559 process_adj_status(txc, ts);
@@ -613,7 +587,7 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts
613 } 587 }
614 588
615 if (txc->modes & ADJ_TAI && txc->constant > 0) 589 if (txc->modes & ADJ_TAI && txc->constant > 0)
616 time_tai = txc->constant; 590 *time_tai = txc->constant;
617 591
618 if (txc->modes & ADJ_OFFSET) 592 if (txc->modes & ADJ_OFFSET)
619 ntp_update_offset(txc->offset); 593 ntp_update_offset(txc->offset);
@@ -625,16 +599,13 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts
625 ntp_update_frequency(); 599 ntp_update_frequency();
626} 600}
627 601
628/* 602
629 * adjtimex mainly allows reading (and writing, if superuser) of 603
630 * kernel time-keeping variables. used by xntpd. 604/**
605 * ntp_validate_timex - Ensures the timex is ok for use in do_adjtimex
631 */ 606 */
632int do_adjtimex(struct timex *txc) 607int ntp_validate_timex(struct timex *txc)
633{ 608{
634 struct timespec ts;
635 int result;
636
637 /* Validate the data before disabling interrupts */
638 if (txc->modes & ADJ_ADJTIME) { 609 if (txc->modes & ADJ_ADJTIME) {
639 /* singleshot must not be used with any other mode bits */ 610 /* singleshot must not be used with any other mode bits */
640 if (!(txc->modes & ADJ_OFFSET_SINGLESHOT)) 611 if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
@@ -646,7 +617,6 @@ int do_adjtimex(struct timex *txc)
646 /* In order to modify anything, you gotta be super-user! */ 617 /* In order to modify anything, you gotta be super-user! */
647 if (txc->modes && !capable(CAP_SYS_TIME)) 618 if (txc->modes && !capable(CAP_SYS_TIME))
648 return -EPERM; 619 return -EPERM;
649
650 /* 620 /*
651 * if the quartz is off by more than 10% then 621 * if the quartz is off by more than 10% then
652 * something is VERY wrong! 622 * something is VERY wrong!
@@ -657,22 +627,20 @@ int do_adjtimex(struct timex *txc)
657 return -EINVAL; 627 return -EINVAL;
658 } 628 }
659 629
660 if (txc->modes & ADJ_SETOFFSET) { 630 if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME)))
661 struct timespec delta; 631 return -EPERM;
662 delta.tv_sec = txc->time.tv_sec;
663 delta.tv_nsec = txc->time.tv_usec;
664 if (!capable(CAP_SYS_TIME))
665 return -EPERM;
666 if (!(txc->modes & ADJ_NANO))
667 delta.tv_nsec *= 1000;
668 result = timekeeping_inject_offset(&delta);
669 if (result)
670 return result;
671 }
672 632
673 getnstimeofday(&ts); 633 return 0;
634}
674 635
675 raw_spin_lock_irq(&ntp_lock); 636
637/*
638 * adjtimex mainly allows reading (and writing, if superuser) of
639 * kernel time-keeping variables. used by xntpd.
640 */
641int __do_adjtimex(struct timex *txc, struct timespec *ts, s32 *time_tai)
642{
643 int result;
676 644
677 if (txc->modes & ADJ_ADJTIME) { 645 if (txc->modes & ADJ_ADJTIME) {
678 long save_adjust = time_adjust; 646 long save_adjust = time_adjust;
@@ -687,7 +655,7 @@ int do_adjtimex(struct timex *txc)
687 655
688 /* If there are input parameters, then process them: */ 656 /* If there are input parameters, then process them: */
689 if (txc->modes) 657 if (txc->modes)
690 process_adjtimex_modes(txc, &ts); 658 process_adjtimex_modes(txc, ts, time_tai);
691 659
692 txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, 660 txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
693 NTP_SCALE_SHIFT); 661 NTP_SCALE_SHIFT);
@@ -709,15 +677,13 @@ int do_adjtimex(struct timex *txc)
709 txc->precision = 1; 677 txc->precision = 1;
710 txc->tolerance = MAXFREQ_SCALED / PPM_SCALE; 678 txc->tolerance = MAXFREQ_SCALED / PPM_SCALE;
711 txc->tick = tick_usec; 679 txc->tick = tick_usec;
712 txc->tai = time_tai; 680 txc->tai = *time_tai;
713 681
714 /* fill PPS status fields */ 682 /* fill PPS status fields */
715 pps_fill_timex(txc); 683 pps_fill_timex(txc);
716 684
717 raw_spin_unlock_irq(&ntp_lock); 685 txc->time.tv_sec = ts->tv_sec;
718 686 txc->time.tv_usec = ts->tv_nsec;
719 txc->time.tv_sec = ts.tv_sec;
720 txc->time.tv_usec = ts.tv_nsec;
721 if (!(time_status & STA_NANO)) 687 if (!(time_status & STA_NANO))
722 txc->time.tv_usec /= NSEC_PER_USEC; 688 txc->time.tv_usec /= NSEC_PER_USEC;
723 689
@@ -894,7 +860,7 @@ static void hardpps_update_phase(long error)
894} 860}
895 861
896/* 862/*
897 * hardpps() - discipline CPU clock oscillator to external PPS signal 863 * __hardpps() - discipline CPU clock oscillator to external PPS signal
898 * 864 *
899 * This routine is called at each PPS signal arrival in order to 865 * This routine is called at each PPS signal arrival in order to
900 * discipline the CPU clock oscillator to the PPS signal. It takes two 866 * discipline the CPU clock oscillator to the PPS signal. It takes two
@@ -905,15 +871,13 @@ static void hardpps_update_phase(long error)
905 * This code is based on David Mills's reference nanokernel 871 * This code is based on David Mills's reference nanokernel
906 * implementation. It was mostly rewritten but keeps the same idea. 872 * implementation. It was mostly rewritten but keeps the same idea.
907 */ 873 */
908void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) 874void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
909{ 875{
910 struct pps_normtime pts_norm, freq_norm; 876 struct pps_normtime pts_norm, freq_norm;
911 unsigned long flags; 877 unsigned long flags;
912 878
913 pts_norm = pps_normalize_ts(*phase_ts); 879 pts_norm = pps_normalize_ts(*phase_ts);
914 880
915 raw_spin_lock_irqsave(&ntp_lock, flags);
916
917 /* clear the error bits, they will be set again if needed */ 881 /* clear the error bits, they will be set again if needed */
918 time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); 882 time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
919 883
@@ -925,7 +889,6 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
925 * just start the frequency interval */ 889 * just start the frequency interval */
926 if (unlikely(pps_fbase.tv_sec == 0)) { 890 if (unlikely(pps_fbase.tv_sec == 0)) {
927 pps_fbase = *raw_ts; 891 pps_fbase = *raw_ts;
928 raw_spin_unlock_irqrestore(&ntp_lock, flags);
929 return; 892 return;
930 } 893 }
931 894
@@ -940,7 +903,6 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
940 time_status |= STA_PPSJITTER; 903 time_status |= STA_PPSJITTER;
941 /* restart the frequency calibration interval */ 904 /* restart the frequency calibration interval */
942 pps_fbase = *raw_ts; 905 pps_fbase = *raw_ts;
943 raw_spin_unlock_irqrestore(&ntp_lock, flags);
944 pr_err("hardpps: PPSJITTER: bad pulse\n"); 906 pr_err("hardpps: PPSJITTER: bad pulse\n");
945 return; 907 return;
946 } 908 }
@@ -957,10 +919,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
957 919
958 hardpps_update_phase(pts_norm.nsec); 920 hardpps_update_phase(pts_norm.nsec);
959 921
960 raw_spin_unlock_irqrestore(&ntp_lock, flags);
961} 922}
962EXPORT_SYMBOL(hardpps);
963
964#endif /* CONFIG_NTP_PPS */ 923#endif /* CONFIG_NTP_PPS */
965 924
966static int __init ntp_tick_adj_setup(char *str) 925static int __init ntp_tick_adj_setup(char *str)
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
new file mode 100644
index 000000000000..1950cb4ca2a4
--- /dev/null
+++ b/kernel/time/ntp_internal.h
@@ -0,0 +1,12 @@
1#ifndef _LINUX_NTP_INTERNAL_H
2#define _LINUX_NTP_INTERNAL_H
3
4extern void ntp_init(void);
5extern void ntp_clear(void);
6/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
7extern u64 ntp_tick_length(void);
8extern int second_overflow(unsigned long secs);
9extern int ntp_validate_timex(struct timex *);
10extern int __do_adjtimex(struct timex *, struct timespec *, s32 *);
11extern void __hardpps(const struct timespec *, const struct timespec *);
12#endif /* _LINUX_NTP_INTERNAL_H */
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 7f32fe0e52cd..61d00a8cdf2f 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -28,9 +28,8 @@
28 */ 28 */
29 29
30static struct tick_device tick_broadcast_device; 30static struct tick_device tick_broadcast_device;
31/* FIXME: Use cpumask_var_t. */ 31static cpumask_var_t tick_broadcast_mask;
32static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS); 32static cpumask_var_t tmpmask;
33static DECLARE_BITMAP(tmpmask, NR_CPUS);
34static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); 33static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
35static int tick_broadcast_force; 34static int tick_broadcast_force;
36 35
@@ -50,7 +49,7 @@ struct tick_device *tick_get_broadcast_device(void)
50 49
51struct cpumask *tick_get_broadcast_mask(void) 50struct cpumask *tick_get_broadcast_mask(void)
52{ 51{
53 return to_cpumask(tick_broadcast_mask); 52 return tick_broadcast_mask;
54} 53}
55 54
56/* 55/*
@@ -67,6 +66,8 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc)
67 */ 66 */
68int tick_check_broadcast_device(struct clock_event_device *dev) 67int tick_check_broadcast_device(struct clock_event_device *dev)
69{ 68{
69 struct clock_event_device *cur = tick_broadcast_device.evtdev;
70
70 if ((dev->features & CLOCK_EVT_FEAT_DUMMY) || 71 if ((dev->features & CLOCK_EVT_FEAT_DUMMY) ||
71 (tick_broadcast_device.evtdev && 72 (tick_broadcast_device.evtdev &&
72 tick_broadcast_device.evtdev->rating >= dev->rating) || 73 tick_broadcast_device.evtdev->rating >= dev->rating) ||
@@ -74,9 +75,21 @@ int tick_check_broadcast_device(struct clock_event_device *dev)
74 return 0; 75 return 0;
75 76
76 clockevents_exchange_device(tick_broadcast_device.evtdev, dev); 77 clockevents_exchange_device(tick_broadcast_device.evtdev, dev);
78 if (cur)
79 cur->event_handler = clockevents_handle_noop;
77 tick_broadcast_device.evtdev = dev; 80 tick_broadcast_device.evtdev = dev;
78 if (!cpumask_empty(tick_get_broadcast_mask())) 81 if (!cpumask_empty(tick_broadcast_mask))
79 tick_broadcast_start_periodic(dev); 82 tick_broadcast_start_periodic(dev);
83 /*
84 * Inform all cpus about this. We might be in a situation
85 * where we did not switch to oneshot mode because the per cpu
86 * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
87 * of a oneshot capable broadcast device. Without that
88 * notification the systems stays stuck in periodic mode
89 * forever.
90 */
91 if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
92 tick_clock_notify();
80 return 1; 93 return 1;
81} 94}
82 95
@@ -124,7 +137,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
124 if (!tick_device_is_functional(dev)) { 137 if (!tick_device_is_functional(dev)) {
125 dev->event_handler = tick_handle_periodic; 138 dev->event_handler = tick_handle_periodic;
126 tick_device_setup_broadcast_func(dev); 139 tick_device_setup_broadcast_func(dev);
127 cpumask_set_cpu(cpu, tick_get_broadcast_mask()); 140 cpumask_set_cpu(cpu, tick_broadcast_mask);
128 tick_broadcast_start_periodic(tick_broadcast_device.evtdev); 141 tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
129 ret = 1; 142 ret = 1;
130 } else { 143 } else {
@@ -135,7 +148,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
135 */ 148 */
136 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { 149 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
137 int cpu = smp_processor_id(); 150 int cpu = smp_processor_id();
138 cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); 151 cpumask_clear_cpu(cpu, tick_broadcast_mask);
139 tick_broadcast_clear_oneshot(cpu); 152 tick_broadcast_clear_oneshot(cpu);
140 } else { 153 } else {
141 tick_device_setup_broadcast_func(dev); 154 tick_device_setup_broadcast_func(dev);
@@ -199,9 +212,8 @@ static void tick_do_periodic_broadcast(void)
199{ 212{
200 raw_spin_lock(&tick_broadcast_lock); 213 raw_spin_lock(&tick_broadcast_lock);
201 214
202 cpumask_and(to_cpumask(tmpmask), 215 cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
203 cpu_online_mask, tick_get_broadcast_mask()); 216 tick_do_broadcast(tmpmask);
204 tick_do_broadcast(to_cpumask(tmpmask));
205 217
206 raw_spin_unlock(&tick_broadcast_lock); 218 raw_spin_unlock(&tick_broadcast_lock);
207} 219}
@@ -264,13 +276,12 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
264 if (!tick_device_is_functional(dev)) 276 if (!tick_device_is_functional(dev))
265 goto out; 277 goto out;
266 278
267 bc_stopped = cpumask_empty(tick_get_broadcast_mask()); 279 bc_stopped = cpumask_empty(tick_broadcast_mask);
268 280
269 switch (*reason) { 281 switch (*reason) {
270 case CLOCK_EVT_NOTIFY_BROADCAST_ON: 282 case CLOCK_EVT_NOTIFY_BROADCAST_ON:
271 case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: 283 case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
272 if (!cpumask_test_cpu(cpu, tick_get_broadcast_mask())) { 284 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
273 cpumask_set_cpu(cpu, tick_get_broadcast_mask());
274 if (tick_broadcast_device.mode == 285 if (tick_broadcast_device.mode ==
275 TICKDEV_MODE_PERIODIC) 286 TICKDEV_MODE_PERIODIC)
276 clockevents_shutdown(dev); 287 clockevents_shutdown(dev);
@@ -280,8 +291,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
280 break; 291 break;
281 case CLOCK_EVT_NOTIFY_BROADCAST_OFF: 292 case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
282 if (!tick_broadcast_force && 293 if (!tick_broadcast_force &&
283 cpumask_test_cpu(cpu, tick_get_broadcast_mask())) { 294 cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
284 cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
285 if (tick_broadcast_device.mode == 295 if (tick_broadcast_device.mode ==
286 TICKDEV_MODE_PERIODIC) 296 TICKDEV_MODE_PERIODIC)
287 tick_setup_periodic(dev, 0); 297 tick_setup_periodic(dev, 0);
@@ -289,7 +299,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
289 break; 299 break;
290 } 300 }
291 301
292 if (cpumask_empty(tick_get_broadcast_mask())) { 302 if (cpumask_empty(tick_broadcast_mask)) {
293 if (!bc_stopped) 303 if (!bc_stopped)
294 clockevents_shutdown(bc); 304 clockevents_shutdown(bc);
295 } else if (bc_stopped) { 305 } else if (bc_stopped) {
@@ -338,10 +348,10 @@ void tick_shutdown_broadcast(unsigned int *cpup)
338 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 348 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
339 349
340 bc = tick_broadcast_device.evtdev; 350 bc = tick_broadcast_device.evtdev;
341 cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); 351 cpumask_clear_cpu(cpu, tick_broadcast_mask);
342 352
343 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { 353 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
344 if (bc && cpumask_empty(tick_get_broadcast_mask())) 354 if (bc && cpumask_empty(tick_broadcast_mask))
345 clockevents_shutdown(bc); 355 clockevents_shutdown(bc);
346 } 356 }
347 357
@@ -377,13 +387,13 @@ int tick_resume_broadcast(void)
377 387
378 switch (tick_broadcast_device.mode) { 388 switch (tick_broadcast_device.mode) {
379 case TICKDEV_MODE_PERIODIC: 389 case TICKDEV_MODE_PERIODIC:
380 if (!cpumask_empty(tick_get_broadcast_mask())) 390 if (!cpumask_empty(tick_broadcast_mask))
381 tick_broadcast_start_periodic(bc); 391 tick_broadcast_start_periodic(bc);
382 broadcast = cpumask_test_cpu(smp_processor_id(), 392 broadcast = cpumask_test_cpu(smp_processor_id(),
383 tick_get_broadcast_mask()); 393 tick_broadcast_mask);
384 break; 394 break;
385 case TICKDEV_MODE_ONESHOT: 395 case TICKDEV_MODE_ONESHOT:
386 if (!cpumask_empty(tick_get_broadcast_mask())) 396 if (!cpumask_empty(tick_broadcast_mask))
387 broadcast = tick_resume_broadcast_oneshot(bc); 397 broadcast = tick_resume_broadcast_oneshot(bc);
388 break; 398 break;
389 } 399 }
@@ -396,25 +406,58 @@ int tick_resume_broadcast(void)
396 406
397#ifdef CONFIG_TICK_ONESHOT 407#ifdef CONFIG_TICK_ONESHOT
398 408
399/* FIXME: use cpumask_var_t. */ 409static cpumask_var_t tick_broadcast_oneshot_mask;
400static DECLARE_BITMAP(tick_broadcast_oneshot_mask, NR_CPUS); 410static cpumask_var_t tick_broadcast_pending_mask;
411static cpumask_var_t tick_broadcast_force_mask;
401 412
402/* 413/*
403 * Exposed for debugging: see timer_list.c 414 * Exposed for debugging: see timer_list.c
404 */ 415 */
405struct cpumask *tick_get_broadcast_oneshot_mask(void) 416struct cpumask *tick_get_broadcast_oneshot_mask(void)
406{ 417{
407 return to_cpumask(tick_broadcast_oneshot_mask); 418 return tick_broadcast_oneshot_mask;
408} 419}
409 420
410static int tick_broadcast_set_event(ktime_t expires, int force) 421/*
422 * Called before going idle with interrupts disabled. Checks whether a
423 * broadcast event from the other core is about to happen. We detected
424 * that in tick_broadcast_oneshot_control(). The callsite can use this
425 * to avoid a deep idle transition as we are about to get the
426 * broadcast IPI right away.
427 */
428int tick_check_broadcast_expired(void)
411{ 429{
412 struct clock_event_device *bc = tick_broadcast_device.evtdev; 430 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
431}
432
433/*
434 * Set broadcast interrupt affinity
435 */
436static void tick_broadcast_set_affinity(struct clock_event_device *bc,
437 const struct cpumask *cpumask)
438{
439 if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
440 return;
441
442 if (cpumask_equal(bc->cpumask, cpumask))
443 return;
444
445 bc->cpumask = cpumask;
446 irq_set_affinity(bc->irq, bc->cpumask);
447}
448
449static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
450 ktime_t expires, int force)
451{
452 int ret;
413 453
414 if (bc->mode != CLOCK_EVT_MODE_ONESHOT) 454 if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
415 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); 455 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
416 456
417 return clockevents_program_event(bc, expires, force); 457 ret = clockevents_program_event(bc, expires, force);
458 if (!ret)
459 tick_broadcast_set_affinity(bc, cpumask_of(cpu));
460 return ret;
418} 461}
419 462
420int tick_resume_broadcast_oneshot(struct clock_event_device *bc) 463int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
@@ -429,7 +472,7 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
429 */ 472 */
430void tick_check_oneshot_broadcast(int cpu) 473void tick_check_oneshot_broadcast(int cpu)
431{ 474{
432 if (cpumask_test_cpu(cpu, to_cpumask(tick_broadcast_oneshot_mask))) { 475 if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
433 struct tick_device *td = &per_cpu(tick_cpu_device, cpu); 476 struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
434 477
435 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT); 478 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
@@ -443,27 +486,39 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
443{ 486{
444 struct tick_device *td; 487 struct tick_device *td;
445 ktime_t now, next_event; 488 ktime_t now, next_event;
446 int cpu; 489 int cpu, next_cpu = 0;
447 490
448 raw_spin_lock(&tick_broadcast_lock); 491 raw_spin_lock(&tick_broadcast_lock);
449again: 492again:
450 dev->next_event.tv64 = KTIME_MAX; 493 dev->next_event.tv64 = KTIME_MAX;
451 next_event.tv64 = KTIME_MAX; 494 next_event.tv64 = KTIME_MAX;
452 cpumask_clear(to_cpumask(tmpmask)); 495 cpumask_clear(tmpmask);
453 now = ktime_get(); 496 now = ktime_get();
454 /* Find all expired events */ 497 /* Find all expired events */
455 for_each_cpu(cpu, tick_get_broadcast_oneshot_mask()) { 498 for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
456 td = &per_cpu(tick_cpu_device, cpu); 499 td = &per_cpu(tick_cpu_device, cpu);
457 if (td->evtdev->next_event.tv64 <= now.tv64) 500 if (td->evtdev->next_event.tv64 <= now.tv64) {
458 cpumask_set_cpu(cpu, to_cpumask(tmpmask)); 501 cpumask_set_cpu(cpu, tmpmask);
459 else if (td->evtdev->next_event.tv64 < next_event.tv64) 502 /*
503 * Mark the remote cpu in the pending mask, so
504 * it can avoid reprogramming the cpu local
505 * timer in tick_broadcast_oneshot_control().
506 */
507 cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
508 } else if (td->evtdev->next_event.tv64 < next_event.tv64) {
460 next_event.tv64 = td->evtdev->next_event.tv64; 509 next_event.tv64 = td->evtdev->next_event.tv64;
510 next_cpu = cpu;
511 }
461 } 512 }
462 513
514 /* Take care of enforced broadcast requests */
515 cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
516 cpumask_clear(tick_broadcast_force_mask);
517
463 /* 518 /*
464 * Wakeup the cpus which have an expired event. 519 * Wakeup the cpus which have an expired event.
465 */ 520 */
466 tick_do_broadcast(to_cpumask(tmpmask)); 521 tick_do_broadcast(tmpmask);
467 522
468 /* 523 /*
469 * Two reasons for reprogram: 524 * Two reasons for reprogram:
@@ -480,7 +535,7 @@ again:
480 * Rearm the broadcast device. If event expired, 535 * Rearm the broadcast device. If event expired,
481 * repeat the above 536 * repeat the above
482 */ 537 */
483 if (tick_broadcast_set_event(next_event, 0)) 538 if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
484 goto again; 539 goto again;
485 } 540 }
486 raw_spin_unlock(&tick_broadcast_lock); 541 raw_spin_unlock(&tick_broadcast_lock);
@@ -495,6 +550,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
495 struct clock_event_device *bc, *dev; 550 struct clock_event_device *bc, *dev;
496 struct tick_device *td; 551 struct tick_device *td;
497 unsigned long flags; 552 unsigned long flags;
553 ktime_t now;
498 int cpu; 554 int cpu;
499 555
500 /* 556 /*
@@ -519,21 +575,84 @@ void tick_broadcast_oneshot_control(unsigned long reason)
519 575
520 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 576 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
521 if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { 577 if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
522 if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) { 578 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
523 cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask()); 579 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
524 clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); 580 clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
525 if (dev->next_event.tv64 < bc->next_event.tv64) 581 /*
526 tick_broadcast_set_event(dev->next_event, 1); 582 * We only reprogram the broadcast timer if we
583 * did not mark ourself in the force mask and
584 * if the cpu local event is earlier than the
585 * broadcast event. If the current CPU is in
586 * the force mask, then we are going to be
587 * woken by the IPI right away.
588 */
589 if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
590 dev->next_event.tv64 < bc->next_event.tv64)
591 tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
527 } 592 }
528 } else { 593 } else {
529 if (cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) { 594 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
530 cpumask_clear_cpu(cpu,
531 tick_get_broadcast_oneshot_mask());
532 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); 595 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
533 if (dev->next_event.tv64 != KTIME_MAX) 596 if (dev->next_event.tv64 == KTIME_MAX)
534 tick_program_event(dev->next_event, 1); 597 goto out;
598 /*
599 * The cpu which was handling the broadcast
600 * timer marked this cpu in the broadcast
601 * pending mask and fired the broadcast
602 * IPI. So we are going to handle the expired
603 * event anyway via the broadcast IPI
604 * handler. No need to reprogram the timer
605 * with an already expired event.
606 */
607 if (cpumask_test_and_clear_cpu(cpu,
608 tick_broadcast_pending_mask))
609 goto out;
610
611 /*
612 * If the pending bit is not set, then we are
613 * either the CPU handling the broadcast
614 * interrupt or we got woken by something else.
615 *
616 * We are not longer in the broadcast mask, so
617 * if the cpu local expiry time is already
618 * reached, we would reprogram the cpu local
619 * timer with an already expired event.
620 *
621 * This can lead to a ping-pong when we return
622 * to idle and therefor rearm the broadcast
623 * timer before the cpu local timer was able
624 * to fire. This happens because the forced
625 * reprogramming makes sure that the event
626 * will happen in the future and depending on
627 * the min_delta setting this might be far
628 * enough out that the ping-pong starts.
629 *
630 * If the cpu local next_event has expired
631 * then we know that the broadcast timer
632 * next_event has expired as well and
633 * broadcast is about to be handled. So we
634 * avoid reprogramming and enforce that the
635 * broadcast handler, which did not run yet,
636 * will invoke the cpu local handler.
637 *
638 * We cannot call the handler directly from
639 * here, because we might be in a NOHZ phase
640 * and we did not go through the irq_enter()
641 * nohz fixups.
642 */
643 now = ktime_get();
644 if (dev->next_event.tv64 <= now.tv64) {
645 cpumask_set_cpu(cpu, tick_broadcast_force_mask);
646 goto out;
647 }
648 /*
649 * We got woken by something else. Reprogram
650 * the cpu local timer device.
651 */
652 tick_program_event(dev->next_event, 1);
535 } 653 }
536 } 654 }
655out:
537 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 656 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
538} 657}
539 658
@@ -544,7 +663,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
544 */ 663 */
545static void tick_broadcast_clear_oneshot(int cpu) 664static void tick_broadcast_clear_oneshot(int cpu)
546{ 665{
547 cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask()); 666 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
548} 667}
549 668
550static void tick_broadcast_init_next_event(struct cpumask *mask, 669static void tick_broadcast_init_next_event(struct cpumask *mask,
@@ -582,17 +701,16 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
582 * oneshot_mask bits for those and program the 701 * oneshot_mask bits for those and program the
583 * broadcast device to fire. 702 * broadcast device to fire.
584 */ 703 */
585 cpumask_copy(to_cpumask(tmpmask), tick_get_broadcast_mask()); 704 cpumask_copy(tmpmask, tick_broadcast_mask);
586 cpumask_clear_cpu(cpu, to_cpumask(tmpmask)); 705 cpumask_clear_cpu(cpu, tmpmask);
587 cpumask_or(tick_get_broadcast_oneshot_mask(), 706 cpumask_or(tick_broadcast_oneshot_mask,
588 tick_get_broadcast_oneshot_mask(), 707 tick_broadcast_oneshot_mask, tmpmask);
589 to_cpumask(tmpmask));
590 708
591 if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) { 709 if (was_periodic && !cpumask_empty(tmpmask)) {
592 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); 710 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
593 tick_broadcast_init_next_event(to_cpumask(tmpmask), 711 tick_broadcast_init_next_event(tmpmask,
594 tick_next_period); 712 tick_next_period);
595 tick_broadcast_set_event(tick_next_period, 1); 713 tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
596 } else 714 } else
597 bc->next_event.tv64 = KTIME_MAX; 715 bc->next_event.tv64 = KTIME_MAX;
598 } else { 716 } else {
@@ -640,7 +758,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
640 * Clear the broadcast mask flag for the dead cpu, but do not 758 * Clear the broadcast mask flag for the dead cpu, but do not
641 * stop the broadcast device! 759 * stop the broadcast device!
642 */ 760 */
643 cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask()); 761 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
644 762
645 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 763 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
646} 764}
@@ -664,3 +782,14 @@ bool tick_broadcast_oneshot_available(void)
664} 782}
665 783
666#endif 784#endif
785
786void __init tick_broadcast_init(void)
787{
788 alloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
789 alloc_cpumask_var(&tmpmask, GFP_NOWAIT);
790#ifdef CONFIG_TICK_ONESHOT
791 alloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
792 alloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
793 alloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
794#endif
795}
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index b1600a6973f4..6176a3e45709 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -323,6 +323,7 @@ static void tick_shutdown(unsigned int *cpup)
323 */ 323 */
324 dev->mode = CLOCK_EVT_MODE_UNUSED; 324 dev->mode = CLOCK_EVT_MODE_UNUSED;
325 clockevents_exchange_device(dev, NULL); 325 clockevents_exchange_device(dev, NULL);
326 dev->event_handler = clockevents_handle_noop;
326 td->evtdev = NULL; 327 td->evtdev = NULL;
327 } 328 }
328 raw_spin_unlock_irqrestore(&tick_device_lock, flags); 329 raw_spin_unlock_irqrestore(&tick_device_lock, flags);
@@ -416,4 +417,5 @@ static struct notifier_block tick_notifier = {
416void __init tick_init(void) 417void __init tick_init(void)
417{ 418{
418 clockevents_register_notifier(&tick_notifier); 419 clockevents_register_notifier(&tick_notifier);
420 tick_broadcast_init();
419} 421}
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index cf3e59ed6dc0..f0299eae4602 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -4,6 +4,8 @@
4#include <linux/hrtimer.h> 4#include <linux/hrtimer.h>
5#include <linux/tick.h> 5#include <linux/tick.h>
6 6
7extern seqlock_t jiffies_lock;
8
7#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD 9#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
8 10
9#define TICK_DO_TIMER_NONE -1 11#define TICK_DO_TIMER_NONE -1
@@ -94,7 +96,7 @@ extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
94extern void tick_shutdown_broadcast(unsigned int *cpup); 96extern void tick_shutdown_broadcast(unsigned int *cpup);
95extern void tick_suspend_broadcast(void); 97extern void tick_suspend_broadcast(void);
96extern int tick_resume_broadcast(void); 98extern int tick_resume_broadcast(void);
97 99extern void tick_broadcast_init(void);
98extern void 100extern void
99tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); 101tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
100 102
@@ -119,6 +121,7 @@ static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { }
119static inline void tick_shutdown_broadcast(unsigned int *cpup) { } 121static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
120static inline void tick_suspend_broadcast(void) { } 122static inline void tick_suspend_broadcast(void) { }
121static inline int tick_resume_broadcast(void) { return 0; } 123static inline int tick_resume_broadcast(void) { return 0; }
124static inline void tick_broadcast_init(void) { }
122 125
123/* 126/*
124 * Set the periodic handler in non broadcast mode 127 * Set the periodic handler in non broadcast mode
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index a19a39952c1b..225f8bf19095 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -482,8 +482,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
482 482
483 if (ratelimit < 10 && 483 if (ratelimit < 10 &&
484 (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { 484 (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
485 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", 485 pr_warn("NOHZ: local_softirq_pending %02x\n",
486 (unsigned int) local_softirq_pending()); 486 (unsigned int) local_softirq_pending());
487 ratelimit++; 487 ratelimit++;
488 } 488 }
489 return false; 489 return false;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 9a0bc98fbe1d..98cd470bbe49 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -23,8 +23,13 @@
23#include <linux/stop_machine.h> 23#include <linux/stop_machine.h>
24#include <linux/pvclock_gtod.h> 24#include <linux/pvclock_gtod.h>
25 25
26#include "tick-internal.h"
27#include "ntp_internal.h"
26 28
27static struct timekeeper timekeeper; 29static struct timekeeper timekeeper;
30static DEFINE_RAW_SPINLOCK(timekeeper_lock);
31static seqcount_t timekeeper_seq;
32static struct timekeeper shadow_timekeeper;
28 33
29/* flag for if timekeeping is suspended */ 34/* flag for if timekeeping is suspended */
30int __read_mostly timekeeping_suspended; 35int __read_mostly timekeeping_suspended;
@@ -67,6 +72,7 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm)
67 tk->wall_to_monotonic = wtm; 72 tk->wall_to_monotonic = wtm;
68 set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec); 73 set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
69 tk->offs_real = timespec_to_ktime(tmp); 74 tk->offs_real = timespec_to_ktime(tmp);
75 tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tk->tai_offset, 0));
70} 76}
71 77
72static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t) 78static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t)
@@ -96,7 +102,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
96 102
97 old_clock = tk->clock; 103 old_clock = tk->clock;
98 tk->clock = clock; 104 tk->clock = clock;
99 clock->cycle_last = clock->read(clock); 105 tk->cycle_last = clock->cycle_last = clock->read(clock);
100 106
101 /* Do the ns -> cycle conversion first, using original mult */ 107 /* Do the ns -> cycle conversion first, using original mult */
102 tmp = NTP_INTERVAL_LENGTH; 108 tmp = NTP_INTERVAL_LENGTH;
@@ -201,8 +207,6 @@ static void update_pvclock_gtod(struct timekeeper *tk)
201 207
202/** 208/**
203 * pvclock_gtod_register_notifier - register a pvclock timedata update listener 209 * pvclock_gtod_register_notifier - register a pvclock timedata update listener
204 *
205 * Must hold write on timekeeper.lock
206 */ 210 */
207int pvclock_gtod_register_notifier(struct notifier_block *nb) 211int pvclock_gtod_register_notifier(struct notifier_block *nb)
208{ 212{
@@ -210,11 +214,10 @@ int pvclock_gtod_register_notifier(struct notifier_block *nb)
210 unsigned long flags; 214 unsigned long flags;
211 int ret; 215 int ret;
212 216
213 write_seqlock_irqsave(&tk->lock, flags); 217 raw_spin_lock_irqsave(&timekeeper_lock, flags);
214 ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb); 218 ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
215 /* update timekeeping data */
216 update_pvclock_gtod(tk); 219 update_pvclock_gtod(tk);
217 write_sequnlock_irqrestore(&tk->lock, flags); 220 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
218 221
219 return ret; 222 return ret;
220} 223}
@@ -223,25 +226,22 @@ EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier);
223/** 226/**
224 * pvclock_gtod_unregister_notifier - unregister a pvclock 227 * pvclock_gtod_unregister_notifier - unregister a pvclock
225 * timedata update listener 228 * timedata update listener
226 *
227 * Must hold write on timekeeper.lock
228 */ 229 */
229int pvclock_gtod_unregister_notifier(struct notifier_block *nb) 230int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
230{ 231{
231 struct timekeeper *tk = &timekeeper;
232 unsigned long flags; 232 unsigned long flags;
233 int ret; 233 int ret;
234 234
235 write_seqlock_irqsave(&tk->lock, flags); 235 raw_spin_lock_irqsave(&timekeeper_lock, flags);
236 ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb); 236 ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
237 write_sequnlock_irqrestore(&tk->lock, flags); 237 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
238 238
239 return ret; 239 return ret;
240} 240}
241EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); 241EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
242 242
243/* must hold write on timekeeper.lock */ 243/* must hold timekeeper_lock */
244static void timekeeping_update(struct timekeeper *tk, bool clearntp) 244static void timekeeping_update(struct timekeeper *tk, bool clearntp, bool mirror)
245{ 245{
246 if (clearntp) { 246 if (clearntp) {
247 tk->ntp_error = 0; 247 tk->ntp_error = 0;
@@ -249,6 +249,9 @@ static void timekeeping_update(struct timekeeper *tk, bool clearntp)
249 } 249 }
250 update_vsyscall(tk); 250 update_vsyscall(tk);
251 update_pvclock_gtod(tk); 251 update_pvclock_gtod(tk);
252
253 if (mirror)
254 memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
252} 255}
253 256
254/** 257/**
@@ -267,7 +270,7 @@ static void timekeeping_forward_now(struct timekeeper *tk)
267 clock = tk->clock; 270 clock = tk->clock;
268 cycle_now = clock->read(clock); 271 cycle_now = clock->read(clock);
269 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; 272 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
270 clock->cycle_last = cycle_now; 273 tk->cycle_last = clock->cycle_last = cycle_now;
271 274
272 tk->xtime_nsec += cycle_delta * tk->mult; 275 tk->xtime_nsec += cycle_delta * tk->mult;
273 276
@@ -294,12 +297,12 @@ int __getnstimeofday(struct timespec *ts)
294 s64 nsecs = 0; 297 s64 nsecs = 0;
295 298
296 do { 299 do {
297 seq = read_seqbegin(&tk->lock); 300 seq = read_seqcount_begin(&timekeeper_seq);
298 301
299 ts->tv_sec = tk->xtime_sec; 302 ts->tv_sec = tk->xtime_sec;
300 nsecs = timekeeping_get_ns(tk); 303 nsecs = timekeeping_get_ns(tk);
301 304
302 } while (read_seqretry(&tk->lock, seq)); 305 } while (read_seqcount_retry(&timekeeper_seq, seq));
303 306
304 ts->tv_nsec = 0; 307 ts->tv_nsec = 0;
305 timespec_add_ns(ts, nsecs); 308 timespec_add_ns(ts, nsecs);
@@ -335,11 +338,11 @@ ktime_t ktime_get(void)
335 WARN_ON(timekeeping_suspended); 338 WARN_ON(timekeeping_suspended);
336 339
337 do { 340 do {
338 seq = read_seqbegin(&tk->lock); 341 seq = read_seqcount_begin(&timekeeper_seq);
339 secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; 342 secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
340 nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec; 343 nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec;
341 344
342 } while (read_seqretry(&tk->lock, seq)); 345 } while (read_seqcount_retry(&timekeeper_seq, seq));
343 /* 346 /*
344 * Use ktime_set/ktime_add_ns to create a proper ktime on 347 * Use ktime_set/ktime_add_ns to create a proper ktime on
345 * 32-bit architectures without CONFIG_KTIME_SCALAR. 348 * 32-bit architectures without CONFIG_KTIME_SCALAR.
@@ -366,12 +369,12 @@ void ktime_get_ts(struct timespec *ts)
366 WARN_ON(timekeeping_suspended); 369 WARN_ON(timekeeping_suspended);
367 370
368 do { 371 do {
369 seq = read_seqbegin(&tk->lock); 372 seq = read_seqcount_begin(&timekeeper_seq);
370 ts->tv_sec = tk->xtime_sec; 373 ts->tv_sec = tk->xtime_sec;
371 nsec = timekeeping_get_ns(tk); 374 nsec = timekeeping_get_ns(tk);
372 tomono = tk->wall_to_monotonic; 375 tomono = tk->wall_to_monotonic;
373 376
374 } while (read_seqretry(&tk->lock, seq)); 377 } while (read_seqcount_retry(&timekeeper_seq, seq));
375 378
376 ts->tv_sec += tomono.tv_sec; 379 ts->tv_sec += tomono.tv_sec;
377 ts->tv_nsec = 0; 380 ts->tv_nsec = 0;
@@ -379,6 +382,50 @@ void ktime_get_ts(struct timespec *ts)
379} 382}
380EXPORT_SYMBOL_GPL(ktime_get_ts); 383EXPORT_SYMBOL_GPL(ktime_get_ts);
381 384
385
386/**
387 * timekeeping_clocktai - Returns the TAI time of day in a timespec
388 * @ts: pointer to the timespec to be set
389 *
390 * Returns the time of day in a timespec.
391 */
392void timekeeping_clocktai(struct timespec *ts)
393{
394 struct timekeeper *tk = &timekeeper;
395 unsigned long seq;
396 u64 nsecs;
397
398 WARN_ON(timekeeping_suspended);
399
400 do {
401 seq = read_seqcount_begin(&timekeeper_seq);
402
403 ts->tv_sec = tk->xtime_sec + tk->tai_offset;
404 nsecs = timekeeping_get_ns(tk);
405
406 } while (read_seqcount_retry(&timekeeper_seq, seq));
407
408 ts->tv_nsec = 0;
409 timespec_add_ns(ts, nsecs);
410
411}
412EXPORT_SYMBOL(timekeeping_clocktai);
413
414
415/**
416 * ktime_get_clocktai - Returns the TAI time of day in a ktime
417 *
418 * Returns the time of day in a ktime.
419 */
420ktime_t ktime_get_clocktai(void)
421{
422 struct timespec ts;
423
424 timekeeping_clocktai(&ts);
425 return timespec_to_ktime(ts);
426}
427EXPORT_SYMBOL(ktime_get_clocktai);
428
382#ifdef CONFIG_NTP_PPS 429#ifdef CONFIG_NTP_PPS
383 430
384/** 431/**
@@ -399,7 +446,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
399 WARN_ON_ONCE(timekeeping_suspended); 446 WARN_ON_ONCE(timekeeping_suspended);
400 447
401 do { 448 do {
402 seq = read_seqbegin(&tk->lock); 449 seq = read_seqcount_begin(&timekeeper_seq);
403 450
404 *ts_raw = tk->raw_time; 451 *ts_raw = tk->raw_time;
405 ts_real->tv_sec = tk->xtime_sec; 452 ts_real->tv_sec = tk->xtime_sec;
@@ -408,7 +455,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
408 nsecs_raw = timekeeping_get_ns_raw(tk); 455 nsecs_raw = timekeeping_get_ns_raw(tk);
409 nsecs_real = timekeeping_get_ns(tk); 456 nsecs_real = timekeeping_get_ns(tk);
410 457
411 } while (read_seqretry(&tk->lock, seq)); 458 } while (read_seqcount_retry(&timekeeper_seq, seq));
412 459
413 timespec_add_ns(ts_raw, nsecs_raw); 460 timespec_add_ns(ts_raw, nsecs_raw);
414 timespec_add_ns(ts_real, nsecs_real); 461 timespec_add_ns(ts_real, nsecs_real);
@@ -448,7 +495,8 @@ int do_settimeofday(const struct timespec *tv)
448 if (!timespec_valid_strict(tv)) 495 if (!timespec_valid_strict(tv))
449 return -EINVAL; 496 return -EINVAL;
450 497
451 write_seqlock_irqsave(&tk->lock, flags); 498 raw_spin_lock_irqsave(&timekeeper_lock, flags);
499 write_seqcount_begin(&timekeeper_seq);
452 500
453 timekeeping_forward_now(tk); 501 timekeeping_forward_now(tk);
454 502
@@ -460,9 +508,10 @@ int do_settimeofday(const struct timespec *tv)
460 508
461 tk_set_xtime(tk, tv); 509 tk_set_xtime(tk, tv);
462 510
463 timekeeping_update(tk, true); 511 timekeeping_update(tk, true, true);
464 512
465 write_sequnlock_irqrestore(&tk->lock, flags); 513 write_seqcount_end(&timekeeper_seq);
514 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
466 515
467 /* signal hrtimers about time change */ 516 /* signal hrtimers about time change */
468 clock_was_set(); 517 clock_was_set();
@@ -487,7 +536,8 @@ int timekeeping_inject_offset(struct timespec *ts)
487 if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) 536 if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
488 return -EINVAL; 537 return -EINVAL;
489 538
490 write_seqlock_irqsave(&tk->lock, flags); 539 raw_spin_lock_irqsave(&timekeeper_lock, flags);
540 write_seqcount_begin(&timekeeper_seq);
491 541
492 timekeeping_forward_now(tk); 542 timekeeping_forward_now(tk);
493 543
@@ -502,9 +552,10 @@ int timekeeping_inject_offset(struct timespec *ts)
502 tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); 552 tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
503 553
504error: /* even if we error out, we forwarded the time, so call update */ 554error: /* even if we error out, we forwarded the time, so call update */
505 timekeeping_update(tk, true); 555 timekeeping_update(tk, true, true);
506 556
507 write_sequnlock_irqrestore(&tk->lock, flags); 557 write_seqcount_end(&timekeeper_seq);
558 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
508 559
509 /* signal hrtimers about time change */ 560 /* signal hrtimers about time change */
510 clock_was_set(); 561 clock_was_set();
@@ -513,6 +564,52 @@ error: /* even if we error out, we forwarded the time, so call update */
513} 564}
514EXPORT_SYMBOL(timekeeping_inject_offset); 565EXPORT_SYMBOL(timekeeping_inject_offset);
515 566
567
568/**
569 * timekeeping_get_tai_offset - Returns current TAI offset from UTC
570 *
571 */
572s32 timekeeping_get_tai_offset(void)
573{
574 struct timekeeper *tk = &timekeeper;
575 unsigned int seq;
576 s32 ret;
577
578 do {
579 seq = read_seqcount_begin(&timekeeper_seq);
580 ret = tk->tai_offset;
581 } while (read_seqcount_retry(&timekeeper_seq, seq));
582
583 return ret;
584}
585
586/**
587 * __timekeeping_set_tai_offset - Lock free worker function
588 *
589 */
590static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
591{
592 tk->tai_offset = tai_offset;
593 tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tai_offset, 0));
594}
595
596/**
597 * timekeeping_set_tai_offset - Sets the current TAI offset from UTC
598 *
599 */
600void timekeeping_set_tai_offset(s32 tai_offset)
601{
602 struct timekeeper *tk = &timekeeper;
603 unsigned long flags;
604
605 raw_spin_lock_irqsave(&timekeeper_lock, flags);
606 write_seqcount_begin(&timekeeper_seq);
607 __timekeeping_set_tai_offset(tk, tai_offset);
608 write_seqcount_end(&timekeeper_seq);
609 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
610 clock_was_set();
611}
612
516/** 613/**
517 * change_clocksource - Swaps clocksources if a new one is available 614 * change_clocksource - Swaps clocksources if a new one is available
518 * 615 *
@@ -526,7 +623,8 @@ static int change_clocksource(void *data)
526 623
527 new = (struct clocksource *) data; 624 new = (struct clocksource *) data;
528 625
529 write_seqlock_irqsave(&tk->lock, flags); 626 raw_spin_lock_irqsave(&timekeeper_lock, flags);
627 write_seqcount_begin(&timekeeper_seq);
530 628
531 timekeeping_forward_now(tk); 629 timekeeping_forward_now(tk);
532 if (!new->enable || new->enable(new) == 0) { 630 if (!new->enable || new->enable(new) == 0) {
@@ -535,9 +633,10 @@ static int change_clocksource(void *data)
535 if (old->disable) 633 if (old->disable)
536 old->disable(old); 634 old->disable(old);
537 } 635 }
538 timekeeping_update(tk, true); 636 timekeeping_update(tk, true, true);
539 637
540 write_sequnlock_irqrestore(&tk->lock, flags); 638 write_seqcount_end(&timekeeper_seq);
639 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
541 640
542 return 0; 641 return 0;
543} 642}
@@ -587,11 +686,11 @@ void getrawmonotonic(struct timespec *ts)
587 s64 nsecs; 686 s64 nsecs;
588 687
589 do { 688 do {
590 seq = read_seqbegin(&tk->lock); 689 seq = read_seqcount_begin(&timekeeper_seq);
591 nsecs = timekeeping_get_ns_raw(tk); 690 nsecs = timekeeping_get_ns_raw(tk);
592 *ts = tk->raw_time; 691 *ts = tk->raw_time;
593 692
594 } while (read_seqretry(&tk->lock, seq)); 693 } while (read_seqcount_retry(&timekeeper_seq, seq));
595 694
596 timespec_add_ns(ts, nsecs); 695 timespec_add_ns(ts, nsecs);
597} 696}
@@ -607,11 +706,11 @@ int timekeeping_valid_for_hres(void)
607 int ret; 706 int ret;
608 707
609 do { 708 do {
610 seq = read_seqbegin(&tk->lock); 709 seq = read_seqcount_begin(&timekeeper_seq);
611 710
612 ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; 711 ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
613 712
614 } while (read_seqretry(&tk->lock, seq)); 713 } while (read_seqcount_retry(&timekeeper_seq, seq));
615 714
616 return ret; 715 return ret;
617} 716}
@@ -626,11 +725,11 @@ u64 timekeeping_max_deferment(void)
626 u64 ret; 725 u64 ret;
627 726
628 do { 727 do {
629 seq = read_seqbegin(&tk->lock); 728 seq = read_seqcount_begin(&timekeeper_seq);
630 729
631 ret = tk->clock->max_idle_ns; 730 ret = tk->clock->max_idle_ns;
632 731
633 } while (read_seqretry(&tk->lock, seq)); 732 } while (read_seqcount_retry(&timekeeper_seq, seq));
634 733
635 return ret; 734 return ret;
636} 735}
@@ -693,11 +792,10 @@ void __init timekeeping_init(void)
693 boot.tv_nsec = 0; 792 boot.tv_nsec = 0;
694 } 793 }
695 794
696 seqlock_init(&tk->lock); 795 raw_spin_lock_irqsave(&timekeeper_lock, flags);
697 796 write_seqcount_begin(&timekeeper_seq);
698 ntp_init(); 797 ntp_init();
699 798
700 write_seqlock_irqsave(&tk->lock, flags);
701 clock = clocksource_default_clock(); 799 clock = clocksource_default_clock();
702 if (clock->enable) 800 if (clock->enable)
703 clock->enable(clock); 801 clock->enable(clock);
@@ -716,7 +814,10 @@ void __init timekeeping_init(void)
716 tmp.tv_nsec = 0; 814 tmp.tv_nsec = 0;
717 tk_set_sleep_time(tk, tmp); 815 tk_set_sleep_time(tk, tmp);
718 816
719 write_sequnlock_irqrestore(&tk->lock, flags); 817 memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
818
819 write_seqcount_end(&timekeeper_seq);
820 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
720} 821}
721 822
722/* time in seconds when suspend began */ 823/* time in seconds when suspend began */
@@ -764,15 +865,17 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
764 if (has_persistent_clock()) 865 if (has_persistent_clock())
765 return; 866 return;
766 867
767 write_seqlock_irqsave(&tk->lock, flags); 868 raw_spin_lock_irqsave(&timekeeper_lock, flags);
869 write_seqcount_begin(&timekeeper_seq);
768 870
769 timekeeping_forward_now(tk); 871 timekeeping_forward_now(tk);
770 872
771 __timekeeping_inject_sleeptime(tk, delta); 873 __timekeeping_inject_sleeptime(tk, delta);
772 874
773 timekeeping_update(tk, true); 875 timekeeping_update(tk, true, true);
774 876
775 write_sequnlock_irqrestore(&tk->lock, flags); 877 write_seqcount_end(&timekeeper_seq);
878 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
776 879
777 /* signal hrtimers about time change */ 880 /* signal hrtimers about time change */
778 clock_was_set(); 881 clock_was_set();
@@ -788,26 +891,72 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
788static void timekeeping_resume(void) 891static void timekeeping_resume(void)
789{ 892{
790 struct timekeeper *tk = &timekeeper; 893 struct timekeeper *tk = &timekeeper;
894 struct clocksource *clock = tk->clock;
791 unsigned long flags; 895 unsigned long flags;
792 struct timespec ts; 896 struct timespec ts_new, ts_delta;
897 cycle_t cycle_now, cycle_delta;
898 bool suspendtime_found = false;
793 899
794 read_persistent_clock(&ts); 900 read_persistent_clock(&ts_new);
795 901
796 clockevents_resume(); 902 clockevents_resume();
797 clocksource_resume(); 903 clocksource_resume();
798 904
799 write_seqlock_irqsave(&tk->lock, flags); 905 raw_spin_lock_irqsave(&timekeeper_lock, flags);
906 write_seqcount_begin(&timekeeper_seq);
907
908 /*
909 * After system resumes, we need to calculate the suspended time and
910 * compensate it for the OS time. There are 3 sources that could be
911 * used: Nonstop clocksource during suspend, persistent clock and rtc
912 * device.
913 *
914 * One specific platform may have 1 or 2 or all of them, and the
915 * preference will be:
916 * suspend-nonstop clocksource -> persistent clock -> rtc
917 * The less preferred source will only be tried if there is no better
918 * usable source. The rtc part is handled separately in rtc core code.
919 */
920 cycle_now = clock->read(clock);
921 if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
922 cycle_now > clock->cycle_last) {
923 u64 num, max = ULLONG_MAX;
924 u32 mult = clock->mult;
925 u32 shift = clock->shift;
926 s64 nsec = 0;
927
928 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
800 929
801 if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { 930 /*
802 ts = timespec_sub(ts, timekeeping_suspend_time); 931 * "cycle_delta * mutl" may cause 64 bits overflow, if the
803 __timekeeping_inject_sleeptime(tk, &ts); 932 * suspended time is too long. In that case we need do the
933 * 64 bits math carefully
934 */
935 do_div(max, mult);
936 if (cycle_delta > max) {
937 num = div64_u64(cycle_delta, max);
938 nsec = (((u64) max * mult) >> shift) * num;
939 cycle_delta -= num * max;
940 }
941 nsec += ((u64) cycle_delta * mult) >> shift;
942
943 ts_delta = ns_to_timespec(nsec);
944 suspendtime_found = true;
945 } else if (timespec_compare(&ts_new, &timekeeping_suspend_time) > 0) {
946 ts_delta = timespec_sub(ts_new, timekeeping_suspend_time);
947 suspendtime_found = true;
804 } 948 }
805 /* re-base the last cycle value */ 949
806 tk->clock->cycle_last = tk->clock->read(tk->clock); 950 if (suspendtime_found)
951 __timekeeping_inject_sleeptime(tk, &ts_delta);
952
953 /* Re-base the last cycle value */
954 tk->cycle_last = clock->cycle_last = cycle_now;
807 tk->ntp_error = 0; 955 tk->ntp_error = 0;
808 timekeeping_suspended = 0; 956 timekeeping_suspended = 0;
809 timekeeping_update(tk, false); 957 timekeeping_update(tk, false, true);
810 write_sequnlock_irqrestore(&tk->lock, flags); 958 write_seqcount_end(&timekeeper_seq);
959 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
811 960
812 touch_softlockup_watchdog(); 961 touch_softlockup_watchdog();
813 962
@@ -826,7 +975,8 @@ static int timekeeping_suspend(void)
826 975
827 read_persistent_clock(&timekeeping_suspend_time); 976 read_persistent_clock(&timekeeping_suspend_time);
828 977
829 write_seqlock_irqsave(&tk->lock, flags); 978 raw_spin_lock_irqsave(&timekeeper_lock, flags);
979 write_seqcount_begin(&timekeeper_seq);
830 timekeeping_forward_now(tk); 980 timekeeping_forward_now(tk);
831 timekeeping_suspended = 1; 981 timekeeping_suspended = 1;
832 982
@@ -849,7 +999,8 @@ static int timekeeping_suspend(void)
849 timekeeping_suspend_time = 999 timekeeping_suspend_time =
850 timespec_add(timekeeping_suspend_time, delta_delta); 1000 timespec_add(timekeeping_suspend_time, delta_delta);
851 } 1001 }
852 write_sequnlock_irqrestore(&tk->lock, flags); 1002 write_seqcount_end(&timekeeper_seq);
1003 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
853 1004
854 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); 1005 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
855 clocksource_suspend(); 1006 clocksource_suspend();
@@ -1099,6 +1250,8 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
1099 tk_set_wall_to_mono(tk, 1250 tk_set_wall_to_mono(tk,
1100 timespec_sub(tk->wall_to_monotonic, ts)); 1251 timespec_sub(tk->wall_to_monotonic, ts));
1101 1252
1253 __timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
1254
1102 clock_was_set_delayed(); 1255 clock_was_set_delayed();
1103 } 1256 }
1104 } 1257 }
@@ -1116,15 +1269,16 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
1116static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, 1269static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
1117 u32 shift) 1270 u32 shift)
1118{ 1271{
1272 cycle_t interval = tk->cycle_interval << shift;
1119 u64 raw_nsecs; 1273 u64 raw_nsecs;
1120 1274
1121 /* If the offset is smaller then a shifted interval, do nothing */ 1275 /* If the offset is smaller then a shifted interval, do nothing */
1122 if (offset < tk->cycle_interval<<shift) 1276 if (offset < interval)
1123 return offset; 1277 return offset;
1124 1278
1125 /* Accumulate one shifted interval */ 1279 /* Accumulate one shifted interval */
1126 offset -= tk->cycle_interval << shift; 1280 offset -= interval;
1127 tk->clock->cycle_last += tk->cycle_interval << shift; 1281 tk->cycle_last += interval;
1128 1282
1129 tk->xtime_nsec += tk->xtime_interval << shift; 1283 tk->xtime_nsec += tk->xtime_interval << shift;
1130 accumulate_nsecs_to_secs(tk); 1284 accumulate_nsecs_to_secs(tk);
@@ -1181,27 +1335,28 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
1181static void update_wall_time(void) 1335static void update_wall_time(void)
1182{ 1336{
1183 struct clocksource *clock; 1337 struct clocksource *clock;
1184 struct timekeeper *tk = &timekeeper; 1338 struct timekeeper *real_tk = &timekeeper;
1339 struct timekeeper *tk = &shadow_timekeeper;
1185 cycle_t offset; 1340 cycle_t offset;
1186 int shift = 0, maxshift; 1341 int shift = 0, maxshift;
1187 unsigned long flags; 1342 unsigned long flags;
1188 1343
1189 write_seqlock_irqsave(&tk->lock, flags); 1344 raw_spin_lock_irqsave(&timekeeper_lock, flags);
1190 1345
1191 /* Make sure we're fully resumed: */ 1346 /* Make sure we're fully resumed: */
1192 if (unlikely(timekeeping_suspended)) 1347 if (unlikely(timekeeping_suspended))
1193 goto out; 1348 goto out;
1194 1349
1195 clock = tk->clock; 1350 clock = real_tk->clock;
1196 1351
1197#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET 1352#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
1198 offset = tk->cycle_interval; 1353 offset = real_tk->cycle_interval;
1199#else 1354#else
1200 offset = (clock->read(clock) - clock->cycle_last) & clock->mask; 1355 offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
1201#endif 1356#endif
1202 1357
1203 /* Check if there's really nothing to do */ 1358 /* Check if there's really nothing to do */
1204 if (offset < tk->cycle_interval) 1359 if (offset < real_tk->cycle_interval)
1205 goto out; 1360 goto out;
1206 1361
1207 /* 1362 /*
@@ -1238,11 +1393,24 @@ static void update_wall_time(void)
1238 */ 1393 */
1239 accumulate_nsecs_to_secs(tk); 1394 accumulate_nsecs_to_secs(tk);
1240 1395
1241 timekeeping_update(tk, false); 1396 write_seqcount_begin(&timekeeper_seq);
1242 1397 /* Update clock->cycle_last with the new value */
1398 clock->cycle_last = tk->cycle_last;
1399 /*
1400 * Update the real timekeeper.
1401 *
1402 * We could avoid this memcpy by switching pointers, but that
1403 * requires changes to all other timekeeper usage sites as
1404 * well, i.e. move the timekeeper pointer getter into the
1405 * spinlocked/seqcount protected sections. And we trade this
1406 * memcpy under the timekeeper_seq against one before we start
1407 * updating.
1408 */
1409 memcpy(real_tk, tk, sizeof(*tk));
1410 timekeeping_update(real_tk, false, false);
1411 write_seqcount_end(&timekeeper_seq);
1243out: 1412out:
1244 write_sequnlock_irqrestore(&tk->lock, flags); 1413 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
1245
1246} 1414}
1247 1415
1248/** 1416/**
@@ -1289,13 +1457,13 @@ void get_monotonic_boottime(struct timespec *ts)
1289 WARN_ON(timekeeping_suspended); 1457 WARN_ON(timekeeping_suspended);
1290 1458
1291 do { 1459 do {
1292 seq = read_seqbegin(&tk->lock); 1460 seq = read_seqcount_begin(&timekeeper_seq);
1293 ts->tv_sec = tk->xtime_sec; 1461 ts->tv_sec = tk->xtime_sec;
1294 nsec = timekeeping_get_ns(tk); 1462 nsec = timekeeping_get_ns(tk);
1295 tomono = tk->wall_to_monotonic; 1463 tomono = tk->wall_to_monotonic;
1296 sleep = tk->total_sleep_time; 1464 sleep = tk->total_sleep_time;
1297 1465
1298 } while (read_seqretry(&tk->lock, seq)); 1466 } while (read_seqcount_retry(&timekeeper_seq, seq));
1299 1467
1300 ts->tv_sec += tomono.tv_sec + sleep.tv_sec; 1468 ts->tv_sec += tomono.tv_sec + sleep.tv_sec;
1301 ts->tv_nsec = 0; 1469 ts->tv_nsec = 0;
@@ -1354,10 +1522,10 @@ struct timespec current_kernel_time(void)
1354 unsigned long seq; 1522 unsigned long seq;
1355 1523
1356 do { 1524 do {
1357 seq = read_seqbegin(&tk->lock); 1525 seq = read_seqcount_begin(&timekeeper_seq);
1358 1526
1359 now = tk_xtime(tk); 1527 now = tk_xtime(tk);
1360 } while (read_seqretry(&tk->lock, seq)); 1528 } while (read_seqcount_retry(&timekeeper_seq, seq));
1361 1529
1362 return now; 1530 return now;
1363} 1531}
@@ -1370,11 +1538,11 @@ struct timespec get_monotonic_coarse(void)
1370 unsigned long seq; 1538 unsigned long seq;
1371 1539
1372 do { 1540 do {
1373 seq = read_seqbegin(&tk->lock); 1541 seq = read_seqcount_begin(&timekeeper_seq);
1374 1542
1375 now = tk_xtime(tk); 1543 now = tk_xtime(tk);
1376 mono = tk->wall_to_monotonic; 1544 mono = tk->wall_to_monotonic;
1377 } while (read_seqretry(&tk->lock, seq)); 1545 } while (read_seqcount_retry(&timekeeper_seq, seq));
1378 1546
1379 set_normalized_timespec(&now, now.tv_sec + mono.tv_sec, 1547 set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
1380 now.tv_nsec + mono.tv_nsec); 1548 now.tv_nsec + mono.tv_nsec);
@@ -1405,11 +1573,11 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
1405 unsigned long seq; 1573 unsigned long seq;
1406 1574
1407 do { 1575 do {
1408 seq = read_seqbegin(&tk->lock); 1576 seq = read_seqcount_begin(&timekeeper_seq);
1409 *xtim = tk_xtime(tk); 1577 *xtim = tk_xtime(tk);
1410 *wtom = tk->wall_to_monotonic; 1578 *wtom = tk->wall_to_monotonic;
1411 *sleep = tk->total_sleep_time; 1579 *sleep = tk->total_sleep_time;
1412 } while (read_seqretry(&tk->lock, seq)); 1580 } while (read_seqcount_retry(&timekeeper_seq, seq));
1413} 1581}
1414 1582
1415#ifdef CONFIG_HIGH_RES_TIMERS 1583#ifdef CONFIG_HIGH_RES_TIMERS
@@ -1421,7 +1589,8 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
1421 * Returns current monotonic time and updates the offsets 1589 * Returns current monotonic time and updates the offsets
1422 * Called from hrtimer_interupt() or retrigger_next_event() 1590 * Called from hrtimer_interupt() or retrigger_next_event()
1423 */ 1591 */
1424ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) 1592ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot,
1593 ktime_t *offs_tai)
1425{ 1594{
1426 struct timekeeper *tk = &timekeeper; 1595 struct timekeeper *tk = &timekeeper;
1427 ktime_t now; 1596 ktime_t now;
@@ -1429,14 +1598,15 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot)
1429 u64 secs, nsecs; 1598 u64 secs, nsecs;
1430 1599
1431 do { 1600 do {
1432 seq = read_seqbegin(&tk->lock); 1601 seq = read_seqcount_begin(&timekeeper_seq);
1433 1602
1434 secs = tk->xtime_sec; 1603 secs = tk->xtime_sec;
1435 nsecs = timekeeping_get_ns(tk); 1604 nsecs = timekeeping_get_ns(tk);
1436 1605
1437 *offs_real = tk->offs_real; 1606 *offs_real = tk->offs_real;
1438 *offs_boot = tk->offs_boot; 1607 *offs_boot = tk->offs_boot;
1439 } while (read_seqretry(&tk->lock, seq)); 1608 *offs_tai = tk->offs_tai;
1609 } while (read_seqcount_retry(&timekeeper_seq, seq));
1440 1610
1441 now = ktime_add_ns(ktime_set(secs, 0), nsecs); 1611 now = ktime_add_ns(ktime_set(secs, 0), nsecs);
1442 now = ktime_sub(now, *offs_real); 1612 now = ktime_sub(now, *offs_real);
@@ -1454,15 +1624,79 @@ ktime_t ktime_get_monotonic_offset(void)
1454 struct timespec wtom; 1624 struct timespec wtom;
1455 1625
1456 do { 1626 do {
1457 seq = read_seqbegin(&tk->lock); 1627 seq = read_seqcount_begin(&timekeeper_seq);
1458 wtom = tk->wall_to_monotonic; 1628 wtom = tk->wall_to_monotonic;
1459 } while (read_seqretry(&tk->lock, seq)); 1629 } while (read_seqcount_retry(&timekeeper_seq, seq));
1460 1630
1461 return timespec_to_ktime(wtom); 1631 return timespec_to_ktime(wtom);
1462} 1632}
1463EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset); 1633EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset);
1464 1634
1465/** 1635/**
1636 * do_adjtimex() - Accessor function to NTP __do_adjtimex function
1637 */
1638int do_adjtimex(struct timex *txc)
1639{
1640 struct timekeeper *tk = &timekeeper;
1641 unsigned long flags;
1642 struct timespec ts;
1643 s32 orig_tai, tai;
1644 int ret;
1645
1646 /* Validate the data before disabling interrupts */
1647 ret = ntp_validate_timex(txc);
1648 if (ret)
1649 return ret;
1650
1651 if (txc->modes & ADJ_SETOFFSET) {
1652 struct timespec delta;
1653 delta.tv_sec = txc->time.tv_sec;
1654 delta.tv_nsec = txc->time.tv_usec;
1655 if (!(txc->modes & ADJ_NANO))
1656 delta.tv_nsec *= 1000;
1657 ret = timekeeping_inject_offset(&delta);
1658 if (ret)
1659 return ret;
1660 }
1661
1662 getnstimeofday(&ts);
1663
1664 raw_spin_lock_irqsave(&timekeeper_lock, flags);
1665 write_seqcount_begin(&timekeeper_seq);
1666
1667 orig_tai = tai = tk->tai_offset;
1668 ret = __do_adjtimex(txc, &ts, &tai);
1669
1670 if (tai != orig_tai) {
1671 __timekeeping_set_tai_offset(tk, tai);
1672 clock_was_set_delayed();
1673 }
1674 write_seqcount_end(&timekeeper_seq);
1675 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
1676
1677 return ret;
1678}
1679
1680#ifdef CONFIG_NTP_PPS
1681/**
1682 * hardpps() - Accessor function to NTP __hardpps function
1683 */
1684void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
1685{
1686 unsigned long flags;
1687
1688 raw_spin_lock_irqsave(&timekeeper_lock, flags);
1689 write_seqcount_begin(&timekeeper_seq);
1690
1691 __hardpps(phase_ts, raw_ts);
1692
1693 write_seqcount_end(&timekeeper_seq);
1694 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
1695}
1696EXPORT_SYMBOL(hardpps);
1697#endif
1698
1699/**
1466 * xtime_update() - advances the timekeeping infrastructure 1700 * xtime_update() - advances the timekeeping infrastructure
1467 * @ticks: number of ticks, that have elapsed since the last call. 1701 * @ticks: number of ticks, that have elapsed since the last call.
1468 * 1702 *
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index af5a7e9f164b..3bdf28323012 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -20,6 +20,13 @@
20 20
21#include <asm/uaccess.h> 21#include <asm/uaccess.h>
22 22
23
24struct timer_list_iter {
25 int cpu;
26 bool second_pass;
27 u64 now;
28};
29
23typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes); 30typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes);
24 31
25DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); 32DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
@@ -133,7 +140,6 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
133 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); 140 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
134 int i; 141 int i;
135 142
136 SEQ_printf(m, "\n");
137 SEQ_printf(m, "cpu: %d\n", cpu); 143 SEQ_printf(m, "cpu: %d\n", cpu);
138 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 144 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
139 SEQ_printf(m, " clock %d:\n", i); 145 SEQ_printf(m, " clock %d:\n", i);
@@ -187,6 +193,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
187 193
188#undef P 194#undef P
189#undef P_ns 195#undef P_ns
196 SEQ_printf(m, "\n");
190} 197}
191 198
192#ifdef CONFIG_GENERIC_CLOCKEVENTS 199#ifdef CONFIG_GENERIC_CLOCKEVENTS
@@ -195,7 +202,6 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
195{ 202{
196 struct clock_event_device *dev = td->evtdev; 203 struct clock_event_device *dev = td->evtdev;
197 204
198 SEQ_printf(m, "\n");
199 SEQ_printf(m, "Tick Device: mode: %d\n", td->mode); 205 SEQ_printf(m, "Tick Device: mode: %d\n", td->mode);
200 if (cpu < 0) 206 if (cpu < 0)
201 SEQ_printf(m, "Broadcast device\n"); 207 SEQ_printf(m, "Broadcast device\n");
@@ -230,12 +236,11 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
230 print_name_offset(m, dev->event_handler); 236 print_name_offset(m, dev->event_handler);
231 SEQ_printf(m, "\n"); 237 SEQ_printf(m, "\n");
232 SEQ_printf(m, " retries: %lu\n", dev->retries); 238 SEQ_printf(m, " retries: %lu\n", dev->retries);
239 SEQ_printf(m, "\n");
233} 240}
234 241
235static void timer_list_show_tickdevices(struct seq_file *m) 242static void timer_list_show_tickdevices_header(struct seq_file *m)
236{ 243{
237 int cpu;
238
239#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 244#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
240 print_tickdevice(m, tick_get_broadcast_device(), -1); 245 print_tickdevice(m, tick_get_broadcast_device(), -1);
241 SEQ_printf(m, "tick_broadcast_mask: %08lx\n", 246 SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
@@ -246,47 +251,104 @@ static void timer_list_show_tickdevices(struct seq_file *m)
246#endif 251#endif
247 SEQ_printf(m, "\n"); 252 SEQ_printf(m, "\n");
248#endif 253#endif
249 for_each_online_cpu(cpu)
250 print_tickdevice(m, tick_get_device(cpu), cpu);
251 SEQ_printf(m, "\n");
252} 254}
253#else
254static void timer_list_show_tickdevices(struct seq_file *m) { }
255#endif 255#endif
256 256
257static inline void timer_list_header(struct seq_file *m, u64 now)
258{
259 SEQ_printf(m, "Timer List Version: v0.7\n");
260 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
261 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
262 SEQ_printf(m, "\n");
263}
264
257static int timer_list_show(struct seq_file *m, void *v) 265static int timer_list_show(struct seq_file *m, void *v)
258{ 266{
267 struct timer_list_iter *iter = v;
268 u64 now = ktime_to_ns(ktime_get());
269
270 if (iter->cpu == -1 && !iter->second_pass)
271 timer_list_header(m, now);
272 else if (!iter->second_pass)
273 print_cpu(m, iter->cpu, iter->now);
274#ifdef CONFIG_GENERIC_CLOCKEVENTS
275 else if (iter->cpu == -1 && iter->second_pass)
276 timer_list_show_tickdevices_header(m);
277 else
278 print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu);
279#endif
280 return 0;
281}
282
283void sysrq_timer_list_show(void)
284{
259 u64 now = ktime_to_ns(ktime_get()); 285 u64 now = ktime_to_ns(ktime_get());
260 int cpu; 286 int cpu;
261 287
262 SEQ_printf(m, "Timer List Version: v0.7\n"); 288 timer_list_header(NULL, now);
263 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
264 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
265 289
266 for_each_online_cpu(cpu) 290 for_each_online_cpu(cpu)
267 print_cpu(m, cpu, now); 291 print_cpu(NULL, cpu, now);
268 292
269 SEQ_printf(m, "\n"); 293#ifdef CONFIG_GENERIC_CLOCKEVENTS
270 timer_list_show_tickdevices(m); 294 timer_list_show_tickdevices_header(NULL);
295 for_each_online_cpu(cpu)
296 print_tickdevice(NULL, tick_get_device(cpu), cpu);
297#endif
298 return;
299}
271 300
272 return 0; 301static void *timer_list_start(struct seq_file *file, loff_t *offset)
302{
303 struct timer_list_iter *iter = file->private;
304
305 if (!*offset) {
306 iter->cpu = -1;
307 iter->now = ktime_to_ns(ktime_get());
308 } else if (iter->cpu >= nr_cpu_ids) {
309#ifdef CONFIG_GENERIC_CLOCKEVENTS
310 if (!iter->second_pass) {
311 iter->cpu = -1;
312 iter->second_pass = true;
313 } else
314 return NULL;
315#else
316 return NULL;
317#endif
318 }
319 return iter;
273} 320}
274 321
275void sysrq_timer_list_show(void) 322static void *timer_list_next(struct seq_file *file, void *v, loff_t *offset)
323{
324 struct timer_list_iter *iter = file->private;
325 iter->cpu = cpumask_next(iter->cpu, cpu_online_mask);
326 ++*offset;
327 return timer_list_start(file, offset);
328}
329
330static void timer_list_stop(struct seq_file *seq, void *v)
276{ 331{
277 timer_list_show(NULL, NULL);
278} 332}
279 333
334static const struct seq_operations timer_list_sops = {
335 .start = timer_list_start,
336 .next = timer_list_next,
337 .stop = timer_list_stop,
338 .show = timer_list_show,
339};
340
280static int timer_list_open(struct inode *inode, struct file *filp) 341static int timer_list_open(struct inode *inode, struct file *filp)
281{ 342{
282 return single_open(filp, timer_list_show, NULL); 343 return seq_open_private(filp, &timer_list_sops,
344 sizeof(struct timer_list_iter));
283} 345}
284 346
285static const struct file_operations timer_list_fops = { 347static const struct file_operations timer_list_fops = {
286 .open = timer_list_open, 348 .open = timer_list_open,
287 .read = seq_read, 349 .read = seq_read,
288 .llseek = seq_lseek, 350 .llseek = seq_lseek,
289 .release = single_release, 351 .release = seq_release_private,
290}; 352};
291 353
292static int __init init_timer_list_procfs(void) 354static int __init init_timer_list_procfs(void)