aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/arm/armv7m_systick.txt26
-rw-r--r--Documentation/devicetree/bindings/timer/nxp,lpc3220-timer.txt26
-rw-r--r--Documentation/devicetree/bindings/timer/st,stm32-timer.txt22
-rw-r--r--Kbuild34
-rw-r--r--arch/s390/include/asm/timex.h5
-rw-r--r--arch/s390/kernel/debug.c11
-rw-r--r--arch/s390/kernel/time.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c5
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c5
-rw-r--r--drivers/clocksource/Kconfig17
-rw-r--r--drivers/clocksource/Makefile3
-rw-r--r--drivers/clocksource/armv7m_systick.c79
-rw-r--r--drivers/clocksource/asm9260_timer.c2
-rw-r--r--drivers/clocksource/exynos_mct.c22
-rw-r--r--drivers/clocksource/qcom-timer.c59
-rw-r--r--drivers/clocksource/time-lpc32xx.c272
-rw-r--r--drivers/clocksource/timer-integrator-ap.c2
-rw-r--r--drivers/clocksource/timer-stm32.c184
-rw-r--r--drivers/clocksource/timer-sun5i.c2
-rw-r--r--drivers/power/reset/ltc2952-poweroff.c1
-rw-r--r--fs/dcache.c16
-rw-r--r--include/linux/alarmtimer.h4
-rw-r--r--include/linux/clockchips.h37
-rw-r--r--include/linux/clocksource.h1
-rw-r--r--include/linux/hrtimer.h167
-rw-r--r--include/linux/interrupt.h9
-rw-r--r--include/linux/jiffies.h130
-rw-r--r--include/linux/perf_event.h4
-rw-r--r--include/linux/rcupdate.h6
-rw-r--r--include/linux/rcutree.h2
-rw-r--r--include/linux/sched.h6
-rw-r--r--include/linux/sched/sysctl.h12
-rw-r--r--include/linux/seqlock.h47
-rw-r--r--include/linux/time64.h2
-rw-r--r--include/linux/timekeeper_internal.h19
-rw-r--r--include/linux/timekeeping.h2
-rw-r--r--include/linux/timer.h63
-rw-r--r--include/linux/timerqueue.h8
-rw-r--r--include/trace/events/timer.h12
-rw-r--r--kernel/events/core.c117
-rw-r--r--kernel/futex.c5
-rw-r--r--kernel/locking/rtmutex.c5
-rw-r--r--kernel/rcu/tree_plugin.h18
-rw-r--r--kernel/sched/core.c44
-rw-r--r--kernel/sched/deadline.c12
-rw-r--r--kernel/sched/debug.c2
-rw-r--r--kernel/sched/fair.c76
-rw-r--r--kernel/sched/rt.c22
-rw-r--r--kernel/sched/sched.h7
-rw-r--r--kernel/sysctl.c18
-rw-r--r--kernel/time/Makefile17
-rw-r--r--kernel/time/alarmtimer.c17
-rw-r--r--kernel/time/clockevents.c67
-rw-r--r--kernel/time/clocksource.c24
-rw-r--r--kernel/time/hrtimer.c699
-rw-r--r--kernel/time/ntp.c61
-rw-r--r--kernel/time/ntp_internal.h1
-rw-r--r--kernel/time/posix-timers.c17
-rw-r--r--kernel/time/tick-broadcast-hrtimer.c18
-rw-r--r--kernel/time/tick-broadcast.c97
-rw-r--r--kernel/time/tick-common.c18
-rw-r--r--kernel/time/tick-internal.h31
-rw-r--r--kernel/time/tick-oneshot.c22
-rw-r--r--kernel/time/tick-sched.c320
-rw-r--r--kernel/time/tick-sched.h2
-rw-r--r--kernel/time/time.c78
-rw-r--r--kernel/time/timeconst.bc3
-rw-r--r--kernel/time/timekeeping.c153
-rw-r--r--kernel/time/timekeeping.h11
-rw-r--r--kernel/time/timer.c362
-rw-r--r--kernel/time/timer_list.c51
-rw-r--r--kernel/time/timer_stats.c10
-rw-r--r--lib/timerqueue.c10
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/sched/sch_api.c5
-rw-r--r--sound/core/hrtimer.c9
-rw-r--r--sound/drivers/pcsp/pcsp.c17
-rw-r--r--tools/testing/selftests/timers/leap-a-day.c77
78 files changed, 2292 insertions, 1563 deletions
diff --git a/Documentation/devicetree/bindings/arm/armv7m_systick.txt b/Documentation/devicetree/bindings/arm/armv7m_systick.txt
new file mode 100644
index 000000000000..7cf4a24601eb
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/armv7m_systick.txt
@@ -0,0 +1,26 @@
1* ARMv7M System Timer
2
3ARMv7-M includes a system timer, known as SysTick. Current driver only
4implements the clocksource feature.
5
6Required properties:
7- compatible : Should be "arm,armv7m-systick"
8- reg : The address range of the timer
9
10Required clocking property, have to be one of:
11- clocks : The input clock of the timer
12- clock-frequency : The rate in HZ in input of the ARM SysTick
13
14Examples:
15
16systick: timer@e000e010 {
17 compatible = "arm,armv7m-systick";
18 reg = <0xe000e010 0x10>;
19 clocks = <&clk_systick>;
20};
21
22systick: timer@e000e010 {
23 compatible = "arm,armv7m-systick";
24 reg = <0xe000e010 0x10>;
25 clock-frequency = <90000000>;
26};
diff --git a/Documentation/devicetree/bindings/timer/nxp,lpc3220-timer.txt b/Documentation/devicetree/bindings/timer/nxp,lpc3220-timer.txt
new file mode 100644
index 000000000000..51b05a0e70d1
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/nxp,lpc3220-timer.txt
@@ -0,0 +1,26 @@
1* NXP LPC3220 timer
2
3The NXP LPC3220 timer is used on a wide range of NXP SoCs. This
4includes LPC32xx, LPC178x, LPC18xx and LPC43xx parts.
5
6Required properties:
7- compatible:
8 Should be "nxp,lpc3220-timer".
9- reg:
10 Address and length of the register set.
11- interrupts:
12 Reference to the timer interrupt
13- clocks:
14 Should contain a reference to timer clock.
15- clock-names:
16 Should contain "timerclk".
17
18Example:
19
20timer1: timer@40085000 {
21 compatible = "nxp,lpc3220-timer";
22 reg = <0x40085000 0x1000>;
23 interrupts = <13>;
24 clocks = <&ccu1 CLK_CPU_TIMER1>;
25 clock-names = "timerclk";
26};
diff --git a/Documentation/devicetree/bindings/timer/st,stm32-timer.txt b/Documentation/devicetree/bindings/timer/st,stm32-timer.txt
new file mode 100644
index 000000000000..8ef28e70d6e8
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/st,stm32-timer.txt
@@ -0,0 +1,22 @@
1. STMicroelectronics STM32 timer
2
3The STM32 MCUs family has several general-purpose 16 and 32 bits timers.
4
5Required properties:
6- compatible : Should be "st,stm32-timer"
7- reg : Address and length of the register set
8- clocks : Reference on the timer input clock
9- interrupts : Reference to the timer interrupt
10
11Optional properties:
12- resets: Reference to a reset controller asserting the timer
13
14Example:
15
16timer5: timer@40000c00 {
17 compatible = "st,stm32-timer";
18 reg = <0x40000c00 0x400>;
19 interrupts = <50>;
20 resets = <&rrc 259>;
21 clocks = <&clk_pmtr1>;
22};
diff --git a/Kbuild b/Kbuild
index 6f0d82a9245d..df99a5f53beb 100644
--- a/Kbuild
+++ b/Kbuild
@@ -2,8 +2,9 @@
2# Kbuild for top-level directory of the kernel 2# Kbuild for top-level directory of the kernel
3# This file takes care of the following: 3# This file takes care of the following:
4# 1) Generate bounds.h 4# 1) Generate bounds.h
5# 2) Generate asm-offsets.h (may need bounds.h) 5# 2) Generate timeconst.h
6# 3) Check for missing system calls 6# 3) Generate asm-offsets.h (may need bounds.h and timeconst.h)
7# 4) Check for missing system calls
7 8
8# Default sed regexp - multiline due to syntax constraints 9# Default sed regexp - multiline due to syntax constraints
9define sed-y 10define sed-y
@@ -47,7 +48,26 @@ $(obj)/$(bounds-file): kernel/bounds.s FORCE
47 $(call filechk,offsets,__LINUX_BOUNDS_H__) 48 $(call filechk,offsets,__LINUX_BOUNDS_H__)
48 49
49##### 50#####
50# 2) Generate asm-offsets.h 51# 2) Generate timeconst.h
52
53timeconst-file := include/generated/timeconst.h
54
55#always += $(timeconst-file)
56targets += $(timeconst-file)
57
58quiet_cmd_gentimeconst = GEN $@
59define cmd_gentimeconst
60 (echo $(CONFIG_HZ) | bc -q $< ) > $@
61endef
62define filechk_gentimeconst
63 (echo $(CONFIG_HZ) | bc -q $< )
64endef
65
66$(obj)/$(timeconst-file): kernel/time/timeconst.bc FORCE
67 $(call filechk,gentimeconst)
68
69#####
70# 3) Generate asm-offsets.h
51# 71#
52 72
53offsets-file := include/generated/asm-offsets.h 73offsets-file := include/generated/asm-offsets.h
@@ -57,7 +77,7 @@ targets += arch/$(SRCARCH)/kernel/asm-offsets.s
57 77
58# We use internal kbuild rules to avoid the "is up to date" message from make 78# We use internal kbuild rules to avoid the "is up to date" message from make
59arch/$(SRCARCH)/kernel/asm-offsets.s: arch/$(SRCARCH)/kernel/asm-offsets.c \ 79arch/$(SRCARCH)/kernel/asm-offsets.s: arch/$(SRCARCH)/kernel/asm-offsets.c \
60 $(obj)/$(bounds-file) FORCE 80 $(obj)/$(timeconst-file) $(obj)/$(bounds-file) FORCE
61 $(Q)mkdir -p $(dir $@) 81 $(Q)mkdir -p $(dir $@)
62 $(call if_changed_dep,cc_s_c) 82 $(call if_changed_dep,cc_s_c)
63 83
@@ -65,7 +85,7 @@ $(obj)/$(offsets-file): arch/$(SRCARCH)/kernel/asm-offsets.s FORCE
65 $(call filechk,offsets,__ASM_OFFSETS_H__) 85 $(call filechk,offsets,__ASM_OFFSETS_H__)
66 86
67##### 87#####
68# 3) Check for missing system calls 88# 4) Check for missing system calls
69# 89#
70 90
71always += missing-syscalls 91always += missing-syscalls
@@ -77,5 +97,5 @@ quiet_cmd_syscalls = CALL $<
77missing-syscalls: scripts/checksyscalls.sh $(offsets-file) FORCE 97missing-syscalls: scripts/checksyscalls.sh $(offsets-file) FORCE
78 $(call cmd,syscalls) 98 $(call cmd,syscalls)
79 99
80# Keep these two files during make clean 100# Keep these three files during make clean
81no-clean-files := $(bounds-file) $(offsets-file) 101no-clean-files := $(bounds-file) $(offsets-file) $(timeconst-file)
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 98eb2a579223..dcb6312a0b91 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -10,6 +10,7 @@
10#define _ASM_S390_TIMEX_H 10#define _ASM_S390_TIMEX_H
11 11
12#include <asm/lowcore.h> 12#include <asm/lowcore.h>
13#include <linux/time64.h>
13 14
14/* The value of the TOD clock for 1.1.1970. */ 15/* The value of the TOD clock for 1.1.1970. */
15#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL 16#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
@@ -108,10 +109,10 @@ int get_sync_clock(unsigned long long *clock);
108void init_cpu_timer(void); 109void init_cpu_timer(void);
109unsigned long long monotonic_clock(void); 110unsigned long long monotonic_clock(void);
110 111
111void tod_to_timeval(__u64, struct timespec *); 112void tod_to_timeval(__u64 todval, struct timespec64 *xt);
112 113
113static inline 114static inline
114void stck_to_timespec(unsigned long long stck, struct timespec *ts) 115void stck_to_timespec64(unsigned long long stck, struct timespec64 *ts)
115{ 116{
116 tod_to_timeval(stck - TOD_UNIX_EPOCH, ts); 117 tod_to_timeval(stck - TOD_UNIX_EPOCH, ts);
117} 118}
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index c1f21aca76e7..6fca0e46464e 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -1457,23 +1457,24 @@ int
1457debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, 1457debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
1458 int area, debug_entry_t * entry, char *out_buf) 1458 int area, debug_entry_t * entry, char *out_buf)
1459{ 1459{
1460 struct timespec time_spec; 1460 struct timespec64 time_spec;
1461 char *except_str; 1461 char *except_str;
1462 unsigned long caller; 1462 unsigned long caller;
1463 int rc = 0; 1463 int rc = 0;
1464 unsigned int level; 1464 unsigned int level;
1465 1465
1466 level = entry->id.fields.level; 1466 level = entry->id.fields.level;
1467 stck_to_timespec(entry->id.stck, &time_spec); 1467 stck_to_timespec64(entry->id.stck, &time_spec);
1468 1468
1469 if (entry->id.fields.exception) 1469 if (entry->id.fields.exception)
1470 except_str = "*"; 1470 except_str = "*";
1471 else 1471 else
1472 except_str = "-"; 1472 except_str = "-";
1473 caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN; 1473 caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN;
1474 rc += sprintf(out_buf, "%02i %011lu:%06lu %1u %1s %02i %p ", 1474 rc += sprintf(out_buf, "%02i %011lld:%06lu %1u %1s %02i %p ",
1475 area, time_spec.tv_sec, time_spec.tv_nsec / 1000, level, 1475 area, (long long)time_spec.tv_sec,
1476 except_str, entry->id.fields.cpuid, (void *) caller); 1476 time_spec.tv_nsec / 1000, level, except_str,
1477 entry->id.fields.cpuid, (void *)caller);
1477 return rc; 1478 return rc;
1478} 1479}
1479EXPORT_SYMBOL(debug_dflt_header_fn); 1480EXPORT_SYMBOL(debug_dflt_header_fn);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 170ddd2018b3..9e733d965e08 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -76,7 +76,7 @@ unsigned long long monotonic_clock(void)
76} 76}
77EXPORT_SYMBOL(monotonic_clock); 77EXPORT_SYMBOL(monotonic_clock);
78 78
79void tod_to_timeval(__u64 todval, struct timespec *xt) 79void tod_to_timeval(__u64 todval, struct timespec64 *xt)
80{ 80{
81 unsigned long long sec; 81 unsigned long long sec;
82 82
@@ -181,12 +181,12 @@ static void timing_alert_interrupt(struct ext_code ext_code,
181static void etr_reset(void); 181static void etr_reset(void);
182static void stp_reset(void); 182static void stp_reset(void);
183 183
184void read_persistent_clock(struct timespec *ts) 184void read_persistent_clock64(struct timespec64 *ts)
185{ 185{
186 tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts); 186 tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts);
187} 187}
188 188
189void read_boot_clock(struct timespec *ts) 189void read_boot_clock64(struct timespec64 *ts)
190{ 190{
191 tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts); 191 tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts);
192} 192}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 358c54ad20d4..5cbd4e64feb5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -204,9 +204,8 @@ again:
204 204
205static void rapl_start_hrtimer(struct rapl_pmu *pmu) 205static void rapl_start_hrtimer(struct rapl_pmu *pmu)
206{ 206{
207 __hrtimer_start_range_ns(&pmu->hrtimer, 207 hrtimer_start(&pmu->hrtimer, pmu->timer_interval,
208 pmu->timer_interval, 0, 208 HRTIMER_MODE_REL_PINNED);
209 HRTIMER_MODE_REL_PINNED, 0);
210} 209}
211 210
212static void rapl_stop_hrtimer(struct rapl_pmu *pmu) 211static void rapl_stop_hrtimer(struct rapl_pmu *pmu)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 7c1de1610178..21b5e38c921b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -233,9 +233,8 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
233 233
234void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) 234void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
235{ 235{
236 __hrtimer_start_range_ns(&box->hrtimer, 236 hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
237 ns_to_ktime(box->hrtimer_duration), 0, 237 HRTIMER_MODE_REL_PINNED);
238 HRTIMER_MODE_REL_PINNED, 0);
239} 238}
240 239
241void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) 240void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 51d7865fdddb..32164ba3d36a 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -106,6 +106,16 @@ config CLKSRC_EFM32
106 Support to use the timers of EFM32 SoCs as clock source and clock 106 Support to use the timers of EFM32 SoCs as clock source and clock
107 event device. 107 event device.
108 108
109config CLKSRC_LPC32XX
110 bool
111 select CLKSRC_MMIO
112 select CLKSRC_OF
113
114config CLKSRC_STM32
115 bool "Clocksource for STM32 SoCs" if !ARCH_STM32
116 depends on OF && ARM && (ARCH_STM32 || COMPILE_TEST)
117 select CLKSRC_MMIO
118
109config ARM_ARCH_TIMER 119config ARM_ARCH_TIMER
110 bool 120 bool
111 select CLKSRC_OF if OF 121 select CLKSRC_OF if OF
@@ -139,6 +149,13 @@ config CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
139 help 149 help
140 Use ARM global timer clock source as sched_clock 150 Use ARM global timer clock source as sched_clock
141 151
152config ARMV7M_SYSTICK
153 bool
154 select CLKSRC_OF if OF
155 select CLKSRC_MMIO
156 help
157 This options enables support for the ARMv7M system timer unit
158
142config ATMEL_PIT 159config ATMEL_PIT
143 select CLKSRC_OF if OF 160 select CLKSRC_OF if OF
144 def_bool SOC_AT91SAM9 || SOC_SAMA5 161 def_bool SOC_AT91SAM9 || SOC_SAMA5
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 5b85f6adb258..1831a588b988 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -36,7 +36,9 @@ obj-$(CONFIG_ARCH_NSPIRE) += zevio-timer.o
36obj-$(CONFIG_ARCH_BCM_MOBILE) += bcm_kona_timer.o 36obj-$(CONFIG_ARCH_BCM_MOBILE) += bcm_kona_timer.o
37obj-$(CONFIG_CADENCE_TTC_TIMER) += cadence_ttc_timer.o 37obj-$(CONFIG_CADENCE_TTC_TIMER) += cadence_ttc_timer.o
38obj-$(CONFIG_CLKSRC_EFM32) += time-efm32.o 38obj-$(CONFIG_CLKSRC_EFM32) += time-efm32.o
39obj-$(CONFIG_CLKSRC_STM32) += timer-stm32.o
39obj-$(CONFIG_CLKSRC_EXYNOS_MCT) += exynos_mct.o 40obj-$(CONFIG_CLKSRC_EXYNOS_MCT) += exynos_mct.o
41obj-$(CONFIG_CLKSRC_LPC32XX) += time-lpc32xx.o
40obj-$(CONFIG_CLKSRC_SAMSUNG_PWM) += samsung_pwm_timer.o 42obj-$(CONFIG_CLKSRC_SAMSUNG_PWM) += samsung_pwm_timer.o
41obj-$(CONFIG_FSL_FTM_TIMER) += fsl_ftm_timer.o 43obj-$(CONFIG_FSL_FTM_TIMER) += fsl_ftm_timer.o
42obj-$(CONFIG_VF_PIT_TIMER) += vf_pit_timer.o 44obj-$(CONFIG_VF_PIT_TIMER) += vf_pit_timer.o
@@ -45,6 +47,7 @@ obj-$(CONFIG_MTK_TIMER) += mtk_timer.o
45 47
46obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o 48obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o
47obj-$(CONFIG_ARM_GLOBAL_TIMER) += arm_global_timer.o 49obj-$(CONFIG_ARM_GLOBAL_TIMER) += arm_global_timer.o
50obj-$(CONFIG_ARMV7M_SYSTICK) += armv7m_systick.o
48obj-$(CONFIG_CLKSRC_METAG_GENERIC) += metag_generic.o 51obj-$(CONFIG_CLKSRC_METAG_GENERIC) += metag_generic.o
49obj-$(CONFIG_ARCH_HAS_TICK_BROADCAST) += dummy_timer.o 52obj-$(CONFIG_ARCH_HAS_TICK_BROADCAST) += dummy_timer.o
50obj-$(CONFIG_ARCH_KEYSTONE) += timer-keystone.o 53obj-$(CONFIG_ARCH_KEYSTONE) += timer-keystone.o
diff --git a/drivers/clocksource/armv7m_systick.c b/drivers/clocksource/armv7m_systick.c
new file mode 100644
index 000000000000..addfd2c64f54
--- /dev/null
+++ b/drivers/clocksource/armv7m_systick.c
@@ -0,0 +1,79 @@
1/*
2 * Copyright (C) Maxime Coquelin 2015
3 * Author: Maxime Coquelin <mcoquelin.stm32@gmail.com>
4 * License terms: GNU General Public License (GPL), version 2
5 */
6
7#include <linux/kernel.h>
8#include <linux/clocksource.h>
9#include <linux/clockchips.h>
10#include <linux/of.h>
11#include <linux/of_address.h>
12#include <linux/clk.h>
13#include <linux/bitops.h>
14
15#define SYST_CSR 0x00
16#define SYST_RVR 0x04
17#define SYST_CVR 0x08
18#define SYST_CALIB 0x0c
19
20#define SYST_CSR_ENABLE BIT(0)
21
22#define SYSTICK_LOAD_RELOAD_MASK 0x00FFFFFF
23
24static void __init system_timer_of_register(struct device_node *np)
25{
26 struct clk *clk = NULL;
27 void __iomem *base;
28 u32 rate;
29 int ret;
30
31 base = of_iomap(np, 0);
32 if (!base) {
33 pr_warn("system-timer: invalid base address\n");
34 return;
35 }
36
37 ret = of_property_read_u32(np, "clock-frequency", &rate);
38 if (ret) {
39 clk = of_clk_get(np, 0);
40 if (IS_ERR(clk))
41 goto out_unmap;
42
43 ret = clk_prepare_enable(clk);
44 if (ret)
45 goto out_clk_put;
46
47 rate = clk_get_rate(clk);
48 if (!rate)
49 goto out_clk_disable;
50 }
51
52 writel_relaxed(SYSTICK_LOAD_RELOAD_MASK, base + SYST_RVR);
53 writel_relaxed(SYST_CSR_ENABLE, base + SYST_CSR);
54
55 ret = clocksource_mmio_init(base + SYST_CVR, "arm_system_timer", rate,
56 200, 24, clocksource_mmio_readl_down);
57 if (ret) {
58 pr_err("failed to init clocksource (%d)\n", ret);
59 if (clk)
60 goto out_clk_disable;
61 else
62 goto out_unmap;
63 }
64
65 pr_info("ARM System timer initialized as clocksource\n");
66
67 return;
68
69out_clk_disable:
70 clk_disable_unprepare(clk);
71out_clk_put:
72 clk_put(clk);
73out_unmap:
74 iounmap(base);
75 pr_warn("ARM System timer register failed (%d)\n", ret);
76}
77
78CLOCKSOURCE_OF_DECLARE(arm_systick, "arm,armv7m-systick",
79 system_timer_of_register);
diff --git a/drivers/clocksource/asm9260_timer.c b/drivers/clocksource/asm9260_timer.c
index 2c9c993727c8..4c2ba59897e8 100644
--- a/drivers/clocksource/asm9260_timer.c
+++ b/drivers/clocksource/asm9260_timer.c
@@ -178,7 +178,7 @@ static void __init asm9260_timer_init(struct device_node *np)
178 unsigned long rate; 178 unsigned long rate;
179 179
180 priv.base = of_io_request_and_map(np, 0, np->name); 180 priv.base = of_io_request_and_map(np, 0, np->name);
181 if (!priv.base) 181 if (IS_ERR(priv.base))
182 panic("%s: unable to map resource", np->name); 182 panic("%s: unable to map resource", np->name);
183 183
184 clk = of_clk_get(np, 0); 184 clk = of_clk_get(np, 0);
diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index 83564c9cfdbe..935b05936dbd 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -209,7 +209,7 @@ static void exynos4_frc_resume(struct clocksource *cs)
209 exynos4_mct_frc_start(); 209 exynos4_mct_frc_start();
210} 210}
211 211
212struct clocksource mct_frc = { 212static struct clocksource mct_frc = {
213 .name = "mct-frc", 213 .name = "mct-frc",
214 .rating = 400, 214 .rating = 400,
215 .read = exynos4_frc_read, 215 .read = exynos4_frc_read,
@@ -413,7 +413,7 @@ static inline void exynos4_tick_set_mode(enum clock_event_mode mode,
413 } 413 }
414} 414}
415 415
416static int exynos4_mct_tick_clear(struct mct_clock_event_device *mevt) 416static void exynos4_mct_tick_clear(struct mct_clock_event_device *mevt)
417{ 417{
418 struct clock_event_device *evt = &mevt->evt; 418 struct clock_event_device *evt = &mevt->evt;
419 419
@@ -426,12 +426,8 @@ static int exynos4_mct_tick_clear(struct mct_clock_event_device *mevt)
426 exynos4_mct_tick_stop(mevt); 426 exynos4_mct_tick_stop(mevt);
427 427
428 /* Clear the MCT tick interrupt */ 428 /* Clear the MCT tick interrupt */
429 if (readl_relaxed(reg_base + mevt->base + MCT_L_INT_CSTAT_OFFSET) & 1) { 429 if (readl_relaxed(reg_base + mevt->base + MCT_L_INT_CSTAT_OFFSET) & 1)
430 exynos4_mct_write(0x1, mevt->base + MCT_L_INT_CSTAT_OFFSET); 430 exynos4_mct_write(0x1, mevt->base + MCT_L_INT_CSTAT_OFFSET);
431 return 1;
432 } else {
433 return 0;
434 }
435} 431}
436 432
437static irqreturn_t exynos4_mct_tick_isr(int irq, void *dev_id) 433static irqreturn_t exynos4_mct_tick_isr(int irq, void *dev_id)
@@ -564,18 +560,6 @@ out_irq:
564 free_percpu_irq(mct_irqs[MCT_L0_IRQ], &percpu_mct_tick); 560 free_percpu_irq(mct_irqs[MCT_L0_IRQ], &percpu_mct_tick);
565} 561}
566 562
567void __init mct_init(void __iomem *base, int irq_g0, int irq_l0, int irq_l1)
568{
569 mct_irqs[MCT_G0_IRQ] = irq_g0;
570 mct_irqs[MCT_L0_IRQ] = irq_l0;
571 mct_irqs[MCT_L1_IRQ] = irq_l1;
572 mct_int_type = MCT_INT_SPI;
573
574 exynos4_timer_resources(NULL, base);
575 exynos4_clocksource_init();
576 exynos4_clockevent_init();
577}
578
579static void __init mct_init_dt(struct device_node *np, unsigned int int_type) 563static void __init mct_init_dt(struct device_node *np, unsigned int int_type)
580{ 564{
581 u32 nr_irqs, i; 565 u32 nr_irqs, i;
diff --git a/drivers/clocksource/qcom-timer.c b/drivers/clocksource/qcom-timer.c
index 098c542e5c53..cba2d015564c 100644
--- a/drivers/clocksource/qcom-timer.c
+++ b/drivers/clocksource/qcom-timer.c
@@ -40,8 +40,6 @@
40 40
41#define GPT_HZ 32768 41#define GPT_HZ 32768
42 42
43#define MSM_DGT_SHIFT 5
44
45static void __iomem *event_base; 43static void __iomem *event_base;
46static void __iomem *sts_base; 44static void __iomem *sts_base;
47 45
@@ -232,7 +230,6 @@ err:
232 register_current_timer_delay(&msm_delay_timer); 230 register_current_timer_delay(&msm_delay_timer);
233} 231}
234 232
235#ifdef CONFIG_ARCH_QCOM
236static void __init msm_dt_timer_init(struct device_node *np) 233static void __init msm_dt_timer_init(struct device_node *np)
237{ 234{
238 u32 freq; 235 u32 freq;
@@ -285,59 +282,3 @@ static void __init msm_dt_timer_init(struct device_node *np)
285} 282}
286CLOCKSOURCE_OF_DECLARE(kpss_timer, "qcom,kpss-timer", msm_dt_timer_init); 283CLOCKSOURCE_OF_DECLARE(kpss_timer, "qcom,kpss-timer", msm_dt_timer_init);
287CLOCKSOURCE_OF_DECLARE(scss_timer, "qcom,scss-timer", msm_dt_timer_init); 284CLOCKSOURCE_OF_DECLARE(scss_timer, "qcom,scss-timer", msm_dt_timer_init);
288#else
289
290static int __init msm_timer_map(phys_addr_t addr, u32 event, u32 source,
291 u32 sts)
292{
293 void __iomem *base;
294
295 base = ioremap(addr, SZ_256);
296 if (!base) {
297 pr_err("Failed to map timer base\n");
298 return -ENOMEM;
299 }
300 event_base = base + event;
301 source_base = base + source;
302 if (sts)
303 sts_base = base + sts;
304
305 return 0;
306}
307
308static notrace cycle_t msm_read_timer_count_shift(struct clocksource *cs)
309{
310 /*
311 * Shift timer count down by a constant due to unreliable lower bits
312 * on some targets.
313 */
314 return msm_read_timer_count(cs) >> MSM_DGT_SHIFT;
315}
316
317void __init msm7x01_timer_init(void)
318{
319 struct clocksource *cs = &msm_clocksource;
320
321 if (msm_timer_map(0xc0100000, 0x0, 0x10, 0x0))
322 return;
323 cs->read = msm_read_timer_count_shift;
324 cs->mask = CLOCKSOURCE_MASK((32 - MSM_DGT_SHIFT));
325 /* 600 KHz */
326 msm_timer_init(19200000 >> MSM_DGT_SHIFT, 32 - MSM_DGT_SHIFT, 7,
327 false);
328}
329
330void __init msm7x30_timer_init(void)
331{
332 if (msm_timer_map(0xc0100000, 0x4, 0x24, 0x80))
333 return;
334 msm_timer_init(24576000 / 4, 32, 1, false);
335}
336
337void __init qsd8x50_timer_init(void)
338{
339 if (msm_timer_map(0xAC100000, 0x0, 0x10, 0x34))
340 return;
341 msm_timer_init(19200000 / 4, 32, 7, false);
342}
343#endif
diff --git a/drivers/clocksource/time-lpc32xx.c b/drivers/clocksource/time-lpc32xx.c
new file mode 100644
index 000000000000..a1c06a2bc77c
--- /dev/null
+++ b/drivers/clocksource/time-lpc32xx.c
@@ -0,0 +1,272 @@
1/*
2 * Clocksource driver for NXP LPC32xx/18xx/43xx timer
3 *
4 * Copyright (C) 2015 Joachim Eastwood <manabian@gmail.com>
5 *
6 * Based on:
7 * time-efm32 Copyright (C) 2013 Pengutronix
8 * mach-lpc32xx/timer.c Copyright (C) 2009 - 2010 NXP Semiconductors
9 *
10 * This file is licensed under the terms of the GNU General Public
11 * License version 2. This program is licensed "as is" without any
12 * warranty of any kind, whether express or implied.
13 *
14 */
15
16#define pr_fmt(fmt) "%s: " fmt, __func__
17
18#include <linux/clk.h>
19#include <linux/clockchips.h>
20#include <linux/clocksource.h>
21#include <linux/interrupt.h>
22#include <linux/irq.h>
23#include <linux/kernel.h>
24#include <linux/of.h>
25#include <linux/of_address.h>
26#include <linux/of_irq.h>
27#include <linux/sched_clock.h>
28
29#define LPC32XX_TIMER_IR 0x000
30#define LPC32XX_TIMER_IR_MR0INT BIT(0)
31#define LPC32XX_TIMER_TCR 0x004
32#define LPC32XX_TIMER_TCR_CEN BIT(0)
33#define LPC32XX_TIMER_TCR_CRST BIT(1)
34#define LPC32XX_TIMER_TC 0x008
35#define LPC32XX_TIMER_PR 0x00c
36#define LPC32XX_TIMER_MCR 0x014
37#define LPC32XX_TIMER_MCR_MR0I BIT(0)
38#define LPC32XX_TIMER_MCR_MR0R BIT(1)
39#define LPC32XX_TIMER_MCR_MR0S BIT(2)
40#define LPC32XX_TIMER_MR0 0x018
41#define LPC32XX_TIMER_CTCR 0x070
42
43struct lpc32xx_clock_event_ddata {
44 struct clock_event_device evtdev;
45 void __iomem *base;
46};
47
48/* Needed for the sched clock */
49static void __iomem *clocksource_timer_counter;
50
51static u64 notrace lpc32xx_read_sched_clock(void)
52{
53 return readl(clocksource_timer_counter);
54}
55
56static int lpc32xx_clkevt_next_event(unsigned long delta,
57 struct clock_event_device *evtdev)
58{
59 struct lpc32xx_clock_event_ddata *ddata =
60 container_of(evtdev, struct lpc32xx_clock_event_ddata, evtdev);
61
62 /*
63 * Place timer in reset and program the delta in the prescale
64 * register (PR). When the prescale counter matches the value
65 * in PR the counter register is incremented and the compare
66 * match will trigger. After setup the timer is released from
67 * reset and enabled.
68 */
69 writel_relaxed(LPC32XX_TIMER_TCR_CRST, ddata->base + LPC32XX_TIMER_TCR);
70 writel_relaxed(delta, ddata->base + LPC32XX_TIMER_PR);
71 writel_relaxed(LPC32XX_TIMER_TCR_CEN, ddata->base + LPC32XX_TIMER_TCR);
72
73 return 0;
74}
75
76static int lpc32xx_clkevt_shutdown(struct clock_event_device *evtdev)
77{
78 struct lpc32xx_clock_event_ddata *ddata =
79 container_of(evtdev, struct lpc32xx_clock_event_ddata, evtdev);
80
81 /* Disable the timer */
82 writel_relaxed(0, ddata->base + LPC32XX_TIMER_TCR);
83
84 return 0;
85}
86
87static int lpc32xx_clkevt_oneshot(struct clock_event_device *evtdev)
88{
89 /*
90 * When using oneshot, we must also disable the timer
91 * to wait for the first call to set_next_event().
92 */
93 return lpc32xx_clkevt_shutdown(evtdev);
94}
95
96static irqreturn_t lpc32xx_clock_event_handler(int irq, void *dev_id)
97{
98 struct lpc32xx_clock_event_ddata *ddata = dev_id;
99
100 /* Clear match on channel 0 */
101 writel_relaxed(LPC32XX_TIMER_IR_MR0INT, ddata->base + LPC32XX_TIMER_IR);
102
103 ddata->evtdev.event_handler(&ddata->evtdev);
104
105 return IRQ_HANDLED;
106}
107
108static struct lpc32xx_clock_event_ddata lpc32xx_clk_event_ddata = {
109 .evtdev = {
110 .name = "lpc3220 clockevent",
111 .features = CLOCK_EVT_FEAT_ONESHOT,
112 .rating = 300,
113 .set_next_event = lpc32xx_clkevt_next_event,
114 .set_state_shutdown = lpc32xx_clkevt_shutdown,
115 .set_state_oneshot = lpc32xx_clkevt_oneshot,
116 },
117};
118
119static int __init lpc32xx_clocksource_init(struct device_node *np)
120{
121 void __iomem *base;
122 unsigned long rate;
123 struct clk *clk;
124 int ret;
125
126 clk = of_clk_get_by_name(np, "timerclk");
127 if (IS_ERR(clk)) {
128 pr_err("clock get failed (%lu)\n", PTR_ERR(clk));
129 return PTR_ERR(clk);
130 }
131
132 ret = clk_prepare_enable(clk);
133 if (ret) {
134 pr_err("clock enable failed (%d)\n", ret);
135 goto err_clk_enable;
136 }
137
138 base = of_iomap(np, 0);
139 if (!base) {
140 pr_err("unable to map registers\n");
141 ret = -EADDRNOTAVAIL;
142 goto err_iomap;
143 }
144
145 /*
146 * Disable and reset timer then set it to free running timer
147 * mode (CTCR) with no prescaler (PR) or match operations (MCR).
148 * After setup the timer is released from reset and enabled.
149 */
150 writel_relaxed(LPC32XX_TIMER_TCR_CRST, base + LPC32XX_TIMER_TCR);
151 writel_relaxed(0, base + LPC32XX_TIMER_PR);
152 writel_relaxed(0, base + LPC32XX_TIMER_MCR);
153 writel_relaxed(0, base + LPC32XX_TIMER_CTCR);
154 writel_relaxed(LPC32XX_TIMER_TCR_CEN, base + LPC32XX_TIMER_TCR);
155
156 rate = clk_get_rate(clk);
157 ret = clocksource_mmio_init(base + LPC32XX_TIMER_TC, "lpc3220 timer",
158 rate, 300, 32, clocksource_mmio_readl_up);
159 if (ret) {
160 pr_err("failed to init clocksource (%d)\n", ret);
161 goto err_clocksource_init;
162 }
163
164 clocksource_timer_counter = base + LPC32XX_TIMER_TC;
165 sched_clock_register(lpc32xx_read_sched_clock, 32, rate);
166
167 return 0;
168
169err_clocksource_init:
170 iounmap(base);
171err_iomap:
172 clk_disable_unprepare(clk);
173err_clk_enable:
174 clk_put(clk);
175 return ret;
176}
177
178static int __init lpc32xx_clockevent_init(struct device_node *np)
179{
180 void __iomem *base;
181 unsigned long rate;
182 struct clk *clk;
183 int ret, irq;
184
185 clk = of_clk_get_by_name(np, "timerclk");
186 if (IS_ERR(clk)) {
187 pr_err("clock get failed (%lu)\n", PTR_ERR(clk));
188 return PTR_ERR(clk);
189 }
190
191 ret = clk_prepare_enable(clk);
192 if (ret) {
193 pr_err("clock enable failed (%d)\n", ret);
194 goto err_clk_enable;
195 }
196
197 base = of_iomap(np, 0);
198 if (!base) {
199 pr_err("unable to map registers\n");
200 ret = -EADDRNOTAVAIL;
201 goto err_iomap;
202 }
203
204 irq = irq_of_parse_and_map(np, 0);
205 if (!irq) {
206 pr_err("get irq failed\n");
207 ret = -ENOENT;
208 goto err_irq;
209 }
210
211 /*
212 * Disable timer and clear any pending interrupt (IR) on match
213 * channel 0 (MR0). Configure a compare match value of 1 on MR0
214 * and enable interrupt, reset on match and stop on match (MCR).
215 */
216 writel_relaxed(0, base + LPC32XX_TIMER_TCR);
217 writel_relaxed(0, base + LPC32XX_TIMER_CTCR);
218 writel_relaxed(LPC32XX_TIMER_IR_MR0INT, base + LPC32XX_TIMER_IR);
219 writel_relaxed(1, base + LPC32XX_TIMER_MR0);
220 writel_relaxed(LPC32XX_TIMER_MCR_MR0I | LPC32XX_TIMER_MCR_MR0R |
221 LPC32XX_TIMER_MCR_MR0S, base + LPC32XX_TIMER_MCR);
222
223 rate = clk_get_rate(clk);
224 lpc32xx_clk_event_ddata.base = base;
225 clockevents_config_and_register(&lpc32xx_clk_event_ddata.evtdev,
226 rate, 1, -1);
227
228 ret = request_irq(irq, lpc32xx_clock_event_handler,
229 IRQF_TIMER | IRQF_IRQPOLL, "lpc3220 clockevent",
230 &lpc32xx_clk_event_ddata);
231 if (ret) {
232 pr_err("request irq failed\n");
233 goto err_irq;
234 }
235
236 return 0;
237
238err_irq:
239 iounmap(base);
240err_iomap:
241 clk_disable_unprepare(clk);
242err_clk_enable:
243 clk_put(clk);
244 return ret;
245}
246
247/*
248 * This function asserts that we have exactly one clocksource and one
249 * clock_event_device in the end.
250 */
251static void __init lpc32xx_timer_init(struct device_node *np)
252{
253 static int has_clocksource, has_clockevent;
254 int ret;
255
256 if (!has_clocksource) {
257 ret = lpc32xx_clocksource_init(np);
258 if (!ret) {
259 has_clocksource = 1;
260 return;
261 }
262 }
263
264 if (!has_clockevent) {
265 ret = lpc32xx_clockevent_init(np);
266 if (!ret) {
267 has_clockevent = 1;
268 return;
269 }
270 }
271}
272CLOCKSOURCE_OF_DECLARE(lpc32xx_timer, "nxp,lpc3220-timer", lpc32xx_timer_init);
diff --git a/drivers/clocksource/timer-integrator-ap.c b/drivers/clocksource/timer-integrator-ap.c
index b9efd30513d5..c97d1980c0f8 100644
--- a/drivers/clocksource/timer-integrator-ap.c
+++ b/drivers/clocksource/timer-integrator-ap.c
@@ -166,7 +166,7 @@ static void __init integrator_ap_timer_init_of(struct device_node *node)
166 struct device_node *sec_node; 166 struct device_node *sec_node;
167 167
168 base = of_io_request_and_map(node, 0, "integrator-timer"); 168 base = of_io_request_and_map(node, 0, "integrator-timer");
169 if (!base) 169 if (IS_ERR(base))
170 return; 170 return;
171 171
172 clk = of_clk_get(node, 0); 172 clk = of_clk_get(node, 0);
diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c
new file mode 100644
index 000000000000..a97e8b50701c
--- /dev/null
+++ b/drivers/clocksource/timer-stm32.c
@@ -0,0 +1,184 @@
1/*
2 * Copyright (C) Maxime Coquelin 2015
3 * Author: Maxime Coquelin <mcoquelin.stm32@gmail.com>
4 * License terms: GNU General Public License (GPL), version 2
5 *
6 * Inspired by time-efm32.c from Uwe Kleine-Koenig
7 */
8
9#include <linux/kernel.h>
10#include <linux/clocksource.h>
11#include <linux/clockchips.h>
12#include <linux/irq.h>
13#include <linux/interrupt.h>
14#include <linux/of.h>
15#include <linux/of_address.h>
16#include <linux/of_irq.h>
17#include <linux/clk.h>
18#include <linux/reset.h>
19
20#define TIM_CR1 0x00
21#define TIM_DIER 0x0c
22#define TIM_SR 0x10
23#define TIM_EGR 0x14
24#define TIM_PSC 0x28
25#define TIM_ARR 0x2c
26
27#define TIM_CR1_CEN BIT(0)
28#define TIM_CR1_OPM BIT(3)
29#define TIM_CR1_ARPE BIT(7)
30
31#define TIM_DIER_UIE BIT(0)
32
33#define TIM_SR_UIF BIT(0)
34
35#define TIM_EGR_UG BIT(0)
36
37struct stm32_clock_event_ddata {
38 struct clock_event_device evtdev;
39 unsigned periodic_top;
40 void __iomem *base;
41};
42
43static void stm32_clock_event_set_mode(enum clock_event_mode mode,
44 struct clock_event_device *evtdev)
45{
46 struct stm32_clock_event_ddata *data =
47 container_of(evtdev, struct stm32_clock_event_ddata, evtdev);
48 void *base = data->base;
49
50 switch (mode) {
51 case CLOCK_EVT_MODE_PERIODIC:
52 writel_relaxed(data->periodic_top, base + TIM_ARR);
53 writel_relaxed(TIM_CR1_ARPE | TIM_CR1_CEN, base + TIM_CR1);
54 break;
55
56 case CLOCK_EVT_MODE_ONESHOT:
57 default:
58 writel_relaxed(0, base + TIM_CR1);
59 break;
60 }
61}
62
63static int stm32_clock_event_set_next_event(unsigned long evt,
64 struct clock_event_device *evtdev)
65{
66 struct stm32_clock_event_ddata *data =
67 container_of(evtdev, struct stm32_clock_event_ddata, evtdev);
68
69 writel_relaxed(evt, data->base + TIM_ARR);
70 writel_relaxed(TIM_CR1_ARPE | TIM_CR1_OPM | TIM_CR1_CEN,
71 data->base + TIM_CR1);
72
73 return 0;
74}
75
76static irqreturn_t stm32_clock_event_handler(int irq, void *dev_id)
77{
78 struct stm32_clock_event_ddata *data = dev_id;
79
80 writel_relaxed(0, data->base + TIM_SR);
81
82 data->evtdev.event_handler(&data->evtdev);
83
84 return IRQ_HANDLED;
85}
86
87static struct stm32_clock_event_ddata clock_event_ddata = {
88 .evtdev = {
89 .name = "stm32 clockevent",
90 .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
91 .set_mode = stm32_clock_event_set_mode,
92 .set_next_event = stm32_clock_event_set_next_event,
93 .rating = 200,
94 },
95};
96
97static void __init stm32_clockevent_init(struct device_node *np)
98{
99 struct stm32_clock_event_ddata *data = &clock_event_ddata;
100 struct clk *clk;
101 struct reset_control *rstc;
102 unsigned long rate, max_delta;
103 int irq, ret, bits, prescaler = 1;
104
105 clk = of_clk_get(np, 0);
106 if (IS_ERR(clk)) {
107 ret = PTR_ERR(clk);
108 pr_err("failed to get clock for clockevent (%d)\n", ret);
109 goto err_clk_get;
110 }
111
112 ret = clk_prepare_enable(clk);
113 if (ret) {
114 pr_err("failed to enable timer clock for clockevent (%d)\n",
115 ret);
116 goto err_clk_enable;
117 }
118
119 rate = clk_get_rate(clk);
120
121 rstc = of_reset_control_get(np, NULL);
122 if (!IS_ERR(rstc)) {
123 reset_control_assert(rstc);
124 reset_control_deassert(rstc);
125 }
126
127 data->base = of_iomap(np, 0);
128 if (!data->base) {
129 pr_err("failed to map registers for clockevent\n");
130 goto err_iomap;
131 }
132
133 irq = irq_of_parse_and_map(np, 0);
134 if (!irq) {
135 pr_err("%s: failed to get irq.\n", np->full_name);
136 goto err_get_irq;
137 }
138
139 /* Detect whether the timer is 16 or 32 bits */
140 writel_relaxed(~0U, data->base + TIM_ARR);
141 max_delta = readl_relaxed(data->base + TIM_ARR);
142 if (max_delta == ~0U) {
143 prescaler = 1;
144 bits = 32;
145 } else {
146 prescaler = 1024;
147 bits = 16;
148 }
149 writel_relaxed(0, data->base + TIM_ARR);
150
151 writel_relaxed(prescaler - 1, data->base + TIM_PSC);
152 writel_relaxed(TIM_EGR_UG, data->base + TIM_EGR);
153 writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER);
154 writel_relaxed(0, data->base + TIM_SR);
155
156 data->periodic_top = DIV_ROUND_CLOSEST(rate, prescaler * HZ);
157
158 clockevents_config_and_register(&data->evtdev,
159 DIV_ROUND_CLOSEST(rate, prescaler),
160 0x1, max_delta);
161
162 ret = request_irq(irq, stm32_clock_event_handler, IRQF_TIMER,
163 "stm32 clockevent", data);
164 if (ret) {
165 pr_err("%s: failed to request irq.\n", np->full_name);
166 goto err_get_irq;
167 }
168
169 pr_info("%s: STM32 clockevent driver initialized (%d bits)\n",
170 np->full_name, bits);
171
172 return;
173
174err_get_irq:
175 iounmap(data->base);
176err_iomap:
177 clk_disable_unprepare(clk);
178err_clk_enable:
179 clk_put(clk);
180err_clk_get:
181 return;
182}
183
184CLOCKSOURCE_OF_DECLARE(stm32, "st,stm32-timer", stm32_clockevent_init);
diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
index 28aa4b7bb602..0ffb4ea7c925 100644
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c
@@ -324,7 +324,7 @@ static void __init sun5i_timer_init(struct device_node *node)
324 int irq; 324 int irq;
325 325
326 timer_base = of_io_request_and_map(node, 0, of_node_full_name(node)); 326 timer_base = of_io_request_and_map(node, 0, of_node_full_name(node));
327 if (!timer_base) 327 if (IS_ERR(timer_base))
328 panic("Can't map registers"); 328 panic("Can't map registers");
329 329
330 irq = irq_of_parse_and_map(node, 0); 330 irq = irq_of_parse_and_map(node, 0);
diff --git a/drivers/power/reset/ltc2952-poweroff.c b/drivers/power/reset/ltc2952-poweroff.c
index 1e08195551fe..5f855f99bdfc 100644
--- a/drivers/power/reset/ltc2952-poweroff.c
+++ b/drivers/power/reset/ltc2952-poweroff.c
@@ -158,7 +158,6 @@ static irqreturn_t ltc2952_poweroff_handler(int irq, void *dev_id)
158 HRTIMER_MODE_REL); 158 HRTIMER_MODE_REL);
159 } else { 159 } else {
160 hrtimer_cancel(&data->timer_trigger); 160 hrtimer_cancel(&data->timer_trigger);
161 /* omitting return value check, timer should have been valid */
162 } 161 }
163 return IRQ_HANDLED; 162 return IRQ_HANDLED;
164} 163}
diff --git a/fs/dcache.c b/fs/dcache.c
index 37b5afdaf698..592c4b582495 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -322,17 +322,17 @@ static void dentry_free(struct dentry *dentry)
322} 322}
323 323
324/** 324/**
325 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups 325 * dentry_rcuwalk_invalidate - invalidate in-progress rcu-walk lookups
326 * @dentry: the target dentry 326 * @dentry: the target dentry
327 * After this call, in-progress rcu-walk path lookup will fail. This 327 * After this call, in-progress rcu-walk path lookup will fail. This
328 * should be called after unhashing, and after changing d_inode (if 328 * should be called after unhashing, and after changing d_inode (if
329 * the dentry has not already been unhashed). 329 * the dentry has not already been unhashed).
330 */ 330 */
331static inline void dentry_rcuwalk_barrier(struct dentry *dentry) 331static inline void dentry_rcuwalk_invalidate(struct dentry *dentry)
332{ 332{
333 assert_spin_locked(&dentry->d_lock); 333 lockdep_assert_held(&dentry->d_lock);
334 /* Go through a barrier */ 334 /* Go through am invalidation barrier */
335 write_seqcount_barrier(&dentry->d_seq); 335 write_seqcount_invalidate(&dentry->d_seq);
336} 336}
337 337
338/* 338/*
@@ -372,7 +372,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
372 struct inode *inode = dentry->d_inode; 372 struct inode *inode = dentry->d_inode;
373 __d_clear_type_and_inode(dentry); 373 __d_clear_type_and_inode(dentry);
374 hlist_del_init(&dentry->d_u.d_alias); 374 hlist_del_init(&dentry->d_u.d_alias);
375 dentry_rcuwalk_barrier(dentry); 375 dentry_rcuwalk_invalidate(dentry);
376 spin_unlock(&dentry->d_lock); 376 spin_unlock(&dentry->d_lock);
377 spin_unlock(&inode->i_lock); 377 spin_unlock(&inode->i_lock);
378 if (!inode->i_nlink) 378 if (!inode->i_nlink)
@@ -494,7 +494,7 @@ void __d_drop(struct dentry *dentry)
494 __hlist_bl_del(&dentry->d_hash); 494 __hlist_bl_del(&dentry->d_hash);
495 dentry->d_hash.pprev = NULL; 495 dentry->d_hash.pprev = NULL;
496 hlist_bl_unlock(b); 496 hlist_bl_unlock(b);
497 dentry_rcuwalk_barrier(dentry); 497 dentry_rcuwalk_invalidate(dentry);
498 } 498 }
499} 499}
500EXPORT_SYMBOL(__d_drop); 500EXPORT_SYMBOL(__d_drop);
@@ -1752,7 +1752,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1752 if (inode) 1752 if (inode)
1753 hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); 1753 hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
1754 __d_set_inode_and_type(dentry, inode, add_flags); 1754 __d_set_inode_and_type(dentry, inode, add_flags);
1755 dentry_rcuwalk_barrier(dentry); 1755 dentry_rcuwalk_invalidate(dentry);
1756 spin_unlock(&dentry->d_lock); 1756 spin_unlock(&dentry->d_lock);
1757 fsnotify_d_instantiate(dentry, inode); 1757 fsnotify_d_instantiate(dentry, inode);
1758} 1758}
diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h
index a899402a5a0e..52f3b7da4f2d 100644
--- a/include/linux/alarmtimer.h
+++ b/include/linux/alarmtimer.h
@@ -43,8 +43,8 @@ struct alarm {
43 43
44void alarm_init(struct alarm *alarm, enum alarmtimer_type type, 44void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
45 enum alarmtimer_restart (*function)(struct alarm *, ktime_t)); 45 enum alarmtimer_restart (*function)(struct alarm *, ktime_t));
46int alarm_start(struct alarm *alarm, ktime_t start); 46void alarm_start(struct alarm *alarm, ktime_t start);
47int alarm_start_relative(struct alarm *alarm, ktime_t start); 47void alarm_start_relative(struct alarm *alarm, ktime_t start);
48void alarm_restart(struct alarm *alarm); 48void alarm_restart(struct alarm *alarm);
49int alarm_try_to_cancel(struct alarm *alarm); 49int alarm_try_to_cancel(struct alarm *alarm);
50int alarm_cancel(struct alarm *alarm); 50int alarm_cancel(struct alarm *alarm);
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 96c280b2c263..597a1e836f22 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -37,12 +37,15 @@ enum clock_event_mode {
37 * reached from DETACHED or SHUTDOWN. 37 * reached from DETACHED or SHUTDOWN.
38 * ONESHOT: Device is programmed to generate event only once. Can be reached 38 * ONESHOT: Device is programmed to generate event only once. Can be reached
39 * from DETACHED or SHUTDOWN. 39 * from DETACHED or SHUTDOWN.
40 * ONESHOT_STOPPED: Device was programmed in ONESHOT mode and is temporarily
41 * stopped.
40 */ 42 */
41enum clock_event_state { 43enum clock_event_state {
42 CLOCK_EVT_STATE_DETACHED, 44 CLOCK_EVT_STATE_DETACHED,
43 CLOCK_EVT_STATE_SHUTDOWN, 45 CLOCK_EVT_STATE_SHUTDOWN,
44 CLOCK_EVT_STATE_PERIODIC, 46 CLOCK_EVT_STATE_PERIODIC,
45 CLOCK_EVT_STATE_ONESHOT, 47 CLOCK_EVT_STATE_ONESHOT,
48 CLOCK_EVT_STATE_ONESHOT_STOPPED,
46}; 49};
47 50
48/* 51/*
@@ -84,12 +87,13 @@ enum clock_event_state {
84 * @mult: nanosecond to cycles multiplier 87 * @mult: nanosecond to cycles multiplier
85 * @shift: nanoseconds to cycles divisor (power of two) 88 * @shift: nanoseconds to cycles divisor (power of two)
86 * @mode: operating mode, relevant only to ->set_mode(), OBSOLETE 89 * @mode: operating mode, relevant only to ->set_mode(), OBSOLETE
87 * @state: current state of the device, assigned by the core code 90 * @state_use_accessors:current state of the device, assigned by the core code
88 * @features: features 91 * @features: features
89 * @retries: number of forced programming retries 92 * @retries: number of forced programming retries
90 * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME. 93 * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME.
91 * @set_state_periodic: switch state to periodic, if !set_mode 94 * @set_state_periodic: switch state to periodic, if !set_mode
92 * @set_state_oneshot: switch state to oneshot, if !set_mode 95 * @set_state_oneshot: switch state to oneshot, if !set_mode
96 * @set_state_oneshot_stopped: switch state to oneshot_stopped, if !set_mode
93 * @set_state_shutdown: switch state to shutdown, if !set_mode 97 * @set_state_shutdown: switch state to shutdown, if !set_mode
94 * @tick_resume: resume clkevt device, if !set_mode 98 * @tick_resume: resume clkevt device, if !set_mode
95 * @broadcast: function to broadcast events 99 * @broadcast: function to broadcast events
@@ -113,7 +117,7 @@ struct clock_event_device {
113 u32 mult; 117 u32 mult;
114 u32 shift; 118 u32 shift;
115 enum clock_event_mode mode; 119 enum clock_event_mode mode;
116 enum clock_event_state state; 120 enum clock_event_state state_use_accessors;
117 unsigned int features; 121 unsigned int features;
118 unsigned long retries; 122 unsigned long retries;
119 123
@@ -121,11 +125,12 @@ struct clock_event_device {
121 * State transition callback(s): Only one of the two groups should be 125 * State transition callback(s): Only one of the two groups should be
122 * defined: 126 * defined:
123 * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME. 127 * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME.
124 * - set_state_{shutdown|periodic|oneshot}(), tick_resume(). 128 * - set_state_{shutdown|periodic|oneshot|oneshot_stopped}(), tick_resume().
125 */ 129 */
126 void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); 130 void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *);
127 int (*set_state_periodic)(struct clock_event_device *); 131 int (*set_state_periodic)(struct clock_event_device *);
128 int (*set_state_oneshot)(struct clock_event_device *); 132 int (*set_state_oneshot)(struct clock_event_device *);
133 int (*set_state_oneshot_stopped)(struct clock_event_device *);
129 int (*set_state_shutdown)(struct clock_event_device *); 134 int (*set_state_shutdown)(struct clock_event_device *);
130 int (*tick_resume)(struct clock_event_device *); 135 int (*tick_resume)(struct clock_event_device *);
131 136
@@ -144,6 +149,32 @@ struct clock_event_device {
144 struct module *owner; 149 struct module *owner;
145} ____cacheline_aligned; 150} ____cacheline_aligned;
146 151
152/* Helpers to verify state of a clockevent device */
153static inline bool clockevent_state_detached(struct clock_event_device *dev)
154{
155 return dev->state_use_accessors == CLOCK_EVT_STATE_DETACHED;
156}
157
158static inline bool clockevent_state_shutdown(struct clock_event_device *dev)
159{
160 return dev->state_use_accessors == CLOCK_EVT_STATE_SHUTDOWN;
161}
162
163static inline bool clockevent_state_periodic(struct clock_event_device *dev)
164{
165 return dev->state_use_accessors == CLOCK_EVT_STATE_PERIODIC;
166}
167
168static inline bool clockevent_state_oneshot(struct clock_event_device *dev)
169{
170 return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT;
171}
172
173static inline bool clockevent_state_oneshot_stopped(struct clock_event_device *dev)
174{
175 return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT_STOPPED;
176}
177
147/* 178/*
148 * Calculate a multiplication factor for scaled math, which is used to convert 179 * Calculate a multiplication factor for scaled math, which is used to convert
149 * nanoseconds based values to clock ticks: 180 * nanoseconds based values to clock ticks:
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index d27d0152271f..278dd279a7a8 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -181,7 +181,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
181 181
182extern int clocksource_unregister(struct clocksource*); 182extern int clocksource_unregister(struct clocksource*);
183extern void clocksource_touch_watchdog(void); 183extern void clocksource_touch_watchdog(void);
184extern struct clocksource* clocksource_get_next(void);
185extern void clocksource_change_rating(struct clocksource *cs, int rating); 184extern void clocksource_change_rating(struct clocksource *cs, int rating);
186extern void clocksource_suspend(void); 185extern void clocksource_suspend(void);
187extern void clocksource_resume(void); 186extern void clocksource_resume(void);
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 05f6df1fdf5b..76dd4f0da5ca 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -53,34 +53,25 @@ enum hrtimer_restart {
53 * 53 *
54 * 0x00 inactive 54 * 0x00 inactive
55 * 0x01 enqueued into rbtree 55 * 0x01 enqueued into rbtree
56 * 0x02 callback function running
57 * 0x04 timer is migrated to another cpu
58 * 56 *
59 * Special cases: 57 * The callback state is not part of the timer->state because clearing it would
60 * 0x03 callback function running and enqueued 58 * mean touching the timer after the callback, this makes it impossible to free
61 * (was requeued on another CPU) 59 * the timer from the callback function.
62 * 0x05 timer was migrated on CPU hotunplug
63 * 60 *
64 * The "callback function running and enqueued" status is only possible on 61 * Therefore we track the callback state in:
65 * SMP. It happens for example when a posix timer expired and the callback 62 *
63 * timer->base->cpu_base->running == timer
64 *
65 * On SMP it is possible to have a "callback function running and enqueued"
66 * status. It happens for example when a posix timer expired and the callback
66 * queued a signal. Between dropping the lock which protects the posix timer 67 * queued a signal. Between dropping the lock which protects the posix timer
67 * and reacquiring the base lock of the hrtimer, another CPU can deliver the 68 * and reacquiring the base lock of the hrtimer, another CPU can deliver the
68 * signal and rearm the timer. We have to preserve the callback running state, 69 * signal and rearm the timer.
69 * as otherwise the timer could be removed before the softirq code finishes the
70 * the handling of the timer.
71 *
72 * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state
73 * to preserve the HRTIMER_STATE_CALLBACK in the above scenario. This
74 * also affects HRTIMER_STATE_MIGRATE where the preservation is not
75 * necessary. HRTIMER_STATE_MIGRATE is cleared after the timer is
76 * enqueued on the new cpu.
77 * 70 *
78 * All state transitions are protected by cpu_base->lock. 71 * All state transitions are protected by cpu_base->lock.
79 */ 72 */
80#define HRTIMER_STATE_INACTIVE 0x00 73#define HRTIMER_STATE_INACTIVE 0x00
81#define HRTIMER_STATE_ENQUEUED 0x01 74#define HRTIMER_STATE_ENQUEUED 0x01
82#define HRTIMER_STATE_CALLBACK 0x02
83#define HRTIMER_STATE_MIGRATE 0x04
84 75
85/** 76/**
86 * struct hrtimer - the basic hrtimer structure 77 * struct hrtimer - the basic hrtimer structure
@@ -130,6 +121,12 @@ struct hrtimer_sleeper {
130 struct task_struct *task; 121 struct task_struct *task;
131}; 122};
132 123
124#ifdef CONFIG_64BIT
125# define HRTIMER_CLOCK_BASE_ALIGN 64
126#else
127# define HRTIMER_CLOCK_BASE_ALIGN 32
128#endif
129
133/** 130/**
134 * struct hrtimer_clock_base - the timer base for a specific clock 131 * struct hrtimer_clock_base - the timer base for a specific clock
135 * @cpu_base: per cpu clock base 132 * @cpu_base: per cpu clock base
@@ -137,9 +134,7 @@ struct hrtimer_sleeper {
137 * timer to a base on another cpu. 134 * timer to a base on another cpu.
138 * @clockid: clock id for per_cpu support 135 * @clockid: clock id for per_cpu support
139 * @active: red black tree root node for the active timers 136 * @active: red black tree root node for the active timers
140 * @resolution: the resolution of the clock, in nanoseconds
141 * @get_time: function to retrieve the current time of the clock 137 * @get_time: function to retrieve the current time of the clock
142 * @softirq_time: the time when running the hrtimer queue in the softirq
143 * @offset: offset of this clock to the monotonic base 138 * @offset: offset of this clock to the monotonic base
144 */ 139 */
145struct hrtimer_clock_base { 140struct hrtimer_clock_base {
@@ -147,11 +142,9 @@ struct hrtimer_clock_base {
147 int index; 142 int index;
148 clockid_t clockid; 143 clockid_t clockid;
149 struct timerqueue_head active; 144 struct timerqueue_head active;
150 ktime_t resolution;
151 ktime_t (*get_time)(void); 145 ktime_t (*get_time)(void);
152 ktime_t softirq_time;
153 ktime_t offset; 146 ktime_t offset;
154}; 147} __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN)));
155 148
156enum hrtimer_base_type { 149enum hrtimer_base_type {
157 HRTIMER_BASE_MONOTONIC, 150 HRTIMER_BASE_MONOTONIC,
@@ -165,11 +158,16 @@ enum hrtimer_base_type {
165 * struct hrtimer_cpu_base - the per cpu clock bases 158 * struct hrtimer_cpu_base - the per cpu clock bases
166 * @lock: lock protecting the base and associated clock bases 159 * @lock: lock protecting the base and associated clock bases
167 * and timers 160 * and timers
161 * @seq: seqcount around __run_hrtimer
162 * @running: pointer to the currently running hrtimer
168 * @cpu: cpu number 163 * @cpu: cpu number
169 * @active_bases: Bitfield to mark bases with active timers 164 * @active_bases: Bitfield to mark bases with active timers
170 * @clock_was_set: Indicates that clock was set from irq context. 165 * @clock_was_set_seq: Sequence counter of clock was set events
166 * @migration_enabled: The migration of hrtimers to other cpus is enabled
167 * @nohz_active: The nohz functionality is enabled
171 * @expires_next: absolute time of the next event which was scheduled 168 * @expires_next: absolute time of the next event which was scheduled
172 * via clock_set_next_event() 169 * via clock_set_next_event()
170 * @next_timer: Pointer to the first expiring timer
173 * @in_hrtirq: hrtimer_interrupt() is currently executing 171 * @in_hrtirq: hrtimer_interrupt() is currently executing
174 * @hres_active: State of high resolution mode 172 * @hres_active: State of high resolution mode
175 * @hang_detected: The last hrtimer interrupt detected a hang 173 * @hang_detected: The last hrtimer interrupt detected a hang
@@ -178,27 +176,38 @@ enum hrtimer_base_type {
178 * @nr_hangs: Total number of hrtimer interrupt hangs 176 * @nr_hangs: Total number of hrtimer interrupt hangs
179 * @max_hang_time: Maximum time spent in hrtimer_interrupt 177 * @max_hang_time: Maximum time spent in hrtimer_interrupt
180 * @clock_base: array of clock bases for this cpu 178 * @clock_base: array of clock bases for this cpu
179 *
180 * Note: next_timer is just an optimization for __remove_hrtimer().
181 * Do not dereference the pointer because it is not reliable on
182 * cross cpu removals.
181 */ 183 */
182struct hrtimer_cpu_base { 184struct hrtimer_cpu_base {
183 raw_spinlock_t lock; 185 raw_spinlock_t lock;
186 seqcount_t seq;
187 struct hrtimer *running;
184 unsigned int cpu; 188 unsigned int cpu;
185 unsigned int active_bases; 189 unsigned int active_bases;
186 unsigned int clock_was_set; 190 unsigned int clock_was_set_seq;
191 bool migration_enabled;
192 bool nohz_active;
187#ifdef CONFIG_HIGH_RES_TIMERS 193#ifdef CONFIG_HIGH_RES_TIMERS
194 unsigned int in_hrtirq : 1,
195 hres_active : 1,
196 hang_detected : 1;
188 ktime_t expires_next; 197 ktime_t expires_next;
189 int in_hrtirq; 198 struct hrtimer *next_timer;
190 int hres_active; 199 unsigned int nr_events;
191 int hang_detected; 200 unsigned int nr_retries;
192 unsigned long nr_events; 201 unsigned int nr_hangs;
193 unsigned long nr_retries; 202 unsigned int max_hang_time;
194 unsigned long nr_hangs;
195 ktime_t max_hang_time;
196#endif 203#endif
197 struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; 204 struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
198}; 205} ____cacheline_aligned;
199 206
200static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) 207static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
201{ 208{
209 BUILD_BUG_ON(sizeof(struct hrtimer_clock_base) > HRTIMER_CLOCK_BASE_ALIGN);
210
202 timer->node.expires = time; 211 timer->node.expires = time;
203 timer->_softexpires = time; 212 timer->_softexpires = time;
204} 213}
@@ -262,19 +271,16 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
262 return ktime_sub(timer->node.expires, timer->base->get_time()); 271 return ktime_sub(timer->node.expires, timer->base->get_time());
263} 272}
264 273
265#ifdef CONFIG_HIGH_RES_TIMERS
266struct clock_event_device;
267
268extern void hrtimer_interrupt(struct clock_event_device *dev);
269
270/*
271 * In high resolution mode the time reference must be read accurate
272 */
273static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) 274static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
274{ 275{
275 return timer->base->get_time(); 276 return timer->base->get_time();
276} 277}
277 278
279#ifdef CONFIG_HIGH_RES_TIMERS
280struct clock_event_device;
281
282extern void hrtimer_interrupt(struct clock_event_device *dev);
283
278static inline int hrtimer_is_hres_active(struct hrtimer *timer) 284static inline int hrtimer_is_hres_active(struct hrtimer *timer)
279{ 285{
280 return timer->base->cpu_base->hres_active; 286 return timer->base->cpu_base->hres_active;
@@ -295,21 +301,16 @@ extern void hrtimer_peek_ahead_timers(void);
295 301
296extern void clock_was_set_delayed(void); 302extern void clock_was_set_delayed(void);
297 303
304extern unsigned int hrtimer_resolution;
305
298#else 306#else
299 307
300# define MONOTONIC_RES_NSEC LOW_RES_NSEC 308# define MONOTONIC_RES_NSEC LOW_RES_NSEC
301# define KTIME_MONOTONIC_RES KTIME_LOW_RES 309# define KTIME_MONOTONIC_RES KTIME_LOW_RES
302 310
303static inline void hrtimer_peek_ahead_timers(void) { } 311#define hrtimer_resolution (unsigned int)LOW_RES_NSEC
304 312
305/* 313static inline void hrtimer_peek_ahead_timers(void) { }
306 * In non high resolution mode the time reference is taken from
307 * the base softirq time variable.
308 */
309static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
310{
311 return timer->base->softirq_time;
312}
313 314
314static inline int hrtimer_is_hres_active(struct hrtimer *timer) 315static inline int hrtimer_is_hres_active(struct hrtimer *timer)
315{ 316{
@@ -353,49 +354,47 @@ static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { }
353#endif 354#endif
354 355
355/* Basic timer operations: */ 356/* Basic timer operations: */
356extern int hrtimer_start(struct hrtimer *timer, ktime_t tim, 357extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
357 const enum hrtimer_mode mode);
358extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
359 unsigned long range_ns, const enum hrtimer_mode mode); 358 unsigned long range_ns, const enum hrtimer_mode mode);
360extern int 359
361__hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, 360/**
362 unsigned long delta_ns, 361 * hrtimer_start - (re)start an hrtimer on the current CPU
363 const enum hrtimer_mode mode, int wakeup); 362 * @timer: the timer to be added
363 * @tim: expiry time
364 * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or
365 * relative (HRTIMER_MODE_REL)
366 */
367static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim,
368 const enum hrtimer_mode mode)
369{
370 hrtimer_start_range_ns(timer, tim, 0, mode);
371}
364 372
365extern int hrtimer_cancel(struct hrtimer *timer); 373extern int hrtimer_cancel(struct hrtimer *timer);
366extern int hrtimer_try_to_cancel(struct hrtimer *timer); 374extern int hrtimer_try_to_cancel(struct hrtimer *timer);
367 375
368static inline int hrtimer_start_expires(struct hrtimer *timer, 376static inline void hrtimer_start_expires(struct hrtimer *timer,
369 enum hrtimer_mode mode) 377 enum hrtimer_mode mode)
370{ 378{
371 unsigned long delta; 379 unsigned long delta;
372 ktime_t soft, hard; 380 ktime_t soft, hard;
373 soft = hrtimer_get_softexpires(timer); 381 soft = hrtimer_get_softexpires(timer);
374 hard = hrtimer_get_expires(timer); 382 hard = hrtimer_get_expires(timer);
375 delta = ktime_to_ns(ktime_sub(hard, soft)); 383 delta = ktime_to_ns(ktime_sub(hard, soft));
376 return hrtimer_start_range_ns(timer, soft, delta, mode); 384 hrtimer_start_range_ns(timer, soft, delta, mode);
377} 385}
378 386
379static inline int hrtimer_restart(struct hrtimer *timer) 387static inline void hrtimer_restart(struct hrtimer *timer)
380{ 388{
381 return hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 389 hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
382} 390}
383 391
384/* Query timers: */ 392/* Query timers: */
385extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); 393extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
386extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
387 394
388extern ktime_t hrtimer_get_next_event(void); 395extern u64 hrtimer_get_next_event(void);
389 396
390/* 397extern bool hrtimer_active(const struct hrtimer *timer);
391 * A timer is active, when it is enqueued into the rbtree or the
392 * callback function is running or it's in the state of being migrated
393 * to another cpu.
394 */
395static inline int hrtimer_active(const struct hrtimer *timer)
396{
397 return timer->state != HRTIMER_STATE_INACTIVE;
398}
399 398
400/* 399/*
401 * Helper function to check, whether the timer is on one of the queues 400 * Helper function to check, whether the timer is on one of the queues
@@ -411,14 +410,29 @@ static inline int hrtimer_is_queued(struct hrtimer *timer)
411 */ 410 */
412static inline int hrtimer_callback_running(struct hrtimer *timer) 411static inline int hrtimer_callback_running(struct hrtimer *timer)
413{ 412{
414 return timer->state & HRTIMER_STATE_CALLBACK; 413 return timer->base->cpu_base->running == timer;
415} 414}
416 415
417/* Forward a hrtimer so it expires after now: */ 416/* Forward a hrtimer so it expires after now: */
418extern u64 417extern u64
419hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval); 418hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval);
420 419
421/* Forward a hrtimer so it expires after the hrtimer's current now */ 420/**
421 * hrtimer_forward_now - forward the timer expiry so it expires after now
422 * @timer: hrtimer to forward
423 * @interval: the interval to forward
424 *
425 * Forward the timer expiry so it will expire after the current time
426 * of the hrtimer clock base. Returns the number of overruns.
427 *
428 * Can be safely called from the callback function of @timer. If
429 * called from other contexts @timer must neither be enqueued nor
430 * running the callback and the caller needs to take care of
431 * serialization.
432 *
433 * Note: This only updates the timer expiry value and does not requeue
434 * the timer.
435 */
422static inline u64 hrtimer_forward_now(struct hrtimer *timer, 436static inline u64 hrtimer_forward_now(struct hrtimer *timer,
423 ktime_t interval) 437 ktime_t interval)
424{ 438{
@@ -443,7 +457,6 @@ extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode);
443 457
444/* Soft interrupt function to run the hrtimer queues: */ 458/* Soft interrupt function to run the hrtimer queues: */
445extern void hrtimer_run_queues(void); 459extern void hrtimer_run_queues(void);
446extern void hrtimer_run_pending(void);
447 460
448/* Bootup initialization: */ 461/* Bootup initialization: */
449extern void __init hrtimers_init(void); 462extern void __init hrtimers_init(void);
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 950ae4501826..be7e75c945e9 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -413,7 +413,8 @@ enum
413 BLOCK_IOPOLL_SOFTIRQ, 413 BLOCK_IOPOLL_SOFTIRQ,
414 TASKLET_SOFTIRQ, 414 TASKLET_SOFTIRQ,
415 SCHED_SOFTIRQ, 415 SCHED_SOFTIRQ,
416 HRTIMER_SOFTIRQ, 416 HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the
417 numbering. Sigh! */
417 RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ 418 RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */
418 419
419 NR_SOFTIRQS 420 NR_SOFTIRQS
@@ -592,10 +593,10 @@ tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
592 clockid_t which_clock, enum hrtimer_mode mode); 593 clockid_t which_clock, enum hrtimer_mode mode);
593 594
594static inline 595static inline
595int tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time, 596void tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time,
596 const enum hrtimer_mode mode) 597 const enum hrtimer_mode mode)
597{ 598{
598 return hrtimer_start(&ttimer->timer, time, mode); 599 hrtimer_start(&ttimer->timer, time, mode);
599} 600}
600 601
601static inline 602static inline
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index c367cbdf73ab..535fd3bb1ba8 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -7,6 +7,7 @@
7#include <linux/time.h> 7#include <linux/time.h>
8#include <linux/timex.h> 8#include <linux/timex.h>
9#include <asm/param.h> /* for HZ */ 9#include <asm/param.h> /* for HZ */
10#include <generated/timeconst.h>
10 11
11/* 12/*
12 * The following defines establish the engineering parameters of the PLL 13 * The following defines establish the engineering parameters of the PLL
@@ -288,8 +289,133 @@ static inline u64 jiffies_to_nsecs(const unsigned long j)
288 return (u64)jiffies_to_usecs(j) * NSEC_PER_USEC; 289 return (u64)jiffies_to_usecs(j) * NSEC_PER_USEC;
289} 290}
290 291
291extern unsigned long msecs_to_jiffies(const unsigned int m); 292extern unsigned long __msecs_to_jiffies(const unsigned int m);
292extern unsigned long usecs_to_jiffies(const unsigned int u); 293#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
294/*
295 * HZ is equal to or smaller than 1000, and 1000 is a nice round
296 * multiple of HZ, divide with the factor between them, but round
297 * upwards:
298 */
299static inline unsigned long _msecs_to_jiffies(const unsigned int m)
300{
301 return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
302}
303#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
304/*
305 * HZ is larger than 1000, and HZ is a nice round multiple of 1000 -
306 * simply multiply with the factor between them.
307 *
308 * But first make sure the multiplication result cannot overflow:
309 */
310static inline unsigned long _msecs_to_jiffies(const unsigned int m)
311{
312 if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
313 return MAX_JIFFY_OFFSET;
314 return m * (HZ / MSEC_PER_SEC);
315}
316#else
317/*
318 * Generic case - multiply, round and divide. But first check that if
319 * we are doing a net multiplication, that we wouldn't overflow:
320 */
321static inline unsigned long _msecs_to_jiffies(const unsigned int m)
322{
323 if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
324 return MAX_JIFFY_OFFSET;
325
326 return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) >> MSEC_TO_HZ_SHR32;
327}
328#endif
329/**
330 * msecs_to_jiffies: - convert milliseconds to jiffies
331 * @m: time in milliseconds
332 *
333 * conversion is done as follows:
334 *
335 * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET)
336 *
337 * - 'too large' values [that would result in larger than
338 * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too.
339 *
340 * - all other values are converted to jiffies by either multiplying
341 * the input value by a factor or dividing it with a factor and
342 * handling any 32-bit overflows.
343 * for the details see __msecs_to_jiffies()
344 *
345 * msecs_to_jiffies() checks for the passed in value being a constant
346 * via __builtin_constant_p() allowing gcc to eliminate most of the
347 * code, __msecs_to_jiffies() is called if the value passed does not
348 * allow constant folding and the actual conversion must be done at
349 * runtime.
350 * the HZ range specific helpers _msecs_to_jiffies() are called both
351 * directly here and from __msecs_to_jiffies() in the case where
352 * constant folding is not possible.
353 */
354static inline unsigned long msecs_to_jiffies(const unsigned int m)
355{
356 if (__builtin_constant_p(m)) {
357 if ((int)m < 0)
358 return MAX_JIFFY_OFFSET;
359 return _msecs_to_jiffies(m);
360 } else {
361 return __msecs_to_jiffies(m);
362 }
363}
364
365extern unsigned long __usecs_to_jiffies(const unsigned int u);
366#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
367static inline unsigned long _usecs_to_jiffies(const unsigned int u)
368{
369 return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ);
370}
371#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
372static inline unsigned long _usecs_to_jiffies(const unsigned int u)
373{
374 return u * (HZ / USEC_PER_SEC);
375}
376static inline unsigned long _usecs_to_jiffies(const unsigned int u)
377{
378#else
379static inline unsigned long _usecs_to_jiffies(const unsigned int u)
380{
381 return (USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32)
382 >> USEC_TO_HZ_SHR32;
383}
384#endif
385
386/**
387 * usecs_to_jiffies: - convert microseconds to jiffies
388 * @u: time in microseconds
389 *
390 * conversion is done as follows:
391 *
392 * - 'too large' values [that would result in larger than
393 * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too.
394 *
395 * - all other values are converted to jiffies by either multiplying
396 * the input value by a factor or dividing it with a factor and
397 * handling any 32-bit overflows as for msecs_to_jiffies.
398 *
399 * usecs_to_jiffies() checks for the passed in value being a constant
400 * via __builtin_constant_p() allowing gcc to eliminate most of the
401 * code, __usecs_to_jiffies() is called if the value passed does not
402 * allow constant folding and the actual conversion must be done at
403 * runtime.
404 * the HZ range specific helpers _usecs_to_jiffies() are called both
405 * directly here and from __msecs_to_jiffies() in the case where
406 * constant folding is not possible.
407 */
408static inline unsigned long usecs_to_jiffies(const unsigned int u)
409{
410 if (__builtin_constant_p(u)) {
411 if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET))
412 return MAX_JIFFY_OFFSET;
413 return _usecs_to_jiffies(u);
414 } else {
415 return __usecs_to_jiffies(u);
416 }
417}
418
293extern unsigned long timespec_to_jiffies(const struct timespec *value); 419extern unsigned long timespec_to_jiffies(const struct timespec *value);
294extern void jiffies_to_timespec(const unsigned long jiffies, 420extern void jiffies_to_timespec(const unsigned long jiffies,
295 struct timespec *value); 421 struct timespec *value);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a204d5266f5f..1b82d44b0a02 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -562,8 +562,12 @@ struct perf_cpu_context {
562 struct perf_event_context *task_ctx; 562 struct perf_event_context *task_ctx;
563 int active_oncpu; 563 int active_oncpu;
564 int exclusive; 564 int exclusive;
565
566 raw_spinlock_t hrtimer_lock;
565 struct hrtimer hrtimer; 567 struct hrtimer hrtimer;
566 ktime_t hrtimer_interval; 568 ktime_t hrtimer_interval;
569 unsigned int hrtimer_active;
570
567 struct pmu *unique_pmu; 571 struct pmu *unique_pmu;
568 struct perf_cgroup *cgrp; 572 struct perf_cgroup *cgrp;
569}; 573};
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 03a899aabd17..33a056bb886f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -44,6 +44,8 @@
44#include <linux/debugobjects.h> 44#include <linux/debugobjects.h>
45#include <linux/bug.h> 45#include <linux/bug.h>
46#include <linux/compiler.h> 46#include <linux/compiler.h>
47#include <linux/ktime.h>
48
47#include <asm/barrier.h> 49#include <asm/barrier.h>
48 50
49extern int rcu_expedited; /* for sysctl */ 51extern int rcu_expedited; /* for sysctl */
@@ -1100,9 +1102,9 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
1100 __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) 1102 __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
1101 1103
1102#ifdef CONFIG_TINY_RCU 1104#ifdef CONFIG_TINY_RCU
1103static inline int rcu_needs_cpu(unsigned long *delta_jiffies) 1105static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1104{ 1106{
1105 *delta_jiffies = ULONG_MAX; 1107 *nextevt = KTIME_MAX;
1106 return 0; 1108 return 0;
1107} 1109}
1108#endif /* #ifdef CONFIG_TINY_RCU */ 1110#endif /* #ifdef CONFIG_TINY_RCU */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 3fa4a43ab415..456879143f89 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -31,7 +31,7 @@
31#define __LINUX_RCUTREE_H 31#define __LINUX_RCUTREE_H
32 32
33void rcu_note_context_switch(void); 33void rcu_note_context_switch(void);
34int rcu_needs_cpu(unsigned long *delta_jiffies); 34int rcu_needs_cpu(u64 basem, u64 *nextevt);
35void rcu_cpu_stall_reset(void); 35void rcu_cpu_stall_reset(void);
36 36
37/* 37/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d4193d5613cf..30364cb58b1f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -345,14 +345,10 @@ extern int runqueue_is_locked(int cpu);
345#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 345#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
346extern void nohz_balance_enter_idle(int cpu); 346extern void nohz_balance_enter_idle(int cpu);
347extern void set_cpu_sd_state_idle(void); 347extern void set_cpu_sd_state_idle(void);
348extern int get_nohz_timer_target(int pinned); 348extern int get_nohz_timer_target(void);
349#else 349#else
350static inline void nohz_balance_enter_idle(int cpu) { } 350static inline void nohz_balance_enter_idle(int cpu) { }
351static inline void set_cpu_sd_state_idle(void) { } 351static inline void set_cpu_sd_state_idle(void) { }
352static inline int get_nohz_timer_target(int pinned)
353{
354 return smp_processor_id();
355}
356#endif 352#endif
357 353
358/* 354/*
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 596a0e007c62..c9e4731cf10b 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -57,24 +57,12 @@ extern unsigned int sysctl_numa_balancing_scan_size;
57extern unsigned int sysctl_sched_migration_cost; 57extern unsigned int sysctl_sched_migration_cost;
58extern unsigned int sysctl_sched_nr_migrate; 58extern unsigned int sysctl_sched_nr_migrate;
59extern unsigned int sysctl_sched_time_avg; 59extern unsigned int sysctl_sched_time_avg;
60extern unsigned int sysctl_timer_migration;
61extern unsigned int sysctl_sched_shares_window; 60extern unsigned int sysctl_sched_shares_window;
62 61
63int sched_proc_update_handler(struct ctl_table *table, int write, 62int sched_proc_update_handler(struct ctl_table *table, int write,
64 void __user *buffer, size_t *length, 63 void __user *buffer, size_t *length,
65 loff_t *ppos); 64 loff_t *ppos);
66#endif 65#endif
67#ifdef CONFIG_SCHED_DEBUG
68static inline unsigned int get_sysctl_timer_migration(void)
69{
70 return sysctl_timer_migration;
71}
72#else
73static inline unsigned int get_sysctl_timer_migration(void)
74{
75 return 1;
76}
77#endif
78 66
79/* 67/*
80 * control realtime throttling: 68 * control realtime throttling:
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 5f68d0a391ce..486e685a226a 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -233,6 +233,47 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
233 s->sequence++; 233 s->sequence++;
234} 234}
235 235
236/**
237 * raw_write_seqcount_barrier - do a seq write barrier
238 * @s: pointer to seqcount_t
239 *
240 * This can be used to provide an ordering guarantee instead of the
241 * usual consistency guarantee. It is one wmb cheaper, because we can
242 * collapse the two back-to-back wmb()s.
243 *
244 * seqcount_t seq;
245 * bool X = true, Y = false;
246 *
247 * void read(void)
248 * {
249 * bool x, y;
250 *
251 * do {
252 * int s = read_seqcount_begin(&seq);
253 *
254 * x = X; y = Y;
255 *
256 * } while (read_seqcount_retry(&seq, s));
257 *
258 * BUG_ON(!x && !y);
259 * }
260 *
261 * void write(void)
262 * {
263 * Y = true;
264 *
265 * raw_write_seqcount_barrier(seq);
266 *
267 * X = false;
268 * }
269 */
270static inline void raw_write_seqcount_barrier(seqcount_t *s)
271{
272 s->sequence++;
273 smp_wmb();
274 s->sequence++;
275}
276
236/* 277/*
237 * raw_write_seqcount_latch - redirect readers to even/odd copy 278 * raw_write_seqcount_latch - redirect readers to even/odd copy
238 * @s: pointer to seqcount_t 279 * @s: pointer to seqcount_t
@@ -266,13 +307,13 @@ static inline void write_seqcount_end(seqcount_t *s)
266} 307}
267 308
268/** 309/**
269 * write_seqcount_barrier - invalidate in-progress read-side seq operations 310 * write_seqcount_invalidate - invalidate in-progress read-side seq operations
270 * @s: pointer to seqcount_t 311 * @s: pointer to seqcount_t
271 * 312 *
272 * After write_seqcount_barrier, no read-side seq operations will complete 313 * After write_seqcount_invalidate, no read-side seq operations will complete
273 * successfully and see data older than this. 314 * successfully and see data older than this.
274 */ 315 */
275static inline void write_seqcount_barrier(seqcount_t *s) 316static inline void write_seqcount_invalidate(seqcount_t *s)
276{ 317{
277 smp_wmb(); 318 smp_wmb();
278 s->sequence+=2; 319 s->sequence+=2;
diff --git a/include/linux/time64.h b/include/linux/time64.h
index a3831478d9cf..77b5df2acd2a 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -2,6 +2,7 @@
2#define _LINUX_TIME64_H 2#define _LINUX_TIME64_H
3 3
4#include <uapi/linux/time.h> 4#include <uapi/linux/time.h>
5#include <linux/math64.h>
5 6
6typedef __s64 time64_t; 7typedef __s64 time64_t;
7 8
@@ -28,6 +29,7 @@ struct timespec64 {
28#define FSEC_PER_SEC 1000000000000000LL 29#define FSEC_PER_SEC 1000000000000000LL
29 30
30/* Located here for timespec[64]_valid_strict */ 31/* Located here for timespec[64]_valid_strict */
32#define TIME64_MAX ((s64)~((u64)1 << 63))
31#define KTIME_MAX ((s64)~((u64)1 << 63)) 33#define KTIME_MAX ((s64)~((u64)1 << 63))
32#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) 34#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
33 35
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index fb86963859c7..25247220b4b7 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -49,6 +49,8 @@ struct tk_read_base {
49 * @offs_boot: Offset clock monotonic -> clock boottime 49 * @offs_boot: Offset clock monotonic -> clock boottime
50 * @offs_tai: Offset clock monotonic -> clock tai 50 * @offs_tai: Offset clock monotonic -> clock tai
51 * @tai_offset: The current UTC to TAI offset in seconds 51 * @tai_offset: The current UTC to TAI offset in seconds
52 * @clock_was_set_seq: The sequence number of clock was set events
53 * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
52 * @raw_time: Monotonic raw base time in timespec64 format 54 * @raw_time: Monotonic raw base time in timespec64 format
53 * @cycle_interval: Number of clock cycles in one NTP interval 55 * @cycle_interval: Number of clock cycles in one NTP interval
54 * @xtime_interval: Number of clock shifted nano seconds in one NTP 56 * @xtime_interval: Number of clock shifted nano seconds in one NTP
@@ -60,6 +62,9 @@ struct tk_read_base {
60 * shifted nano seconds. 62 * shifted nano seconds.
61 * @ntp_error_shift: Shift conversion between clock shifted nano seconds and 63 * @ntp_error_shift: Shift conversion between clock shifted nano seconds and
62 * ntp shifted nano seconds. 64 * ntp shifted nano seconds.
65 * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING)
66 * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING)
67 * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING)
63 * 68 *
64 * Note: For timespec(64) based interfaces wall_to_monotonic is what 69 * Note: For timespec(64) based interfaces wall_to_monotonic is what
65 * we need to add to xtime (or xtime corrected for sub jiffie times) 70 * we need to add to xtime (or xtime corrected for sub jiffie times)
@@ -85,6 +90,8 @@ struct timekeeper {
85 ktime_t offs_boot; 90 ktime_t offs_boot;
86 ktime_t offs_tai; 91 ktime_t offs_tai;
87 s32 tai_offset; 92 s32 tai_offset;
93 unsigned int clock_was_set_seq;
94 ktime_t next_leap_ktime;
88 struct timespec64 raw_time; 95 struct timespec64 raw_time;
89 96
90 /* The following members are for timekeeping internal use */ 97 /* The following members are for timekeeping internal use */
@@ -104,6 +111,18 @@ struct timekeeper {
104 s64 ntp_error; 111 s64 ntp_error;
105 u32 ntp_error_shift; 112 u32 ntp_error_shift;
106 u32 ntp_err_mult; 113 u32 ntp_err_mult;
114#ifdef CONFIG_DEBUG_TIMEKEEPING
115 long last_warning;
116 /*
117 * These simple flag variables are managed
118 * without locks, which is racy, but they are
119 * ok since we don't really care about being
120 * super precise about how many events were
121 * seen, just that a problem was observed.
122 */
123 int underflow_seen;
124 int overflow_seen;
125#endif
107}; 126};
108 127
109#ifdef CONFIG_GENERIC_TIME_VSYSCALL 128#ifdef CONFIG_GENERIC_TIME_VSYSCALL
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 99176af216af..3aa72e648650 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -163,6 +163,7 @@ extern ktime_t ktime_get(void);
163extern ktime_t ktime_get_with_offset(enum tk_offsets offs); 163extern ktime_t ktime_get_with_offset(enum tk_offsets offs);
164extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs); 164extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs);
165extern ktime_t ktime_get_raw(void); 165extern ktime_t ktime_get_raw(void);
166extern u32 ktime_get_resolution_ns(void);
166 167
167/** 168/**
168 * ktime_get_real - get the real (wall-) time in ktime_t format 169 * ktime_get_real - get the real (wall-) time in ktime_t format
@@ -266,7 +267,6 @@ extern int persistent_clock_is_local;
266 267
267extern void read_persistent_clock(struct timespec *ts); 268extern void read_persistent_clock(struct timespec *ts);
268extern void read_persistent_clock64(struct timespec64 *ts); 269extern void read_persistent_clock64(struct timespec64 *ts);
269extern void read_boot_clock(struct timespec *ts);
270extern void read_boot_clock64(struct timespec64 *ts); 270extern void read_boot_clock64(struct timespec64 *ts);
271extern int update_persistent_clock(struct timespec now); 271extern int update_persistent_clock(struct timespec now);
272extern int update_persistent_clock64(struct timespec64 now); 272extern int update_persistent_clock64(struct timespec64 now);
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 8c5a197e1587..61aa61dc410c 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -14,27 +14,23 @@ struct timer_list {
14 * All fields that change during normal runtime grouped to the 14 * All fields that change during normal runtime grouped to the
15 * same cacheline 15 * same cacheline
16 */ 16 */
17 struct list_head entry; 17 struct hlist_node entry;
18 unsigned long expires; 18 unsigned long expires;
19 struct tvec_base *base; 19 void (*function)(unsigned long);
20 20 unsigned long data;
21 void (*function)(unsigned long); 21 u32 flags;
22 unsigned long data; 22 int slack;
23
24 int slack;
25 23
26#ifdef CONFIG_TIMER_STATS 24#ifdef CONFIG_TIMER_STATS
27 int start_pid; 25 int start_pid;
28 void *start_site; 26 void *start_site;
29 char start_comm[16]; 27 char start_comm[16];
30#endif 28#endif
31#ifdef CONFIG_LOCKDEP 29#ifdef CONFIG_LOCKDEP
32 struct lockdep_map lockdep_map; 30 struct lockdep_map lockdep_map;
33#endif 31#endif
34}; 32};
35 33
36extern struct tvec_base boot_tvec_bases;
37
38#ifdef CONFIG_LOCKDEP 34#ifdef CONFIG_LOCKDEP
39/* 35/*
40 * NB: because we have to copy the lockdep_map, setting the lockdep_map key 36 * NB: because we have to copy the lockdep_map, setting the lockdep_map key
@@ -49,9 +45,6 @@ extern struct tvec_base boot_tvec_bases;
49#endif 45#endif
50 46
51/* 47/*
52 * Note that all tvec_bases are at least 4 byte aligned and lower two bits
53 * of base in timer_list is guaranteed to be zero. Use them for flags.
54 *
55 * A deferrable timer will work normally when the system is busy, but 48 * A deferrable timer will work normally when the system is busy, but
56 * will not cause a CPU to come out of idle just to service it; instead, 49 * will not cause a CPU to come out of idle just to service it; instead,
57 * the timer will be serviced when the CPU eventually wakes up with a 50 * the timer will be serviced when the CPU eventually wakes up with a
@@ -65,17 +58,18 @@ extern struct tvec_base boot_tvec_bases;
65 * workqueue locking issues. It's not meant for executing random crap 58 * workqueue locking issues. It's not meant for executing random crap
66 * with interrupts disabled. Abuse is monitored! 59 * with interrupts disabled. Abuse is monitored!
67 */ 60 */
68#define TIMER_DEFERRABLE 0x1LU 61#define TIMER_CPUMASK 0x0007FFFF
69#define TIMER_IRQSAFE 0x2LU 62#define TIMER_MIGRATING 0x00080000
70 63#define TIMER_BASEMASK (TIMER_CPUMASK | TIMER_MIGRATING)
71#define TIMER_FLAG_MASK 0x3LU 64#define TIMER_DEFERRABLE 0x00100000
65#define TIMER_IRQSAFE 0x00200000
72 66
73#define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \ 67#define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \
74 .entry = { .prev = TIMER_ENTRY_STATIC }, \ 68 .entry = { .next = TIMER_ENTRY_STATIC }, \
75 .function = (_function), \ 69 .function = (_function), \
76 .expires = (_expires), \ 70 .expires = (_expires), \
77 .data = (_data), \ 71 .data = (_data), \
78 .base = (void *)((unsigned long)&boot_tvec_bases + (_flags)), \ 72 .flags = (_flags), \
79 .slack = -1, \ 73 .slack = -1, \
80 __TIMER_LOCKDEP_MAP_INITIALIZER( \ 74 __TIMER_LOCKDEP_MAP_INITIALIZER( \
81 __FILE__ ":" __stringify(__LINE__)) \ 75 __FILE__ ":" __stringify(__LINE__)) \
@@ -168,7 +162,7 @@ static inline void init_timer_on_stack_key(struct timer_list *timer,
168 */ 162 */
169static inline int timer_pending(const struct timer_list * timer) 163static inline int timer_pending(const struct timer_list * timer)
170{ 164{
171 return timer->entry.next != NULL; 165 return timer->entry.pprev != NULL;
172} 166}
173 167
174extern void add_timer_on(struct timer_list *timer, int cpu); 168extern void add_timer_on(struct timer_list *timer, int cpu);
@@ -188,26 +182,16 @@ extern void set_timer_slack(struct timer_list *time, int slack_hz);
188#define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1) 182#define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1)
189 183
190/* 184/*
191 * Return when the next timer-wheel timeout occurs (in absolute jiffies),
192 * locks the timer base and does the comparison against the given
193 * jiffie.
194 */
195extern unsigned long get_next_timer_interrupt(unsigned long now);
196
197/*
198 * Timer-statistics info: 185 * Timer-statistics info:
199 */ 186 */
200#ifdef CONFIG_TIMER_STATS 187#ifdef CONFIG_TIMER_STATS
201 188
202extern int timer_stats_active; 189extern int timer_stats_active;
203 190
204#define TIMER_STATS_FLAG_DEFERRABLE 0x1
205
206extern void init_timer_stats(void); 191extern void init_timer_stats(void);
207 192
208extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf, 193extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
209 void *timerf, char *comm, 194 void *timerf, char *comm, u32 flags);
210 unsigned int timer_flag);
211 195
212extern void __timer_stats_timer_set_start_info(struct timer_list *timer, 196extern void __timer_stats_timer_set_start_info(struct timer_list *timer,
213 void *addr); 197 void *addr);
@@ -254,6 +238,15 @@ extern void run_local_timers(void);
254struct hrtimer; 238struct hrtimer;
255extern enum hrtimer_restart it_real_fn(struct hrtimer *); 239extern enum hrtimer_restart it_real_fn(struct hrtimer *);
256 240
241#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
242#include <linux/sysctl.h>
243
244extern unsigned int sysctl_timer_migration;
245int timer_migration_handler(struct ctl_table *table, int write,
246 void __user *buffer, size_t *lenp,
247 loff_t *ppos);
248#endif
249
257unsigned long __round_jiffies(unsigned long j, int cpu); 250unsigned long __round_jiffies(unsigned long j, int cpu);
258unsigned long __round_jiffies_relative(unsigned long j, int cpu); 251unsigned long __round_jiffies_relative(unsigned long j, int cpu);
259unsigned long round_jiffies(unsigned long j); 252unsigned long round_jiffies(unsigned long j);
diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
index a520fd70a59f..7eec17ad7fa1 100644
--- a/include/linux/timerqueue.h
+++ b/include/linux/timerqueue.h
@@ -16,10 +16,10 @@ struct timerqueue_head {
16}; 16};
17 17
18 18
19extern void timerqueue_add(struct timerqueue_head *head, 19extern bool timerqueue_add(struct timerqueue_head *head,
20 struct timerqueue_node *node); 20 struct timerqueue_node *node);
21extern void timerqueue_del(struct timerqueue_head *head, 21extern bool timerqueue_del(struct timerqueue_head *head,
22 struct timerqueue_node *node); 22 struct timerqueue_node *node);
23extern struct timerqueue_node *timerqueue_iterate_next( 23extern struct timerqueue_node *timerqueue_iterate_next(
24 struct timerqueue_node *node); 24 struct timerqueue_node *node);
25 25
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 68c2c2000f02..073b9ac245ba 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -43,15 +43,18 @@ DEFINE_EVENT(timer_class, timer_init,
43 */ 43 */
44TRACE_EVENT(timer_start, 44TRACE_EVENT(timer_start,
45 45
46 TP_PROTO(struct timer_list *timer, unsigned long expires), 46 TP_PROTO(struct timer_list *timer,
47 unsigned long expires,
48 unsigned int flags),
47 49
48 TP_ARGS(timer, expires), 50 TP_ARGS(timer, expires, flags),
49 51
50 TP_STRUCT__entry( 52 TP_STRUCT__entry(
51 __field( void *, timer ) 53 __field( void *, timer )
52 __field( void *, function ) 54 __field( void *, function )
53 __field( unsigned long, expires ) 55 __field( unsigned long, expires )
54 __field( unsigned long, now ) 56 __field( unsigned long, now )
57 __field( unsigned int, flags )
55 ), 58 ),
56 59
57 TP_fast_assign( 60 TP_fast_assign(
@@ -59,11 +62,12 @@ TRACE_EVENT(timer_start,
59 __entry->function = timer->function; 62 __entry->function = timer->function;
60 __entry->expires = expires; 63 __entry->expires = expires;
61 __entry->now = jiffies; 64 __entry->now = jiffies;
65 __entry->flags = flags;
62 ), 66 ),
63 67
64 TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]", 68 TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld] flags=0x%08x",
65 __entry->timer, __entry->function, __entry->expires, 69 __entry->timer, __entry->function, __entry->expires,
66 (long)__entry->expires - __entry->now) 70 (long)__entry->expires - __entry->now, __entry->flags)
67); 71);
68 72
69/** 73/**
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f2003b97ddc9..8e13f3e54ec3 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -51,9 +51,11 @@
51 51
52static struct workqueue_struct *perf_wq; 52static struct workqueue_struct *perf_wq;
53 53
54typedef int (*remote_function_f)(void *);
55
54struct remote_function_call { 56struct remote_function_call {
55 struct task_struct *p; 57 struct task_struct *p;
56 int (*func)(void *info); 58 remote_function_f func;
57 void *info; 59 void *info;
58 int ret; 60 int ret;
59}; 61};
@@ -86,7 +88,7 @@ static void remote_function(void *data)
86 * -EAGAIN - when the process moved away 88 * -EAGAIN - when the process moved away
87 */ 89 */
88static int 90static int
89task_function_call(struct task_struct *p, int (*func) (void *info), void *info) 91task_function_call(struct task_struct *p, remote_function_f func, void *info)
90{ 92{
91 struct remote_function_call data = { 93 struct remote_function_call data = {
92 .p = p, 94 .p = p,
@@ -110,7 +112,7 @@ task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
110 * 112 *
111 * returns: @func return value or -ENXIO when the cpu is offline 113 * returns: @func return value or -ENXIO when the cpu is offline
112 */ 114 */
113static int cpu_function_call(int cpu, int (*func) (void *info), void *info) 115static int cpu_function_call(int cpu, remote_function_f func, void *info)
114{ 116{
115 struct remote_function_call data = { 117 struct remote_function_call data = {
116 .p = NULL, 118 .p = NULL,
@@ -747,62 +749,31 @@ perf_cgroup_mark_enabled(struct perf_event *event,
747/* 749/*
748 * function must be called with interrupts disbled 750 * function must be called with interrupts disbled
749 */ 751 */
750static enum hrtimer_restart perf_cpu_hrtimer_handler(struct hrtimer *hr) 752static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
751{ 753{
752 struct perf_cpu_context *cpuctx; 754 struct perf_cpu_context *cpuctx;
753 enum hrtimer_restart ret = HRTIMER_NORESTART;
754 int rotations = 0; 755 int rotations = 0;
755 756
756 WARN_ON(!irqs_disabled()); 757 WARN_ON(!irqs_disabled());
757 758
758 cpuctx = container_of(hr, struct perf_cpu_context, hrtimer); 759 cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);
759
760 rotations = perf_rotate_context(cpuctx); 760 rotations = perf_rotate_context(cpuctx);
761 761
762 /* 762 raw_spin_lock(&cpuctx->hrtimer_lock);
763 * arm timer if needed 763 if (rotations)
764 */
765 if (rotations) {
766 hrtimer_forward_now(hr, cpuctx->hrtimer_interval); 764 hrtimer_forward_now(hr, cpuctx->hrtimer_interval);
767 ret = HRTIMER_RESTART; 765 else
768 } 766 cpuctx->hrtimer_active = 0;
769 767 raw_spin_unlock(&cpuctx->hrtimer_lock);
770 return ret;
771}
772
773/* CPU is going down */
774void perf_cpu_hrtimer_cancel(int cpu)
775{
776 struct perf_cpu_context *cpuctx;
777 struct pmu *pmu;
778 unsigned long flags;
779
780 if (WARN_ON(cpu != smp_processor_id()))
781 return;
782
783 local_irq_save(flags);
784
785 rcu_read_lock();
786
787 list_for_each_entry_rcu(pmu, &pmus, entry) {
788 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
789
790 if (pmu->task_ctx_nr == perf_sw_context)
791 continue;
792
793 hrtimer_cancel(&cpuctx->hrtimer);
794 }
795
796 rcu_read_unlock();
797 768
798 local_irq_restore(flags); 769 return rotations ? HRTIMER_RESTART : HRTIMER_NORESTART;
799} 770}
800 771
801static void __perf_cpu_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) 772static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
802{ 773{
803 struct hrtimer *hr = &cpuctx->hrtimer; 774 struct hrtimer *timer = &cpuctx->hrtimer;
804 struct pmu *pmu = cpuctx->ctx.pmu; 775 struct pmu *pmu = cpuctx->ctx.pmu;
805 int timer; 776 u64 interval;
806 777
807 /* no multiplexing needed for SW PMU */ 778 /* no multiplexing needed for SW PMU */
808 if (pmu->task_ctx_nr == perf_sw_context) 779 if (pmu->task_ctx_nr == perf_sw_context)
@@ -812,31 +783,36 @@ static void __perf_cpu_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
812 * check default is sane, if not set then force to 783 * check default is sane, if not set then force to
813 * default interval (1/tick) 784 * default interval (1/tick)
814 */ 785 */
815 timer = pmu->hrtimer_interval_ms; 786 interval = pmu->hrtimer_interval_ms;
816 if (timer < 1) 787 if (interval < 1)
817 timer = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER; 788 interval = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER;
818 789
819 cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer); 790 cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
820 791
821 hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); 792 raw_spin_lock_init(&cpuctx->hrtimer_lock);
822 hr->function = perf_cpu_hrtimer_handler; 793 hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
794 timer->function = perf_mux_hrtimer_handler;
823} 795}
824 796
825static void perf_cpu_hrtimer_restart(struct perf_cpu_context *cpuctx) 797static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
826{ 798{
827 struct hrtimer *hr = &cpuctx->hrtimer; 799 struct hrtimer *timer = &cpuctx->hrtimer;
828 struct pmu *pmu = cpuctx->ctx.pmu; 800 struct pmu *pmu = cpuctx->ctx.pmu;
801 unsigned long flags;
829 802
830 /* not for SW PMU */ 803 /* not for SW PMU */
831 if (pmu->task_ctx_nr == perf_sw_context) 804 if (pmu->task_ctx_nr == perf_sw_context)
832 return; 805 return 0;
833 806
834 if (hrtimer_active(hr)) 807 raw_spin_lock_irqsave(&cpuctx->hrtimer_lock, flags);
835 return; 808 if (!cpuctx->hrtimer_active) {
809 cpuctx->hrtimer_active = 1;
810 hrtimer_forward_now(timer, cpuctx->hrtimer_interval);
811 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
812 }
813 raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags);
836 814
837 if (!hrtimer_callback_running(hr)) 815 return 0;
838 __hrtimer_start_range_ns(hr, cpuctx->hrtimer_interval,
839 0, HRTIMER_MODE_REL_PINNED, 0);
840} 816}
841 817
842void perf_pmu_disable(struct pmu *pmu) 818void perf_pmu_disable(struct pmu *pmu)
@@ -1935,7 +1911,7 @@ group_sched_in(struct perf_event *group_event,
1935 1911
1936 if (event_sched_in(group_event, cpuctx, ctx)) { 1912 if (event_sched_in(group_event, cpuctx, ctx)) {
1937 pmu->cancel_txn(pmu); 1913 pmu->cancel_txn(pmu);
1938 perf_cpu_hrtimer_restart(cpuctx); 1914 perf_mux_hrtimer_restart(cpuctx);
1939 return -EAGAIN; 1915 return -EAGAIN;
1940 } 1916 }
1941 1917
@@ -1982,7 +1958,7 @@ group_error:
1982 1958
1983 pmu->cancel_txn(pmu); 1959 pmu->cancel_txn(pmu);
1984 1960
1985 perf_cpu_hrtimer_restart(cpuctx); 1961 perf_mux_hrtimer_restart(cpuctx);
1986 1962
1987 return -EAGAIN; 1963 return -EAGAIN;
1988} 1964}
@@ -2255,7 +2231,7 @@ static int __perf_event_enable(void *info)
2255 */ 2231 */
2256 if (leader != event) { 2232 if (leader != event) {
2257 group_sched_out(leader, cpuctx, ctx); 2233 group_sched_out(leader, cpuctx, ctx);
2258 perf_cpu_hrtimer_restart(cpuctx); 2234 perf_mux_hrtimer_restart(cpuctx);
2259 } 2235 }
2260 if (leader->attr.pinned) { 2236 if (leader->attr.pinned) {
2261 update_group_times(leader); 2237 update_group_times(leader);
@@ -6897,9 +6873,8 @@ static void perf_swevent_start_hrtimer(struct perf_event *event)
6897 } else { 6873 } else {
6898 period = max_t(u64, 10000, hwc->sample_period); 6874 period = max_t(u64, 10000, hwc->sample_period);
6899 } 6875 }
6900 __hrtimer_start_range_ns(&hwc->hrtimer, 6876 hrtimer_start(&hwc->hrtimer, ns_to_ktime(period),
6901 ns_to_ktime(period), 0, 6877 HRTIMER_MODE_REL_PINNED);
6902 HRTIMER_MODE_REL_PINNED, 0);
6903} 6878}
6904 6879
6905static void perf_swevent_cancel_hrtimer(struct perf_event *event) 6880static void perf_swevent_cancel_hrtimer(struct perf_event *event)
@@ -7200,6 +7175,8 @@ perf_event_mux_interval_ms_show(struct device *dev,
7200 return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->hrtimer_interval_ms); 7175 return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->hrtimer_interval_ms);
7201} 7176}
7202 7177
7178static DEFINE_MUTEX(mux_interval_mutex);
7179
7203static ssize_t 7180static ssize_t
7204perf_event_mux_interval_ms_store(struct device *dev, 7181perf_event_mux_interval_ms_store(struct device *dev,
7205 struct device_attribute *attr, 7182 struct device_attribute *attr,
@@ -7219,17 +7196,21 @@ perf_event_mux_interval_ms_store(struct device *dev,
7219 if (timer == pmu->hrtimer_interval_ms) 7196 if (timer == pmu->hrtimer_interval_ms)
7220 return count; 7197 return count;
7221 7198
7199 mutex_lock(&mux_interval_mutex);
7222 pmu->hrtimer_interval_ms = timer; 7200 pmu->hrtimer_interval_ms = timer;
7223 7201
7224 /* update all cpuctx for this PMU */ 7202 /* update all cpuctx for this PMU */
7225 for_each_possible_cpu(cpu) { 7203 get_online_cpus();
7204 for_each_online_cpu(cpu) {
7226 struct perf_cpu_context *cpuctx; 7205 struct perf_cpu_context *cpuctx;
7227 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); 7206 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
7228 cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer); 7207 cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
7229 7208
7230 if (hrtimer_active(&cpuctx->hrtimer)) 7209 cpu_function_call(cpu,
7231 hrtimer_forward_now(&cpuctx->hrtimer, cpuctx->hrtimer_interval); 7210 (remote_function_f)perf_mux_hrtimer_restart, cpuctx);
7232 } 7211 }
7212 put_online_cpus();
7213 mutex_unlock(&mux_interval_mutex);
7233 7214
7234 return count; 7215 return count;
7235} 7216}
@@ -7334,7 +7315,7 @@ skip_type:
7334 lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock); 7315 lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
7335 cpuctx->ctx.pmu = pmu; 7316 cpuctx->ctx.pmu = pmu;
7336 7317
7337 __perf_cpu_hrtimer_init(cpuctx, cpu); 7318 __perf_mux_hrtimer_init(cpuctx, cpu);
7338 7319
7339 cpuctx->unique_pmu = pmu; 7320 cpuctx->unique_pmu = pmu;
7340 } 7321 }
diff --git a/kernel/futex.c b/kernel/futex.c
index aacc706f85fc..ea6ca0bca525 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2064,11 +2064,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
2064 queue_me(q, hb); 2064 queue_me(q, hb);
2065 2065
2066 /* Arm the timer */ 2066 /* Arm the timer */
2067 if (timeout) { 2067 if (timeout)
2068 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); 2068 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
2069 if (!hrtimer_active(&timeout->timer))
2070 timeout->task = NULL;
2071 }
2072 2069
2073 /* 2070 /*
2074 * If we have been removed from the hash list, then another task 2071 * If we have been removed from the hash list, then another task
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 30ec5b46cd8c..36573e96a477 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1182,11 +1182,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
1182 set_current_state(state); 1182 set_current_state(state);
1183 1183
1184 /* Setup the timer, when timeout != NULL */ 1184 /* Setup the timer, when timeout != NULL */
1185 if (unlikely(timeout)) { 1185 if (unlikely(timeout))
1186 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); 1186 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1187 if (!hrtimer_active(&timeout->timer))
1188 timeout->task = NULL;
1189 }
1190 1187
1191 ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); 1188 ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
1192 1189
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 32664347091a..013485fb2b06 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1375,9 +1375,9 @@ static void rcu_prepare_kthreads(int cpu)
1375 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs 1375 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
1376 * any flavor of RCU. 1376 * any flavor of RCU.
1377 */ 1377 */
1378int rcu_needs_cpu(unsigned long *delta_jiffies) 1378int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1379{ 1379{
1380 *delta_jiffies = ULONG_MAX; 1380 *nextevt = KTIME_MAX;
1381 return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) 1381 return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)
1382 ? 0 : rcu_cpu_has_callbacks(NULL); 1382 ? 0 : rcu_cpu_has_callbacks(NULL);
1383} 1383}
@@ -1439,8 +1439,6 @@ module_param(rcu_idle_gp_delay, int, 0644);
1439static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; 1439static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
1440module_param(rcu_idle_lazy_gp_delay, int, 0644); 1440module_param(rcu_idle_lazy_gp_delay, int, 0644);
1441 1441
1442extern int tick_nohz_active;
1443
1444/* 1442/*
1445 * Try to advance callbacks for all flavors of RCU on the current CPU, but 1443 * Try to advance callbacks for all flavors of RCU on the current CPU, but
1446 * only if it has been awhile since the last time we did so. Afterwards, 1444 * only if it has been awhile since the last time we did so. Afterwards,
@@ -1487,12 +1485,13 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
1487 * 1485 *
1488 * The caller must have disabled interrupts. 1486 * The caller must have disabled interrupts.
1489 */ 1487 */
1490int rcu_needs_cpu(unsigned long *dj) 1488int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1491{ 1489{
1492 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 1490 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
1491 unsigned long dj;
1493 1492
1494 if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) { 1493 if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) {
1495 *dj = ULONG_MAX; 1494 *nextevt = KTIME_MAX;
1496 return 0; 1495 return 0;
1497 } 1496 }
1498 1497
@@ -1501,7 +1500,7 @@ int rcu_needs_cpu(unsigned long *dj)
1501 1500
1502 /* If no callbacks, RCU doesn't need the CPU. */ 1501 /* If no callbacks, RCU doesn't need the CPU. */
1503 if (!rcu_cpu_has_callbacks(&rdtp->all_lazy)) { 1502 if (!rcu_cpu_has_callbacks(&rdtp->all_lazy)) {
1504 *dj = ULONG_MAX; 1503 *nextevt = KTIME_MAX;
1505 return 0; 1504 return 0;
1506 } 1505 }
1507 1506
@@ -1515,11 +1514,12 @@ int rcu_needs_cpu(unsigned long *dj)
1515 1514
1516 /* Request timer delay depending on laziness, and round. */ 1515 /* Request timer delay depending on laziness, and round. */
1517 if (!rdtp->all_lazy) { 1516 if (!rdtp->all_lazy) {
1518 *dj = round_up(rcu_idle_gp_delay + jiffies, 1517 dj = round_up(rcu_idle_gp_delay + jiffies,
1519 rcu_idle_gp_delay) - jiffies; 1518 rcu_idle_gp_delay) - jiffies;
1520 } else { 1519 } else {
1521 *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies; 1520 dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;
1522 } 1521 }
1522 *nextevt = basemono + dj * TICK_NSEC;
1523 return 0; 1523 return 0;
1524} 1524}
1525 1525
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f89ca9bcf42a..c9a707b59331 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -90,26 +90,6 @@
90#define CREATE_TRACE_POINTS 90#define CREATE_TRACE_POINTS
91#include <trace/events/sched.h> 91#include <trace/events/sched.h>
92 92
93void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
94{
95 unsigned long delta;
96 ktime_t soft, hard, now;
97
98 for (;;) {
99 if (hrtimer_active(period_timer))
100 break;
101
102 now = hrtimer_cb_get_time(period_timer);
103 hrtimer_forward(period_timer, now, period);
104
105 soft = hrtimer_get_softexpires(period_timer);
106 hard = hrtimer_get_expires(period_timer);
107 delta = ktime_to_ns(ktime_sub(hard, soft));
108 __hrtimer_start_range_ns(period_timer, soft, delta,
109 HRTIMER_MODE_ABS_PINNED, 0);
110 }
111}
112
113DEFINE_MUTEX(sched_domains_mutex); 93DEFINE_MUTEX(sched_domains_mutex);
114DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); 94DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
115 95
@@ -355,12 +335,11 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
355 335
356#ifdef CONFIG_SMP 336#ifdef CONFIG_SMP
357 337
358static int __hrtick_restart(struct rq *rq) 338static void __hrtick_restart(struct rq *rq)
359{ 339{
360 struct hrtimer *timer = &rq->hrtick_timer; 340 struct hrtimer *timer = &rq->hrtick_timer;
361 ktime_t time = hrtimer_get_softexpires(timer);
362 341
363 return __hrtimer_start_range_ns(timer, time, 0, HRTIMER_MODE_ABS_PINNED, 0); 342 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
364} 343}
365 344
366/* 345/*
@@ -440,8 +419,8 @@ void hrtick_start(struct rq *rq, u64 delay)
440 * doesn't make sense. Rely on vruntime for fairness. 419 * doesn't make sense. Rely on vruntime for fairness.
441 */ 420 */
442 delay = max_t(u64, delay, 10000LL); 421 delay = max_t(u64, delay, 10000LL);
443 __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, 422 hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
444 HRTIMER_MODE_REL_PINNED, 0); 423 HRTIMER_MODE_REL_PINNED);
445} 424}
446 425
447static inline void init_hrtick(void) 426static inline void init_hrtick(void)
@@ -639,13 +618,12 @@ void resched_cpu(int cpu)
639 * selecting an idle cpu will add more delays to the timers than intended 618 * selecting an idle cpu will add more delays to the timers than intended
640 * (as that cpu's timer base may not be uptodate wrt jiffies etc). 619 * (as that cpu's timer base may not be uptodate wrt jiffies etc).
641 */ 620 */
642int get_nohz_timer_target(int pinned) 621int get_nohz_timer_target(void)
643{ 622{
644 int cpu = smp_processor_id(); 623 int i, cpu = smp_processor_id();
645 int i;
646 struct sched_domain *sd; 624 struct sched_domain *sd;
647 625
648 if (pinned || !get_sysctl_timer_migration() || !idle_cpu(cpu)) 626 if (!idle_cpu(cpu))
649 return cpu; 627 return cpu;
650 628
651 rcu_read_lock(); 629 rcu_read_lock();
@@ -7126,8 +7104,6 @@ void __init sched_init_smp(void)
7126} 7104}
7127#endif /* CONFIG_SMP */ 7105#endif /* CONFIG_SMP */
7128 7106
7129const_debug unsigned int sysctl_timer_migration = 1;
7130
7131int in_sched_functions(unsigned long addr) 7107int in_sched_functions(unsigned long addr)
7132{ 7108{
7133 return in_lock_functions(addr) || 7109 return in_lock_functions(addr) ||
@@ -8163,10 +8139,8 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
8163 8139
8164 __refill_cfs_bandwidth_runtime(cfs_b); 8140 __refill_cfs_bandwidth_runtime(cfs_b);
8165 /* restart the period timer (if active) to handle new period expiry */ 8141 /* restart the period timer (if active) to handle new period expiry */
8166 if (runtime_enabled && cfs_b->timer_active) { 8142 if (runtime_enabled)
8167 /* force a reprogram */ 8143 start_cfs_bandwidth(cfs_b);
8168 __start_cfs_bandwidth(cfs_b, true);
8169 }
8170 raw_spin_unlock_irq(&cfs_b->lock); 8144 raw_spin_unlock_irq(&cfs_b->lock);
8171 8145
8172 for_each_online_cpu(i) { 8146 for_each_online_cpu(i) {
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 392e8fb94db3..eac20c557a55 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -503,8 +503,6 @@ static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
503 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 503 struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
504 struct rq *rq = rq_of_dl_rq(dl_rq); 504 struct rq *rq = rq_of_dl_rq(dl_rq);
505 ktime_t now, act; 505 ktime_t now, act;
506 ktime_t soft, hard;
507 unsigned long range;
508 s64 delta; 506 s64 delta;
509 507
510 if (boosted) 508 if (boosted)
@@ -527,15 +525,9 @@ static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
527 if (ktime_us_delta(act, now) < 0) 525 if (ktime_us_delta(act, now) < 0)
528 return 0; 526 return 0;
529 527
530 hrtimer_set_expires(&dl_se->dl_timer, act); 528 hrtimer_start(&dl_se->dl_timer, act, HRTIMER_MODE_ABS);
531 529
532 soft = hrtimer_get_softexpires(&dl_se->dl_timer); 530 return 1;
533 hard = hrtimer_get_expires(&dl_se->dl_timer);
534 range = ktime_to_ns(ktime_sub(hard, soft));
535 __hrtimer_start_range_ns(&dl_se->dl_timer, soft,
536 range, HRTIMER_MODE_ABS, 0);
537
538 return hrtimer_active(&dl_se->dl_timer);
539} 531}
540 532
541/* 533/*
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 704683cc9042..315c68e015d9 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -232,8 +232,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
232#endif 232#endif
233#endif 233#endif
234#ifdef CONFIG_CFS_BANDWIDTH 234#ifdef CONFIG_CFS_BANDWIDTH
235 SEQ_printf(m, " .%-30s: %d\n", "tg->cfs_bandwidth.timer_active",
236 cfs_rq->tg->cfs_bandwidth.timer_active);
237 SEQ_printf(m, " .%-30s: %d\n", "throttled", 235 SEQ_printf(m, " .%-30s: %d\n", "throttled",
238 cfs_rq->throttled); 236 cfs_rq->throttled);
239 SEQ_printf(m, " .%-30s: %d\n", "throttle_count", 237 SEQ_printf(m, " .%-30s: %d\n", "throttle_count",
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 433061d984ea..40a7fcbf491e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3504,16 +3504,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
3504 if (cfs_b->quota == RUNTIME_INF) 3504 if (cfs_b->quota == RUNTIME_INF)
3505 amount = min_amount; 3505 amount = min_amount;
3506 else { 3506 else {
3507 /* 3507 start_cfs_bandwidth(cfs_b);
3508 * If the bandwidth pool has become inactive, then at least one
3509 * period must have elapsed since the last consumption.
3510 * Refresh the global state and ensure bandwidth timer becomes
3511 * active.
3512 */
3513 if (!cfs_b->timer_active) {
3514 __refill_cfs_bandwidth_runtime(cfs_b);
3515 __start_cfs_bandwidth(cfs_b, false);
3516 }
3517 3508
3518 if (cfs_b->runtime > 0) { 3509 if (cfs_b->runtime > 0) {
3519 amount = min(cfs_b->runtime, min_amount); 3510 amount = min(cfs_b->runtime, min_amount);
@@ -3662,6 +3653,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
3662 struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); 3653 struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
3663 struct sched_entity *se; 3654 struct sched_entity *se;
3664 long task_delta, dequeue = 1; 3655 long task_delta, dequeue = 1;
3656 bool empty;
3665 3657
3666 se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; 3658 se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
3667 3659
@@ -3691,13 +3683,21 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
3691 cfs_rq->throttled = 1; 3683 cfs_rq->throttled = 1;
3692 cfs_rq->throttled_clock = rq_clock(rq); 3684 cfs_rq->throttled_clock = rq_clock(rq);
3693 raw_spin_lock(&cfs_b->lock); 3685 raw_spin_lock(&cfs_b->lock);
3686 empty = list_empty(&cfs_rq->throttled_list);
3687
3694 /* 3688 /*
3695 * Add to the _head_ of the list, so that an already-started 3689 * Add to the _head_ of the list, so that an already-started
3696 * distribute_cfs_runtime will not see us 3690 * distribute_cfs_runtime will not see us
3697 */ 3691 */
3698 list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); 3692 list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
3699 if (!cfs_b->timer_active) 3693
3700 __start_cfs_bandwidth(cfs_b, false); 3694 /*
3695 * If we're the first throttled task, make sure the bandwidth
3696 * timer is running.
3697 */
3698 if (empty)
3699 start_cfs_bandwidth(cfs_b);
3700
3701 raw_spin_unlock(&cfs_b->lock); 3701 raw_spin_unlock(&cfs_b->lock);
3702} 3702}
3703 3703
@@ -3812,13 +3812,6 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
3812 if (cfs_b->idle && !throttled) 3812 if (cfs_b->idle && !throttled)
3813 goto out_deactivate; 3813 goto out_deactivate;
3814 3814
3815 /*
3816 * if we have relooped after returning idle once, we need to update our
3817 * status as actually running, so that other cpus doing
3818 * __start_cfs_bandwidth will stop trying to cancel us.
3819 */
3820 cfs_b->timer_active = 1;
3821
3822 __refill_cfs_bandwidth_runtime(cfs_b); 3815 __refill_cfs_bandwidth_runtime(cfs_b);
3823 3816
3824 if (!throttled) { 3817 if (!throttled) {
@@ -3863,7 +3856,6 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
3863 return 0; 3856 return 0;
3864 3857
3865out_deactivate: 3858out_deactivate:
3866 cfs_b->timer_active = 0;
3867 return 1; 3859 return 1;
3868} 3860}
3869 3861
@@ -3878,7 +3870,7 @@ static const u64 cfs_bandwidth_slack_period = 5 * NSEC_PER_MSEC;
3878 * Are we near the end of the current quota period? 3870 * Are we near the end of the current quota period?
3879 * 3871 *
3880 * Requires cfs_b->lock for hrtimer_expires_remaining to be safe against the 3872 * Requires cfs_b->lock for hrtimer_expires_remaining to be safe against the
3881 * hrtimer base being cleared by __hrtimer_start_range_ns. In the case of 3873 * hrtimer base being cleared by hrtimer_start. In the case of
3882 * migrate_hrtimers, base is never cleared, so we are fine. 3874 * migrate_hrtimers, base is never cleared, so we are fine.
3883 */ 3875 */
3884static int runtime_refresh_within(struct cfs_bandwidth *cfs_b, u64 min_expire) 3876static int runtime_refresh_within(struct cfs_bandwidth *cfs_b, u64 min_expire)
@@ -3906,8 +3898,9 @@ static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b)
3906 if (runtime_refresh_within(cfs_b, min_left)) 3898 if (runtime_refresh_within(cfs_b, min_left))
3907 return; 3899 return;
3908 3900
3909 start_bandwidth_timer(&cfs_b->slack_timer, 3901 hrtimer_start(&cfs_b->slack_timer,
3910 ns_to_ktime(cfs_bandwidth_slack_period)); 3902 ns_to_ktime(cfs_bandwidth_slack_period),
3903 HRTIMER_MODE_REL);
3911} 3904}
3912 3905
3913/* we know any runtime found here is valid as update_curr() precedes return */ 3906/* we know any runtime found here is valid as update_curr() precedes return */
@@ -4027,6 +4020,7 @@ static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer)
4027{ 4020{
4028 struct cfs_bandwidth *cfs_b = 4021 struct cfs_bandwidth *cfs_b =
4029 container_of(timer, struct cfs_bandwidth, slack_timer); 4022 container_of(timer, struct cfs_bandwidth, slack_timer);
4023
4030 do_sched_cfs_slack_timer(cfs_b); 4024 do_sched_cfs_slack_timer(cfs_b);
4031 4025
4032 return HRTIMER_NORESTART; 4026 return HRTIMER_NORESTART;
@@ -4036,20 +4030,19 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
4036{ 4030{
4037 struct cfs_bandwidth *cfs_b = 4031 struct cfs_bandwidth *cfs_b =
4038 container_of(timer, struct cfs_bandwidth, period_timer); 4032 container_of(timer, struct cfs_bandwidth, period_timer);
4039 ktime_t now;
4040 int overrun; 4033 int overrun;
4041 int idle = 0; 4034 int idle = 0;
4042 4035
4043 raw_spin_lock(&cfs_b->lock); 4036 raw_spin_lock(&cfs_b->lock);
4044 for (;;) { 4037 for (;;) {
4045 now = hrtimer_cb_get_time(timer); 4038 overrun = hrtimer_forward_now(timer, cfs_b->period);
4046 overrun = hrtimer_forward(timer, now, cfs_b->period);
4047
4048 if (!overrun) 4039 if (!overrun)
4049 break; 4040 break;
4050 4041
4051 idle = do_sched_cfs_period_timer(cfs_b, overrun); 4042 idle = do_sched_cfs_period_timer(cfs_b, overrun);
4052 } 4043 }
4044 if (idle)
4045 cfs_b->period_active = 0;
4053 raw_spin_unlock(&cfs_b->lock); 4046 raw_spin_unlock(&cfs_b->lock);
4054 4047
4055 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; 4048 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
@@ -4063,7 +4056,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
4063 cfs_b->period = ns_to_ktime(default_cfs_period()); 4056 cfs_b->period = ns_to_ktime(default_cfs_period());
4064 4057
4065 INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq); 4058 INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
4066 hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 4059 hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
4067 cfs_b->period_timer.function = sched_cfs_period_timer; 4060 cfs_b->period_timer.function = sched_cfs_period_timer;
4068 hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 4061 hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4069 cfs_b->slack_timer.function = sched_cfs_slack_timer; 4062 cfs_b->slack_timer.function = sched_cfs_slack_timer;
@@ -4075,28 +4068,15 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
4075 INIT_LIST_HEAD(&cfs_rq->throttled_list); 4068 INIT_LIST_HEAD(&cfs_rq->throttled_list);
4076} 4069}
4077 4070
4078/* requires cfs_b->lock, may release to reprogram timer */ 4071void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
4079void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b, bool force)
4080{ 4072{
4081 /* 4073 lockdep_assert_held(&cfs_b->lock);
4082 * The timer may be active because we're trying to set a new bandwidth
4083 * period or because we're racing with the tear-down path
4084 * (timer_active==0 becomes visible before the hrtimer call-back
4085 * terminates). In either case we ensure that it's re-programmed
4086 */
4087 while (unlikely(hrtimer_active(&cfs_b->period_timer)) &&
4088 hrtimer_try_to_cancel(&cfs_b->period_timer) < 0) {
4089 /* bounce the lock to allow do_sched_cfs_period_timer to run */
4090 raw_spin_unlock(&cfs_b->lock);
4091 cpu_relax();
4092 raw_spin_lock(&cfs_b->lock);
4093 /* if someone else restarted the timer then we're done */
4094 if (!force && cfs_b->timer_active)
4095 return;
4096 }
4097 4074
4098 cfs_b->timer_active = 1; 4075 if (!cfs_b->period_active) {
4099 start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period); 4076 cfs_b->period_active = 1;
4077 hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
4078 hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
4079 }
4100} 4080}
4101 4081
4102static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) 4082static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 560d2fa623c3..7d7093c51f8d 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -18,19 +18,22 @@ static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
18{ 18{
19 struct rt_bandwidth *rt_b = 19 struct rt_bandwidth *rt_b =
20 container_of(timer, struct rt_bandwidth, rt_period_timer); 20 container_of(timer, struct rt_bandwidth, rt_period_timer);
21 ktime_t now;
22 int overrun;
23 int idle = 0; 21 int idle = 0;
22 int overrun;
24 23
24 raw_spin_lock(&rt_b->rt_runtime_lock);
25 for (;;) { 25 for (;;) {
26 now = hrtimer_cb_get_time(timer); 26 overrun = hrtimer_forward_now(timer, rt_b->rt_period);
27 overrun = hrtimer_forward(timer, now, rt_b->rt_period);
28
29 if (!overrun) 27 if (!overrun)
30 break; 28 break;
31 29
30 raw_spin_unlock(&rt_b->rt_runtime_lock);
32 idle = do_sched_rt_period_timer(rt_b, overrun); 31 idle = do_sched_rt_period_timer(rt_b, overrun);
32 raw_spin_lock(&rt_b->rt_runtime_lock);
33 } 33 }
34 if (idle)
35 rt_b->rt_period_active = 0;
36 raw_spin_unlock(&rt_b->rt_runtime_lock);
34 37
35 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; 38 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
36} 39}
@@ -52,11 +55,12 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
52 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) 55 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
53 return; 56 return;
54 57
55 if (hrtimer_active(&rt_b->rt_period_timer))
56 return;
57
58 raw_spin_lock(&rt_b->rt_runtime_lock); 58 raw_spin_lock(&rt_b->rt_runtime_lock);
59 start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period); 59 if (!rt_b->rt_period_active) {
60 rt_b->rt_period_active = 1;
61 hrtimer_forward_now(&rt_b->rt_period_timer, rt_b->rt_period);
62 hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
63 }
60 raw_spin_unlock(&rt_b->rt_runtime_lock); 64 raw_spin_unlock(&rt_b->rt_runtime_lock);
61} 65}
62 66
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d62b2882232b..aea7c1f393cb 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -137,6 +137,7 @@ struct rt_bandwidth {
137 ktime_t rt_period; 137 ktime_t rt_period;
138 u64 rt_runtime; 138 u64 rt_runtime;
139 struct hrtimer rt_period_timer; 139 struct hrtimer rt_period_timer;
140 unsigned int rt_period_active;
140}; 141};
141 142
142void __dl_clear_params(struct task_struct *p); 143void __dl_clear_params(struct task_struct *p);
@@ -221,7 +222,7 @@ struct cfs_bandwidth {
221 s64 hierarchical_quota; 222 s64 hierarchical_quota;
222 u64 runtime_expires; 223 u64 runtime_expires;
223 224
224 int idle, timer_active; 225 int idle, period_active;
225 struct hrtimer period_timer, slack_timer; 226 struct hrtimer period_timer, slack_timer;
226 struct list_head throttled_cfs_rq; 227 struct list_head throttled_cfs_rq;
227 228
@@ -312,7 +313,7 @@ extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
312extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); 313extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
313 314
314extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b); 315extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
315extern void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b, bool force); 316extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
316extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq); 317extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
317 318
318extern void free_rt_sched_group(struct task_group *tg); 319extern void free_rt_sched_group(struct task_group *tg);
@@ -1409,8 +1410,6 @@ static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
1409static inline void sched_avg_update(struct rq *rq) { } 1410static inline void sched_avg_update(struct rq *rq) { }
1410#endif 1411#endif
1411 1412
1412extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
1413
1414/* 1413/*
1415 * __task_rq_lock - lock the rq @p resides on. 1414 * __task_rq_lock - lock the rq @p resides on.
1416 */ 1415 */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 2082b1a88fb9..b13e9d2de302 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -349,15 +349,6 @@ static struct ctl_table kern_table[] = {
349 .mode = 0644, 349 .mode = 0644,
350 .proc_handler = proc_dointvec, 350 .proc_handler = proc_dointvec,
351 }, 351 },
352 {
353 .procname = "timer_migration",
354 .data = &sysctl_timer_migration,
355 .maxlen = sizeof(unsigned int),
356 .mode = 0644,
357 .proc_handler = proc_dointvec_minmax,
358 .extra1 = &zero,
359 .extra2 = &one,
360 },
361#endif /* CONFIG_SMP */ 352#endif /* CONFIG_SMP */
362#ifdef CONFIG_NUMA_BALANCING 353#ifdef CONFIG_NUMA_BALANCING
363 { 354 {
@@ -1132,6 +1123,15 @@ static struct ctl_table kern_table[] = {
1132 .extra1 = &zero, 1123 .extra1 = &zero,
1133 .extra2 = &one, 1124 .extra2 = &one,
1134 }, 1125 },
1126#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1127 {
1128 .procname = "timer_migration",
1129 .data = &sysctl_timer_migration,
1130 .maxlen = sizeof(unsigned int),
1131 .mode = 0644,
1132 .proc_handler = timer_migration_handler,
1133 },
1134#endif
1135 { } 1135 { }
1136}; 1136};
1137 1137
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 01f0312419b3..ffc4cc3dcd47 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -13,19 +13,4 @@ obj-$(CONFIG_TIMER_STATS) += timer_stats.o
13obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o 13obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
14obj-$(CONFIG_TEST_UDELAY) += test_udelay.o 14obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
15 15
16$(obj)/time.o: $(obj)/timeconst.h 16$(obj)/time.o: $(objtree)/include/config/
17
18quiet_cmd_hzfile = HZFILE $@
19 cmd_hzfile = echo "hz=$(CONFIG_HZ)" > $@
20
21targets += hz.bc
22$(obj)/hz.bc: $(objtree)/include/config/hz.h FORCE
23 $(call if_changed,hzfile)
24
25quiet_cmd_bc = BC $@
26 cmd_bc = bc -q $(filter-out FORCE,$^) > $@
27
28targets += timeconst.h
29$(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE
30 $(call if_changed,bc)
31
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 1b001ed1edb9..7fbba635a549 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -317,19 +317,16 @@ EXPORT_SYMBOL_GPL(alarm_init);
317 * @alarm: ptr to alarm to set 317 * @alarm: ptr to alarm to set
318 * @start: time to run the alarm 318 * @start: time to run the alarm
319 */ 319 */
320int alarm_start(struct alarm *alarm, ktime_t start) 320void alarm_start(struct alarm *alarm, ktime_t start)
321{ 321{
322 struct alarm_base *base = &alarm_bases[alarm->type]; 322 struct alarm_base *base = &alarm_bases[alarm->type];
323 unsigned long flags; 323 unsigned long flags;
324 int ret;
325 324
326 spin_lock_irqsave(&base->lock, flags); 325 spin_lock_irqsave(&base->lock, flags);
327 alarm->node.expires = start; 326 alarm->node.expires = start;
328 alarmtimer_enqueue(base, alarm); 327 alarmtimer_enqueue(base, alarm);
329 ret = hrtimer_start(&alarm->timer, alarm->node.expires, 328 hrtimer_start(&alarm->timer, alarm->node.expires, HRTIMER_MODE_ABS);
330 HRTIMER_MODE_ABS);
331 spin_unlock_irqrestore(&base->lock, flags); 329 spin_unlock_irqrestore(&base->lock, flags);
332 return ret;
333} 330}
334EXPORT_SYMBOL_GPL(alarm_start); 331EXPORT_SYMBOL_GPL(alarm_start);
335 332
@@ -338,12 +335,12 @@ EXPORT_SYMBOL_GPL(alarm_start);
338 * @alarm: ptr to alarm to set 335 * @alarm: ptr to alarm to set
339 * @start: time relative to now to run the alarm 336 * @start: time relative to now to run the alarm
340 */ 337 */
341int alarm_start_relative(struct alarm *alarm, ktime_t start) 338void alarm_start_relative(struct alarm *alarm, ktime_t start)
342{ 339{
343 struct alarm_base *base = &alarm_bases[alarm->type]; 340 struct alarm_base *base = &alarm_bases[alarm->type];
344 341
345 start = ktime_add(start, base->gettime()); 342 start = ktime_add(start, base->gettime());
346 return alarm_start(alarm, start); 343 alarm_start(alarm, start);
347} 344}
348EXPORT_SYMBOL_GPL(alarm_start_relative); 345EXPORT_SYMBOL_GPL(alarm_start_relative);
349 346
@@ -495,12 +492,12 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm,
495 */ 492 */
496static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp) 493static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp)
497{ 494{
498 clockid_t baseid = alarm_bases[clock2alarm(which_clock)].base_clockid;
499
500 if (!alarmtimer_get_rtcdev()) 495 if (!alarmtimer_get_rtcdev())
501 return -EINVAL; 496 return -EINVAL;
502 497
503 return hrtimer_get_res(baseid, tp); 498 tp->tv_sec = 0;
499 tp->tv_nsec = hrtimer_resolution;
500 return 0;
504} 501}
505 502
506/** 503/**
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 637a09461c1d..08ccc3da3ca0 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -94,8 +94,8 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
94} 94}
95EXPORT_SYMBOL_GPL(clockevent_delta2ns); 95EXPORT_SYMBOL_GPL(clockevent_delta2ns);
96 96
97static int __clockevents_set_state(struct clock_event_device *dev, 97static int __clockevents_switch_state(struct clock_event_device *dev,
98 enum clock_event_state state) 98 enum clock_event_state state)
99{ 99{
100 /* Transition with legacy set_mode() callback */ 100 /* Transition with legacy set_mode() callback */
101 if (dev->set_mode) { 101 if (dev->set_mode) {
@@ -134,32 +134,44 @@ static int __clockevents_set_state(struct clock_event_device *dev,
134 return -ENOSYS; 134 return -ENOSYS;
135 return dev->set_state_oneshot(dev); 135 return dev->set_state_oneshot(dev);
136 136
137 case CLOCK_EVT_STATE_ONESHOT_STOPPED:
138 /* Core internal bug */
139 if (WARN_ONCE(!clockevent_state_oneshot(dev),
140 "Current state: %d\n",
141 clockevent_get_state(dev)))
142 return -EINVAL;
143
144 if (dev->set_state_oneshot_stopped)
145 return dev->set_state_oneshot_stopped(dev);
146 else
147 return -ENOSYS;
148
137 default: 149 default:
138 return -ENOSYS; 150 return -ENOSYS;
139 } 151 }
140} 152}
141 153
142/** 154/**
143 * clockevents_set_state - set the operating state of a clock event device 155 * clockevents_switch_state - set the operating state of a clock event device
144 * @dev: device to modify 156 * @dev: device to modify
145 * @state: new state 157 * @state: new state
146 * 158 *
147 * Must be called with interrupts disabled ! 159 * Must be called with interrupts disabled !
148 */ 160 */
149void clockevents_set_state(struct clock_event_device *dev, 161void clockevents_switch_state(struct clock_event_device *dev,
150 enum clock_event_state state) 162 enum clock_event_state state)
151{ 163{
152 if (dev->state != state) { 164 if (clockevent_get_state(dev) != state) {
153 if (__clockevents_set_state(dev, state)) 165 if (__clockevents_switch_state(dev, state))
154 return; 166 return;
155 167
156 dev->state = state; 168 clockevent_set_state(dev, state);
157 169
158 /* 170 /*
159 * A nsec2cyc multiplicator of 0 is invalid and we'd crash 171 * A nsec2cyc multiplicator of 0 is invalid and we'd crash
160 * on it, so fix it up and emit a warning: 172 * on it, so fix it up and emit a warning:
161 */ 173 */
162 if (state == CLOCK_EVT_STATE_ONESHOT) { 174 if (clockevent_state_oneshot(dev)) {
163 if (unlikely(!dev->mult)) { 175 if (unlikely(!dev->mult)) {
164 dev->mult = 1; 176 dev->mult = 1;
165 WARN_ON(1); 177 WARN_ON(1);
@@ -174,7 +186,7 @@ void clockevents_set_state(struct clock_event_device *dev,
174 */ 186 */
175void clockevents_shutdown(struct clock_event_device *dev) 187void clockevents_shutdown(struct clock_event_device *dev)
176{ 188{
177 clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN); 189 clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
178 dev->next_event.tv64 = KTIME_MAX; 190 dev->next_event.tv64 = KTIME_MAX;
179} 191}
180 192
@@ -248,7 +260,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev)
248 delta = dev->min_delta_ns; 260 delta = dev->min_delta_ns;
249 dev->next_event = ktime_add_ns(ktime_get(), delta); 261 dev->next_event = ktime_add_ns(ktime_get(), delta);
250 262
251 if (dev->state == CLOCK_EVT_STATE_SHUTDOWN) 263 if (clockevent_state_shutdown(dev))
252 return 0; 264 return 0;
253 265
254 dev->retries++; 266 dev->retries++;
@@ -285,7 +297,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev)
285 delta = dev->min_delta_ns; 297 delta = dev->min_delta_ns;
286 dev->next_event = ktime_add_ns(ktime_get(), delta); 298 dev->next_event = ktime_add_ns(ktime_get(), delta);
287 299
288 if (dev->state == CLOCK_EVT_STATE_SHUTDOWN) 300 if (clockevent_state_shutdown(dev))
289 return 0; 301 return 0;
290 302
291 dev->retries++; 303 dev->retries++;
@@ -317,9 +329,13 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
317 329
318 dev->next_event = expires; 330 dev->next_event = expires;
319 331
320 if (dev->state == CLOCK_EVT_STATE_SHUTDOWN) 332 if (clockevent_state_shutdown(dev))
321 return 0; 333 return 0;
322 334
335 /* We must be in ONESHOT state here */
336 WARN_ONCE(!clockevent_state_oneshot(dev), "Current state: %d\n",
337 clockevent_get_state(dev));
338
323 /* Shortcut for clockevent devices that can deal with ktime. */ 339 /* Shortcut for clockevent devices that can deal with ktime. */
324 if (dev->features & CLOCK_EVT_FEAT_KTIME) 340 if (dev->features & CLOCK_EVT_FEAT_KTIME)
325 return dev->set_next_ktime(expires, dev); 341 return dev->set_next_ktime(expires, dev);
@@ -362,7 +378,7 @@ static int clockevents_replace(struct clock_event_device *ced)
362 struct clock_event_device *dev, *newdev = NULL; 378 struct clock_event_device *dev, *newdev = NULL;
363 379
364 list_for_each_entry(dev, &clockevent_devices, list) { 380 list_for_each_entry(dev, &clockevent_devices, list) {
365 if (dev == ced || dev->state != CLOCK_EVT_STATE_DETACHED) 381 if (dev == ced || !clockevent_state_detached(dev))
366 continue; 382 continue;
367 383
368 if (!tick_check_replacement(newdev, dev)) 384 if (!tick_check_replacement(newdev, dev))
@@ -388,7 +404,7 @@ static int clockevents_replace(struct clock_event_device *ced)
388static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu) 404static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu)
389{ 405{
390 /* Fast track. Device is unused */ 406 /* Fast track. Device is unused */
391 if (ced->state == CLOCK_EVT_STATE_DETACHED) { 407 if (clockevent_state_detached(ced)) {
392 list_del_init(&ced->list); 408 list_del_init(&ced->list);
393 return 0; 409 return 0;
394 } 410 }
@@ -445,7 +461,8 @@ static int clockevents_sanity_check(struct clock_event_device *dev)
445 if (dev->set_mode) { 461 if (dev->set_mode) {
446 /* We shouldn't be supporting new modes now */ 462 /* We shouldn't be supporting new modes now */
447 WARN_ON(dev->set_state_periodic || dev->set_state_oneshot || 463 WARN_ON(dev->set_state_periodic || dev->set_state_oneshot ||
448 dev->set_state_shutdown || dev->tick_resume); 464 dev->set_state_shutdown || dev->tick_resume ||
465 dev->set_state_oneshot_stopped);
449 466
450 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); 467 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
451 return 0; 468 return 0;
@@ -480,7 +497,7 @@ void clockevents_register_device(struct clock_event_device *dev)
480 BUG_ON(clockevents_sanity_check(dev)); 497 BUG_ON(clockevents_sanity_check(dev));
481 498
482 /* Initialize state to DETACHED */ 499 /* Initialize state to DETACHED */
483 dev->state = CLOCK_EVT_STATE_DETACHED; 500 clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
484 501
485 if (!dev->cpumask) { 502 if (!dev->cpumask) {
486 WARN_ON(num_possible_cpus() > 1); 503 WARN_ON(num_possible_cpus() > 1);
@@ -545,11 +562,11 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
545{ 562{
546 clockevents_config(dev, freq); 563 clockevents_config(dev, freq);
547 564
548 if (dev->state == CLOCK_EVT_STATE_ONESHOT) 565 if (clockevent_state_oneshot(dev))
549 return clockevents_program_event(dev, dev->next_event, false); 566 return clockevents_program_event(dev, dev->next_event, false);
550 567
551 if (dev->state == CLOCK_EVT_STATE_PERIODIC) 568 if (clockevent_state_periodic(dev))
552 return __clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC); 569 return __clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC);
553 570
554 return 0; 571 return 0;
555} 572}
@@ -603,13 +620,13 @@ void clockevents_exchange_device(struct clock_event_device *old,
603 */ 620 */
604 if (old) { 621 if (old) {
605 module_put(old->owner); 622 module_put(old->owner);
606 clockevents_set_state(old, CLOCK_EVT_STATE_DETACHED); 623 clockevents_switch_state(old, CLOCK_EVT_STATE_DETACHED);
607 list_del(&old->list); 624 list_del(&old->list);
608 list_add(&old->list, &clockevents_released); 625 list_add(&old->list, &clockevents_released);
609 } 626 }
610 627
611 if (new) { 628 if (new) {
612 BUG_ON(new->state != CLOCK_EVT_STATE_DETACHED); 629 BUG_ON(!clockevent_state_detached(new));
613 clockevents_shutdown(new); 630 clockevents_shutdown(new);
614 } 631 }
615} 632}
@@ -622,7 +639,7 @@ void clockevents_suspend(void)
622 struct clock_event_device *dev; 639 struct clock_event_device *dev;
623 640
624 list_for_each_entry_reverse(dev, &clockevent_devices, list) 641 list_for_each_entry_reverse(dev, &clockevent_devices, list)
625 if (dev->suspend) 642 if (dev->suspend && !clockevent_state_detached(dev))
626 dev->suspend(dev); 643 dev->suspend(dev);
627} 644}
628 645
@@ -634,7 +651,7 @@ void clockevents_resume(void)
634 struct clock_event_device *dev; 651 struct clock_event_device *dev;
635 652
636 list_for_each_entry(dev, &clockevent_devices, list) 653 list_for_each_entry(dev, &clockevent_devices, list)
637 if (dev->resume) 654 if (dev->resume && !clockevent_state_detached(dev))
638 dev->resume(dev); 655 dev->resume(dev);
639} 656}
640 657
@@ -665,7 +682,7 @@ void tick_cleanup_dead_cpu(int cpu)
665 if (cpumask_test_cpu(cpu, dev->cpumask) && 682 if (cpumask_test_cpu(cpu, dev->cpumask) &&
666 cpumask_weight(dev->cpumask) == 1 && 683 cpumask_weight(dev->cpumask) == 1 &&
667 !tick_is_broadcast_device(dev)) { 684 !tick_is_broadcast_device(dev)) {
668 BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED); 685 BUG_ON(!clockevent_state_detached(dev));
669 list_del(&dev->list); 686 list_del(&dev->list);
670 } 687 }
671 } 688 }
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 15facb1b9c60..841b72f720e8 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -23,6 +23,8 @@
23 * o Allow clocksource drivers to be unregistered 23 * o Allow clocksource drivers to be unregistered
24 */ 24 */
25 25
26#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
27
26#include <linux/device.h> 28#include <linux/device.h>
27#include <linux/clocksource.h> 29#include <linux/clocksource.h>
28#include <linux/init.h> 30#include <linux/init.h>
@@ -216,10 +218,11 @@ static void clocksource_watchdog(unsigned long data)
216 218
217 /* Check the deviation from the watchdog clocksource. */ 219 /* Check the deviation from the watchdog clocksource. */
218 if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { 220 if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
219 pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name); 221 pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n",
220 pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", 222 cs->name);
223 pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
221 watchdog->name, wdnow, wdlast, watchdog->mask); 224 watchdog->name, wdnow, wdlast, watchdog->mask);
222 pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n", 225 pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
223 cs->name, csnow, cslast, cs->mask); 226 cs->name, csnow, cslast, cs->mask);
224 __clocksource_unstable(cs); 227 __clocksource_unstable(cs);
225 continue; 228 continue;
@@ -567,9 +570,8 @@ static void __clocksource_select(bool skipcur)
567 */ 570 */
568 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) { 571 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {
569 /* Override clocksource cannot be used. */ 572 /* Override clocksource cannot be used. */
570 printk(KERN_WARNING "Override clocksource %s is not " 573 pr_warn("Override clocksource %s is not HRT compatible - cannot switch while in HRT/NOHZ mode\n",
571 "HRT compatible. Cannot switch while in " 574 cs->name);
572 "HRT/NOHZ mode\n", cs->name);
573 override_name[0] = 0; 575 override_name[0] = 0;
574 } else 576 } else
575 /* Override clocksource can be used. */ 577 /* Override clocksource can be used. */
@@ -708,8 +710,8 @@ void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq
708 710
709 clocksource_update_max_deferment(cs); 711 clocksource_update_max_deferment(cs);
710 712
711 pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n", 713 pr_info("%s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
712 cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns); 714 cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
713} 715}
714EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale); 716EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
715 717
@@ -1008,12 +1010,10 @@ __setup("clocksource=", boot_override_clocksource);
1008static int __init boot_override_clock(char* str) 1010static int __init boot_override_clock(char* str)
1009{ 1011{
1010 if (!strcmp(str, "pmtmr")) { 1012 if (!strcmp(str, "pmtmr")) {
1011 printk("Warning: clock=pmtmr is deprecated. " 1013 pr_warn("clock=pmtmr is deprecated - use clocksource=acpi_pm\n");
1012 "Use clocksource=acpi_pm.\n");
1013 return boot_override_clocksource("acpi_pm"); 1014 return boot_override_clocksource("acpi_pm");
1014 } 1015 }
1015 printk("Warning! clock= boot option is deprecated. " 1016 pr_warn("clock= boot option is deprecated - use clocksource=xyz\n");
1016 "Use clocksource=xyz\n");
1017 return boot_override_clocksource(str); 1017 return boot_override_clocksource(str);
1018} 1018}
1019 1019
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 93ef7190bdea..5c7ae4b641c4 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -66,33 +66,29 @@
66 */ 66 */
67DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = 67DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
68{ 68{
69
70 .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), 69 .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
70 .seq = SEQCNT_ZERO(hrtimer_bases.seq),
71 .clock_base = 71 .clock_base =
72 { 72 {
73 { 73 {
74 .index = HRTIMER_BASE_MONOTONIC, 74 .index = HRTIMER_BASE_MONOTONIC,
75 .clockid = CLOCK_MONOTONIC, 75 .clockid = CLOCK_MONOTONIC,
76 .get_time = &ktime_get, 76 .get_time = &ktime_get,
77 .resolution = KTIME_LOW_RES,
78 }, 77 },
79 { 78 {
80 .index = HRTIMER_BASE_REALTIME, 79 .index = HRTIMER_BASE_REALTIME,
81 .clockid = CLOCK_REALTIME, 80 .clockid = CLOCK_REALTIME,
82 .get_time = &ktime_get_real, 81 .get_time = &ktime_get_real,
83 .resolution = KTIME_LOW_RES,
84 }, 82 },
85 { 83 {
86 .index = HRTIMER_BASE_BOOTTIME, 84 .index = HRTIMER_BASE_BOOTTIME,
87 .clockid = CLOCK_BOOTTIME, 85 .clockid = CLOCK_BOOTTIME,
88 .get_time = &ktime_get_boottime, 86 .get_time = &ktime_get_boottime,
89 .resolution = KTIME_LOW_RES,
90 }, 87 },
91 { 88 {
92 .index = HRTIMER_BASE_TAI, 89 .index = HRTIMER_BASE_TAI,
93 .clockid = CLOCK_TAI, 90 .clockid = CLOCK_TAI,
94 .get_time = &ktime_get_clocktai, 91 .get_time = &ktime_get_clocktai,
95 .resolution = KTIME_LOW_RES,
96 }, 92 },
97 } 93 }
98}; 94};
@@ -109,27 +105,6 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id)
109 return hrtimer_clock_to_base_table[clock_id]; 105 return hrtimer_clock_to_base_table[clock_id];
110} 106}
111 107
112
113/*
114 * Get the coarse grained time at the softirq based on xtime and
115 * wall_to_monotonic.
116 */
117static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
118{
119 ktime_t xtim, mono, boot, tai;
120 ktime_t off_real, off_boot, off_tai;
121
122 mono = ktime_get_update_offsets_tick(&off_real, &off_boot, &off_tai);
123 boot = ktime_add(mono, off_boot);
124 xtim = ktime_add(mono, off_real);
125 tai = ktime_add(mono, off_tai);
126
127 base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
128 base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
129 base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
130 base->clock_base[HRTIMER_BASE_TAI].softirq_time = tai;
131}
132
133/* 108/*
134 * Functions and macros which are different for UP/SMP systems are kept in a 109 * Functions and macros which are different for UP/SMP systems are kept in a
135 * single place 110 * single place
@@ -137,6 +112,18 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
137#ifdef CONFIG_SMP 112#ifdef CONFIG_SMP
138 113
139/* 114/*
115 * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base()
116 * such that hrtimer_callback_running() can unconditionally dereference
117 * timer->base->cpu_base
118 */
119static struct hrtimer_cpu_base migration_cpu_base = {
120 .seq = SEQCNT_ZERO(migration_cpu_base),
121 .clock_base = { { .cpu_base = &migration_cpu_base, }, },
122};
123
124#define migration_base migration_cpu_base.clock_base[0]
125
126/*
140 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock 127 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
141 * means that all timers which are tied to this base via timer->base are 128 * means that all timers which are tied to this base via timer->base are
142 * locked, and the base itself is locked too. 129 * locked, and the base itself is locked too.
@@ -145,8 +132,8 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
145 * be found on the lists/queues. 132 * be found on the lists/queues.
146 * 133 *
147 * When the timer's base is locked, and the timer removed from list, it is 134 * When the timer's base is locked, and the timer removed from list, it is
148 * possible to set timer->base = NULL and drop the lock: the timer remains 135 * possible to set timer->base = &migration_base and drop the lock: the timer
149 * locked. 136 * remains locked.
150 */ 137 */
151static 138static
152struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, 139struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
@@ -156,7 +143,7 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
156 143
157 for (;;) { 144 for (;;) {
158 base = timer->base; 145 base = timer->base;
159 if (likely(base != NULL)) { 146 if (likely(base != &migration_base)) {
160 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); 147 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
161 if (likely(base == timer->base)) 148 if (likely(base == timer->base))
162 return base; 149 return base;
@@ -190,6 +177,24 @@ hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
190#endif 177#endif
191} 178}
192 179
180#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
181static inline
182struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
183 int pinned)
184{
185 if (pinned || !base->migration_enabled)
186 return this_cpu_ptr(&hrtimer_bases);
187 return &per_cpu(hrtimer_bases, get_nohz_timer_target());
188}
189#else
190static inline
191struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
192 int pinned)
193{
194 return this_cpu_ptr(&hrtimer_bases);
195}
196#endif
197
193/* 198/*
194 * Switch the timer base to the current CPU when possible. 199 * Switch the timer base to the current CPU when possible.
195 */ 200 */
@@ -197,14 +202,13 @@ static inline struct hrtimer_clock_base *
197switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, 202switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
198 int pinned) 203 int pinned)
199{ 204{
205 struct hrtimer_cpu_base *new_cpu_base, *this_base;
200 struct hrtimer_clock_base *new_base; 206 struct hrtimer_clock_base *new_base;
201 struct hrtimer_cpu_base *new_cpu_base;
202 int this_cpu = smp_processor_id();
203 int cpu = get_nohz_timer_target(pinned);
204 int basenum = base->index; 207 int basenum = base->index;
205 208
209 this_base = this_cpu_ptr(&hrtimer_bases);
210 new_cpu_base = get_target_base(this_base, pinned);
206again: 211again:
207 new_cpu_base = &per_cpu(hrtimer_bases, cpu);
208 new_base = &new_cpu_base->clock_base[basenum]; 212 new_base = &new_cpu_base->clock_base[basenum];
209 213
210 if (base != new_base) { 214 if (base != new_base) {
@@ -220,22 +224,24 @@ again:
220 if (unlikely(hrtimer_callback_running(timer))) 224 if (unlikely(hrtimer_callback_running(timer)))
221 return base; 225 return base;
222 226
223 /* See the comment in lock_timer_base() */ 227 /* See the comment in lock_hrtimer_base() */
224 timer->base = NULL; 228 timer->base = &migration_base;
225 raw_spin_unlock(&base->cpu_base->lock); 229 raw_spin_unlock(&base->cpu_base->lock);
226 raw_spin_lock(&new_base->cpu_base->lock); 230 raw_spin_lock(&new_base->cpu_base->lock);
227 231
228 if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { 232 if (new_cpu_base != this_base &&
229 cpu = this_cpu; 233 hrtimer_check_target(timer, new_base)) {
230 raw_spin_unlock(&new_base->cpu_base->lock); 234 raw_spin_unlock(&new_base->cpu_base->lock);
231 raw_spin_lock(&base->cpu_base->lock); 235 raw_spin_lock(&base->cpu_base->lock);
236 new_cpu_base = this_base;
232 timer->base = base; 237 timer->base = base;
233 goto again; 238 goto again;
234 } 239 }
235 timer->base = new_base; 240 timer->base = new_base;
236 } else { 241 } else {
237 if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { 242 if (new_cpu_base != this_base &&
238 cpu = this_cpu; 243 hrtimer_check_target(timer, new_base)) {
244 new_cpu_base = this_base;
239 goto again; 245 goto again;
240 } 246 }
241 } 247 }
@@ -443,24 +449,35 @@ static inline void debug_deactivate(struct hrtimer *timer)
443} 449}
444 450
445#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) 451#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
452static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base,
453 struct hrtimer *timer)
454{
455#ifdef CONFIG_HIGH_RES_TIMERS
456 cpu_base->next_timer = timer;
457#endif
458}
459
446static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) 460static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
447{ 461{
448 struct hrtimer_clock_base *base = cpu_base->clock_base; 462 struct hrtimer_clock_base *base = cpu_base->clock_base;
449 ktime_t expires, expires_next = { .tv64 = KTIME_MAX }; 463 ktime_t expires, expires_next = { .tv64 = KTIME_MAX };
450 int i; 464 unsigned int active = cpu_base->active_bases;
451 465
452 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { 466 hrtimer_update_next_timer(cpu_base, NULL);
467 for (; active; base++, active >>= 1) {
453 struct timerqueue_node *next; 468 struct timerqueue_node *next;
454 struct hrtimer *timer; 469 struct hrtimer *timer;
455 470
456 next = timerqueue_getnext(&base->active); 471 if (!(active & 0x01))
457 if (!next)
458 continue; 472 continue;
459 473
474 next = timerqueue_getnext(&base->active);
460 timer = container_of(next, struct hrtimer, node); 475 timer = container_of(next, struct hrtimer, node);
461 expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 476 expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
462 if (expires.tv64 < expires_next.tv64) 477 if (expires.tv64 < expires_next.tv64) {
463 expires_next = expires; 478 expires_next = expires;
479 hrtimer_update_next_timer(cpu_base, timer);
480 }
464 } 481 }
465 /* 482 /*
466 * clock_was_set() might have changed base->offset of any of 483 * clock_was_set() might have changed base->offset of any of
@@ -473,6 +490,16 @@ static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
473} 490}
474#endif 491#endif
475 492
493static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
494{
495 ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
496 ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
497 ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
498
499 return ktime_get_update_offsets_now(&base->clock_was_set_seq,
500 offs_real, offs_boot, offs_tai);
501}
502
476/* High resolution timer related functions */ 503/* High resolution timer related functions */
477#ifdef CONFIG_HIGH_RES_TIMERS 504#ifdef CONFIG_HIGH_RES_TIMERS
478 505
@@ -480,6 +507,8 @@ static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
480 * High resolution timer enabled ? 507 * High resolution timer enabled ?
481 */ 508 */
482static int hrtimer_hres_enabled __read_mostly = 1; 509static int hrtimer_hres_enabled __read_mostly = 1;
510unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC;
511EXPORT_SYMBOL_GPL(hrtimer_resolution);
483 512
484/* 513/*
485 * Enable / Disable high resolution mode 514 * Enable / Disable high resolution mode
@@ -508,9 +537,14 @@ static inline int hrtimer_is_hres_enabled(void)
508/* 537/*
509 * Is the high resolution mode active ? 538 * Is the high resolution mode active ?
510 */ 539 */
540static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
541{
542 return cpu_base->hres_active;
543}
544
511static inline int hrtimer_hres_active(void) 545static inline int hrtimer_hres_active(void)
512{ 546{
513 return __this_cpu_read(hrtimer_bases.hres_active); 547 return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
514} 548}
515 549
516/* 550/*
@@ -521,7 +555,12 @@ static inline int hrtimer_hres_active(void)
521static void 555static void
522hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) 556hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
523{ 557{
524 ktime_t expires_next = __hrtimer_get_next_event(cpu_base); 558 ktime_t expires_next;
559
560 if (!cpu_base->hres_active)
561 return;
562
563 expires_next = __hrtimer_get_next_event(cpu_base);
525 564
526 if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64) 565 if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64)
527 return; 566 return;
@@ -545,63 +584,53 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
545 if (cpu_base->hang_detected) 584 if (cpu_base->hang_detected)
546 return; 585 return;
547 586
548 if (cpu_base->expires_next.tv64 != KTIME_MAX) 587 tick_program_event(cpu_base->expires_next, 1);
549 tick_program_event(cpu_base->expires_next, 1);
550} 588}
551 589
552/* 590/*
553 * Shared reprogramming for clock_realtime and clock_monotonic
554 *
555 * When a timer is enqueued and expires earlier than the already enqueued 591 * When a timer is enqueued and expires earlier than the already enqueued
556 * timers, we have to check, whether it expires earlier than the timer for 592 * timers, we have to check, whether it expires earlier than the timer for
557 * which the clock event device was armed. 593 * which the clock event device was armed.
558 * 594 *
559 * Note, that in case the state has HRTIMER_STATE_CALLBACK set, no reprogramming
560 * and no expiry check happens. The timer gets enqueued into the rbtree. The
561 * reprogramming and expiry check is done in the hrtimer_interrupt or in the
562 * softirq.
563 *
564 * Called with interrupts disabled and base->cpu_base.lock held 595 * Called with interrupts disabled and base->cpu_base.lock held
565 */ 596 */
566static int hrtimer_reprogram(struct hrtimer *timer, 597static void hrtimer_reprogram(struct hrtimer *timer,
567 struct hrtimer_clock_base *base) 598 struct hrtimer_clock_base *base)
568{ 599{
569 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 600 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
570 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 601 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
571 int res;
572 602
573 WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); 603 WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
574 604
575 /* 605 /*
576 * When the callback is running, we do not reprogram the clock event 606 * If the timer is not on the current cpu, we cannot reprogram
577 * device. The timer callback is either running on a different CPU or 607 * the other cpus clock event device.
578 * the callback is executed in the hrtimer_interrupt context. The
579 * reprogramming is handled either by the softirq, which called the
580 * callback or at the end of the hrtimer_interrupt.
581 */ 608 */
582 if (hrtimer_callback_running(timer)) 609 if (base->cpu_base != cpu_base)
583 return 0; 610 return;
611
612 /*
613 * If the hrtimer interrupt is running, then it will
614 * reevaluate the clock bases and reprogram the clock event
615 * device. The callbacks are always executed in hard interrupt
616 * context so we don't need an extra check for a running
617 * callback.
618 */
619 if (cpu_base->in_hrtirq)
620 return;
584 621
585 /* 622 /*
586 * CLOCK_REALTIME timer might be requested with an absolute 623 * CLOCK_REALTIME timer might be requested with an absolute
587 * expiry time which is less than base->offset. Nothing wrong 624 * expiry time which is less than base->offset. Set it to 0.
588 * about that, just avoid to call into the tick code, which
589 * has now objections against negative expiry values.
590 */ 625 */
591 if (expires.tv64 < 0) 626 if (expires.tv64 < 0)
592 return -ETIME; 627 expires.tv64 = 0;
593 628
594 if (expires.tv64 >= cpu_base->expires_next.tv64) 629 if (expires.tv64 >= cpu_base->expires_next.tv64)
595 return 0; 630 return;
596 631
597 /* 632 /* Update the pointer to the next expiring timer */
598 * When the target cpu of the timer is currently executing 633 cpu_base->next_timer = timer;
599 * hrtimer_interrupt(), then we do not touch the clock event
600 * device. hrtimer_interrupt() will reevaluate all clock bases
601 * before reprogramming the device.
602 */
603 if (cpu_base->in_hrtirq)
604 return 0;
605 634
606 /* 635 /*
607 * If a hang was detected in the last timer interrupt then we 636 * If a hang was detected in the last timer interrupt then we
@@ -610,15 +639,14 @@ static int hrtimer_reprogram(struct hrtimer *timer,
610 * to make progress. 639 * to make progress.
611 */ 640 */
612 if (cpu_base->hang_detected) 641 if (cpu_base->hang_detected)
613 return 0; 642 return;
614 643
615 /* 644 /*
616 * Clockevents returns -ETIME, when the event was in the past. 645 * Program the timer hardware. We enforce the expiry for
646 * events which are already in the past.
617 */ 647 */
618 res = tick_program_event(expires, 0); 648 cpu_base->expires_next = expires;
619 if (!IS_ERR_VALUE(res)) 649 tick_program_event(expires, 1);
620 cpu_base->expires_next = expires;
621 return res;
622} 650}
623 651
624/* 652/*
@@ -630,15 +658,6 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
630 base->hres_active = 0; 658 base->hres_active = 0;
631} 659}
632 660
633static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
634{
635 ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
636 ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
637 ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
638
639 return ktime_get_update_offsets_now(offs_real, offs_boot, offs_tai);
640}
641
642/* 661/*
643 * Retrigger next event is called after clock was set 662 * Retrigger next event is called after clock was set
644 * 663 *
@@ -648,7 +667,7 @@ static void retrigger_next_event(void *arg)
648{ 667{
649 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); 668 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
650 669
651 if (!hrtimer_hres_active()) 670 if (!base->hres_active)
652 return; 671 return;
653 672
654 raw_spin_lock(&base->lock); 673 raw_spin_lock(&base->lock);
@@ -662,29 +681,19 @@ static void retrigger_next_event(void *arg)
662 */ 681 */
663static int hrtimer_switch_to_hres(void) 682static int hrtimer_switch_to_hres(void)
664{ 683{
665 int i, cpu = smp_processor_id(); 684 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
666 struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
667 unsigned long flags;
668
669 if (base->hres_active)
670 return 1;
671
672 local_irq_save(flags);
673 685
674 if (tick_init_highres()) { 686 if (tick_init_highres()) {
675 local_irq_restore(flags);
676 printk(KERN_WARNING "Could not switch to high resolution " 687 printk(KERN_WARNING "Could not switch to high resolution "
677 "mode on CPU %d\n", cpu); 688 "mode on CPU %d\n", base->cpu);
678 return 0; 689 return 0;
679 } 690 }
680 base->hres_active = 1; 691 base->hres_active = 1;
681 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) 692 hrtimer_resolution = HIGH_RES_NSEC;
682 base->clock_base[i].resolution = KTIME_HIGH_RES;
683 693
684 tick_setup_sched_timer(); 694 tick_setup_sched_timer();
685 /* "Retrigger" the interrupt to get things going */ 695 /* "Retrigger" the interrupt to get things going */
686 retrigger_next_event(NULL); 696 retrigger_next_event(NULL);
687 local_irq_restore(flags);
688 return 1; 697 return 1;
689} 698}
690 699
@@ -706,6 +715,7 @@ void clock_was_set_delayed(void)
706 715
707#else 716#else
708 717
718static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *b) { return 0; }
709static inline int hrtimer_hres_active(void) { return 0; } 719static inline int hrtimer_hres_active(void) { return 0; }
710static inline int hrtimer_is_hres_enabled(void) { return 0; } 720static inline int hrtimer_is_hres_enabled(void) { return 0; }
711static inline int hrtimer_switch_to_hres(void) { return 0; } 721static inline int hrtimer_switch_to_hres(void) { return 0; }
@@ -803,6 +813,14 @@ void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
803 * 813 *
804 * Forward the timer expiry so it will expire in the future. 814 * Forward the timer expiry so it will expire in the future.
805 * Returns the number of overruns. 815 * Returns the number of overruns.
816 *
817 * Can be safely called from the callback function of @timer. If
818 * called from other contexts @timer must neither be enqueued nor
819 * running the callback and the caller needs to take care of
820 * serialization.
821 *
822 * Note: This only updates the timer expiry value and does not requeue
823 * the timer.
806 */ 824 */
807u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) 825u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
808{ 826{
@@ -814,8 +832,11 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
814 if (delta.tv64 < 0) 832 if (delta.tv64 < 0)
815 return 0; 833 return 0;
816 834
817 if (interval.tv64 < timer->base->resolution.tv64) 835 if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED))
818 interval.tv64 = timer->base->resolution.tv64; 836 return 0;
837
838 if (interval.tv64 < hrtimer_resolution)
839 interval.tv64 = hrtimer_resolution;
819 840
820 if (unlikely(delta.tv64 >= interval.tv64)) { 841 if (unlikely(delta.tv64 >= interval.tv64)) {
821 s64 incr = ktime_to_ns(interval); 842 s64 incr = ktime_to_ns(interval);
@@ -849,16 +870,11 @@ static int enqueue_hrtimer(struct hrtimer *timer,
849{ 870{
850 debug_activate(timer); 871 debug_activate(timer);
851 872
852 timerqueue_add(&base->active, &timer->node);
853 base->cpu_base->active_bases |= 1 << base->index; 873 base->cpu_base->active_bases |= 1 << base->index;
854 874
855 /* 875 timer->state = HRTIMER_STATE_ENQUEUED;
856 * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
857 * state of a possibly running callback.
858 */
859 timer->state |= HRTIMER_STATE_ENQUEUED;
860 876
861 return (&timer->node == base->active.next); 877 return timerqueue_add(&base->active, &timer->node);
862} 878}
863 879
864/* 880/*
@@ -875,39 +891,38 @@ static void __remove_hrtimer(struct hrtimer *timer,
875 struct hrtimer_clock_base *base, 891 struct hrtimer_clock_base *base,
876 unsigned long newstate, int reprogram) 892 unsigned long newstate, int reprogram)
877{ 893{
878 struct timerqueue_node *next_timer; 894 struct hrtimer_cpu_base *cpu_base = base->cpu_base;
879 if (!(timer->state & HRTIMER_STATE_ENQUEUED)) 895 unsigned int state = timer->state;
880 goto out; 896
897 timer->state = newstate;
898 if (!(state & HRTIMER_STATE_ENQUEUED))
899 return;
900
901 if (!timerqueue_del(&base->active, &timer->node))
902 cpu_base->active_bases &= ~(1 << base->index);
881 903
882 next_timer = timerqueue_getnext(&base->active);
883 timerqueue_del(&base->active, &timer->node);
884 if (&timer->node == next_timer) {
885#ifdef CONFIG_HIGH_RES_TIMERS 904#ifdef CONFIG_HIGH_RES_TIMERS
886 /* Reprogram the clock event device. if enabled */ 905 /*
887 if (reprogram && hrtimer_hres_active()) { 906 * Note: If reprogram is false we do not update
888 ktime_t expires; 907 * cpu_base->next_timer. This happens when we remove the first
889 908 * timer on a remote cpu. No harm as we never dereference
890 expires = ktime_sub(hrtimer_get_expires(timer), 909 * cpu_base->next_timer. So the worst thing what can happen is
891 base->offset); 910 * an superflous call to hrtimer_force_reprogram() on the
892 if (base->cpu_base->expires_next.tv64 == expires.tv64) 911 * remote cpu later on if the same timer gets enqueued again.
893 hrtimer_force_reprogram(base->cpu_base, 1); 912 */
894 } 913 if (reprogram && timer == cpu_base->next_timer)
914 hrtimer_force_reprogram(cpu_base, 1);
895#endif 915#endif
896 }
897 if (!timerqueue_getnext(&base->active))
898 base->cpu_base->active_bases &= ~(1 << base->index);
899out:
900 timer->state = newstate;
901} 916}
902 917
903/* 918/*
904 * remove hrtimer, called with base lock held 919 * remove hrtimer, called with base lock held
905 */ 920 */
906static inline int 921static inline int
907remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) 922remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart)
908{ 923{
909 if (hrtimer_is_queued(timer)) { 924 if (hrtimer_is_queued(timer)) {
910 unsigned long state; 925 unsigned long state = timer->state;
911 int reprogram; 926 int reprogram;
912 927
913 /* 928 /*
@@ -921,30 +936,35 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
921 debug_deactivate(timer); 936 debug_deactivate(timer);
922 timer_stats_hrtimer_clear_start_info(timer); 937 timer_stats_hrtimer_clear_start_info(timer);
923 reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); 938 reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
924 /* 939
925 * We must preserve the CALLBACK state flag here, 940 if (!restart)
926 * otherwise we could move the timer base in 941 state = HRTIMER_STATE_INACTIVE;
927 * switch_hrtimer_base. 942
928 */
929 state = timer->state & HRTIMER_STATE_CALLBACK;
930 __remove_hrtimer(timer, base, state, reprogram); 943 __remove_hrtimer(timer, base, state, reprogram);
931 return 1; 944 return 1;
932 } 945 }
933 return 0; 946 return 0;
934} 947}
935 948
936int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, 949/**
937 unsigned long delta_ns, const enum hrtimer_mode mode, 950 * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
938 int wakeup) 951 * @timer: the timer to be added
952 * @tim: expiry time
953 * @delta_ns: "slack" range for the timer
954 * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or
955 * relative (HRTIMER_MODE_REL)
956 */
957void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
958 unsigned long delta_ns, const enum hrtimer_mode mode)
939{ 959{
940 struct hrtimer_clock_base *base, *new_base; 960 struct hrtimer_clock_base *base, *new_base;
941 unsigned long flags; 961 unsigned long flags;
942 int ret, leftmost; 962 int leftmost;
943 963
944 base = lock_hrtimer_base(timer, &flags); 964 base = lock_hrtimer_base(timer, &flags);
945 965
946 /* Remove an active timer from the queue: */ 966 /* Remove an active timer from the queue: */
947 ret = remove_hrtimer(timer, base); 967 remove_hrtimer(timer, base, true);
948 968
949 if (mode & HRTIMER_MODE_REL) { 969 if (mode & HRTIMER_MODE_REL) {
950 tim = ktime_add_safe(tim, base->get_time()); 970 tim = ktime_add_safe(tim, base->get_time());
@@ -956,7 +976,7 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
956 * timeouts. This will go away with the GTOD framework. 976 * timeouts. This will go away with the GTOD framework.
957 */ 977 */
958#ifdef CONFIG_TIME_LOW_RES 978#ifdef CONFIG_TIME_LOW_RES
959 tim = ktime_add_safe(tim, base->resolution); 979 tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution));
960#endif 980#endif
961 } 981 }
962 982
@@ -968,85 +988,25 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
968 timer_stats_hrtimer_set_start_info(timer); 988 timer_stats_hrtimer_set_start_info(timer);
969 989
970 leftmost = enqueue_hrtimer(timer, new_base); 990 leftmost = enqueue_hrtimer(timer, new_base);
971 991 if (!leftmost)
972 if (!leftmost) { 992 goto unlock;
973 unlock_hrtimer_base(timer, &flags);
974 return ret;
975 }
976 993
977 if (!hrtimer_is_hres_active(timer)) { 994 if (!hrtimer_is_hres_active(timer)) {
978 /* 995 /*
979 * Kick to reschedule the next tick to handle the new timer 996 * Kick to reschedule the next tick to handle the new timer
980 * on dynticks target. 997 * on dynticks target.
981 */ 998 */
982 wake_up_nohz_cpu(new_base->cpu_base->cpu); 999 if (new_base->cpu_base->nohz_active)
983 } else if (new_base->cpu_base == this_cpu_ptr(&hrtimer_bases) && 1000 wake_up_nohz_cpu(new_base->cpu_base->cpu);
984 hrtimer_reprogram(timer, new_base)) { 1001 } else {
985 /* 1002 hrtimer_reprogram(timer, new_base);
986 * Only allow reprogramming if the new base is on this CPU.
987 * (it might still be on another CPU if the timer was pending)
988 *
989 * XXX send_remote_softirq() ?
990 */
991 if (wakeup) {
992 /*
993 * We need to drop cpu_base->lock to avoid a
994 * lock ordering issue vs. rq->lock.
995 */
996 raw_spin_unlock(&new_base->cpu_base->lock);
997 raise_softirq_irqoff(HRTIMER_SOFTIRQ);
998 local_irq_restore(flags);
999 return ret;
1000 } else {
1001 __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
1002 }
1003 } 1003 }
1004 1004unlock:
1005 unlock_hrtimer_base(timer, &flags); 1005 unlock_hrtimer_base(timer, &flags);
1006
1007 return ret;
1008}
1009EXPORT_SYMBOL_GPL(__hrtimer_start_range_ns);
1010
1011/**
1012 * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
1013 * @timer: the timer to be added
1014 * @tim: expiry time
1015 * @delta_ns: "slack" range for the timer
1016 * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or
1017 * relative (HRTIMER_MODE_REL)
1018 *
1019 * Returns:
1020 * 0 on success
1021 * 1 when the timer was active
1022 */
1023int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1024 unsigned long delta_ns, const enum hrtimer_mode mode)
1025{
1026 return __hrtimer_start_range_ns(timer, tim, delta_ns, mode, 1);
1027} 1006}
1028EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); 1007EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
1029 1008
1030/** 1009/**
1031 * hrtimer_start - (re)start an hrtimer on the current CPU
1032 * @timer: the timer to be added
1033 * @tim: expiry time
1034 * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or
1035 * relative (HRTIMER_MODE_REL)
1036 *
1037 * Returns:
1038 * 0 on success
1039 * 1 when the timer was active
1040 */
1041int
1042hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
1043{
1044 return __hrtimer_start_range_ns(timer, tim, 0, mode, 1);
1045}
1046EXPORT_SYMBOL_GPL(hrtimer_start);
1047
1048
1049/**
1050 * hrtimer_try_to_cancel - try to deactivate a timer 1010 * hrtimer_try_to_cancel - try to deactivate a timer
1051 * @timer: hrtimer to stop 1011 * @timer: hrtimer to stop
1052 * 1012 *
@@ -1062,10 +1022,19 @@ int hrtimer_try_to_cancel(struct hrtimer *timer)
1062 unsigned long flags; 1022 unsigned long flags;
1063 int ret = -1; 1023 int ret = -1;
1064 1024
1025 /*
1026 * Check lockless first. If the timer is not active (neither
1027 * enqueued nor running the callback, nothing to do here. The
1028 * base lock does not serialize against a concurrent enqueue,
1029 * so we can avoid taking it.
1030 */
1031 if (!hrtimer_active(timer))
1032 return 0;
1033
1065 base = lock_hrtimer_base(timer, &flags); 1034 base = lock_hrtimer_base(timer, &flags);
1066 1035
1067 if (!hrtimer_callback_running(timer)) 1036 if (!hrtimer_callback_running(timer))
1068 ret = remove_hrtimer(timer, base); 1037 ret = remove_hrtimer(timer, base, false);
1069 1038
1070 unlock_hrtimer_base(timer, &flags); 1039 unlock_hrtimer_base(timer, &flags);
1071 1040
@@ -1115,26 +1084,22 @@ EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
1115/** 1084/**
1116 * hrtimer_get_next_event - get the time until next expiry event 1085 * hrtimer_get_next_event - get the time until next expiry event
1117 * 1086 *
1118 * Returns the delta to the next expiry event or KTIME_MAX if no timer 1087 * Returns the next expiry time or KTIME_MAX if no timer is pending.
1119 * is pending.
1120 */ 1088 */
1121ktime_t hrtimer_get_next_event(void) 1089u64 hrtimer_get_next_event(void)
1122{ 1090{
1123 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1091 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1124 ktime_t mindelta = { .tv64 = KTIME_MAX }; 1092 u64 expires = KTIME_MAX;
1125 unsigned long flags; 1093 unsigned long flags;
1126 1094
1127 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1095 raw_spin_lock_irqsave(&cpu_base->lock, flags);
1128 1096
1129 if (!hrtimer_hres_active()) 1097 if (!__hrtimer_hres_active(cpu_base))
1130 mindelta = ktime_sub(__hrtimer_get_next_event(cpu_base), 1098 expires = __hrtimer_get_next_event(cpu_base).tv64;
1131 ktime_get());
1132 1099
1133 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1100 raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1134 1101
1135 if (mindelta.tv64 < 0) 1102 return expires;
1136 mindelta.tv64 = 0;
1137 return mindelta;
1138} 1103}
1139#endif 1104#endif
1140 1105
@@ -1176,37 +1141,73 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1176} 1141}
1177EXPORT_SYMBOL_GPL(hrtimer_init); 1142EXPORT_SYMBOL_GPL(hrtimer_init);
1178 1143
1179/** 1144/*
1180 * hrtimer_get_res - get the timer resolution for a clock 1145 * A timer is active, when it is enqueued into the rbtree or the
1181 * @which_clock: which clock to query 1146 * callback function is running or it's in the state of being migrated
1182 * @tp: pointer to timespec variable to store the resolution 1147 * to another cpu.
1183 * 1148 *
1184 * Store the resolution of the clock selected by @which_clock in the 1149 * It is important for this function to not return a false negative.
1185 * variable pointed to by @tp.
1186 */ 1150 */
1187int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) 1151bool hrtimer_active(const struct hrtimer *timer)
1188{ 1152{
1189 struct hrtimer_cpu_base *cpu_base; 1153 struct hrtimer_cpu_base *cpu_base;
1190 int base = hrtimer_clockid_to_base(which_clock); 1154 unsigned int seq;
1191 1155
1192 cpu_base = raw_cpu_ptr(&hrtimer_bases); 1156 do {
1193 *tp = ktime_to_timespec(cpu_base->clock_base[base].resolution); 1157 cpu_base = READ_ONCE(timer->base->cpu_base);
1158 seq = raw_read_seqcount_begin(&cpu_base->seq);
1194 1159
1195 return 0; 1160 if (timer->state != HRTIMER_STATE_INACTIVE ||
1161 cpu_base->running == timer)
1162 return true;
1163
1164 } while (read_seqcount_retry(&cpu_base->seq, seq) ||
1165 cpu_base != READ_ONCE(timer->base->cpu_base));
1166
1167 return false;
1196} 1168}
1197EXPORT_SYMBOL_GPL(hrtimer_get_res); 1169EXPORT_SYMBOL_GPL(hrtimer_active);
1198 1170
1199static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) 1171/*
1172 * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3
1173 * distinct sections:
1174 *
1175 * - queued: the timer is queued
1176 * - callback: the timer is being ran
1177 * - post: the timer is inactive or (re)queued
1178 *
1179 * On the read side we ensure we observe timer->state and cpu_base->running
1180 * from the same section, if anything changed while we looked at it, we retry.
1181 * This includes timer->base changing because sequence numbers alone are
1182 * insufficient for that.
1183 *
1184 * The sequence numbers are required because otherwise we could still observe
1185 * a false negative if the read side got smeared over multiple consequtive
1186 * __run_hrtimer() invocations.
1187 */
1188
1189static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
1190 struct hrtimer_clock_base *base,
1191 struct hrtimer *timer, ktime_t *now)
1200{ 1192{
1201 struct hrtimer_clock_base *base = timer->base;
1202 struct hrtimer_cpu_base *cpu_base = base->cpu_base;
1203 enum hrtimer_restart (*fn)(struct hrtimer *); 1193 enum hrtimer_restart (*fn)(struct hrtimer *);
1204 int restart; 1194 int restart;
1205 1195
1206 WARN_ON(!irqs_disabled()); 1196 lockdep_assert_held(&cpu_base->lock);
1207 1197
1208 debug_deactivate(timer); 1198 debug_deactivate(timer);
1209 __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); 1199 cpu_base->running = timer;
1200
1201 /*
1202 * Separate the ->running assignment from the ->state assignment.
1203 *
1204 * As with a regular write barrier, this ensures the read side in
1205 * hrtimer_active() cannot observe cpu_base->running == NULL &&
1206 * timer->state == INACTIVE.
1207 */
1208 raw_write_seqcount_barrier(&cpu_base->seq);
1209
1210 __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
1210 timer_stats_account_hrtimer(timer); 1211 timer_stats_account_hrtimer(timer);
1211 fn = timer->function; 1212 fn = timer->function;
1212 1213
@@ -1222,58 +1223,43 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
1222 raw_spin_lock(&cpu_base->lock); 1223 raw_spin_lock(&cpu_base->lock);
1223 1224
1224 /* 1225 /*
1225 * Note: We clear the CALLBACK bit after enqueue_hrtimer and 1226 * Note: We clear the running state after enqueue_hrtimer and
1226 * we do not reprogramm the event hardware. Happens either in 1227 * we do not reprogramm the event hardware. Happens either in
1227 * hrtimer_start_range_ns() or in hrtimer_interrupt() 1228 * hrtimer_start_range_ns() or in hrtimer_interrupt()
1229 *
1230 * Note: Because we dropped the cpu_base->lock above,
1231 * hrtimer_start_range_ns() can have popped in and enqueued the timer
1232 * for us already.
1228 */ 1233 */
1229 if (restart != HRTIMER_NORESTART) { 1234 if (restart != HRTIMER_NORESTART &&
1230 BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); 1235 !(timer->state & HRTIMER_STATE_ENQUEUED))
1231 enqueue_hrtimer(timer, base); 1236 enqueue_hrtimer(timer, base);
1232 }
1233 1237
1234 WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK)); 1238 /*
1239 * Separate the ->running assignment from the ->state assignment.
1240 *
1241 * As with a regular write barrier, this ensures the read side in
1242 * hrtimer_active() cannot observe cpu_base->running == NULL &&
1243 * timer->state == INACTIVE.
1244 */
1245 raw_write_seqcount_barrier(&cpu_base->seq);
1235 1246
1236 timer->state &= ~HRTIMER_STATE_CALLBACK; 1247 WARN_ON_ONCE(cpu_base->running != timer);
1248 cpu_base->running = NULL;
1237} 1249}
1238 1250
1239#ifdef CONFIG_HIGH_RES_TIMERS 1251static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
1240
1241/*
1242 * High resolution timer interrupt
1243 * Called with interrupts disabled
1244 */
1245void hrtimer_interrupt(struct clock_event_device *dev)
1246{ 1252{
1247 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1253 struct hrtimer_clock_base *base = cpu_base->clock_base;
1248 ktime_t expires_next, now, entry_time, delta; 1254 unsigned int active = cpu_base->active_bases;
1249 int i, retries = 0;
1250
1251 BUG_ON(!cpu_base->hres_active);
1252 cpu_base->nr_events++;
1253 dev->next_event.tv64 = KTIME_MAX;
1254
1255 raw_spin_lock(&cpu_base->lock);
1256 entry_time = now = hrtimer_update_base(cpu_base);
1257retry:
1258 cpu_base->in_hrtirq = 1;
1259 /*
1260 * We set expires_next to KTIME_MAX here with cpu_base->lock
1261 * held to prevent that a timer is enqueued in our queue via
1262 * the migration code. This does not affect enqueueing of
1263 * timers which run their callback and need to be requeued on
1264 * this CPU.
1265 */
1266 cpu_base->expires_next.tv64 = KTIME_MAX;
1267 1255
1268 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1256 for (; active; base++, active >>= 1) {
1269 struct hrtimer_clock_base *base;
1270 struct timerqueue_node *node; 1257 struct timerqueue_node *node;
1271 ktime_t basenow; 1258 ktime_t basenow;
1272 1259
1273 if (!(cpu_base->active_bases & (1 << i))) 1260 if (!(active & 0x01))
1274 continue; 1261 continue;
1275 1262
1276 base = cpu_base->clock_base + i;
1277 basenow = ktime_add(now, base->offset); 1263 basenow = ktime_add(now, base->offset);
1278 1264
1279 while ((node = timerqueue_getnext(&base->active))) { 1265 while ((node = timerqueue_getnext(&base->active))) {
@@ -1296,9 +1282,42 @@ retry:
1296 if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) 1282 if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer))
1297 break; 1283 break;
1298 1284
1299 __run_hrtimer(timer, &basenow); 1285 __run_hrtimer(cpu_base, base, timer, &basenow);
1300 } 1286 }
1301 } 1287 }
1288}
1289
1290#ifdef CONFIG_HIGH_RES_TIMERS
1291
1292/*
1293 * High resolution timer interrupt
1294 * Called with interrupts disabled
1295 */
1296void hrtimer_interrupt(struct clock_event_device *dev)
1297{
1298 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1299 ktime_t expires_next, now, entry_time, delta;
1300 int retries = 0;
1301
1302 BUG_ON(!cpu_base->hres_active);
1303 cpu_base->nr_events++;
1304 dev->next_event.tv64 = KTIME_MAX;
1305
1306 raw_spin_lock(&cpu_base->lock);
1307 entry_time = now = hrtimer_update_base(cpu_base);
1308retry:
1309 cpu_base->in_hrtirq = 1;
1310 /*
1311 * We set expires_next to KTIME_MAX here with cpu_base->lock
1312 * held to prevent that a timer is enqueued in our queue via
1313 * the migration code. This does not affect enqueueing of
1314 * timers which run their callback and need to be requeued on
1315 * this CPU.
1316 */
1317 cpu_base->expires_next.tv64 = KTIME_MAX;
1318
1319 __hrtimer_run_queues(cpu_base, now);
1320
1302 /* Reevaluate the clock bases for the next expiry */ 1321 /* Reevaluate the clock bases for the next expiry */
1303 expires_next = __hrtimer_get_next_event(cpu_base); 1322 expires_next = __hrtimer_get_next_event(cpu_base);
1304 /* 1323 /*
@@ -1310,8 +1329,7 @@ retry:
1310 raw_spin_unlock(&cpu_base->lock); 1329 raw_spin_unlock(&cpu_base->lock);
1311 1330
1312 /* Reprogramming necessary ? */ 1331 /* Reprogramming necessary ? */
1313 if (expires_next.tv64 == KTIME_MAX || 1332 if (!tick_program_event(expires_next, 0)) {
1314 !tick_program_event(expires_next, 0)) {
1315 cpu_base->hang_detected = 0; 1333 cpu_base->hang_detected = 0;
1316 return; 1334 return;
1317 } 1335 }
@@ -1344,8 +1362,8 @@ retry:
1344 cpu_base->hang_detected = 1; 1362 cpu_base->hang_detected = 1;
1345 raw_spin_unlock(&cpu_base->lock); 1363 raw_spin_unlock(&cpu_base->lock);
1346 delta = ktime_sub(now, entry_time); 1364 delta = ktime_sub(now, entry_time);
1347 if (delta.tv64 > cpu_base->max_hang_time.tv64) 1365 if ((unsigned int)delta.tv64 > cpu_base->max_hang_time)
1348 cpu_base->max_hang_time = delta; 1366 cpu_base->max_hang_time = (unsigned int) delta.tv64;
1349 /* 1367 /*
1350 * Limit it to a sensible value as we enforce a longer 1368 * Limit it to a sensible value as we enforce a longer
1351 * delay. Give the CPU at least 100ms to catch up. 1369 * delay. Give the CPU at least 100ms to catch up.
@@ -1363,7 +1381,7 @@ retry:
1363 * local version of hrtimer_peek_ahead_timers() called with interrupts 1381 * local version of hrtimer_peek_ahead_timers() called with interrupts
1364 * disabled. 1382 * disabled.
1365 */ 1383 */
1366static void __hrtimer_peek_ahead_timers(void) 1384static inline void __hrtimer_peek_ahead_timers(void)
1367{ 1385{
1368 struct tick_device *td; 1386 struct tick_device *td;
1369 1387
@@ -1375,29 +1393,6 @@ static void __hrtimer_peek_ahead_timers(void)
1375 hrtimer_interrupt(td->evtdev); 1393 hrtimer_interrupt(td->evtdev);
1376} 1394}
1377 1395
1378/**
1379 * hrtimer_peek_ahead_timers -- run soft-expired timers now
1380 *
1381 * hrtimer_peek_ahead_timers will peek at the timer queue of
1382 * the current cpu and check if there are any timers for which
1383 * the soft expires time has passed. If any such timers exist,
1384 * they are run immediately and then removed from the timer queue.
1385 *
1386 */
1387void hrtimer_peek_ahead_timers(void)
1388{
1389 unsigned long flags;
1390
1391 local_irq_save(flags);
1392 __hrtimer_peek_ahead_timers();
1393 local_irq_restore(flags);
1394}
1395
1396static void run_hrtimer_softirq(struct softirq_action *h)
1397{
1398 hrtimer_peek_ahead_timers();
1399}
1400
1401#else /* CONFIG_HIGH_RES_TIMERS */ 1396#else /* CONFIG_HIGH_RES_TIMERS */
1402 1397
1403static inline void __hrtimer_peek_ahead_timers(void) { } 1398static inline void __hrtimer_peek_ahead_timers(void) { }
@@ -1405,66 +1400,32 @@ static inline void __hrtimer_peek_ahead_timers(void) { }
1405#endif /* !CONFIG_HIGH_RES_TIMERS */ 1400#endif /* !CONFIG_HIGH_RES_TIMERS */
1406 1401
1407/* 1402/*
1408 * Called from timer softirq every jiffy, expire hrtimers: 1403 * Called from run_local_timers in hardirq context every jiffy
1409 *
1410 * For HRT its the fall back code to run the softirq in the timer
1411 * softirq context in case the hrtimer initialization failed or has
1412 * not been done yet.
1413 */ 1404 */
1414void hrtimer_run_pending(void) 1405void hrtimer_run_queues(void)
1415{ 1406{
1416 if (hrtimer_hres_active()) 1407 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1408 ktime_t now;
1409
1410 if (__hrtimer_hres_active(cpu_base))
1417 return; 1411 return;
1418 1412
1419 /* 1413 /*
1420 * This _is_ ugly: We have to check in the softirq context, 1414 * This _is_ ugly: We have to check periodically, whether we
1421 * whether we can switch to highres and / or nohz mode. The 1415 * can switch to highres and / or nohz mode. The clocksource
1422 * clocksource switch happens in the timer interrupt with 1416 * switch happens with xtime_lock held. Notification from
1423 * xtime_lock held. Notification from there only sets the 1417 * there only sets the check bit in the tick_oneshot code,
1424 * check bit in the tick_oneshot code, otherwise we might 1418 * otherwise we might deadlock vs. xtime_lock.
1425 * deadlock vs. xtime_lock.
1426 */ 1419 */
1427 if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) 1420 if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) {
1428 hrtimer_switch_to_hres(); 1421 hrtimer_switch_to_hres();
1429}
1430
1431/*
1432 * Called from hardirq context every jiffy
1433 */
1434void hrtimer_run_queues(void)
1435{
1436 struct timerqueue_node *node;
1437 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1438 struct hrtimer_clock_base *base;
1439 int index, gettime = 1;
1440
1441 if (hrtimer_hres_active())
1442 return; 1422 return;
1443
1444 for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
1445 base = &cpu_base->clock_base[index];
1446 if (!timerqueue_getnext(&base->active))
1447 continue;
1448
1449 if (gettime) {
1450 hrtimer_get_softirq_time(cpu_base);
1451 gettime = 0;
1452 }
1453
1454 raw_spin_lock(&cpu_base->lock);
1455
1456 while ((node = timerqueue_getnext(&base->active))) {
1457 struct hrtimer *timer;
1458
1459 timer = container_of(node, struct hrtimer, node);
1460 if (base->softirq_time.tv64 <=
1461 hrtimer_get_expires_tv64(timer))
1462 break;
1463
1464 __run_hrtimer(timer, &base->softirq_time);
1465 }
1466 raw_spin_unlock(&cpu_base->lock);
1467 } 1423 }
1424
1425 raw_spin_lock(&cpu_base->lock);
1426 now = hrtimer_update_base(cpu_base);
1427 __hrtimer_run_queues(cpu_base, now);
1428 raw_spin_unlock(&cpu_base->lock);
1468} 1429}
1469 1430
1470/* 1431/*
@@ -1497,8 +1458,6 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
1497 do { 1458 do {
1498 set_current_state(TASK_INTERRUPTIBLE); 1459 set_current_state(TASK_INTERRUPTIBLE);
1499 hrtimer_start_expires(&t->timer, mode); 1460 hrtimer_start_expires(&t->timer, mode);
1500 if (!hrtimer_active(&t->timer))
1501 t->task = NULL;
1502 1461
1503 if (likely(t->task)) 1462 if (likely(t->task))
1504 freezable_schedule(); 1463 freezable_schedule();
@@ -1642,11 +1601,11 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
1642 debug_deactivate(timer); 1601 debug_deactivate(timer);
1643 1602
1644 /* 1603 /*
1645 * Mark it as STATE_MIGRATE not INACTIVE otherwise the 1604 * Mark it as ENQUEUED not INACTIVE otherwise the
1646 * timer could be seen as !active and just vanish away 1605 * timer could be seen as !active and just vanish away
1647 * under us on another CPU 1606 * under us on another CPU
1648 */ 1607 */
1649 __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0); 1608 __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0);
1650 timer->base = new_base; 1609 timer->base = new_base;
1651 /* 1610 /*
1652 * Enqueue the timers on the new cpu. This does not 1611 * Enqueue the timers on the new cpu. This does not
@@ -1657,9 +1616,6 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
1657 * event device. 1616 * event device.
1658 */ 1617 */
1659 enqueue_hrtimer(timer, new_base); 1618 enqueue_hrtimer(timer, new_base);
1660
1661 /* Clear the migration state bit */
1662 timer->state &= ~HRTIMER_STATE_MIGRATE;
1663 } 1619 }
1664} 1620}
1665 1621
@@ -1731,9 +1687,6 @@ void __init hrtimers_init(void)
1731 hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, 1687 hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
1732 (void *)(long)smp_processor_id()); 1688 (void *)(long)smp_processor_id());
1733 register_cpu_notifier(&hrtimers_nb); 1689 register_cpu_notifier(&hrtimers_nb);
1734#ifdef CONFIG_HIGH_RES_TIMERS
1735 open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
1736#endif
1737} 1690}
1738 1691
1739/** 1692/**
@@ -1772,8 +1725,6 @@ schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
1772 hrtimer_init_sleeper(&t, current); 1725 hrtimer_init_sleeper(&t, current);
1773 1726
1774 hrtimer_start_expires(&t.timer, mode); 1727 hrtimer_start_expires(&t.timer, mode);
1775 if (!hrtimer_active(&t.timer))
1776 t.task = NULL;
1777 1728
1778 if (likely(t.task)) 1729 if (likely(t.task))
1779 schedule(); 1730 schedule();
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 7a681003001c..fb4d98c7fd43 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -35,6 +35,7 @@ unsigned long tick_nsec;
35static u64 tick_length; 35static u64 tick_length;
36static u64 tick_length_base; 36static u64 tick_length_base;
37 37
38#define SECS_PER_DAY 86400
38#define MAX_TICKADJ 500LL /* usecs */ 39#define MAX_TICKADJ 500LL /* usecs */
39#define MAX_TICKADJ_SCALED \ 40#define MAX_TICKADJ_SCALED \
40 (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) 41 (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
@@ -76,6 +77,9 @@ static long time_adjust;
76/* constant (boot-param configurable) NTP tick adjustment (upscaled) */ 77/* constant (boot-param configurable) NTP tick adjustment (upscaled) */
77static s64 ntp_tick_adj; 78static s64 ntp_tick_adj;
78 79
80/* second value of the next pending leapsecond, or TIME64_MAX if no leap */
81static time64_t ntp_next_leap_sec = TIME64_MAX;
82
79#ifdef CONFIG_NTP_PPS 83#ifdef CONFIG_NTP_PPS
80 84
81/* 85/*
@@ -349,6 +353,7 @@ void ntp_clear(void)
349 tick_length = tick_length_base; 353 tick_length = tick_length_base;
350 time_offset = 0; 354 time_offset = 0;
351 355
356 ntp_next_leap_sec = TIME64_MAX;
352 /* Clear PPS state variables */ 357 /* Clear PPS state variables */
353 pps_clear(); 358 pps_clear();
354} 359}
@@ -359,6 +364,21 @@ u64 ntp_tick_length(void)
359 return tick_length; 364 return tick_length;
360} 365}
361 366
367/**
368 * ntp_get_next_leap - Returns the next leapsecond in CLOCK_REALTIME ktime_t
369 *
370 * Provides the time of the next leapsecond against CLOCK_REALTIME in
371 * a ktime_t format. Returns KTIME_MAX if no leapsecond is pending.
372 */
373ktime_t ntp_get_next_leap(void)
374{
375 ktime_t ret;
376
377 if ((time_state == TIME_INS) && (time_status & STA_INS))
378 return ktime_set(ntp_next_leap_sec, 0);
379 ret.tv64 = KTIME_MAX;
380 return ret;
381}
362 382
363/* 383/*
364 * this routine handles the overflow of the microsecond field 384 * this routine handles the overflow of the microsecond field
@@ -382,15 +402,21 @@ int second_overflow(unsigned long secs)
382 */ 402 */
383 switch (time_state) { 403 switch (time_state) {
384 case TIME_OK: 404 case TIME_OK:
385 if (time_status & STA_INS) 405 if (time_status & STA_INS) {
386 time_state = TIME_INS; 406 time_state = TIME_INS;
387 else if (time_status & STA_DEL) 407 ntp_next_leap_sec = secs + SECS_PER_DAY -
408 (secs % SECS_PER_DAY);
409 } else if (time_status & STA_DEL) {
388 time_state = TIME_DEL; 410 time_state = TIME_DEL;
411 ntp_next_leap_sec = secs + SECS_PER_DAY -
412 ((secs+1) % SECS_PER_DAY);
413 }
389 break; 414 break;
390 case TIME_INS: 415 case TIME_INS:
391 if (!(time_status & STA_INS)) 416 if (!(time_status & STA_INS)) {
417 ntp_next_leap_sec = TIME64_MAX;
392 time_state = TIME_OK; 418 time_state = TIME_OK;
393 else if (secs % 86400 == 0) { 419 } else if (secs % SECS_PER_DAY == 0) {
394 leap = -1; 420 leap = -1;
395 time_state = TIME_OOP; 421 time_state = TIME_OOP;
396 printk(KERN_NOTICE 422 printk(KERN_NOTICE
@@ -398,19 +424,21 @@ int second_overflow(unsigned long secs)
398 } 424 }
399 break; 425 break;
400 case TIME_DEL: 426 case TIME_DEL:
401 if (!(time_status & STA_DEL)) 427 if (!(time_status & STA_DEL)) {
428 ntp_next_leap_sec = TIME64_MAX;
402 time_state = TIME_OK; 429 time_state = TIME_OK;
403 else if ((secs + 1) % 86400 == 0) { 430 } else if ((secs + 1) % SECS_PER_DAY == 0) {
404 leap = 1; 431 leap = 1;
432 ntp_next_leap_sec = TIME64_MAX;
405 time_state = TIME_WAIT; 433 time_state = TIME_WAIT;
406 printk(KERN_NOTICE 434 printk(KERN_NOTICE
407 "Clock: deleting leap second 23:59:59 UTC\n"); 435 "Clock: deleting leap second 23:59:59 UTC\n");
408 } 436 }
409 break; 437 break;
410 case TIME_OOP: 438 case TIME_OOP:
439 ntp_next_leap_sec = TIME64_MAX;
411 time_state = TIME_WAIT; 440 time_state = TIME_WAIT;
412 break; 441 break;
413
414 case TIME_WAIT: 442 case TIME_WAIT:
415 if (!(time_status & (STA_INS | STA_DEL))) 443 if (!(time_status & (STA_INS | STA_DEL)))
416 time_state = TIME_OK; 444 time_state = TIME_OK;
@@ -547,6 +575,7 @@ static inline void process_adj_status(struct timex *txc, struct timespec64 *ts)
547 if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) { 575 if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
548 time_state = TIME_OK; 576 time_state = TIME_OK;
549 time_status = STA_UNSYNC; 577 time_status = STA_UNSYNC;
578 ntp_next_leap_sec = TIME64_MAX;
550 /* restart PPS frequency calibration */ 579 /* restart PPS frequency calibration */
551 pps_reset_freq_interval(); 580 pps_reset_freq_interval();
552 } 581 }
@@ -711,6 +740,24 @@ int __do_adjtimex(struct timex *txc, struct timespec64 *ts, s32 *time_tai)
711 if (!(time_status & STA_NANO)) 740 if (!(time_status & STA_NANO))
712 txc->time.tv_usec /= NSEC_PER_USEC; 741 txc->time.tv_usec /= NSEC_PER_USEC;
713 742
743 /* Handle leapsec adjustments */
744 if (unlikely(ts->tv_sec >= ntp_next_leap_sec)) {
745 if ((time_state == TIME_INS) && (time_status & STA_INS)) {
746 result = TIME_OOP;
747 txc->tai++;
748 txc->time.tv_sec--;
749 }
750 if ((time_state == TIME_DEL) && (time_status & STA_DEL)) {
751 result = TIME_WAIT;
752 txc->tai--;
753 txc->time.tv_sec++;
754 }
755 if ((time_state == TIME_OOP) &&
756 (ts->tv_sec == ntp_next_leap_sec)) {
757 result = TIME_WAIT;
758 }
759 }
760
714 return result; 761 return result;
715} 762}
716 763
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index bbd102ad9df7..65430504ca26 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -5,6 +5,7 @@ extern void ntp_init(void);
5extern void ntp_clear(void); 5extern void ntp_clear(void);
6/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */ 6/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
7extern u64 ntp_tick_length(void); 7extern u64 ntp_tick_length(void);
8extern ktime_t ntp_get_next_leap(void);
8extern int second_overflow(unsigned long secs); 9extern int second_overflow(unsigned long secs);
9extern int ntp_validate_timex(struct timex *); 10extern int ntp_validate_timex(struct timex *);
10extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *); 11extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *);
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 31ea01f42e1f..31d11ac9fa47 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -272,13 +272,20 @@ static int posix_get_tai(clockid_t which_clock, struct timespec *tp)
272 return 0; 272 return 0;
273} 273}
274 274
275static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec *tp)
276{
277 tp->tv_sec = 0;
278 tp->tv_nsec = hrtimer_resolution;
279 return 0;
280}
281
275/* 282/*
276 * Initialize everything, well, just everything in Posix clocks/timers ;) 283 * Initialize everything, well, just everything in Posix clocks/timers ;)
277 */ 284 */
278static __init int init_posix_timers(void) 285static __init int init_posix_timers(void)
279{ 286{
280 struct k_clock clock_realtime = { 287 struct k_clock clock_realtime = {
281 .clock_getres = hrtimer_get_res, 288 .clock_getres = posix_get_hrtimer_res,
282 .clock_get = posix_clock_realtime_get, 289 .clock_get = posix_clock_realtime_get,
283 .clock_set = posix_clock_realtime_set, 290 .clock_set = posix_clock_realtime_set,
284 .clock_adj = posix_clock_realtime_adj, 291 .clock_adj = posix_clock_realtime_adj,
@@ -290,7 +297,7 @@ static __init int init_posix_timers(void)
290 .timer_del = common_timer_del, 297 .timer_del = common_timer_del,
291 }; 298 };
292 struct k_clock clock_monotonic = { 299 struct k_clock clock_monotonic = {
293 .clock_getres = hrtimer_get_res, 300 .clock_getres = posix_get_hrtimer_res,
294 .clock_get = posix_ktime_get_ts, 301 .clock_get = posix_ktime_get_ts,
295 .nsleep = common_nsleep, 302 .nsleep = common_nsleep,
296 .nsleep_restart = hrtimer_nanosleep_restart, 303 .nsleep_restart = hrtimer_nanosleep_restart,
@@ -300,7 +307,7 @@ static __init int init_posix_timers(void)
300 .timer_del = common_timer_del, 307 .timer_del = common_timer_del,
301 }; 308 };
302 struct k_clock clock_monotonic_raw = { 309 struct k_clock clock_monotonic_raw = {
303 .clock_getres = hrtimer_get_res, 310 .clock_getres = posix_get_hrtimer_res,
304 .clock_get = posix_get_monotonic_raw, 311 .clock_get = posix_get_monotonic_raw,
305 }; 312 };
306 struct k_clock clock_realtime_coarse = { 313 struct k_clock clock_realtime_coarse = {
@@ -312,7 +319,7 @@ static __init int init_posix_timers(void)
312 .clock_get = posix_get_monotonic_coarse, 319 .clock_get = posix_get_monotonic_coarse,
313 }; 320 };
314 struct k_clock clock_tai = { 321 struct k_clock clock_tai = {
315 .clock_getres = hrtimer_get_res, 322 .clock_getres = posix_get_hrtimer_res,
316 .clock_get = posix_get_tai, 323 .clock_get = posix_get_tai,
317 .nsleep = common_nsleep, 324 .nsleep = common_nsleep,
318 .nsleep_restart = hrtimer_nanosleep_restart, 325 .nsleep_restart = hrtimer_nanosleep_restart,
@@ -322,7 +329,7 @@ static __init int init_posix_timers(void)
322 .timer_del = common_timer_del, 329 .timer_del = common_timer_del,
323 }; 330 };
324 struct k_clock clock_boottime = { 331 struct k_clock clock_boottime = {
325 .clock_getres = hrtimer_get_res, 332 .clock_getres = posix_get_hrtimer_res,
326 .clock_get = posix_get_boottime, 333 .clock_get = posix_get_boottime,
327 .nsleep = common_nsleep, 334 .nsleep = common_nsleep,
328 .nsleep_restart = hrtimer_nanosleep_restart, 335 .nsleep_restart = hrtimer_nanosleep_restart,
diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
index 6aac4beedbbe..3e7db49a2381 100644
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -22,6 +22,7 @@ static void bc_set_mode(enum clock_event_mode mode,
22 struct clock_event_device *bc) 22 struct clock_event_device *bc)
23{ 23{
24 switch (mode) { 24 switch (mode) {
25 case CLOCK_EVT_MODE_UNUSED:
25 case CLOCK_EVT_MODE_SHUTDOWN: 26 case CLOCK_EVT_MODE_SHUTDOWN:
26 /* 27 /*
27 * Note, we cannot cancel the timer here as we might 28 * Note, we cannot cancel the timer here as we might
@@ -66,9 +67,11 @@ static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
66 * hrtimer_{start/cancel} functions call into tracing, 67 * hrtimer_{start/cancel} functions call into tracing,
67 * calls to these functions must be bound within RCU_NONIDLE. 68 * calls to these functions must be bound within RCU_NONIDLE.
68 */ 69 */
69 RCU_NONIDLE(bc_moved = (hrtimer_try_to_cancel(&bctimer) >= 0) ? 70 RCU_NONIDLE({
70 !hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED) : 71 bc_moved = hrtimer_try_to_cancel(&bctimer) >= 0;
71 0); 72 if (bc_moved)
73 hrtimer_start(&bctimer, expires,
74 HRTIMER_MODE_ABS_PINNED);});
72 if (bc_moved) { 75 if (bc_moved) {
73 /* Bind the "device" to the cpu */ 76 /* Bind the "device" to the cpu */
74 bc->bound_on = smp_processor_id(); 77 bc->bound_on = smp_processor_id();
@@ -99,10 +102,13 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t)
99{ 102{
100 ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer); 103 ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer);
101 104
102 if (ce_broadcast_hrtimer.next_event.tv64 == KTIME_MAX) 105 switch (ce_broadcast_hrtimer.mode) {
106 case CLOCK_EVT_MODE_ONESHOT:
107 if (ce_broadcast_hrtimer.next_event.tv64 != KTIME_MAX)
108 return HRTIMER_RESTART;
109 default:
103 return HRTIMER_NORESTART; 110 return HRTIMER_NORESTART;
104 111 }
105 return HRTIMER_RESTART;
106} 112}
107 113
108void tick_setup_hrtimer_broadcast(void) 114void tick_setup_hrtimer_broadcast(void)
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 7e8ca4f448a8..d39f32cdd1b5 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -255,18 +255,18 @@ int tick_receive_broadcast(void)
255/* 255/*
256 * Broadcast the event to the cpus, which are set in the mask (mangled). 256 * Broadcast the event to the cpus, which are set in the mask (mangled).
257 */ 257 */
258static void tick_do_broadcast(struct cpumask *mask) 258static bool tick_do_broadcast(struct cpumask *mask)
259{ 259{
260 int cpu = smp_processor_id(); 260 int cpu = smp_processor_id();
261 struct tick_device *td; 261 struct tick_device *td;
262 bool local = false;
262 263
263 /* 264 /*
264 * Check, if the current cpu is in the mask 265 * Check, if the current cpu is in the mask
265 */ 266 */
266 if (cpumask_test_cpu(cpu, mask)) { 267 if (cpumask_test_cpu(cpu, mask)) {
267 cpumask_clear_cpu(cpu, mask); 268 cpumask_clear_cpu(cpu, mask);
268 td = &per_cpu(tick_cpu_device, cpu); 269 local = true;
269 td->evtdev->event_handler(td->evtdev);
270 } 270 }
271 271
272 if (!cpumask_empty(mask)) { 272 if (!cpumask_empty(mask)) {
@@ -279,16 +279,17 @@ static void tick_do_broadcast(struct cpumask *mask)
279 td = &per_cpu(tick_cpu_device, cpumask_first(mask)); 279 td = &per_cpu(tick_cpu_device, cpumask_first(mask));
280 td->evtdev->broadcast(mask); 280 td->evtdev->broadcast(mask);
281 } 281 }
282 return local;
282} 283}
283 284
284/* 285/*
285 * Periodic broadcast: 286 * Periodic broadcast:
286 * - invoke the broadcast handlers 287 * - invoke the broadcast handlers
287 */ 288 */
288static void tick_do_periodic_broadcast(void) 289static bool tick_do_periodic_broadcast(void)
289{ 290{
290 cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask); 291 cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
291 tick_do_broadcast(tmpmask); 292 return tick_do_broadcast(tmpmask);
292} 293}
293 294
294/* 295/*
@@ -296,34 +297,26 @@ static void tick_do_periodic_broadcast(void)
296 */ 297 */
297static void tick_handle_periodic_broadcast(struct clock_event_device *dev) 298static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
298{ 299{
299 ktime_t next; 300 struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
301 bool bc_local;
300 302
301 raw_spin_lock(&tick_broadcast_lock); 303 raw_spin_lock(&tick_broadcast_lock);
304 bc_local = tick_do_periodic_broadcast();
302 305
303 tick_do_periodic_broadcast(); 306 if (clockevent_state_oneshot(dev)) {
307 ktime_t next = ktime_add(dev->next_event, tick_period);
304 308
305 /* 309 clockevents_program_event(dev, next, true);
306 * The device is in periodic mode. No reprogramming necessary: 310 }
307 */ 311 raw_spin_unlock(&tick_broadcast_lock);
308 if (dev->state == CLOCK_EVT_STATE_PERIODIC)
309 goto unlock;
310 312
311 /* 313 /*
312 * Setup the next period for devices, which do not have 314 * We run the handler of the local cpu after dropping
313 * periodic mode. We read dev->next_event first and add to it 315 * tick_broadcast_lock because the handler might deadlock when
314 * when the event already expired. clockevents_program_event() 316 * trying to switch to oneshot mode.
315 * sets dev->next_event only when the event is really
316 * programmed to the device.
317 */ 317 */
318 for (next = dev->next_event; ;) { 318 if (bc_local)
319 next = ktime_add(next, tick_period); 319 td->evtdev->event_handler(td->evtdev);
320
321 if (!clockevents_program_event(dev, next, false))
322 goto unlock;
323 tick_do_periodic_broadcast();
324 }
325unlock:
326 raw_spin_unlock(&tick_broadcast_lock);
327} 320}
328 321
329/** 322/**
@@ -532,23 +525,19 @@ static void tick_broadcast_set_affinity(struct clock_event_device *bc,
532 irq_set_affinity(bc->irq, bc->cpumask); 525 irq_set_affinity(bc->irq, bc->cpumask);
533} 526}
534 527
535static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu, 528static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
536 ktime_t expires, int force) 529 ktime_t expires)
537{ 530{
538 int ret; 531 if (!clockevent_state_oneshot(bc))
539 532 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
540 if (bc->state != CLOCK_EVT_STATE_ONESHOT)
541 clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
542 533
543 ret = clockevents_program_event(bc, expires, force); 534 clockevents_program_event(bc, expires, 1);
544 if (!ret) 535 tick_broadcast_set_affinity(bc, cpumask_of(cpu));
545 tick_broadcast_set_affinity(bc, cpumask_of(cpu));
546 return ret;
547} 536}
548 537
549static void tick_resume_broadcast_oneshot(struct clock_event_device *bc) 538static void tick_resume_broadcast_oneshot(struct clock_event_device *bc)
550{ 539{
551 clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); 540 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
552} 541}
553 542
554/* 543/*
@@ -566,7 +555,7 @@ void tick_check_oneshot_broadcast_this_cpu(void)
566 * switched over, leave the device alone. 555 * switched over, leave the device alone.
567 */ 556 */
568 if (td->mode == TICKDEV_MODE_ONESHOT) { 557 if (td->mode == TICKDEV_MODE_ONESHOT) {
569 clockevents_set_state(td->evtdev, 558 clockevents_switch_state(td->evtdev,
570 CLOCK_EVT_STATE_ONESHOT); 559 CLOCK_EVT_STATE_ONESHOT);
571 } 560 }
572 } 561 }
@@ -580,9 +569,9 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
580 struct tick_device *td; 569 struct tick_device *td;
581 ktime_t now, next_event; 570 ktime_t now, next_event;
582 int cpu, next_cpu = 0; 571 int cpu, next_cpu = 0;
572 bool bc_local;
583 573
584 raw_spin_lock(&tick_broadcast_lock); 574 raw_spin_lock(&tick_broadcast_lock);
585again:
586 dev->next_event.tv64 = KTIME_MAX; 575 dev->next_event.tv64 = KTIME_MAX;
587 next_event.tv64 = KTIME_MAX; 576 next_event.tv64 = KTIME_MAX;
588 cpumask_clear(tmpmask); 577 cpumask_clear(tmpmask);
@@ -624,7 +613,7 @@ again:
624 /* 613 /*
625 * Wakeup the cpus which have an expired event. 614 * Wakeup the cpus which have an expired event.
626 */ 615 */
627 tick_do_broadcast(tmpmask); 616 bc_local = tick_do_broadcast(tmpmask);
628 617
629 /* 618 /*
630 * Two reasons for reprogram: 619 * Two reasons for reprogram:
@@ -636,15 +625,15 @@ again:
636 * - There are pending events on sleeping CPUs which were not 625 * - There are pending events on sleeping CPUs which were not
637 * in the event mask 626 * in the event mask
638 */ 627 */
639 if (next_event.tv64 != KTIME_MAX) { 628 if (next_event.tv64 != KTIME_MAX)
640 /* 629 tick_broadcast_set_event(dev, next_cpu, next_event);
641 * Rearm the broadcast device. If event expired, 630
642 * repeat the above
643 */
644 if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
645 goto again;
646 }
647 raw_spin_unlock(&tick_broadcast_lock); 631 raw_spin_unlock(&tick_broadcast_lock);
632
633 if (bc_local) {
634 td = this_cpu_ptr(&tick_cpu_device);
635 td->evtdev->event_handler(td->evtdev);
636 }
648} 637}
649 638
650static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu) 639static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
@@ -670,7 +659,7 @@ static void broadcast_shutdown_local(struct clock_event_device *bc,
670 if (dev->next_event.tv64 < bc->next_event.tv64) 659 if (dev->next_event.tv64 < bc->next_event.tv64)
671 return; 660 return;
672 } 661 }
673 clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN); 662 clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
674} 663}
675 664
676/** 665/**
@@ -726,7 +715,7 @@ int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
726 */ 715 */
727 if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) && 716 if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
728 dev->next_event.tv64 < bc->next_event.tv64) 717 dev->next_event.tv64 < bc->next_event.tv64)
729 tick_broadcast_set_event(bc, cpu, dev->next_event, 1); 718 tick_broadcast_set_event(bc, cpu, dev->next_event);
730 } 719 }
731 /* 720 /*
732 * If the current CPU owns the hrtimer broadcast 721 * If the current CPU owns the hrtimer broadcast
@@ -740,7 +729,7 @@ int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
740 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 729 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
741 } else { 730 } else {
742 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { 731 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
743 clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); 732 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
744 /* 733 /*
745 * The cpu which was handling the broadcast 734 * The cpu which was handling the broadcast
746 * timer marked this cpu in the broadcast 735 * timer marked this cpu in the broadcast
@@ -842,7 +831,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
842 831
843 /* Set it up only once ! */ 832 /* Set it up only once ! */
844 if (bc->event_handler != tick_handle_oneshot_broadcast) { 833 if (bc->event_handler != tick_handle_oneshot_broadcast) {
845 int was_periodic = bc->state == CLOCK_EVT_STATE_PERIODIC; 834 int was_periodic = clockevent_state_periodic(bc);
846 835
847 bc->event_handler = tick_handle_oneshot_broadcast; 836 bc->event_handler = tick_handle_oneshot_broadcast;
848 837
@@ -858,10 +847,10 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
858 tick_broadcast_oneshot_mask, tmpmask); 847 tick_broadcast_oneshot_mask, tmpmask);
859 848
860 if (was_periodic && !cpumask_empty(tmpmask)) { 849 if (was_periodic && !cpumask_empty(tmpmask)) {
861 clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); 850 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
862 tick_broadcast_init_next_event(tmpmask, 851 tick_broadcast_init_next_event(tmpmask,
863 tick_next_period); 852 tick_next_period);
864 tick_broadcast_set_event(bc, cpu, tick_next_period, 1); 853 tick_broadcast_set_event(bc, cpu, tick_next_period);
865 } else 854 } else
866 bc->next_event.tv64 = KTIME_MAX; 855 bc->next_event.tv64 = KTIME_MAX;
867 } else { 856 } else {
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 3ae6afa1eb98..17f144450050 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -102,7 +102,17 @@ void tick_handle_periodic(struct clock_event_device *dev)
102 102
103 tick_periodic(cpu); 103 tick_periodic(cpu);
104 104
105 if (dev->state != CLOCK_EVT_STATE_ONESHOT) 105#if defined(CONFIG_HIGH_RES_TIMERS) || defined(CONFIG_NO_HZ_COMMON)
106 /*
107 * The cpu might have transitioned to HIGHRES or NOHZ mode via
108 * update_process_times() -> run_local_timers() ->
109 * hrtimer_run_queues().
110 */
111 if (dev->event_handler != tick_handle_periodic)
112 return;
113#endif
114
115 if (!clockevent_state_oneshot(dev))
106 return; 116 return;
107 for (;;) { 117 for (;;) {
108 /* 118 /*
@@ -140,7 +150,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
140 150
141 if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && 151 if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
142 !tick_broadcast_oneshot_active()) { 152 !tick_broadcast_oneshot_active()) {
143 clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC); 153 clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC);
144 } else { 154 } else {
145 unsigned long seq; 155 unsigned long seq;
146 ktime_t next; 156 ktime_t next;
@@ -150,7 +160,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
150 next = tick_next_period; 160 next = tick_next_period;
151 } while (read_seqretry(&jiffies_lock, seq)); 161 } while (read_seqretry(&jiffies_lock, seq));
152 162
153 clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); 163 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
154 164
155 for (;;) { 165 for (;;) {
156 if (!clockevents_program_event(dev, next, false)) 166 if (!clockevents_program_event(dev, next, false))
@@ -367,7 +377,7 @@ void tick_shutdown(unsigned int cpu)
367 * Prevent that the clock events layer tries to call 377 * Prevent that the clock events layer tries to call
368 * the set mode function! 378 * the set mode function!
369 */ 379 */
370 dev->state = CLOCK_EVT_STATE_DETACHED; 380 clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
371 dev->mode = CLOCK_EVT_MODE_UNUSED; 381 dev->mode = CLOCK_EVT_MODE_UNUSED;
372 clockevents_exchange_device(dev, NULL); 382 clockevents_exchange_device(dev, NULL);
373 dev->event_handler = clockevents_handle_noop; 383 dev->event_handler = clockevents_handle_noop;
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index b64fdd8054c5..966a5a6fdd0a 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -36,11 +36,22 @@ static inline int tick_device_is_functional(struct clock_event_device *dev)
36 return !(dev->features & CLOCK_EVT_FEAT_DUMMY); 36 return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
37} 37}
38 38
39static inline enum clock_event_state clockevent_get_state(struct clock_event_device *dev)
40{
41 return dev->state_use_accessors;
42}
43
44static inline void clockevent_set_state(struct clock_event_device *dev,
45 enum clock_event_state state)
46{
47 dev->state_use_accessors = state;
48}
49
39extern void clockevents_shutdown(struct clock_event_device *dev); 50extern void clockevents_shutdown(struct clock_event_device *dev);
40extern void clockevents_exchange_device(struct clock_event_device *old, 51extern void clockevents_exchange_device(struct clock_event_device *old,
41 struct clock_event_device *new); 52 struct clock_event_device *new);
42extern void clockevents_set_state(struct clock_event_device *dev, 53extern void clockevents_switch_state(struct clock_event_device *dev,
43 enum clock_event_state state); 54 enum clock_event_state state);
44extern int clockevents_program_event(struct clock_event_device *dev, 55extern int clockevents_program_event(struct clock_event_device *dev,
45 ktime_t expires, bool force); 56 ktime_t expires, bool force);
46extern void clockevents_handle_noop(struct clock_event_device *dev); 57extern void clockevents_handle_noop(struct clock_event_device *dev);
@@ -137,3 +148,19 @@ extern void tick_nohz_init(void);
137# else 148# else
138static inline void tick_nohz_init(void) { } 149static inline void tick_nohz_init(void) { }
139#endif 150#endif
151
152#ifdef CONFIG_NO_HZ_COMMON
153extern unsigned long tick_nohz_active;
154#else
155#define tick_nohz_active (0)
156#endif
157
158#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
159extern void timers_update_migration(bool update_nohz);
160#else
161static inline void timers_update_migration(bool update_nohz) { }
162#endif
163
164DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
165
166extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 67a64b1670bf..b51344652330 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -28,6 +28,22 @@ int tick_program_event(ktime_t expires, int force)
28{ 28{
29 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); 29 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
30 30
31 if (unlikely(expires.tv64 == KTIME_MAX)) {
32 /*
33 * We don't need the clock event device any more, stop it.
34 */
35 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED);
36 return 0;
37 }
38
39 if (unlikely(clockevent_state_oneshot_stopped(dev))) {
40 /*
41 * We need the clock event again, configure it in ONESHOT mode
42 * before using it.
43 */
44 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
45 }
46
31 return clockevents_program_event(dev, expires, force); 47 return clockevents_program_event(dev, expires, force);
32} 48}
33 49
@@ -38,7 +54,7 @@ void tick_resume_oneshot(void)
38{ 54{
39 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); 55 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
40 56
41 clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); 57 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
42 clockevents_program_event(dev, ktime_get(), true); 58 clockevents_program_event(dev, ktime_get(), true);
43} 59}
44 60
@@ -50,7 +66,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev,
50 ktime_t next_event) 66 ktime_t next_event)
51{ 67{
52 newdev->event_handler = handler; 68 newdev->event_handler = handler;
53 clockevents_set_state(newdev, CLOCK_EVT_STATE_ONESHOT); 69 clockevents_switch_state(newdev, CLOCK_EVT_STATE_ONESHOT);
54 clockevents_program_event(newdev, next_event, true); 70 clockevents_program_event(newdev, next_event, true);
55} 71}
56 72
@@ -81,7 +97,7 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
81 97
82 td->mode = TICKDEV_MODE_ONESHOT; 98 td->mode = TICKDEV_MODE_ONESHOT;
83 dev->event_handler = handler; 99 dev->event_handler = handler;
84 clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); 100 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
85 tick_broadcast_switch_to_oneshot(); 101 tick_broadcast_switch_to_oneshot();
86 return 0; 102 return 0;
87} 103}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 914259128145..c792429e98c6 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -399,7 +399,7 @@ void __init tick_nohz_init(void)
399 * NO HZ enabled ? 399 * NO HZ enabled ?
400 */ 400 */
401static int tick_nohz_enabled __read_mostly = 1; 401static int tick_nohz_enabled __read_mostly = 1;
402int tick_nohz_active __read_mostly; 402unsigned long tick_nohz_active __read_mostly;
403/* 403/*
404 * Enable / Disable tickless mode 404 * Enable / Disable tickless mode
405 */ 405 */
@@ -565,156 +565,144 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
565} 565}
566EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); 566EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
567 567
568static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
569{
570 hrtimer_cancel(&ts->sched_timer);
571 hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
572
573 /* Forward the time to expire in the future */
574 hrtimer_forward(&ts->sched_timer, now, tick_period);
575
576 if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
577 hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
578 else
579 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
580}
581
568static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, 582static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
569 ktime_t now, int cpu) 583 ktime_t now, int cpu)
570{ 584{
571 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies;
572 ktime_t last_update, expires, ret = { .tv64 = 0 };
573 unsigned long rcu_delta_jiffies;
574 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); 585 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
575 u64 time_delta; 586 u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
576 587 unsigned long seq, basejiff;
577 time_delta = timekeeping_max_deferment(); 588 ktime_t tick;
578 589
579 /* Read jiffies and the time when jiffies were updated last */ 590 /* Read jiffies and the time when jiffies were updated last */
580 do { 591 do {
581 seq = read_seqbegin(&jiffies_lock); 592 seq = read_seqbegin(&jiffies_lock);
582 last_update = last_jiffies_update; 593 basemono = last_jiffies_update.tv64;
583 last_jiffies = jiffies; 594 basejiff = jiffies;
584 } while (read_seqretry(&jiffies_lock, seq)); 595 } while (read_seqretry(&jiffies_lock, seq));
596 ts->last_jiffies = basejiff;
585 597
586 if (rcu_needs_cpu(&rcu_delta_jiffies) || 598 if (rcu_needs_cpu(basemono, &next_rcu) ||
587 arch_needs_cpu() || irq_work_needs_cpu()) { 599 arch_needs_cpu() || irq_work_needs_cpu()) {
588 next_jiffies = last_jiffies + 1; 600 next_tick = basemono + TICK_NSEC;
589 delta_jiffies = 1;
590 } else { 601 } else {
591 /* Get the next timer wheel timer */ 602 /*
592 next_jiffies = get_next_timer_interrupt(last_jiffies); 603 * Get the next pending timer. If high resolution
593 delta_jiffies = next_jiffies - last_jiffies; 604 * timers are enabled this only takes the timer wheel
594 if (rcu_delta_jiffies < delta_jiffies) { 605 * timers into account. If high resolution timers are
595 next_jiffies = last_jiffies + rcu_delta_jiffies; 606 * disabled this also looks at the next expiring
596 delta_jiffies = rcu_delta_jiffies; 607 * hrtimer.
597 } 608 */
609 next_tmr = get_next_timer_interrupt(basejiff, basemono);
610 ts->next_timer = next_tmr;
611 /* Take the next rcu event into account */
612 next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
598 } 613 }
599 614
600 /* 615 /*
601 * Do not stop the tick, if we are only one off (or less) 616 * If the tick is due in the next period, keep it ticking or
602 * or if the cpu is required for RCU: 617 * restart it proper.
603 */ 618 */
604 if (!ts->tick_stopped && delta_jiffies <= 1) 619 delta = next_tick - basemono;
605 goto out; 620 if (delta <= (u64)TICK_NSEC) {
606 621 tick.tv64 = 0;
607 /* Schedule the tick, if we are at least one jiffie off */ 622 if (!ts->tick_stopped)
608 if ((long)delta_jiffies >= 1) { 623 goto out;
609 624 if (delta == 0) {
610 /* 625 /* Tick is stopped, but required now. Enforce it */
611 * If this cpu is the one which updates jiffies, then 626 tick_nohz_restart(ts, now);
612 * give up the assignment and let it be taken by the 627 goto out;
613 * cpu which runs the tick timer next, which might be
614 * this cpu as well. If we don't drop this here the
615 * jiffies might be stale and do_timer() never
616 * invoked. Keep track of the fact that it was the one
617 * which had the do_timer() duty last. If this cpu is
618 * the one which had the do_timer() duty last, we
619 * limit the sleep time to the timekeeping
620 * max_deferement value which we retrieved
621 * above. Otherwise we can sleep as long as we want.
622 */
623 if (cpu == tick_do_timer_cpu) {
624 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
625 ts->do_timer_last = 1;
626 } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
627 time_delta = KTIME_MAX;
628 ts->do_timer_last = 0;
629 } else if (!ts->do_timer_last) {
630 time_delta = KTIME_MAX;
631 } 628 }
629 }
630
631 /*
632 * If this cpu is the one which updates jiffies, then give up
633 * the assignment and let it be taken by the cpu which runs
634 * the tick timer next, which might be this cpu as well. If we
635 * don't drop this here the jiffies might be stale and
636 * do_timer() never invoked. Keep track of the fact that it
637 * was the one which had the do_timer() duty last. If this cpu
638 * is the one which had the do_timer() duty last, we limit the
639 * sleep time to the timekeeping max_deferement value.
640 * Otherwise we can sleep as long as we want.
641 */
642 delta = timekeeping_max_deferment();
643 if (cpu == tick_do_timer_cpu) {
644 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
645 ts->do_timer_last = 1;
646 } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
647 delta = KTIME_MAX;
648 ts->do_timer_last = 0;
649 } else if (!ts->do_timer_last) {
650 delta = KTIME_MAX;
651 }
632 652
633#ifdef CONFIG_NO_HZ_FULL 653#ifdef CONFIG_NO_HZ_FULL
634 if (!ts->inidle) { 654 /* Limit the tick delta to the maximum scheduler deferment */
635 time_delta = min(time_delta, 655 if (!ts->inidle)
636 scheduler_tick_max_deferment()); 656 delta = min(delta, scheduler_tick_max_deferment());
637 }
638#endif 657#endif
639 658
640 /* 659 /* Calculate the next expiry time */
641 * calculate the expiry time for the next timer wheel 660 if (delta < (KTIME_MAX - basemono))
642 * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals 661 expires = basemono + delta;
643 * that there is no timer pending or at least extremely 662 else
644 * far into the future (12 days for HZ=1000). In this 663 expires = KTIME_MAX;
645 * case we set the expiry to the end of time.
646 */
647 if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) {
648 /*
649 * Calculate the time delta for the next timer event.
650 * If the time delta exceeds the maximum time delta
651 * permitted by the current clocksource then adjust
652 * the time delta accordingly to ensure the
653 * clocksource does not wrap.
654 */
655 time_delta = min_t(u64, time_delta,
656 tick_period.tv64 * delta_jiffies);
657 }
658
659 if (time_delta < KTIME_MAX)
660 expires = ktime_add_ns(last_update, time_delta);
661 else
662 expires.tv64 = KTIME_MAX;
663
664 /* Skip reprogram of event if its not changed */
665 if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
666 goto out;
667 664
668 ret = expires; 665 expires = min_t(u64, expires, next_tick);
666 tick.tv64 = expires;
669 667
670 /* 668 /* Skip reprogram of event if its not changed */
671 * nohz_stop_sched_tick can be called several times before 669 if (ts->tick_stopped && (expires == dev->next_event.tv64))
672 * the nohz_restart_sched_tick is called. This happens when 670 goto out;
673 * interrupts arrive which do not cause a reschedule. In the
674 * first call we save the current tick time, so we can restart
675 * the scheduler tick in nohz_restart_sched_tick.
676 */
677 if (!ts->tick_stopped) {
678 nohz_balance_enter_idle(cpu);
679 calc_load_enter_idle();
680 671
681 ts->last_tick = hrtimer_get_expires(&ts->sched_timer); 672 /*
682 ts->tick_stopped = 1; 673 * nohz_stop_sched_tick can be called several times before
683 trace_tick_stop(1, " "); 674 * the nohz_restart_sched_tick is called. This happens when
684 } 675 * interrupts arrive which do not cause a reschedule. In the
676 * first call we save the current tick time, so we can restart
677 * the scheduler tick in nohz_restart_sched_tick.
678 */
679 if (!ts->tick_stopped) {
680 nohz_balance_enter_idle(cpu);
681 calc_load_enter_idle();
685 682
686 /* 683 ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
687 * If the expiration time == KTIME_MAX, then 684 ts->tick_stopped = 1;
688 * in this case we simply stop the tick timer. 685 trace_tick_stop(1, " ");
689 */ 686 }
690 if (unlikely(expires.tv64 == KTIME_MAX)) {
691 if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
692 hrtimer_cancel(&ts->sched_timer);
693 goto out;
694 }
695 687
696 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 688 /*
697 hrtimer_start(&ts->sched_timer, expires, 689 * If the expiration time == KTIME_MAX, then we simply stop
698 HRTIMER_MODE_ABS_PINNED); 690 * the tick timer.
699 /* Check, if the timer was already in the past */ 691 */
700 if (hrtimer_active(&ts->sched_timer)) 692 if (unlikely(expires == KTIME_MAX)) {
701 goto out; 693 if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
702 } else if (!tick_program_event(expires, 0)) 694 hrtimer_cancel(&ts->sched_timer);
703 goto out; 695 goto out;
704 /*
705 * We are past the event already. So we crossed a
706 * jiffie boundary. Update jiffies and raise the
707 * softirq.
708 */
709 tick_do_update_jiffies64(ktime_get());
710 } 696 }
711 raise_softirq_irqoff(TIMER_SOFTIRQ); 697
698 if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
699 hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED);
700 else
701 tick_program_event(tick, 1);
712out: 702out:
713 ts->next_jiffies = next_jiffies; 703 /* Update the estimated sleep length */
714 ts->last_jiffies = last_jiffies;
715 ts->sleep_length = ktime_sub(dev->next_event, now); 704 ts->sleep_length = ktime_sub(dev->next_event, now);
716 705 return tick;
717 return ret;
718} 706}
719 707
720static void tick_nohz_full_stop_tick(struct tick_sched *ts) 708static void tick_nohz_full_stop_tick(struct tick_sched *ts)
@@ -876,32 +864,6 @@ ktime_t tick_nohz_get_sleep_length(void)
876 return ts->sleep_length; 864 return ts->sleep_length;
877} 865}
878 866
879static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
880{
881 hrtimer_cancel(&ts->sched_timer);
882 hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
883
884 while (1) {
885 /* Forward the time to expire in the future */
886 hrtimer_forward(&ts->sched_timer, now, tick_period);
887
888 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
889 hrtimer_start_expires(&ts->sched_timer,
890 HRTIMER_MODE_ABS_PINNED);
891 /* Check, if the timer was already in the past */
892 if (hrtimer_active(&ts->sched_timer))
893 break;
894 } else {
895 if (!tick_program_event(
896 hrtimer_get_expires(&ts->sched_timer), 0))
897 break;
898 }
899 /* Reread time and update jiffies */
900 now = ktime_get();
901 tick_do_update_jiffies64(now);
902 }
903}
904
905static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) 867static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
906{ 868{
907 /* Update jiffies first */ 869 /* Update jiffies first */
@@ -972,12 +934,6 @@ void tick_nohz_idle_exit(void)
972 local_irq_enable(); 934 local_irq_enable();
973} 935}
974 936
975static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
976{
977 hrtimer_forward(&ts->sched_timer, now, tick_period);
978 return tick_program_event(hrtimer_get_expires(&ts->sched_timer), 0);
979}
980
981/* 937/*
982 * The nohz low res interrupt handler 938 * The nohz low res interrupt handler
983 */ 939 */
@@ -996,10 +952,18 @@ static void tick_nohz_handler(struct clock_event_device *dev)
996 if (unlikely(ts->tick_stopped)) 952 if (unlikely(ts->tick_stopped))
997 return; 953 return;
998 954
999 while (tick_nohz_reprogram(ts, now)) { 955 hrtimer_forward(&ts->sched_timer, now, tick_period);
1000 now = ktime_get(); 956 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
1001 tick_do_update_jiffies64(now); 957}
1002 } 958
959static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
960{
961 if (!tick_nohz_enabled)
962 return;
963 ts->nohz_mode = mode;
964 /* One update is enough */
965 if (!test_and_set_bit(0, &tick_nohz_active))
966 timers_update_migration(true);
1003} 967}
1004 968
1005/** 969/**
@@ -1013,13 +977,8 @@ static void tick_nohz_switch_to_nohz(void)
1013 if (!tick_nohz_enabled) 977 if (!tick_nohz_enabled)
1014 return; 978 return;
1015 979
1016 local_irq_disable(); 980 if (tick_switch_to_oneshot(tick_nohz_handler))
1017 if (tick_switch_to_oneshot(tick_nohz_handler)) {
1018 local_irq_enable();
1019 return; 981 return;
1020 }
1021 tick_nohz_active = 1;
1022 ts->nohz_mode = NOHZ_MODE_LOWRES;
1023 982
1024 /* 983 /*
1025 * Recycle the hrtimer in ts, so we can share the 984 * Recycle the hrtimer in ts, so we can share the
@@ -1029,13 +988,10 @@ static void tick_nohz_switch_to_nohz(void)
1029 /* Get the next period */ 988 /* Get the next period */
1030 next = tick_init_jiffy_update(); 989 next = tick_init_jiffy_update();
1031 990
1032 for (;;) { 991 hrtimer_forward_now(&ts->sched_timer, tick_period);
1033 hrtimer_set_expires(&ts->sched_timer, next); 992 hrtimer_set_expires(&ts->sched_timer, next);
1034 if (!tick_program_event(next, 0)) 993 tick_program_event(next, 1);
1035 break; 994 tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
1036 next = ktime_add(next, tick_period);
1037 }
1038 local_irq_enable();
1039} 995}
1040 996
1041/* 997/*
@@ -1087,6 +1043,7 @@ static inline void tick_nohz_irq_enter(void)
1087 1043
1088static inline void tick_nohz_switch_to_nohz(void) { } 1044static inline void tick_nohz_switch_to_nohz(void) { }
1089static inline void tick_nohz_irq_enter(void) { } 1045static inline void tick_nohz_irq_enter(void) { }
1046static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { }
1090 1047
1091#endif /* CONFIG_NO_HZ_COMMON */ 1048#endif /* CONFIG_NO_HZ_COMMON */
1092 1049
@@ -1167,22 +1124,9 @@ void tick_setup_sched_timer(void)
1167 hrtimer_add_expires_ns(&ts->sched_timer, offset); 1124 hrtimer_add_expires_ns(&ts->sched_timer, offset);
1168 } 1125 }
1169 1126
1170 for (;;) { 1127 hrtimer_forward(&ts->sched_timer, now, tick_period);
1171 hrtimer_forward(&ts->sched_timer, now, tick_period); 1128 hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
1172 hrtimer_start_expires(&ts->sched_timer, 1129 tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
1173 HRTIMER_MODE_ABS_PINNED);
1174 /* Check, if the timer was already in the past */
1175 if (hrtimer_active(&ts->sched_timer))
1176 break;
1177 now = ktime_get();
1178 }
1179
1180#ifdef CONFIG_NO_HZ_COMMON
1181 if (tick_nohz_enabled) {
1182 ts->nohz_mode = NOHZ_MODE_HIGHRES;
1183 tick_nohz_active = 1;
1184 }
1185#endif
1186} 1130}
1187#endif /* HIGH_RES_TIMERS */ 1131#endif /* HIGH_RES_TIMERS */
1188 1132
@@ -1227,7 +1171,7 @@ void tick_oneshot_notify(void)
1227 * Called cyclic from the hrtimer softirq (driven by the timer 1171 * Called cyclic from the hrtimer softirq (driven by the timer
1228 * softirq) allow_nohz signals, that we can switch into low-res nohz 1172 * softirq) allow_nohz signals, that we can switch into low-res nohz
1229 * mode, because high resolution timers are disabled (either compile 1173 * mode, because high resolution timers are disabled (either compile
1230 * or runtime). 1174 * or runtime). Called with interrupts disabled.
1231 */ 1175 */
1232int tick_check_oneshot_change(int allow_nohz) 1176int tick_check_oneshot_change(int allow_nohz)
1233{ 1177{
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 28b5da3e1a17..42fdf4958bcc 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -57,7 +57,7 @@ struct tick_sched {
57 ktime_t iowait_sleeptime; 57 ktime_t iowait_sleeptime;
58 ktime_t sleep_length; 58 ktime_t sleep_length;
59 unsigned long last_jiffies; 59 unsigned long last_jiffies;
60 unsigned long next_jiffies; 60 u64 next_timer;
61 ktime_t idle_expires; 61 ktime_t idle_expires;
62 int do_timer_last; 62 int do_timer_last;
63}; 63};
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 2c85b7724af4..85d5bb1d67eb 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -41,7 +41,7 @@
41#include <asm/uaccess.h> 41#include <asm/uaccess.h>
42#include <asm/unistd.h> 42#include <asm/unistd.h>
43 43
44#include "timeconst.h" 44#include <generated/timeconst.h>
45#include "timekeeping.h" 45#include "timekeeping.h"
46 46
47/* 47/*
@@ -173,6 +173,10 @@ int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
173 return error; 173 return error;
174 174
175 if (tz) { 175 if (tz) {
176 /* Verify we're witin the +-15 hrs range */
177 if (tz->tz_minuteswest > 15*60 || tz->tz_minuteswest < -15*60)
178 return -EINVAL;
179
176 sys_tz = *tz; 180 sys_tz = *tz;
177 update_vsyscall_tz(); 181 update_vsyscall_tz();
178 if (firsttime) { 182 if (firsttime) {
@@ -483,9 +487,11 @@ struct timespec64 ns_to_timespec64(const s64 nsec)
483} 487}
484EXPORT_SYMBOL(ns_to_timespec64); 488EXPORT_SYMBOL(ns_to_timespec64);
485#endif 489#endif
486/* 490/**
487 * When we convert to jiffies then we interpret incoming values 491 * msecs_to_jiffies: - convert milliseconds to jiffies
488 * the following way: 492 * @m: time in milliseconds
493 *
494 * conversion is done as follows:
489 * 495 *
490 * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET) 496 * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET)
491 * 497 *
@@ -493,66 +499,36 @@ EXPORT_SYMBOL(ns_to_timespec64);
493 * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too. 499 * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too.
494 * 500 *
495 * - all other values are converted to jiffies by either multiplying 501 * - all other values are converted to jiffies by either multiplying
496 * the input value by a factor or dividing it with a factor 502 * the input value by a factor or dividing it with a factor and
497 * 503 * handling any 32-bit overflows.
498 * We must also be careful about 32-bit overflows. 504 * for the details see __msecs_to_jiffies()
505 *
506 * msecs_to_jiffies() checks for the passed in value being a constant
507 * via __builtin_constant_p() allowing gcc to eliminate most of the
508 * code, __msecs_to_jiffies() is called if the value passed does not
509 * allow constant folding and the actual conversion must be done at
510 * runtime.
511 * the _msecs_to_jiffies helpers are the HZ dependent conversion
512 * routines found in include/linux/jiffies.h
499 */ 513 */
500unsigned long msecs_to_jiffies(const unsigned int m) 514unsigned long __msecs_to_jiffies(const unsigned int m)
501{ 515{
502 /* 516 /*
503 * Negative value, means infinite timeout: 517 * Negative value, means infinite timeout:
504 */ 518 */
505 if ((int)m < 0) 519 if ((int)m < 0)
506 return MAX_JIFFY_OFFSET; 520 return MAX_JIFFY_OFFSET;
507 521 return _msecs_to_jiffies(m);
508#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
509 /*
510 * HZ is equal to or smaller than 1000, and 1000 is a nice
511 * round multiple of HZ, divide with the factor between them,
512 * but round upwards:
513 */
514 return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
515#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
516 /*
517 * HZ is larger than 1000, and HZ is a nice round multiple of
518 * 1000 - simply multiply with the factor between them.
519 *
520 * But first make sure the multiplication result cannot
521 * overflow:
522 */
523 if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
524 return MAX_JIFFY_OFFSET;
525
526 return m * (HZ / MSEC_PER_SEC);
527#else
528 /*
529 * Generic case - multiply, round and divide. But first
530 * check that if we are doing a net multiplication, that
531 * we wouldn't overflow:
532 */
533 if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
534 return MAX_JIFFY_OFFSET;
535
536 return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32)
537 >> MSEC_TO_HZ_SHR32;
538#endif
539} 522}
540EXPORT_SYMBOL(msecs_to_jiffies); 523EXPORT_SYMBOL(__msecs_to_jiffies);
541 524
542unsigned long usecs_to_jiffies(const unsigned int u) 525unsigned long __usecs_to_jiffies(const unsigned int u)
543{ 526{
544 if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET)) 527 if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET))
545 return MAX_JIFFY_OFFSET; 528 return MAX_JIFFY_OFFSET;
546#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) 529 return _usecs_to_jiffies(u);
547 return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ);
548#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
549 return u * (HZ / USEC_PER_SEC);
550#else
551 return (USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32)
552 >> USEC_TO_HZ_SHR32;
553#endif
554} 530}
555EXPORT_SYMBOL(usecs_to_jiffies); 531EXPORT_SYMBOL(__usecs_to_jiffies);
556 532
557/* 533/*
558 * The TICK_NSEC - 1 rounds up the value to the next resolution. Note 534 * The TICK_NSEC - 1 rounds up the value to the next resolution. Note
diff --git a/kernel/time/timeconst.bc b/kernel/time/timeconst.bc
index 511bdf2cafda..c7388dee8635 100644
--- a/kernel/time/timeconst.bc
+++ b/kernel/time/timeconst.bc
@@ -50,7 +50,7 @@ define timeconst(hz) {
50 print "#include <linux/types.h>\n\n" 50 print "#include <linux/types.h>\n\n"
51 51
52 print "#if HZ != ", hz, "\n" 52 print "#if HZ != ", hz, "\n"
53 print "#error \qkernel/timeconst.h has the wrong HZ value!\q\n" 53 print "#error \qinclude/generated/timeconst.h has the wrong HZ value!\q\n"
54 print "#endif\n\n" 54 print "#endif\n\n"
55 55
56 if (hz < 2) { 56 if (hz < 2) {
@@ -105,4 +105,5 @@ define timeconst(hz) {
105 halt 105 halt
106} 106}
107 107
108hz = read();
108timeconst(hz) 109timeconst(hz)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 946acb72179f..30b7a409bf1e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -118,18 +118,6 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
118 118
119#ifdef CONFIG_DEBUG_TIMEKEEPING 119#ifdef CONFIG_DEBUG_TIMEKEEPING
120#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */ 120#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
121/*
122 * These simple flag variables are managed
123 * without locks, which is racy, but ok since
124 * we don't really care about being super
125 * precise about how many events were seen,
126 * just that a problem was observed.
127 */
128static int timekeeping_underflow_seen;
129static int timekeeping_overflow_seen;
130
131/* last_warning is only modified under the timekeeping lock */
132static long timekeeping_last_warning;
133 121
134static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) 122static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
135{ 123{
@@ -149,29 +137,30 @@ static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
149 } 137 }
150 } 138 }
151 139
152 if (timekeeping_underflow_seen) { 140 if (tk->underflow_seen) {
153 if (jiffies - timekeeping_last_warning > WARNING_FREQ) { 141 if (jiffies - tk->last_warning > WARNING_FREQ) {
154 printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name); 142 printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
155 printk_deferred(" Please report this, consider using a different clocksource, if possible.\n"); 143 printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
156 printk_deferred(" Your kernel is probably still fine.\n"); 144 printk_deferred(" Your kernel is probably still fine.\n");
157 timekeeping_last_warning = jiffies; 145 tk->last_warning = jiffies;
158 } 146 }
159 timekeeping_underflow_seen = 0; 147 tk->underflow_seen = 0;
160 } 148 }
161 149
162 if (timekeeping_overflow_seen) { 150 if (tk->overflow_seen) {
163 if (jiffies - timekeeping_last_warning > WARNING_FREQ) { 151 if (jiffies - tk->last_warning > WARNING_FREQ) {
164 printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name); 152 printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
165 printk_deferred(" Please report this, consider using a different clocksource, if possible.\n"); 153 printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
166 printk_deferred(" Your kernel is probably still fine.\n"); 154 printk_deferred(" Your kernel is probably still fine.\n");
167 timekeeping_last_warning = jiffies; 155 tk->last_warning = jiffies;
168 } 156 }
169 timekeeping_overflow_seen = 0; 157 tk->overflow_seen = 0;
170 } 158 }
171} 159}
172 160
173static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr) 161static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
174{ 162{
163 struct timekeeper *tk = &tk_core.timekeeper;
175 cycle_t now, last, mask, max, delta; 164 cycle_t now, last, mask, max, delta;
176 unsigned int seq; 165 unsigned int seq;
177 166
@@ -197,13 +186,13 @@ static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
197 * mask-relative negative values. 186 * mask-relative negative values.
198 */ 187 */
199 if (unlikely((~delta & mask) < (mask >> 3))) { 188 if (unlikely((~delta & mask) < (mask >> 3))) {
200 timekeeping_underflow_seen = 1; 189 tk->underflow_seen = 1;
201 delta = 0; 190 delta = 0;
202 } 191 }
203 192
204 /* Cap delta value to the max_cycles values to avoid mult overflows */ 193 /* Cap delta value to the max_cycles values to avoid mult overflows */
205 if (unlikely(delta > max)) { 194 if (unlikely(delta > max)) {
206 timekeeping_overflow_seen = 1; 195 tk->overflow_seen = 1;
207 delta = tkr->clock->max_cycles; 196 delta = tkr->clock->max_cycles;
208 } 197 }
209 198
@@ -551,6 +540,17 @@ int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
551EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); 540EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
552 541
553/* 542/*
543 * tk_update_leap_state - helper to update the next_leap_ktime
544 */
545static inline void tk_update_leap_state(struct timekeeper *tk)
546{
547 tk->next_leap_ktime = ntp_get_next_leap();
548 if (tk->next_leap_ktime.tv64 != KTIME_MAX)
549 /* Convert to monotonic time */
550 tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real);
551}
552
553/*
554 * Update the ktime_t based scalar nsec members of the timekeeper 554 * Update the ktime_t based scalar nsec members of the timekeeper
555 */ 555 */
556static inline void tk_update_ktime_data(struct timekeeper *tk) 556static inline void tk_update_ktime_data(struct timekeeper *tk)
@@ -591,17 +591,25 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
591 ntp_clear(); 591 ntp_clear();
592 } 592 }
593 593
594 tk_update_leap_state(tk);
594 tk_update_ktime_data(tk); 595 tk_update_ktime_data(tk);
595 596
596 update_vsyscall(tk); 597 update_vsyscall(tk);
597 update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); 598 update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
598 599
600 update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
601 update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw);
602
603 if (action & TK_CLOCK_WAS_SET)
604 tk->clock_was_set_seq++;
605 /*
606 * The mirroring of the data to the shadow-timekeeper needs
607 * to happen last here to ensure we don't over-write the
608 * timekeeper structure on the next update with stale data
609 */
599 if (action & TK_MIRROR) 610 if (action & TK_MIRROR)
600 memcpy(&shadow_timekeeper, &tk_core.timekeeper, 611 memcpy(&shadow_timekeeper, &tk_core.timekeeper,
601 sizeof(tk_core.timekeeper)); 612 sizeof(tk_core.timekeeper));
602
603 update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
604 update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw);
605} 613}
606 614
607/** 615/**
@@ -699,6 +707,23 @@ ktime_t ktime_get(void)
699} 707}
700EXPORT_SYMBOL_GPL(ktime_get); 708EXPORT_SYMBOL_GPL(ktime_get);
701 709
710u32 ktime_get_resolution_ns(void)
711{
712 struct timekeeper *tk = &tk_core.timekeeper;
713 unsigned int seq;
714 u32 nsecs;
715
716 WARN_ON(timekeeping_suspended);
717
718 do {
719 seq = read_seqcount_begin(&tk_core.seq);
720 nsecs = tk->tkr_mono.mult >> tk->tkr_mono.shift;
721 } while (read_seqcount_retry(&tk_core.seq, seq));
722
723 return nsecs;
724}
725EXPORT_SYMBOL_GPL(ktime_get_resolution_ns);
726
702static ktime_t *offsets[TK_OFFS_MAX] = { 727static ktime_t *offsets[TK_OFFS_MAX] = {
703 [TK_OFFS_REAL] = &tk_core.timekeeper.offs_real, 728 [TK_OFFS_REAL] = &tk_core.timekeeper.offs_real,
704 [TK_OFFS_BOOT] = &tk_core.timekeeper.offs_boot, 729 [TK_OFFS_BOOT] = &tk_core.timekeeper.offs_boot,
@@ -1179,28 +1204,20 @@ void __weak read_persistent_clock64(struct timespec64 *ts64)
1179} 1204}
1180 1205
1181/** 1206/**
1182 * read_boot_clock - Return time of the system start. 1207 * read_boot_clock64 - Return time of the system start.
1183 * 1208 *
1184 * Weak dummy function for arches that do not yet support it. 1209 * Weak dummy function for arches that do not yet support it.
1185 * Function to read the exact time the system has been started. 1210 * Function to read the exact time the system has been started.
1186 * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported. 1211 * Returns a timespec64 with tv_sec=0 and tv_nsec=0 if unsupported.
1187 * 1212 *
1188 * XXX - Do be sure to remove it once all arches implement it. 1213 * XXX - Do be sure to remove it once all arches implement it.
1189 */ 1214 */
1190void __weak read_boot_clock(struct timespec *ts) 1215void __weak read_boot_clock64(struct timespec64 *ts)
1191{ 1216{
1192 ts->tv_sec = 0; 1217 ts->tv_sec = 0;
1193 ts->tv_nsec = 0; 1218 ts->tv_nsec = 0;
1194} 1219}
1195 1220
1196void __weak read_boot_clock64(struct timespec64 *ts64)
1197{
1198 struct timespec ts;
1199
1200 read_boot_clock(&ts);
1201 *ts64 = timespec_to_timespec64(ts);
1202}
1203
1204/* Flag for if timekeeping_resume() has injected sleeptime */ 1221/* Flag for if timekeeping_resume() has injected sleeptime */
1205static bool sleeptime_injected; 1222static bool sleeptime_injected;
1206 1223
@@ -1836,8 +1853,9 @@ void update_wall_time(void)
1836 * memcpy under the tk_core.seq against one before we start 1853 * memcpy under the tk_core.seq against one before we start
1837 * updating. 1854 * updating.
1838 */ 1855 */
1856 timekeeping_update(tk, clock_set);
1839 memcpy(real_tk, tk, sizeof(*tk)); 1857 memcpy(real_tk, tk, sizeof(*tk));
1840 timekeeping_update(real_tk, clock_set); 1858 /* The memcpy must come last. Do not put anything here! */
1841 write_seqcount_end(&tk_core.seq); 1859 write_seqcount_end(&tk_core.seq);
1842out: 1860out:
1843 raw_spin_unlock_irqrestore(&timekeeper_lock, flags); 1861 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -1926,47 +1944,20 @@ void do_timer(unsigned long ticks)
1926} 1944}
1927 1945
1928/** 1946/**
1929 * ktime_get_update_offsets_tick - hrtimer helper
1930 * @offs_real: pointer to storage for monotonic -> realtime offset
1931 * @offs_boot: pointer to storage for monotonic -> boottime offset
1932 * @offs_tai: pointer to storage for monotonic -> clock tai offset
1933 *
1934 * Returns monotonic time at last tick and various offsets
1935 */
1936ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
1937 ktime_t *offs_tai)
1938{
1939 struct timekeeper *tk = &tk_core.timekeeper;
1940 unsigned int seq;
1941 ktime_t base;
1942 u64 nsecs;
1943
1944 do {
1945 seq = read_seqcount_begin(&tk_core.seq);
1946
1947 base = tk->tkr_mono.base;
1948 nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
1949
1950 *offs_real = tk->offs_real;
1951 *offs_boot = tk->offs_boot;
1952 *offs_tai = tk->offs_tai;
1953 } while (read_seqcount_retry(&tk_core.seq, seq));
1954
1955 return ktime_add_ns(base, nsecs);
1956}
1957
1958#ifdef CONFIG_HIGH_RES_TIMERS
1959/**
1960 * ktime_get_update_offsets_now - hrtimer helper 1947 * ktime_get_update_offsets_now - hrtimer helper
1948 * @cwsseq: pointer to check and store the clock was set sequence number
1961 * @offs_real: pointer to storage for monotonic -> realtime offset 1949 * @offs_real: pointer to storage for monotonic -> realtime offset
1962 * @offs_boot: pointer to storage for monotonic -> boottime offset 1950 * @offs_boot: pointer to storage for monotonic -> boottime offset
1963 * @offs_tai: pointer to storage for monotonic -> clock tai offset 1951 * @offs_tai: pointer to storage for monotonic -> clock tai offset
1964 * 1952 *
1965 * Returns current monotonic time and updates the offsets 1953 * Returns current monotonic time and updates the offsets if the
1954 * sequence number in @cwsseq and timekeeper.clock_was_set_seq are
1955 * different.
1956 *
1966 * Called from hrtimer_interrupt() or retrigger_next_event() 1957 * Called from hrtimer_interrupt() or retrigger_next_event()
1967 */ 1958 */
1968ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot, 1959ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
1969 ktime_t *offs_tai) 1960 ktime_t *offs_boot, ktime_t *offs_tai)
1970{ 1961{
1971 struct timekeeper *tk = &tk_core.timekeeper; 1962 struct timekeeper *tk = &tk_core.timekeeper;
1972 unsigned int seq; 1963 unsigned int seq;
@@ -1978,15 +1969,23 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
1978 1969
1979 base = tk->tkr_mono.base; 1970 base = tk->tkr_mono.base;
1980 nsecs = timekeeping_get_ns(&tk->tkr_mono); 1971 nsecs = timekeeping_get_ns(&tk->tkr_mono);
1972 base = ktime_add_ns(base, nsecs);
1973
1974 if (*cwsseq != tk->clock_was_set_seq) {
1975 *cwsseq = tk->clock_was_set_seq;
1976 *offs_real = tk->offs_real;
1977 *offs_boot = tk->offs_boot;
1978 *offs_tai = tk->offs_tai;
1979 }
1980
1981 /* Handle leapsecond insertion adjustments */
1982 if (unlikely(base.tv64 >= tk->next_leap_ktime.tv64))
1983 *offs_real = ktime_sub(tk->offs_real, ktime_set(1, 0));
1981 1984
1982 *offs_real = tk->offs_real;
1983 *offs_boot = tk->offs_boot;
1984 *offs_tai = tk->offs_tai;
1985 } while (read_seqcount_retry(&tk_core.seq, seq)); 1985 } while (read_seqcount_retry(&tk_core.seq, seq));
1986 1986
1987 return ktime_add_ns(base, nsecs); 1987 return base;
1988} 1988}
1989#endif
1990 1989
1991/** 1990/**
1992 * do_adjtimex() - Accessor function to NTP __do_adjtimex function 1991 * do_adjtimex() - Accessor function to NTP __do_adjtimex function
@@ -2027,6 +2026,8 @@ int do_adjtimex(struct timex *txc)
2027 __timekeeping_set_tai_offset(tk, tai); 2026 __timekeeping_set_tai_offset(tk, tai);
2028 timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); 2027 timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
2029 } 2028 }
2029 tk_update_leap_state(tk);
2030
2030 write_seqcount_end(&tk_core.seq); 2031 write_seqcount_end(&tk_core.seq);
2031 raw_spin_unlock_irqrestore(&timekeeper_lock, flags); 2032 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
2032 2033
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index ead8794b9a4e..704f595ce83f 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -3,19 +3,16 @@
3/* 3/*
4 * Internal interfaces for kernel/time/ 4 * Internal interfaces for kernel/time/
5 */ 5 */
6extern ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, 6extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq,
7 ktime_t *offs_boot, 7 ktime_t *offs_real,
8 ktime_t *offs_tai); 8 ktime_t *offs_boot,
9extern ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, 9 ktime_t *offs_tai);
10 ktime_t *offs_boot,
11 ktime_t *offs_tai);
12 10
13extern int timekeeping_valid_for_hres(void); 11extern int timekeeping_valid_for_hres(void);
14extern u64 timekeeping_max_deferment(void); 12extern u64 timekeeping_max_deferment(void);
15extern int timekeeping_inject_offset(struct timespec *ts); 13extern int timekeeping_inject_offset(struct timespec *ts);
16extern s32 timekeeping_get_tai_offset(void); 14extern s32 timekeeping_get_tai_offset(void);
17extern void timekeeping_set_tai_offset(s32 tai_offset); 15extern void timekeeping_set_tai_offset(s32 tai_offset);
18extern void timekeeping_clocktai(struct timespec *ts);
19extern int timekeeping_suspend(void); 16extern int timekeeping_suspend(void);
20extern void timekeeping_resume(void); 17extern void timekeeping_resume(void);
21 18
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 2ece3aa5069c..520499dd85af 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -49,6 +49,8 @@
49#include <asm/timex.h> 49#include <asm/timex.h>
50#include <asm/io.h> 50#include <asm/io.h>
51 51
52#include "tick-internal.h"
53
52#define CREATE_TRACE_POINTS 54#define CREATE_TRACE_POINTS
53#include <trace/events/timer.h> 55#include <trace/events/timer.h>
54 56
@@ -68,11 +70,11 @@ EXPORT_SYMBOL(jiffies_64);
68#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1)) 70#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
69 71
70struct tvec { 72struct tvec {
71 struct list_head vec[TVN_SIZE]; 73 struct hlist_head vec[TVN_SIZE];
72}; 74};
73 75
74struct tvec_root { 76struct tvec_root {
75 struct list_head vec[TVR_SIZE]; 77 struct hlist_head vec[TVR_SIZE];
76}; 78};
77 79
78struct tvec_base { 80struct tvec_base {
@@ -83,6 +85,8 @@ struct tvec_base {
83 unsigned long active_timers; 85 unsigned long active_timers;
84 unsigned long all_timers; 86 unsigned long all_timers;
85 int cpu; 87 int cpu;
88 bool migration_enabled;
89 bool nohz_active;
86 struct tvec_root tv1; 90 struct tvec_root tv1;
87 struct tvec tv2; 91 struct tvec tv2;
88 struct tvec tv3; 92 struct tvec tv3;
@@ -90,43 +94,60 @@ struct tvec_base {
90 struct tvec tv5; 94 struct tvec tv5;
91} ____cacheline_aligned; 95} ____cacheline_aligned;
92 96
93/*
94 * __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've
95 * made NULL special, hint: lock_timer_base()) and we cannot get a compile time
96 * pointer to per-cpu entries because we don't know where we'll map the section,
97 * even for the boot cpu.
98 *
99 * And so we use boot_tvec_bases for boot CPU and per-cpu __tvec_bases for the
100 * rest of them.
101 */
102struct tvec_base boot_tvec_bases;
103EXPORT_SYMBOL(boot_tvec_bases);
104 97
105static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; 98static DEFINE_PER_CPU(struct tvec_base, tvec_bases);
99
100#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
101unsigned int sysctl_timer_migration = 1;
106 102
107/* Functions below help us manage 'deferrable' flag */ 103void timers_update_migration(bool update_nohz)
108static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
109{ 104{
110 return ((unsigned int)(unsigned long)base & TIMER_DEFERRABLE); 105 bool on = sysctl_timer_migration && tick_nohz_active;
106 unsigned int cpu;
107
108 /* Avoid the loop, if nothing to update */
109 if (this_cpu_read(tvec_bases.migration_enabled) == on)
110 return;
111
112 for_each_possible_cpu(cpu) {
113 per_cpu(tvec_bases.migration_enabled, cpu) = on;
114 per_cpu(hrtimer_bases.migration_enabled, cpu) = on;
115 if (!update_nohz)
116 continue;
117 per_cpu(tvec_bases.nohz_active, cpu) = true;
118 per_cpu(hrtimer_bases.nohz_active, cpu) = true;
119 }
111} 120}
112 121
113static inline unsigned int tbase_get_irqsafe(struct tvec_base *base) 122int timer_migration_handler(struct ctl_table *table, int write,
123 void __user *buffer, size_t *lenp,
124 loff_t *ppos)
114{ 125{
115 return ((unsigned int)(unsigned long)base & TIMER_IRQSAFE); 126 static DEFINE_MUTEX(mutex);
127 int ret;
128
129 mutex_lock(&mutex);
130 ret = proc_dointvec(table, write, buffer, lenp, ppos);
131 if (!ret && write)
132 timers_update_migration(false);
133 mutex_unlock(&mutex);
134 return ret;
116} 135}
117 136
118static inline struct tvec_base *tbase_get_base(struct tvec_base *base) 137static inline struct tvec_base *get_target_base(struct tvec_base *base,
138 int pinned)
119{ 139{
120 return ((struct tvec_base *)((unsigned long)base & ~TIMER_FLAG_MASK)); 140 if (pinned || !base->migration_enabled)
141 return this_cpu_ptr(&tvec_bases);
142 return per_cpu_ptr(&tvec_bases, get_nohz_timer_target());
121} 143}
122 144#else
123static inline void 145static inline struct tvec_base *get_target_base(struct tvec_base *base,
124timer_set_base(struct timer_list *timer, struct tvec_base *new_base) 146 int pinned)
125{ 147{
126 unsigned long flags = (unsigned long)timer->base & TIMER_FLAG_MASK; 148 return this_cpu_ptr(&tvec_bases);
127
128 timer->base = (struct tvec_base *)((unsigned long)(new_base) | flags);
129} 149}
150#endif
130 151
131static unsigned long round_jiffies_common(unsigned long j, int cpu, 152static unsigned long round_jiffies_common(unsigned long j, int cpu,
132 bool force_up) 153 bool force_up)
@@ -349,26 +370,12 @@ void set_timer_slack(struct timer_list *timer, int slack_hz)
349} 370}
350EXPORT_SYMBOL_GPL(set_timer_slack); 371EXPORT_SYMBOL_GPL(set_timer_slack);
351 372
352/*
353 * If the list is empty, catch up ->timer_jiffies to the current time.
354 * The caller must hold the tvec_base lock. Returns true if the list
355 * was empty and therefore ->timer_jiffies was updated.
356 */
357static bool catchup_timer_jiffies(struct tvec_base *base)
358{
359 if (!base->all_timers) {
360 base->timer_jiffies = jiffies;
361 return true;
362 }
363 return false;
364}
365
366static void 373static void
367__internal_add_timer(struct tvec_base *base, struct timer_list *timer) 374__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
368{ 375{
369 unsigned long expires = timer->expires; 376 unsigned long expires = timer->expires;
370 unsigned long idx = expires - base->timer_jiffies; 377 unsigned long idx = expires - base->timer_jiffies;
371 struct list_head *vec; 378 struct hlist_head *vec;
372 379
373 if (idx < TVR_SIZE) { 380 if (idx < TVR_SIZE) {
374 int i = expires & TVR_MASK; 381 int i = expires & TVR_MASK;
@@ -401,25 +408,25 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer)
401 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; 408 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
402 vec = base->tv5.vec + i; 409 vec = base->tv5.vec + i;
403 } 410 }
404 /* 411
405 * Timers are FIFO: 412 hlist_add_head(&timer->entry, vec);
406 */
407 list_add_tail(&timer->entry, vec);
408} 413}
409 414
410static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) 415static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
411{ 416{
412 (void)catchup_timer_jiffies(base); 417 /* Advance base->jiffies, if the base is empty */
418 if (!base->all_timers++)
419 base->timer_jiffies = jiffies;
420
413 __internal_add_timer(base, timer); 421 __internal_add_timer(base, timer);
414 /* 422 /*
415 * Update base->active_timers and base->next_timer 423 * Update base->active_timers and base->next_timer
416 */ 424 */
417 if (!tbase_get_deferrable(timer->base)) { 425 if (!(timer->flags & TIMER_DEFERRABLE)) {
418 if (!base->active_timers++ || 426 if (!base->active_timers++ ||
419 time_before(timer->expires, base->next_timer)) 427 time_before(timer->expires, base->next_timer))
420 base->next_timer = timer->expires; 428 base->next_timer = timer->expires;
421 } 429 }
422 base->all_timers++;
423 430
424 /* 431 /*
425 * Check whether the other CPU is in dynticks mode and needs 432 * Check whether the other CPU is in dynticks mode and needs
@@ -434,8 +441,11 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
434 * require special care against races with idle_cpu(), lets deal 441 * require special care against races with idle_cpu(), lets deal
435 * with that later. 442 * with that later.
436 */ 443 */
437 if (!tbase_get_deferrable(base) || tick_nohz_full_cpu(base->cpu)) 444 if (base->nohz_active) {
438 wake_up_nohz_cpu(base->cpu); 445 if (!(timer->flags & TIMER_DEFERRABLE) ||
446 tick_nohz_full_cpu(base->cpu))
447 wake_up_nohz_cpu(base->cpu);
448 }
439} 449}
440 450
441#ifdef CONFIG_TIMER_STATS 451#ifdef CONFIG_TIMER_STATS
@@ -451,15 +461,12 @@ void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
451 461
452static void timer_stats_account_timer(struct timer_list *timer) 462static void timer_stats_account_timer(struct timer_list *timer)
453{ 463{
454 unsigned int flag = 0;
455
456 if (likely(!timer->start_site)) 464 if (likely(!timer->start_site))
457 return; 465 return;
458 if (unlikely(tbase_get_deferrable(timer->base)))
459 flag |= TIMER_STATS_FLAG_DEFERRABLE;
460 466
461 timer_stats_update_stats(timer, timer->start_pid, timer->start_site, 467 timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
462 timer->function, timer->start_comm, flag); 468 timer->function, timer->start_comm,
469 timer->flags);
463} 470}
464 471
465#else 472#else
@@ -516,8 +523,8 @@ static int timer_fixup_activate(void *addr, enum debug_obj_state state)
516 * statically initialized. We just make sure that it 523 * statically initialized. We just make sure that it
517 * is tracked in the object tracker. 524 * is tracked in the object tracker.
518 */ 525 */
519 if (timer->entry.next == NULL && 526 if (timer->entry.pprev == NULL &&
520 timer->entry.prev == TIMER_ENTRY_STATIC) { 527 timer->entry.next == TIMER_ENTRY_STATIC) {
521 debug_object_init(timer, &timer_debug_descr); 528 debug_object_init(timer, &timer_debug_descr);
522 debug_object_activate(timer, &timer_debug_descr); 529 debug_object_activate(timer, &timer_debug_descr);
523 return 0; 530 return 0;
@@ -563,7 +570,7 @@ static int timer_fixup_assert_init(void *addr, enum debug_obj_state state)
563 570
564 switch (state) { 571 switch (state) {
565 case ODEBUG_STATE_NOTAVAILABLE: 572 case ODEBUG_STATE_NOTAVAILABLE:
566 if (timer->entry.prev == TIMER_ENTRY_STATIC) { 573 if (timer->entry.next == TIMER_ENTRY_STATIC) {
567 /* 574 /*
568 * This is not really a fixup. The timer was 575 * This is not really a fixup. The timer was
569 * statically initialized. We just make sure that it 576 * statically initialized. We just make sure that it
@@ -648,7 +655,7 @@ static inline void
648debug_activate(struct timer_list *timer, unsigned long expires) 655debug_activate(struct timer_list *timer, unsigned long expires)
649{ 656{
650 debug_timer_activate(timer); 657 debug_timer_activate(timer);
651 trace_timer_start(timer, expires); 658 trace_timer_start(timer, expires, timer->flags);
652} 659}
653 660
654static inline void debug_deactivate(struct timer_list *timer) 661static inline void debug_deactivate(struct timer_list *timer)
@@ -665,10 +672,8 @@ static inline void debug_assert_init(struct timer_list *timer)
665static void do_init_timer(struct timer_list *timer, unsigned int flags, 672static void do_init_timer(struct timer_list *timer, unsigned int flags,
666 const char *name, struct lock_class_key *key) 673 const char *name, struct lock_class_key *key)
667{ 674{
668 struct tvec_base *base = raw_cpu_read(tvec_bases); 675 timer->entry.pprev = NULL;
669 676 timer->flags = flags | raw_smp_processor_id();
670 timer->entry.next = NULL;
671 timer->base = (void *)((unsigned long)base | flags);
672 timer->slack = -1; 677 timer->slack = -1;
673#ifdef CONFIG_TIMER_STATS 678#ifdef CONFIG_TIMER_STATS
674 timer->start_site = NULL; 679 timer->start_site = NULL;
@@ -699,24 +704,23 @@ EXPORT_SYMBOL(init_timer_key);
699 704
700static inline void detach_timer(struct timer_list *timer, bool clear_pending) 705static inline void detach_timer(struct timer_list *timer, bool clear_pending)
701{ 706{
702 struct list_head *entry = &timer->entry; 707 struct hlist_node *entry = &timer->entry;
703 708
704 debug_deactivate(timer); 709 debug_deactivate(timer);
705 710
706 __list_del(entry->prev, entry->next); 711 __hlist_del(entry);
707 if (clear_pending) 712 if (clear_pending)
708 entry->next = NULL; 713 entry->pprev = NULL;
709 entry->prev = LIST_POISON2; 714 entry->next = LIST_POISON2;
710} 715}
711 716
712static inline void 717static inline void
713detach_expired_timer(struct timer_list *timer, struct tvec_base *base) 718detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
714{ 719{
715 detach_timer(timer, true); 720 detach_timer(timer, true);
716 if (!tbase_get_deferrable(timer->base)) 721 if (!(timer->flags & TIMER_DEFERRABLE))
717 base->active_timers--; 722 base->active_timers--;
718 base->all_timers--; 723 base->all_timers--;
719 (void)catchup_timer_jiffies(base);
720} 724}
721 725
722static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, 726static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
@@ -726,13 +730,14 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
726 return 0; 730 return 0;
727 731
728 detach_timer(timer, clear_pending); 732 detach_timer(timer, clear_pending);
729 if (!tbase_get_deferrable(timer->base)) { 733 if (!(timer->flags & TIMER_DEFERRABLE)) {
730 base->active_timers--; 734 base->active_timers--;
731 if (timer->expires == base->next_timer) 735 if (timer->expires == base->next_timer)
732 base->next_timer = base->timer_jiffies; 736 base->next_timer = base->timer_jiffies;
733 } 737 }
734 base->all_timers--; 738 /* If this was the last timer, advance base->jiffies */
735 (void)catchup_timer_jiffies(base); 739 if (!--base->all_timers)
740 base->timer_jiffies = jiffies;
736 return 1; 741 return 1;
737} 742}
738 743
@@ -744,24 +749,22 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
744 * So __run_timers/migrate_timers can safely modify all timers which could 749 * So __run_timers/migrate_timers can safely modify all timers which could
745 * be found on ->tvX lists. 750 * be found on ->tvX lists.
746 * 751 *
747 * When the timer's base is locked, and the timer removed from list, it is 752 * When the timer's base is locked and removed from the list, the
748 * possible to set timer->base = NULL and drop the lock: the timer remains 753 * TIMER_MIGRATING flag is set, FIXME
749 * locked.
750 */ 754 */
751static struct tvec_base *lock_timer_base(struct timer_list *timer, 755static struct tvec_base *lock_timer_base(struct timer_list *timer,
752 unsigned long *flags) 756 unsigned long *flags)
753 __acquires(timer->base->lock) 757 __acquires(timer->base->lock)
754{ 758{
755 struct tvec_base *base;
756
757 for (;;) { 759 for (;;) {
758 struct tvec_base *prelock_base = timer->base; 760 u32 tf = timer->flags;
759 base = tbase_get_base(prelock_base); 761 struct tvec_base *base;
760 if (likely(base != NULL)) { 762
763 if (!(tf & TIMER_MIGRATING)) {
764 base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK);
761 spin_lock_irqsave(&base->lock, *flags); 765 spin_lock_irqsave(&base->lock, *flags);
762 if (likely(prelock_base == timer->base)) 766 if (timer->flags == tf)
763 return base; 767 return base;
764 /* The timer has migrated to another CPU */
765 spin_unlock_irqrestore(&base->lock, *flags); 768 spin_unlock_irqrestore(&base->lock, *flags);
766 } 769 }
767 cpu_relax(); 770 cpu_relax();
@@ -770,11 +773,11 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
770 773
771static inline int 774static inline int
772__mod_timer(struct timer_list *timer, unsigned long expires, 775__mod_timer(struct timer_list *timer, unsigned long expires,
773 bool pending_only, int pinned) 776 bool pending_only, int pinned)
774{ 777{
775 struct tvec_base *base, *new_base; 778 struct tvec_base *base, *new_base;
776 unsigned long flags; 779 unsigned long flags;
777 int ret = 0 , cpu; 780 int ret = 0;
778 781
779 timer_stats_timer_set_start_info(timer); 782 timer_stats_timer_set_start_info(timer);
780 BUG_ON(!timer->function); 783 BUG_ON(!timer->function);
@@ -787,8 +790,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
787 790
788 debug_activate(timer, expires); 791 debug_activate(timer, expires);
789 792
790 cpu = get_nohz_timer_target(pinned); 793 new_base = get_target_base(base, pinned);
791 new_base = per_cpu(tvec_bases, cpu);
792 794
793 if (base != new_base) { 795 if (base != new_base) {
794 /* 796 /*
@@ -800,11 +802,13 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
800 */ 802 */
801 if (likely(base->running_timer != timer)) { 803 if (likely(base->running_timer != timer)) {
802 /* See the comment in lock_timer_base() */ 804 /* See the comment in lock_timer_base() */
803 timer_set_base(timer, NULL); 805 timer->flags |= TIMER_MIGRATING;
806
804 spin_unlock(&base->lock); 807 spin_unlock(&base->lock);
805 base = new_base; 808 base = new_base;
806 spin_lock(&base->lock); 809 spin_lock(&base->lock);
807 timer_set_base(timer, base); 810 timer->flags &= ~TIMER_BASEMASK;
811 timer->flags |= base->cpu;
808 } 812 }
809 } 813 }
810 814
@@ -966,13 +970,13 @@ EXPORT_SYMBOL(add_timer);
966 */ 970 */
967void add_timer_on(struct timer_list *timer, int cpu) 971void add_timer_on(struct timer_list *timer, int cpu)
968{ 972{
969 struct tvec_base *base = per_cpu(tvec_bases, cpu); 973 struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu);
970 unsigned long flags; 974 unsigned long flags;
971 975
972 timer_stats_timer_set_start_info(timer); 976 timer_stats_timer_set_start_info(timer);
973 BUG_ON(timer_pending(timer) || !timer->function); 977 BUG_ON(timer_pending(timer) || !timer->function);
974 spin_lock_irqsave(&base->lock, flags); 978 spin_lock_irqsave(&base->lock, flags);
975 timer_set_base(timer, base); 979 timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
976 debug_activate(timer, timer->expires); 980 debug_activate(timer, timer->expires);
977 internal_add_timer(base, timer); 981 internal_add_timer(base, timer);
978 spin_unlock_irqrestore(&base->lock, flags); 982 spin_unlock_irqrestore(&base->lock, flags);
@@ -1037,8 +1041,6 @@ int try_to_del_timer_sync(struct timer_list *timer)
1037EXPORT_SYMBOL(try_to_del_timer_sync); 1041EXPORT_SYMBOL(try_to_del_timer_sync);
1038 1042
1039#ifdef CONFIG_SMP 1043#ifdef CONFIG_SMP
1040static DEFINE_PER_CPU(struct tvec_base, __tvec_bases);
1041
1042/** 1044/**
1043 * del_timer_sync - deactivate a timer and wait for the handler to finish. 1045 * del_timer_sync - deactivate a timer and wait for the handler to finish.
1044 * @timer: the timer to be deactivated 1046 * @timer: the timer to be deactivated
@@ -1093,7 +1095,7 @@ int del_timer_sync(struct timer_list *timer)
1093 * don't use it in hardirq context, because it 1095 * don't use it in hardirq context, because it
1094 * could lead to deadlock. 1096 * could lead to deadlock.
1095 */ 1097 */
1096 WARN_ON(in_irq() && !tbase_get_irqsafe(timer->base)); 1098 WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
1097 for (;;) { 1099 for (;;) {
1098 int ret = try_to_del_timer_sync(timer); 1100 int ret = try_to_del_timer_sync(timer);
1099 if (ret >= 0) 1101 if (ret >= 0)
@@ -1107,17 +1109,17 @@ EXPORT_SYMBOL(del_timer_sync);
1107static int cascade(struct tvec_base *base, struct tvec *tv, int index) 1109static int cascade(struct tvec_base *base, struct tvec *tv, int index)
1108{ 1110{
1109 /* cascade all the timers from tv up one level */ 1111 /* cascade all the timers from tv up one level */
1110 struct timer_list *timer, *tmp; 1112 struct timer_list *timer;
1111 struct list_head tv_list; 1113 struct hlist_node *tmp;
1114 struct hlist_head tv_list;
1112 1115
1113 list_replace_init(tv->vec + index, &tv_list); 1116 hlist_move_list(tv->vec + index, &tv_list);
1114 1117
1115 /* 1118 /*
1116 * We are removing _all_ timers from the list, so we 1119 * We are removing _all_ timers from the list, so we
1117 * don't have to detach them individually. 1120 * don't have to detach them individually.
1118 */ 1121 */
1119 list_for_each_entry_safe(timer, tmp, &tv_list, entry) { 1122 hlist_for_each_entry_safe(timer, tmp, &tv_list, entry) {
1120 BUG_ON(tbase_get_base(timer->base) != base);
1121 /* No accounting, while moving them */ 1123 /* No accounting, while moving them */
1122 __internal_add_timer(base, timer); 1124 __internal_add_timer(base, timer);
1123 } 1125 }
@@ -1182,14 +1184,18 @@ static inline void __run_timers(struct tvec_base *base)
1182 struct timer_list *timer; 1184 struct timer_list *timer;
1183 1185
1184 spin_lock_irq(&base->lock); 1186 spin_lock_irq(&base->lock);
1185 if (catchup_timer_jiffies(base)) { 1187
1186 spin_unlock_irq(&base->lock);
1187 return;
1188 }
1189 while (time_after_eq(jiffies, base->timer_jiffies)) { 1188 while (time_after_eq(jiffies, base->timer_jiffies)) {
1190 struct list_head work_list; 1189 struct hlist_head work_list;
1191 struct list_head *head = &work_list; 1190 struct hlist_head *head = &work_list;
1192 int index = base->timer_jiffies & TVR_MASK; 1191 int index;
1192
1193 if (!base->all_timers) {
1194 base->timer_jiffies = jiffies;
1195 break;
1196 }
1197
1198 index = base->timer_jiffies & TVR_MASK;
1193 1199
1194 /* 1200 /*
1195 * Cascade timers: 1201 * Cascade timers:
@@ -1200,16 +1206,16 @@ static inline void __run_timers(struct tvec_base *base)
1200 !cascade(base, &base->tv4, INDEX(2))) 1206 !cascade(base, &base->tv4, INDEX(2)))
1201 cascade(base, &base->tv5, INDEX(3)); 1207 cascade(base, &base->tv5, INDEX(3));
1202 ++base->timer_jiffies; 1208 ++base->timer_jiffies;
1203 list_replace_init(base->tv1.vec + index, head); 1209 hlist_move_list(base->tv1.vec + index, head);
1204 while (!list_empty(head)) { 1210 while (!hlist_empty(head)) {
1205 void (*fn)(unsigned long); 1211 void (*fn)(unsigned long);
1206 unsigned long data; 1212 unsigned long data;
1207 bool irqsafe; 1213 bool irqsafe;
1208 1214
1209 timer = list_first_entry(head, struct timer_list,entry); 1215 timer = hlist_entry(head->first, struct timer_list, entry);
1210 fn = timer->function; 1216 fn = timer->function;
1211 data = timer->data; 1217 data = timer->data;
1212 irqsafe = tbase_get_irqsafe(timer->base); 1218 irqsafe = timer->flags & TIMER_IRQSAFE;
1213 1219
1214 timer_stats_account_timer(timer); 1220 timer_stats_account_timer(timer);
1215 1221
@@ -1248,8 +1254,8 @@ static unsigned long __next_timer_interrupt(struct tvec_base *base)
1248 /* Look for timer events in tv1. */ 1254 /* Look for timer events in tv1. */
1249 index = slot = timer_jiffies & TVR_MASK; 1255 index = slot = timer_jiffies & TVR_MASK;
1250 do { 1256 do {
1251 list_for_each_entry(nte, base->tv1.vec + slot, entry) { 1257 hlist_for_each_entry(nte, base->tv1.vec + slot, entry) {
1252 if (tbase_get_deferrable(nte->base)) 1258 if (nte->flags & TIMER_DEFERRABLE)
1253 continue; 1259 continue;
1254 1260
1255 found = 1; 1261 found = 1;
@@ -1279,8 +1285,8 @@ cascade:
1279 1285
1280 index = slot = timer_jiffies & TVN_MASK; 1286 index = slot = timer_jiffies & TVN_MASK;
1281 do { 1287 do {
1282 list_for_each_entry(nte, varp->vec + slot, entry) { 1288 hlist_for_each_entry(nte, varp->vec + slot, entry) {
1283 if (tbase_get_deferrable(nte->base)) 1289 if (nte->flags & TIMER_DEFERRABLE)
1284 continue; 1290 continue;
1285 1291
1286 found = 1; 1292 found = 1;
@@ -1311,54 +1317,48 @@ cascade:
1311 * Check, if the next hrtimer event is before the next timer wheel 1317 * Check, if the next hrtimer event is before the next timer wheel
1312 * event: 1318 * event:
1313 */ 1319 */
1314static unsigned long cmp_next_hrtimer_event(unsigned long now, 1320static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
1315 unsigned long expires)
1316{ 1321{
1317 ktime_t hr_delta = hrtimer_get_next_event(); 1322 u64 nextevt = hrtimer_get_next_event();
1318 struct timespec tsdelta;
1319 unsigned long delta;
1320
1321 if (hr_delta.tv64 == KTIME_MAX)
1322 return expires;
1323 1323
1324 /* 1324 /*
1325 * Expired timer available, let it expire in the next tick 1325 * If high resolution timers are enabled
1326 * hrtimer_get_next_event() returns KTIME_MAX.
1326 */ 1327 */
1327 if (hr_delta.tv64 <= 0) 1328 if (expires <= nextevt)
1328 return now + 1; 1329 return expires;
1329
1330 tsdelta = ktime_to_timespec(hr_delta);
1331 delta = timespec_to_jiffies(&tsdelta);
1332 1330
1333 /* 1331 /*
1334 * Limit the delta to the max value, which is checked in 1332 * If the next timer is already expired, return the tick base
1335 * tick_nohz_stop_sched_tick(): 1333 * time so the tick is fired immediately.
1336 */ 1334 */
1337 if (delta > NEXT_TIMER_MAX_DELTA) 1335 if (nextevt <= basem)
1338 delta = NEXT_TIMER_MAX_DELTA; 1336 return basem;
1339 1337
1340 /* 1338 /*
1341 * Take rounding errors in to account and make sure, that it 1339 * Round up to the next jiffie. High resolution timers are
1342 * expires in the next tick. Otherwise we go into an endless 1340 * off, so the hrtimers are expired in the tick and we need to
1343 * ping pong due to tick_nohz_stop_sched_tick() retriggering 1341 * make sure that this tick really expires the timer to avoid
1344 * the timer softirq 1342 * a ping pong of the nohz stop code.
1343 *
1344 * Use DIV_ROUND_UP_ULL to prevent gcc calling __divdi3
1345 */ 1345 */
1346 if (delta < 1) 1346 return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
1347 delta = 1;
1348 now += delta;
1349 if (time_before(now, expires))
1350 return now;
1351 return expires;
1352} 1347}
1353 1348
1354/** 1349/**
1355 * get_next_timer_interrupt - return the jiffy of the next pending timer 1350 * get_next_timer_interrupt - return the time (clock mono) of the next timer
1356 * @now: current time (in jiffies) 1351 * @basej: base time jiffies
1352 * @basem: base time clock monotonic
1353 *
1354 * Returns the tick aligned clock monotonic time of the next pending
1355 * timer or KTIME_MAX if no timer is pending.
1357 */ 1356 */
1358unsigned long get_next_timer_interrupt(unsigned long now) 1357u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
1359{ 1358{
1360 struct tvec_base *base = __this_cpu_read(tvec_bases); 1359 struct tvec_base *base = this_cpu_ptr(&tvec_bases);
1361 unsigned long expires = now + NEXT_TIMER_MAX_DELTA; 1360 u64 expires = KTIME_MAX;
1361 unsigned long nextevt;
1362 1362
1363 /* 1363 /*
1364 * Pretend that there is no timer pending if the cpu is offline. 1364 * Pretend that there is no timer pending if the cpu is offline.
@@ -1371,14 +1371,15 @@ unsigned long get_next_timer_interrupt(unsigned long now)
1371 if (base->active_timers) { 1371 if (base->active_timers) {
1372 if (time_before_eq(base->next_timer, base->timer_jiffies)) 1372 if (time_before_eq(base->next_timer, base->timer_jiffies))
1373 base->next_timer = __next_timer_interrupt(base); 1373 base->next_timer = __next_timer_interrupt(base);
1374 expires = base->next_timer; 1374 nextevt = base->next_timer;
1375 if (time_before_eq(nextevt, basej))
1376 expires = basem;
1377 else
1378 expires = basem + (nextevt - basej) * TICK_NSEC;
1375 } 1379 }
1376 spin_unlock(&base->lock); 1380 spin_unlock(&base->lock);
1377 1381
1378 if (time_before_eq(expires, now)) 1382 return cmp_next_hrtimer_event(basem, expires);
1379 return now;
1380
1381 return cmp_next_hrtimer_event(now, expires);
1382} 1383}
1383#endif 1384#endif
1384 1385
@@ -1407,9 +1408,7 @@ void update_process_times(int user_tick)
1407 */ 1408 */
1408static void run_timer_softirq(struct softirq_action *h) 1409static void run_timer_softirq(struct softirq_action *h)
1409{ 1410{
1410 struct tvec_base *base = __this_cpu_read(tvec_bases); 1411 struct tvec_base *base = this_cpu_ptr(&tvec_bases);
1411
1412 hrtimer_run_pending();
1413 1412
1414 if (time_after_eq(jiffies, base->timer_jiffies)) 1413 if (time_after_eq(jiffies, base->timer_jiffies))
1415 __run_timers(base); 1414 __run_timers(base);
@@ -1545,15 +1544,16 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
1545EXPORT_SYMBOL(schedule_timeout_uninterruptible); 1544EXPORT_SYMBOL(schedule_timeout_uninterruptible);
1546 1545
1547#ifdef CONFIG_HOTPLUG_CPU 1546#ifdef CONFIG_HOTPLUG_CPU
1548static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head) 1547static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head)
1549{ 1548{
1550 struct timer_list *timer; 1549 struct timer_list *timer;
1550 int cpu = new_base->cpu;
1551 1551
1552 while (!list_empty(head)) { 1552 while (!hlist_empty(head)) {
1553 timer = list_first_entry(head, struct timer_list, entry); 1553 timer = hlist_entry(head->first, struct timer_list, entry);
1554 /* We ignore the accounting on the dying cpu */ 1554 /* We ignore the accounting on the dying cpu */
1555 detach_timer(timer, false); 1555 detach_timer(timer, false);
1556 timer_set_base(timer, new_base); 1556 timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
1557 internal_add_timer(new_base, timer); 1557 internal_add_timer(new_base, timer);
1558 } 1558 }
1559} 1559}
@@ -1565,8 +1565,8 @@ static void migrate_timers(int cpu)
1565 int i; 1565 int i;
1566 1566
1567 BUG_ON(cpu_online(cpu)); 1567 BUG_ON(cpu_online(cpu));
1568 old_base = per_cpu(tvec_bases, cpu); 1568 old_base = per_cpu_ptr(&tvec_bases, cpu);
1569 new_base = get_cpu_var(tvec_bases); 1569 new_base = this_cpu_ptr(&tvec_bases);
1570 /* 1570 /*
1571 * The caller is globally serialized and nobody else 1571 * The caller is globally serialized and nobody else
1572 * takes two locks at once, deadlock is not possible. 1572 * takes two locks at once, deadlock is not possible.
@@ -1590,7 +1590,6 @@ static void migrate_timers(int cpu)
1590 1590
1591 spin_unlock(&old_base->lock); 1591 spin_unlock(&old_base->lock);
1592 spin_unlock_irq(&new_base->lock); 1592 spin_unlock_irq(&new_base->lock);
1593 put_cpu_var(tvec_bases);
1594} 1593}
1595 1594
1596static int timer_cpu_notify(struct notifier_block *self, 1595static int timer_cpu_notify(struct notifier_block *self,
@@ -1616,52 +1615,27 @@ static inline void timer_register_cpu_notifier(void)
1616static inline void timer_register_cpu_notifier(void) { } 1615static inline void timer_register_cpu_notifier(void) { }
1617#endif /* CONFIG_HOTPLUG_CPU */ 1616#endif /* CONFIG_HOTPLUG_CPU */
1618 1617
1619static void __init init_timer_cpu(struct tvec_base *base, int cpu) 1618static void __init init_timer_cpu(int cpu)
1620{ 1619{
1621 int j; 1620 struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu);
1622
1623 BUG_ON(base != tbase_get_base(base));
1624 1621
1625 base->cpu = cpu; 1622 base->cpu = cpu;
1626 per_cpu(tvec_bases, cpu) = base;
1627 spin_lock_init(&base->lock); 1623 spin_lock_init(&base->lock);
1628 1624
1629 for (j = 0; j < TVN_SIZE; j++) {
1630 INIT_LIST_HEAD(base->tv5.vec + j);
1631 INIT_LIST_HEAD(base->tv4.vec + j);
1632 INIT_LIST_HEAD(base->tv3.vec + j);
1633 INIT_LIST_HEAD(base->tv2.vec + j);
1634 }
1635 for (j = 0; j < TVR_SIZE; j++)
1636 INIT_LIST_HEAD(base->tv1.vec + j);
1637
1638 base->timer_jiffies = jiffies; 1625 base->timer_jiffies = jiffies;
1639 base->next_timer = base->timer_jiffies; 1626 base->next_timer = base->timer_jiffies;
1640} 1627}
1641 1628
1642static void __init init_timer_cpus(void) 1629static void __init init_timer_cpus(void)
1643{ 1630{
1644 struct tvec_base *base;
1645 int local_cpu = smp_processor_id();
1646 int cpu; 1631 int cpu;
1647 1632
1648 for_each_possible_cpu(cpu) { 1633 for_each_possible_cpu(cpu)
1649 if (cpu == local_cpu) 1634 init_timer_cpu(cpu);
1650 base = &boot_tvec_bases;
1651#ifdef CONFIG_SMP
1652 else
1653 base = per_cpu_ptr(&__tvec_bases, cpu);
1654#endif
1655
1656 init_timer_cpu(base, cpu);
1657 }
1658} 1635}
1659 1636
1660void __init init_timers(void) 1637void __init init_timers(void)
1661{ 1638{
1662 /* ensure there are enough low bits for flags in timer->base pointer */
1663 BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
1664
1665 init_timer_cpus(); 1639 init_timer_cpus();
1666 init_timer_stats(); 1640 init_timer_stats();
1667 timer_register_cpu_notifier(); 1641 timer_register_cpu_notifier();
@@ -1697,14 +1671,14 @@ unsigned long msleep_interruptible(unsigned int msecs)
1697 1671
1698EXPORT_SYMBOL(msleep_interruptible); 1672EXPORT_SYMBOL(msleep_interruptible);
1699 1673
1700static int __sched do_usleep_range(unsigned long min, unsigned long max) 1674static void __sched do_usleep_range(unsigned long min, unsigned long max)
1701{ 1675{
1702 ktime_t kmin; 1676 ktime_t kmin;
1703 unsigned long delta; 1677 unsigned long delta;
1704 1678
1705 kmin = ktime_set(0, min * NSEC_PER_USEC); 1679 kmin = ktime_set(0, min * NSEC_PER_USEC);
1706 delta = (max - min) * NSEC_PER_USEC; 1680 delta = (max - min) * NSEC_PER_USEC;
1707 return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL); 1681 schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
1708} 1682}
1709 1683
1710/** 1684/**
@@ -1712,7 +1686,7 @@ static int __sched do_usleep_range(unsigned long min, unsigned long max)
1712 * @min: Minimum time in usecs to sleep 1686 * @min: Minimum time in usecs to sleep
1713 * @max: Maximum time in usecs to sleep 1687 * @max: Maximum time in usecs to sleep
1714 */ 1688 */
1715void usleep_range(unsigned long min, unsigned long max) 1689void __sched usleep_range(unsigned long min, unsigned long max)
1716{ 1690{
1717 __set_current_state(TASK_UNINTERRUPTIBLE); 1691 __set_current_state(TASK_UNINTERRUPTIBLE);
1718 do_usleep_range(min, max); 1692 do_usleep_range(min, max);
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index e878c2e0ba45..a4536e1e3e2a 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -29,19 +29,24 @@ struct timer_list_iter {
29 29
30typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes); 30typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes);
31 31
32DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
33
34/* 32/*
35 * This allows printing both to /proc/timer_list and 33 * This allows printing both to /proc/timer_list and
36 * to the console (on SysRq-Q): 34 * to the console (on SysRq-Q):
37 */ 35 */
38#define SEQ_printf(m, x...) \ 36__printf(2, 3)
39 do { \ 37static void SEQ_printf(struct seq_file *m, const char *fmt, ...)
40 if (m) \ 38{
41 seq_printf(m, x); \ 39 va_list args;
42 else \ 40
43 printk(x); \ 41 va_start(args, fmt);
44 } while (0) 42
43 if (m)
44 seq_vprintf(m, fmt, args);
45 else
46 vprintk(fmt, args);
47
48 va_end(args);
49}
45 50
46static void print_name_offset(struct seq_file *m, void *sym) 51static void print_name_offset(struct seq_file *m, void *sym)
47{ 52{
@@ -120,10 +125,10 @@ static void
120print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) 125print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now)
121{ 126{
122 SEQ_printf(m, " .base: %pK\n", base); 127 SEQ_printf(m, " .base: %pK\n", base);
123 SEQ_printf(m, " .index: %d\n", 128 SEQ_printf(m, " .index: %d\n", base->index);
124 base->index); 129
125 SEQ_printf(m, " .resolution: %Lu nsecs\n", 130 SEQ_printf(m, " .resolution: %u nsecs\n", (unsigned) hrtimer_resolution);
126 (unsigned long long)ktime_to_ns(base->resolution)); 131
127 SEQ_printf(m, " .get_time: "); 132 SEQ_printf(m, " .get_time: ");
128 print_name_offset(m, base->get_time); 133 print_name_offset(m, base->get_time);
129 SEQ_printf(m, "\n"); 134 SEQ_printf(m, "\n");
@@ -158,7 +163,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
158 P(nr_events); 163 P(nr_events);
159 P(nr_retries); 164 P(nr_retries);
160 P(nr_hangs); 165 P(nr_hangs);
161 P_ns(max_hang_time); 166 P(max_hang_time);
162#endif 167#endif
163#undef P 168#undef P
164#undef P_ns 169#undef P_ns
@@ -184,7 +189,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
184 P_ns(idle_sleeptime); 189 P_ns(idle_sleeptime);
185 P_ns(iowait_sleeptime); 190 P_ns(iowait_sleeptime);
186 P(last_jiffies); 191 P(last_jiffies);
187 P(next_jiffies); 192 P(next_timer);
188 P_ns(idle_expires); 193 P_ns(idle_expires);
189 SEQ_printf(m, "jiffies: %Lu\n", 194 SEQ_printf(m, "jiffies: %Lu\n",
190 (unsigned long long)jiffies); 195 (unsigned long long)jiffies);
@@ -251,6 +256,12 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
251 SEQ_printf(m, "\n"); 256 SEQ_printf(m, "\n");
252 } 257 }
253 258
259 if (dev->set_state_oneshot_stopped) {
260 SEQ_printf(m, " oneshot stopped: ");
261 print_name_offset(m, dev->set_state_oneshot_stopped);
262 SEQ_printf(m, "\n");
263 }
264
254 if (dev->tick_resume) { 265 if (dev->tick_resume) {
255 SEQ_printf(m, " resume: "); 266 SEQ_printf(m, " resume: ");
256 print_name_offset(m, dev->tick_resume); 267 print_name_offset(m, dev->tick_resume);
@@ -269,11 +280,11 @@ static void timer_list_show_tickdevices_header(struct seq_file *m)
269{ 280{
270#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 281#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
271 print_tickdevice(m, tick_get_broadcast_device(), -1); 282 print_tickdevice(m, tick_get_broadcast_device(), -1);
272 SEQ_printf(m, "tick_broadcast_mask: %08lx\n", 283 SEQ_printf(m, "tick_broadcast_mask: %*pb\n",
273 cpumask_bits(tick_get_broadcast_mask())[0]); 284 cpumask_pr_args(tick_get_broadcast_mask()));
274#ifdef CONFIG_TICK_ONESHOT 285#ifdef CONFIG_TICK_ONESHOT
275 SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n", 286 SEQ_printf(m, "tick_broadcast_oneshot_mask: %*pb\n",
276 cpumask_bits(tick_get_broadcast_oneshot_mask())[0]); 287 cpumask_pr_args(tick_get_broadcast_oneshot_mask()));
277#endif 288#endif
278 SEQ_printf(m, "\n"); 289 SEQ_printf(m, "\n");
279#endif 290#endif
@@ -282,7 +293,7 @@ static void timer_list_show_tickdevices_header(struct seq_file *m)
282 293
283static inline void timer_list_header(struct seq_file *m, u64 now) 294static inline void timer_list_header(struct seq_file *m, u64 now)
284{ 295{
285 SEQ_printf(m, "Timer List Version: v0.7\n"); 296 SEQ_printf(m, "Timer List Version: v0.8\n");
286 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); 297 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
287 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); 298 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
288 SEQ_printf(m, "\n"); 299 SEQ_printf(m, "\n");
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index 1fb08f21302e..1adecb4b87c8 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -68,7 +68,7 @@ struct entry {
68 * Number of timeout events: 68 * Number of timeout events:
69 */ 69 */
70 unsigned long count; 70 unsigned long count;
71 unsigned int timer_flag; 71 u32 flags;
72 72
73 /* 73 /*
74 * We save the command-line string to preserve 74 * We save the command-line string to preserve
@@ -227,13 +227,13 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm)
227 * @startf: pointer to the function which did the timer setup 227 * @startf: pointer to the function which did the timer setup
228 * @timerf: pointer to the timer callback function of the timer 228 * @timerf: pointer to the timer callback function of the timer
229 * @comm: name of the process which set up the timer 229 * @comm: name of the process which set up the timer
230 * @tflags: The flags field of the timer
230 * 231 *
231 * When the timer is already registered, then the event counter is 232 * When the timer is already registered, then the event counter is
232 * incremented. Otherwise the timer is registered in a free slot. 233 * incremented. Otherwise the timer is registered in a free slot.
233 */ 234 */
234void timer_stats_update_stats(void *timer, pid_t pid, void *startf, 235void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
235 void *timerf, char *comm, 236 void *timerf, char *comm, u32 tflags)
236 unsigned int timer_flag)
237{ 237{
238 /* 238 /*
239 * It doesn't matter which lock we take: 239 * It doesn't matter which lock we take:
@@ -251,7 +251,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
251 input.start_func = startf; 251 input.start_func = startf;
252 input.expire_func = timerf; 252 input.expire_func = timerf;
253 input.pid = pid; 253 input.pid = pid;
254 input.timer_flag = timer_flag; 254 input.flags = tflags;
255 255
256 raw_spin_lock_irqsave(lock, flags); 256 raw_spin_lock_irqsave(lock, flags);
257 if (!timer_stats_active) 257 if (!timer_stats_active)
@@ -306,7 +306,7 @@ static int tstats_show(struct seq_file *m, void *v)
306 306
307 for (i = 0; i < nr_entries; i++) { 307 for (i = 0; i < nr_entries; i++) {
308 entry = entries + i; 308 entry = entries + i;
309 if (entry->timer_flag & TIMER_STATS_FLAG_DEFERRABLE) { 309 if (entry->flags & TIMER_DEFERRABLE) {
310 seq_printf(m, "%4luD, %5d %-16s ", 310 seq_printf(m, "%4luD, %5d %-16s ",
311 entry->count, entry->pid, entry->comm); 311 entry->count, entry->pid, entry->comm);
312 } else { 312 } else {
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index a382e4a32609..782ae8ca2c06 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -36,7 +36,7 @@
36 * Adds the timer node to the timerqueue, sorted by the 36 * Adds the timer node to the timerqueue, sorted by the
37 * node's expires value. 37 * node's expires value.
38 */ 38 */
39void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) 39bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
40{ 40{
41 struct rb_node **p = &head->head.rb_node; 41 struct rb_node **p = &head->head.rb_node;
42 struct rb_node *parent = NULL; 42 struct rb_node *parent = NULL;
@@ -56,8 +56,11 @@ void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
56 rb_link_node(&node->node, parent, p); 56 rb_link_node(&node->node, parent, p);
57 rb_insert_color(&node->node, &head->head); 57 rb_insert_color(&node->node, &head->head);
58 58
59 if (!head->next || node->expires.tv64 < head->next->expires.tv64) 59 if (!head->next || node->expires.tv64 < head->next->expires.tv64) {
60 head->next = node; 60 head->next = node;
61 return true;
62 }
63 return false;
61} 64}
62EXPORT_SYMBOL_GPL(timerqueue_add); 65EXPORT_SYMBOL_GPL(timerqueue_add);
63 66
@@ -69,7 +72,7 @@ EXPORT_SYMBOL_GPL(timerqueue_add);
69 * 72 *
70 * Removes the timer node from the timerqueue. 73 * Removes the timer node from the timerqueue.
71 */ 74 */
72void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) 75bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
73{ 76{
74 WARN_ON_ONCE(RB_EMPTY_NODE(&node->node)); 77 WARN_ON_ONCE(RB_EMPTY_NODE(&node->node));
75 78
@@ -82,6 +85,7 @@ void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
82 } 85 }
83 rb_erase(&node->node, &head->head); 86 rb_erase(&node->node, &head->head);
84 RB_CLEAR_NODE(&node->node); 87 RB_CLEAR_NODE(&node->node);
88 return head->next != NULL;
85} 89}
86EXPORT_SYMBOL_GPL(timerqueue_del); 90EXPORT_SYMBOL_GPL(timerqueue_del);
87 91
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 508155b283dd..54817d365366 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2212,8 +2212,6 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
2212 do { 2212 do {
2213 set_current_state(TASK_INTERRUPTIBLE); 2213 set_current_state(TASK_INTERRUPTIBLE);
2214 hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); 2214 hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
2215 if (!hrtimer_active(&t.timer))
2216 t.task = NULL;
2217 2215
2218 if (likely(t.task)) 2216 if (likely(t.task))
2219 schedule(); 2217 schedule();
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 1e1c89e51a11..73a123daa2cc 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1885,13 +1885,10 @@ EXPORT_SYMBOL(tcf_destroy_chain);
1885#ifdef CONFIG_PROC_FS 1885#ifdef CONFIG_PROC_FS
1886static int psched_show(struct seq_file *seq, void *v) 1886static int psched_show(struct seq_file *seq, void *v)
1887{ 1887{
1888 struct timespec ts;
1889
1890 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
1891 seq_printf(seq, "%08x %08x %08x %08x\n", 1888 seq_printf(seq, "%08x %08x %08x %08x\n",
1892 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1), 1889 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1893 1000000, 1890 1000000,
1894 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts))); 1891 (u32)NSEC_PER_SEC / hrtimer_resolution);
1895 1892
1896 return 0; 1893 return 0;
1897} 1894}
diff --git a/sound/core/hrtimer.c b/sound/core/hrtimer.c
index 886be7da989d..f845ecf7e172 100644
--- a/sound/core/hrtimer.c
+++ b/sound/core/hrtimer.c
@@ -121,16 +121,9 @@ static struct snd_timer *mytimer;
121static int __init snd_hrtimer_init(void) 121static int __init snd_hrtimer_init(void)
122{ 122{
123 struct snd_timer *timer; 123 struct snd_timer *timer;
124 struct timespec tp;
125 int err; 124 int err;
126 125
127 hrtimer_get_res(CLOCK_MONOTONIC, &tp); 126 resolution = hrtimer_resolution;
128 if (tp.tv_sec > 0 || !tp.tv_nsec) {
129 pr_err("snd-hrtimer: Invalid resolution %u.%09u",
130 (unsigned)tp.tv_sec, (unsigned)tp.tv_nsec);
131 return -EINVAL;
132 }
133 resolution = tp.tv_nsec;
134 127
135 /* Create a new timer and set up the fields */ 128 /* Create a new timer and set up the fields */
136 err = snd_timer_global_new("hrtimer", SNDRV_TIMER_GLOBAL_HRTIMER, 129 err = snd_timer_global_new("hrtimer", SNDRV_TIMER_GLOBAL_HRTIMER,
diff --git a/sound/drivers/pcsp/pcsp.c b/sound/drivers/pcsp/pcsp.c
index d9647bd84d0f..27e25bb78c97 100644
--- a/sound/drivers/pcsp/pcsp.c
+++ b/sound/drivers/pcsp/pcsp.c
@@ -42,16 +42,13 @@ struct snd_pcsp pcsp_chip;
42static int snd_pcsp_create(struct snd_card *card) 42static int snd_pcsp_create(struct snd_card *card)
43{ 43{
44 static struct snd_device_ops ops = { }; 44 static struct snd_device_ops ops = { };
45 struct timespec tp; 45 unsigned int resolution = hrtimer_resolution;
46 int err; 46 int err, div, min_div, order;
47 int div, min_div, order;
48
49 hrtimer_get_res(CLOCK_MONOTONIC, &tp);
50 47
51 if (!nopcm) { 48 if (!nopcm) {
52 if (tp.tv_sec || tp.tv_nsec > PCSP_MAX_PERIOD_NS) { 49 if (resolution > PCSP_MAX_PERIOD_NS) {
53 printk(KERN_ERR "PCSP: Timer resolution is not sufficient " 50 printk(KERN_ERR "PCSP: Timer resolution is not sufficient "
54 "(%linS)\n", tp.tv_nsec); 51 "(%unS)\n", resolution);
55 printk(KERN_ERR "PCSP: Make sure you have HPET and ACPI " 52 printk(KERN_ERR "PCSP: Make sure you have HPET and ACPI "
56 "enabled.\n"); 53 "enabled.\n");
57 printk(KERN_ERR "PCSP: Turned into nopcm mode.\n"); 54 printk(KERN_ERR "PCSP: Turned into nopcm mode.\n");
@@ -59,13 +56,13 @@ static int snd_pcsp_create(struct snd_card *card)
59 } 56 }
60 } 57 }
61 58
62 if (loops_per_jiffy >= PCSP_MIN_LPJ && tp.tv_nsec <= PCSP_MIN_PERIOD_NS) 59 if (loops_per_jiffy >= PCSP_MIN_LPJ && resolution <= PCSP_MIN_PERIOD_NS)
63 min_div = MIN_DIV; 60 min_div = MIN_DIV;
64 else 61 else
65 min_div = MAX_DIV; 62 min_div = MAX_DIV;
66#if PCSP_DEBUG 63#if PCSP_DEBUG
67 printk(KERN_DEBUG "PCSP: lpj=%li, min_div=%i, res=%li\n", 64 printk(KERN_DEBUG "PCSP: lpj=%li, min_div=%i, res=%u\n",
68 loops_per_jiffy, min_div, tp.tv_nsec); 65 loops_per_jiffy, min_div, resolution);
69#endif 66#endif
70 67
71 div = MAX_DIV / min_div; 68 div = MAX_DIV / min_div;
diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c
index b8272e6c4b3b..fb46ad6ac92c 100644
--- a/tools/testing/selftests/timers/leap-a-day.c
+++ b/tools/testing/selftests/timers/leap-a-day.c
@@ -44,6 +44,7 @@
44#include <time.h> 44#include <time.h>
45#include <sys/time.h> 45#include <sys/time.h>
46#include <sys/timex.h> 46#include <sys/timex.h>
47#include <sys/errno.h>
47#include <string.h> 48#include <string.h>
48#include <signal.h> 49#include <signal.h>
49#include <unistd.h> 50#include <unistd.h>
@@ -63,6 +64,9 @@ static inline int ksft_exit_fail(void)
63#define NSEC_PER_SEC 1000000000ULL 64#define NSEC_PER_SEC 1000000000ULL
64#define CLOCK_TAI 11 65#define CLOCK_TAI 11
65 66
67time_t next_leap;
68int error_found;
69
66/* returns 1 if a <= b, 0 otherwise */ 70/* returns 1 if a <= b, 0 otherwise */
67static inline int in_order(struct timespec a, struct timespec b) 71static inline int in_order(struct timespec a, struct timespec b)
68{ 72{
@@ -134,6 +138,35 @@ void handler(int unused)
134 exit(0); 138 exit(0);
135} 139}
136 140
141void sigalarm(int signo)
142{
143 struct timex tx;
144 int ret;
145
146 tx.modes = 0;
147 ret = adjtimex(&tx);
148
149 if (tx.time.tv_sec < next_leap) {
150 printf("Error: Early timer expiration! (Should be %ld)\n", next_leap);
151 error_found = 1;
152 printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n",
153 tx.time.tv_sec,
154 tx.time.tv_usec,
155 tx.tai,
156 time_state_str(ret));
157 }
158 if (ret != TIME_WAIT) {
159 printf("Error: Timer seeing incorrect NTP state? (Should be TIME_WAIT)\n");
160 error_found = 1;
161 printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n",
162 tx.time.tv_sec,
163 tx.time.tv_usec,
164 tx.tai,
165 time_state_str(ret));
166 }
167}
168
169
137/* Test for known hrtimer failure */ 170/* Test for known hrtimer failure */
138void test_hrtimer_failure(void) 171void test_hrtimer_failure(void)
139{ 172{
@@ -144,12 +177,19 @@ void test_hrtimer_failure(void)
144 clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &target, NULL); 177 clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &target, NULL);
145 clock_gettime(CLOCK_REALTIME, &now); 178 clock_gettime(CLOCK_REALTIME, &now);
146 179
147 if (!in_order(target, now)) 180 if (!in_order(target, now)) {
148 printf("ERROR: hrtimer early expiration failure observed.\n"); 181 printf("ERROR: hrtimer early expiration failure observed.\n");
182 error_found = 1;
183 }
149} 184}
150 185
151int main(int argc, char **argv) 186int main(int argc, char **argv)
152{ 187{
188 timer_t tm1;
189 struct itimerspec its1;
190 struct sigevent se;
191 struct sigaction act;
192 int signum = SIGRTMAX;
153 int settime = 0; 193 int settime = 0;
154 int tai_time = 0; 194 int tai_time = 0;
155 int insert = 1; 195 int insert = 1;
@@ -191,6 +231,12 @@ int main(int argc, char **argv)
191 signal(SIGINT, handler); 231 signal(SIGINT, handler);
192 signal(SIGKILL, handler); 232 signal(SIGKILL, handler);
193 233
234 /* Set up timer signal handler: */
235 sigfillset(&act.sa_mask);
236 act.sa_flags = 0;
237 act.sa_handler = sigalarm;
238 sigaction(signum, &act, NULL);
239
194 if (iterations < 0) 240 if (iterations < 0)
195 printf("This runs continuously. Press ctrl-c to stop\n"); 241 printf("This runs continuously. Press ctrl-c to stop\n");
196 else 242 else
@@ -201,7 +247,7 @@ int main(int argc, char **argv)
201 int ret; 247 int ret;
202 struct timespec ts; 248 struct timespec ts;
203 struct timex tx; 249 struct timex tx;
204 time_t now, next_leap; 250 time_t now;
205 251
206 /* Get the current time */ 252 /* Get the current time */
207 clock_gettime(CLOCK_REALTIME, &ts); 253 clock_gettime(CLOCK_REALTIME, &ts);
@@ -251,10 +297,27 @@ int main(int argc, char **argv)
251 297
252 printf("Scheduling leap second for %s", ctime(&next_leap)); 298 printf("Scheduling leap second for %s", ctime(&next_leap));
253 299
300 /* Set up timer */
301 printf("Setting timer for %ld - %s", next_leap, ctime(&next_leap));
302 memset(&se, 0, sizeof(se));
303 se.sigev_notify = SIGEV_SIGNAL;
304 se.sigev_signo = signum;
305 se.sigev_value.sival_int = 0;
306 if (timer_create(CLOCK_REALTIME, &se, &tm1) == -1) {
307 printf("Error: timer_create failed\n");
308 return ksft_exit_fail();
309 }
310 its1.it_value.tv_sec = next_leap;
311 its1.it_value.tv_nsec = 0;
312 its1.it_interval.tv_sec = 0;
313 its1.it_interval.tv_nsec = 0;
314 timer_settime(tm1, TIMER_ABSTIME, &its1, NULL);
315
254 /* Wake up 3 seconds before leap */ 316 /* Wake up 3 seconds before leap */
255 ts.tv_sec = next_leap - 3; 317 ts.tv_sec = next_leap - 3;
256 ts.tv_nsec = 0; 318 ts.tv_nsec = 0;
257 319
320
258 while (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL)) 321 while (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL))
259 printf("Something woke us up, returning to sleep\n"); 322 printf("Something woke us up, returning to sleep\n");
260 323
@@ -276,6 +339,7 @@ int main(int argc, char **argv)
276 while (now < next_leap + 2) { 339 while (now < next_leap + 2) {
277 char buf[26]; 340 char buf[26];
278 struct timespec tai; 341 struct timespec tai;
342 int ret;
279 343
280 tx.modes = 0; 344 tx.modes = 0;
281 ret = adjtimex(&tx); 345 ret = adjtimex(&tx);
@@ -308,8 +372,13 @@ int main(int argc, char **argv)
308 /* Note if kernel has known hrtimer failure */ 372 /* Note if kernel has known hrtimer failure */
309 test_hrtimer_failure(); 373 test_hrtimer_failure();
310 374
311 printf("Leap complete\n\n"); 375 printf("Leap complete\n");
312 376 if (error_found) {
377 printf("Errors observed\n");
378 clear_time_state();
379 return ksft_exit_fail();
380 }
381 printf("\n");
313 if ((iterations != -1) && !(--iterations)) 382 if ((iterations != -1) && !(--iterations))
314 break; 383 break;
315 } 384 }