aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-12-08 22:27:08 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-08 22:27:08 -0500
commit60d8ce2cd6c283132928c11f3fd57ff4187287e0 (patch)
tree36d08a2ead7a7d8c3c081d484215ccca00bf6aab
parent849e8dea099aafa56db9e74b580b0d858b956533 (diff)
parentfeae3203d711db0a9965300ee6d592257fdaae4f (diff)
Merge branch 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: timers, init: Limit the number of per cpu calibration bootup messages posix-cpu-timers: optimize and document timer_create callback clockevents: Add missing include to pacify sparse x86: vmiclock: Fix printk format x86: Fix printk format due to variable type change sparc: fix printk for change of variable type clocksource/events: Fix fallout of generic code changes nohz: Allow 32-bit machines to sleep for more than 2.15 seconds nohz: Track last do_timer() cpu nohz: Prevent clocksource wrapping during idle nohz: Type cast printk argument mips: Use generic mult/shift factor calculation for clocks clocksource: Provide a generic mult/shift factor calculation clockevents: Use u32 for mult and shift factors nohz: Introduce arch_needs_cpu nohz: Reuse ktime in sub-functions of tick_check_idle. time: Remove xtime_cache time: Implement logarithmic time accumulation
-rw-r--r--arch/mips/include/asm/time.h14
-rw-r--r--arch/mips/kernel/time.c33
-rw-r--r--arch/powerpc/kernel/time.c2
-rw-r--r--arch/s390/include/asm/cputime.h8
-rw-r--r--arch/s390/kernel/s390_ext.c2
-rw-r--r--arch/s390/kernel/vtime.c2
-rw-r--r--arch/sparc/kernel/time_64.c2
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/vmiclock_32.c2
-rw-r--r--drivers/s390/cio/cio.c1
-rw-r--r--include/linux/clockchips.h19
-rw-r--r--include/linux/clocksource.h12
-rw-r--r--include/linux/tick.h5
-rw-r--r--include/linux/time.h1
-rw-r--r--include/linux/timex.h4
-rw-r--r--init/calibrate.c24
-rw-r--r--kernel/cpu.c5
-rw-r--r--kernel/hrtimer.c3
-rw-r--r--kernel/posix-cpu-timers.c5
-rw-r--r--kernel/time.c1
-rw-r--r--kernel/time/clockevents.c13
-rw-r--r--kernel/time/clocksource.c97
-rw-r--r--kernel/time/tick-oneshot.c4
-rw-r--r--kernel/time/tick-sched.c141
-rw-r--r--kernel/time/timekeeping.c119
-rw-r--r--kernel/time/timer_list.c10
26 files changed, 353 insertions, 178 deletions
diff --git a/arch/mips/include/asm/time.h b/arch/mips/include/asm/time.h
index df6a430de5eb..c7f1bfef1574 100644
--- a/arch/mips/include/asm/time.h
+++ b/arch/mips/include/asm/time.h
@@ -84,8 +84,16 @@ static inline int init_mips_clocksource(void)
84#endif 84#endif
85} 85}
86 86
87extern void clocksource_set_clock(struct clocksource *cs, unsigned int clock); 87static inline void clocksource_set_clock(struct clocksource *cs,
88extern void clockevent_set_clock(struct clock_event_device *cd, 88 unsigned int clock)
89 unsigned int clock); 89{
90 clocksource_calc_mult_shift(cs, clock, 4);
91}
92
93static inline void clockevent_set_clock(struct clock_event_device *cd,
94 unsigned int clock)
95{
96 clockevents_calc_mult_shift(cd, clock, 4);
97}
90 98
91#endif /* _ASM_TIME_H */ 99#endif /* _ASM_TIME_H */
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index 1f467d534642..fb7497405510 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -71,39 +71,6 @@ EXPORT_SYMBOL(perf_irq);
71 71
72unsigned int mips_hpt_frequency; 72unsigned int mips_hpt_frequency;
73 73
74void __init clocksource_set_clock(struct clocksource *cs, unsigned int clock)
75{
76 u64 temp;
77 u32 shift;
78
79 /* Find a shift value */
80 for (shift = 32; shift > 0; shift--) {
81 temp = (u64) NSEC_PER_SEC << shift;
82 do_div(temp, clock);
83 if ((temp >> 32) == 0)
84 break;
85 }
86 cs->shift = shift;
87 cs->mult = (u32) temp;
88}
89
90void __cpuinit clockevent_set_clock(struct clock_event_device *cd,
91 unsigned int clock)
92{
93 u64 temp;
94 u32 shift;
95
96 /* Find a shift value */
97 for (shift = 32; shift > 0; shift--) {
98 temp = (u64) clock << shift;
99 do_div(temp, NSEC_PER_SEC);
100 if ((temp >> 32) == 0)
101 break;
102 }
103 cd->shift = shift;
104 cd->mult = (u32) temp;
105}
106
107/* 74/*
108 * This function exists in order to cause an error due to a duplicate 75 * This function exists in order to cause an error due to a duplicate
109 * definition if platform code should have its own implementation. The hook 76 * definition if platform code should have its own implementation. The hook
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 36707dec94d7..d18a7f04edec 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -924,7 +924,7 @@ static void register_decrementer_clockevent(int cpu)
924 *dec = decrementer_clockevent; 924 *dec = decrementer_clockevent;
925 dec->cpumask = cpumask_of(cpu); 925 dec->cpumask = cpumask_of(cpu);
926 926
927 printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n", 927 printk(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n",
928 dec->name, dec->mult, dec->shift, cpu); 928 dec->name, dec->mult, dec->shift, cpu);
929 929
930 clockevents_register_device(dec); 930 clockevents_register_device(dec);
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index f23961ada7fb..258ba88b7b50 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -183,6 +183,7 @@ struct s390_idle_data {
183 unsigned long long idle_count; 183 unsigned long long idle_count;
184 unsigned long long idle_enter; 184 unsigned long long idle_enter;
185 unsigned long long idle_time; 185 unsigned long long idle_time;
186 int nohz_delay;
186}; 187};
187 188
188DECLARE_PER_CPU(struct s390_idle_data, s390_idle); 189DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
@@ -198,4 +199,11 @@ static inline void s390_idle_check(void)
198 vtime_start_cpu(); 199 vtime_start_cpu();
199} 200}
200 201
202static inline int s390_nohz_delay(int cpu)
203{
204 return per_cpu(s390_idle, cpu).nohz_delay != 0;
205}
206
207#define arch_needs_cpu(cpu) s390_nohz_delay(cpu)
208
201#endif /* _S390_CPUTIME_H */ 209#endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
index 0de305b598ce..59618bcd99b7 100644
--- a/arch/s390/kernel/s390_ext.c
+++ b/arch/s390/kernel/s390_ext.c
@@ -126,6 +126,8 @@ void __irq_entry do_extint(struct pt_regs *regs, unsigned short code)
126 /* Serve timer interrupts first. */ 126 /* Serve timer interrupts first. */
127 clock_comparator_work(); 127 clock_comparator_work();
128 kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++; 128 kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++;
129 if (code != 0x1004)
130 __get_cpu_var(s390_idle).nohz_delay = 1;
129 index = ext_hash(code); 131 index = ext_hash(code);
130 for (p = ext_int_hash[index]; p; p = p->next) { 132 for (p = ext_int_hash[index]; p; p = p->next) {
131 if (likely(p->code == code)) 133 if (likely(p->code == code))
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index c41bb0d416e1..b59a812a010e 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -167,6 +167,8 @@ void vtime_stop_cpu(void)
167 /* Wait for external, I/O or machine check interrupt. */ 167 /* Wait for external, I/O or machine check interrupt. */
168 psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT; 168 psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT;
169 169
170 idle->nohz_delay = 0;
171
170 /* Check if the CPU timer needs to be reprogrammed. */ 172 /* Check if the CPU timer needs to be reprogrammed. */
171 if (vq->do_spt) { 173 if (vq->do_spt) {
172 __u64 vmax = VTIMER_MAX_SLICE; 174 __u64 vmax = VTIMER_MAX_SLICE;
diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c
index da1218e8ee87..63f73ae8a892 100644
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@ -847,7 +847,7 @@ void __init time_init(void)
847 sparc64_clockevent.min_delta_ns = 847 sparc64_clockevent.min_delta_ns =
848 clockevent_delta2ns(0xF, &sparc64_clockevent); 848 clockevent_delta2ns(0xF, &sparc64_clockevent);
849 849
850 printk("clockevent: mult[%lx] shift[%d]\n", 850 printk("clockevent: mult[%ux] shift[%d]\n",
851 sparc64_clockevent.mult, sparc64_clockevent.shift); 851 sparc64_clockevent.mult, sparc64_clockevent.shift);
852 852
853 setup_sparc64_timer(); 853 setup_sparc64_timer();
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ad8c75b9e453..efb2b9cd132c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -647,7 +647,7 @@ static int __init calibrate_APIC_clock(void)
647 calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; 647 calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
648 648
649 apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); 649 apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
650 apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); 650 apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
651 apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", 651 apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
652 calibration_result); 652 calibration_result);
653 653
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 611b9e2360d3..74c92bb194df 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void)
226 evt->min_delta_ns = clockevent_delta2ns(1, evt); 226 evt->min_delta_ns = clockevent_delta2ns(1, evt);
227 evt->cpumask = cpumask_of(cpu); 227 evt->cpumask = cpumask_of(cpu);
228 228
229 printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", 229 printk(KERN_WARNING "vmi: registering clock event %s. mult=%u shift=%u\n",
230 evt->name, evt->mult, evt->shift); 230 evt->name, evt->mult, evt->shift);
231 clockevents_register_device(evt); 231 clockevents_register_device(evt);
232} 232}
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 138124fcfcad..126f240715a4 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -618,6 +618,7 @@ void __irq_entry do_IRQ(struct pt_regs *regs)
618 old_regs = set_irq_regs(regs); 618 old_regs = set_irq_regs(regs);
619 s390_idle_check(); 619 s390_idle_check();
620 irq_enter(); 620 irq_enter();
621 __get_cpu_var(s390_idle).nohz_delay = 1;
621 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) 622 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
622 /* Serve timer interrupts first. */ 623 /* Serve timer interrupts first. */
623 clock_comparator_work(); 624 clock_comparator_work();
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 3a1dbba4d3ae..0cf725bdd2a1 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -77,10 +77,10 @@ enum clock_event_nofitiers {
77struct clock_event_device { 77struct clock_event_device {
78 const char *name; 78 const char *name;
79 unsigned int features; 79 unsigned int features;
80 unsigned long max_delta_ns; 80 u64 max_delta_ns;
81 unsigned long min_delta_ns; 81 u64 min_delta_ns;
82 unsigned long mult; 82 u32 mult;
83 int shift; 83 u32 shift;
84 int rating; 84 int rating;
85 int irq; 85 int irq;
86 const struct cpumask *cpumask; 86 const struct cpumask *cpumask;
@@ -116,8 +116,8 @@ static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec,
116} 116}
117 117
118/* Clock event layer functions */ 118/* Clock event layer functions */
119extern unsigned long clockevent_delta2ns(unsigned long latch, 119extern u64 clockevent_delta2ns(unsigned long latch,
120 struct clock_event_device *evt); 120 struct clock_event_device *evt);
121extern void clockevents_register_device(struct clock_event_device *dev); 121extern void clockevents_register_device(struct clock_event_device *dev);
122 122
123extern void clockevents_exchange_device(struct clock_event_device *old, 123extern void clockevents_exchange_device(struct clock_event_device *old,
@@ -130,6 +130,13 @@ extern int clockevents_program_event(struct clock_event_device *dev,
130 130
131extern void clockevents_handle_noop(struct clock_event_device *dev); 131extern void clockevents_handle_noop(struct clock_event_device *dev);
132 132
133static inline void
134clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec)
135{
136 return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC,
137 freq, minsec);
138}
139
133#ifdef CONFIG_GENERIC_CLOCKEVENTS 140#ifdef CONFIG_GENERIC_CLOCKEVENTS
134extern void clockevents_notify(unsigned long reason, void *arg); 141extern void clockevents_notify(unsigned long reason, void *arg);
135#else 142#else
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 83d2fbd81b93..279c5478e8a6 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -151,6 +151,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
151 * subtraction of non 64 bit counters 151 * subtraction of non 64 bit counters
152 * @mult: cycle to nanosecond multiplier 152 * @mult: cycle to nanosecond multiplier
153 * @shift: cycle to nanosecond divisor (power of two) 153 * @shift: cycle to nanosecond divisor (power of two)
154 * @max_idle_ns: max idle time permitted by the clocksource (nsecs)
154 * @flags: flags describing special properties 155 * @flags: flags describing special properties
155 * @vread: vsyscall based read 156 * @vread: vsyscall based read
156 * @resume: resume function for the clocksource, if necessary 157 * @resume: resume function for the clocksource, if necessary
@@ -168,6 +169,7 @@ struct clocksource {
168 cycle_t mask; 169 cycle_t mask;
169 u32 mult; 170 u32 mult;
170 u32 shift; 171 u32 shift;
172 u64 max_idle_ns;
171 unsigned long flags; 173 unsigned long flags;
172 cycle_t (*vread)(void); 174 cycle_t (*vread)(void);
173 void (*resume)(void); 175 void (*resume)(void);
@@ -279,6 +281,16 @@ extern void clocksource_resume(void);
279extern struct clocksource * __init __weak clocksource_default_clock(void); 281extern struct clocksource * __init __weak clocksource_default_clock(void);
280extern void clocksource_mark_unstable(struct clocksource *cs); 282extern void clocksource_mark_unstable(struct clocksource *cs);
281 283
284extern void
285clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
286
287static inline void
288clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec)
289{
290 return clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
291 NSEC_PER_SEC, minsec);
292}
293
282#ifdef CONFIG_GENERIC_TIME_VSYSCALL 294#ifdef CONFIG_GENERIC_TIME_VSYSCALL
283extern void update_vsyscall(struct timespec *ts, struct clocksource *c); 295extern void update_vsyscall(struct timespec *ts, struct clocksource *c);
284extern void update_vsyscall_tz(void); 296extern void update_vsyscall_tz(void);
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 0482229c07db..d2ae79e21be3 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -43,6 +43,7 @@ enum tick_nohz_mode {
43 * @idle_exittime: Time when the idle state was left 43 * @idle_exittime: Time when the idle state was left
44 * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped 44 * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
45 * @sleep_length: Duration of the current idle sleep 45 * @sleep_length: Duration of the current idle sleep
46 * @do_timer_lst: CPU was the last one doing do_timer before going idle
46 */ 47 */
47struct tick_sched { 48struct tick_sched {
48 struct hrtimer sched_timer; 49 struct hrtimer sched_timer;
@@ -64,6 +65,7 @@ struct tick_sched {
64 unsigned long last_jiffies; 65 unsigned long last_jiffies;
65 unsigned long next_jiffies; 66 unsigned long next_jiffies;
66 ktime_t idle_expires; 67 ktime_t idle_expires;
68 int do_timer_last;
67}; 69};
68 70
69extern void __init tick_init(void); 71extern void __init tick_init(void);
@@ -98,6 +100,9 @@ extern int tick_check_oneshot_change(int allow_nohz);
98extern struct tick_sched *tick_get_tick_sched(int cpu); 100extern struct tick_sched *tick_get_tick_sched(int cpu);
99extern void tick_check_idle(int cpu); 101extern void tick_check_idle(int cpu);
100extern int tick_oneshot_mode_active(void); 102extern int tick_oneshot_mode_active(void);
103# ifndef arch_needs_cpu
104# define arch_needs_cpu(cpu) (0)
105# endif
101# else 106# else
102static inline void tick_clock_notify(void) { } 107static inline void tick_clock_notify(void) { }
103static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } 108static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
diff --git a/include/linux/time.h b/include/linux/time.h
index fe04e5ef6a59..6e026e45a179 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -148,6 +148,7 @@ extern void monotonic_to_bootbased(struct timespec *ts);
148 148
149extern struct timespec timespec_trunc(struct timespec t, unsigned gran); 149extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
150extern int timekeeping_valid_for_hres(void); 150extern int timekeeping_valid_for_hres(void);
151extern u64 timekeeping_max_deferment(void);
151extern void update_wall_time(void); 152extern void update_wall_time(void);
152extern void update_xtime_cache(u64 nsec); 153extern void update_xtime_cache(u64 nsec);
153extern void timekeeping_leap_insert(int leapsecond); 154extern void timekeeping_leap_insert(int leapsecond);
diff --git a/include/linux/timex.h b/include/linux/timex.h
index e6967d10d9e5..0c0ef7d4db7c 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -261,11 +261,7 @@ static inline int ntp_synced(void)
261 261
262#define NTP_SCALE_SHIFT 32 262#define NTP_SCALE_SHIFT 32
263 263
264#ifdef CONFIG_NO_HZ
265#define NTP_INTERVAL_FREQ (2)
266#else
267#define NTP_INTERVAL_FREQ (HZ) 264#define NTP_INTERVAL_FREQ (HZ)
268#endif
269#define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ) 265#define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ)
270 266
271/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */ 267/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
diff --git a/init/calibrate.c b/init/calibrate.c
index a379c9061199..6eb48e53d61c 100644
--- a/init/calibrate.c
+++ b/init/calibrate.c
@@ -123,23 +123,26 @@ void __cpuinit calibrate_delay(void)
123{ 123{
124 unsigned long ticks, loopbit; 124 unsigned long ticks, loopbit;
125 int lps_precision = LPS_PREC; 125 int lps_precision = LPS_PREC;
126 static bool printed;
126 127
127 if (preset_lpj) { 128 if (preset_lpj) {
128 loops_per_jiffy = preset_lpj; 129 loops_per_jiffy = preset_lpj;
129 printk(KERN_INFO 130 if (!printed)
130 "Calibrating delay loop (skipped) preset value.. "); 131 pr_info("Calibrating delay loop (skipped) "
131 } else if ((smp_processor_id() == 0) && lpj_fine) { 132 "preset value.. ");
133 } else if ((!printed) && lpj_fine) {
132 loops_per_jiffy = lpj_fine; 134 loops_per_jiffy = lpj_fine;
133 printk(KERN_INFO 135 pr_info("Calibrating delay loop (skipped), "
134 "Calibrating delay loop (skipped), "
135 "value calculated using timer frequency.. "); 136 "value calculated using timer frequency.. ");
136 } else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) { 137 } else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) {
137 printk(KERN_INFO 138 if (!printed)
138 "Calibrating delay using timer specific routine.. "); 139 pr_info("Calibrating delay using timer "
140 "specific routine.. ");
139 } else { 141 } else {
140 loops_per_jiffy = (1<<12); 142 loops_per_jiffy = (1<<12);
141 143
142 printk(KERN_INFO "Calibrating delay loop... "); 144 if (!printed)
145 pr_info("Calibrating delay loop... ");
143 while ((loops_per_jiffy <<= 1) != 0) { 146 while ((loops_per_jiffy <<= 1) != 0) {
144 /* wait for "start of" clock tick */ 147 /* wait for "start of" clock tick */
145 ticks = jiffies; 148 ticks = jiffies;
@@ -170,7 +173,10 @@ void __cpuinit calibrate_delay(void)
170 loops_per_jiffy &= ~loopbit; 173 loops_per_jiffy &= ~loopbit;
171 } 174 }
172 } 175 }
173 printk(KERN_CONT "%lu.%02lu BogoMIPS (lpj=%lu)\n", 176 if (!printed)
177 pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
174 loops_per_jiffy/(500000/HZ), 178 loops_per_jiffy/(500000/HZ),
175 (loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy); 179 (loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy);
180
181 printed = true;
176} 182}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6ba0f1ecb212..7c4e2713df0a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -392,10 +392,9 @@ int disable_nonboot_cpus(void)
392 if (cpu == first_cpu) 392 if (cpu == first_cpu)
393 continue; 393 continue;
394 error = _cpu_down(cpu, 1); 394 error = _cpu_down(cpu, 1);
395 if (!error) { 395 if (!error)
396 cpumask_set_cpu(cpu, frozen_cpus); 396 cpumask_set_cpu(cpu, frozen_cpus);
397 printk("CPU%d is down\n", cpu); 397 else {
398 } else {
399 printk(KERN_ERR "Error taking CPU%d down: %d\n", 398 printk(KERN_ERR "Error taking CPU%d down: %d\n",
400 cpu, error); 399 cpu, error);
401 break; 400 break;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 3e1c36e7998f..ede527708123 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1238,7 +1238,8 @@ hrtimer_interrupt_hanging(struct clock_event_device *dev,
1238 force_clock_reprogram = 1; 1238 force_clock_reprogram = 1;
1239 dev->min_delta_ns = (unsigned long)try_time.tv64 * 3; 1239 dev->min_delta_ns = (unsigned long)try_time.tv64 * 3;
1240 printk(KERN_WARNING "hrtimer: interrupt too slow, " 1240 printk(KERN_WARNING "hrtimer: interrupt too slow, "
1241 "forcing clock min delta to %lu ns\n", dev->min_delta_ns); 1241 "forcing clock min delta to %llu ns\n",
1242 (unsigned long long) dev->min_delta_ns);
1242} 1243}
1243/* 1244/*
1244 * High resolution timer interrupt 1245 * High resolution timer interrupt
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 5c9dc228747b..438ff4523513 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -384,7 +384,8 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
384 384
385/* 385/*
386 * Validate the clockid_t for a new CPU-clock timer, and initialize the timer. 386 * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
387 * This is called from sys_timer_create with the new timer already locked. 387 * This is called from sys_timer_create() and do_cpu_nanosleep() with the
388 * new timer already all-zeros initialized.
388 */ 389 */
389int posix_cpu_timer_create(struct k_itimer *new_timer) 390int posix_cpu_timer_create(struct k_itimer *new_timer)
390{ 391{
@@ -396,8 +397,6 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
396 return -EINVAL; 397 return -EINVAL;
397 398
398 INIT_LIST_HEAD(&new_timer->it.cpu.entry); 399 INIT_LIST_HEAD(&new_timer->it.cpu.entry);
399 new_timer->it.cpu.incr.sched = 0;
400 new_timer->it.cpu.expires.sched = 0;
401 400
402 read_lock(&tasklist_lock); 401 read_lock(&tasklist_lock);
403 if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { 402 if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
diff --git a/kernel/time.c b/kernel/time.c
index 804798005d19..c6324d96009e 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -136,7 +136,6 @@ static inline void warp_clock(void)
136 write_seqlock_irq(&xtime_lock); 136 write_seqlock_irq(&xtime_lock);
137 wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; 137 wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60;
138 xtime.tv_sec += sys_tz.tz_minuteswest * 60; 138 xtime.tv_sec += sys_tz.tz_minuteswest * 60;
139 update_xtime_cache(0);
140 write_sequnlock_irq(&xtime_lock); 139 write_sequnlock_irq(&xtime_lock);
141 clock_was_set(); 140 clock_was_set();
142} 141}
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 620b58abdc32..20a8920029ee 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -20,6 +20,8 @@
20#include <linux/sysdev.h> 20#include <linux/sysdev.h>
21#include <linux/tick.h> 21#include <linux/tick.h>
22 22
23#include "tick-internal.h"
24
23/* The registered clock event devices */ 25/* The registered clock event devices */
24static LIST_HEAD(clockevent_devices); 26static LIST_HEAD(clockevent_devices);
25static LIST_HEAD(clockevents_released); 27static LIST_HEAD(clockevents_released);
@@ -37,10 +39,9 @@ static DEFINE_SPINLOCK(clockevents_lock);
37 * 39 *
38 * Math helper, returns latch value converted to nanoseconds (bound checked) 40 * Math helper, returns latch value converted to nanoseconds (bound checked)
39 */ 41 */
40unsigned long clockevent_delta2ns(unsigned long latch, 42u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
41 struct clock_event_device *evt)
42{ 43{
43 u64 clc = ((u64) latch << evt->shift); 44 u64 clc = (u64) latch << evt->shift;
44 45
45 if (unlikely(!evt->mult)) { 46 if (unlikely(!evt->mult)) {
46 evt->mult = 1; 47 evt->mult = 1;
@@ -50,10 +51,10 @@ unsigned long clockevent_delta2ns(unsigned long latch,
50 do_div(clc, evt->mult); 51 do_div(clc, evt->mult);
51 if (clc < 1000) 52 if (clc < 1000)
52 clc = 1000; 53 clc = 1000;
53 if (clc > LONG_MAX) 54 if (clc > KTIME_MAX)
54 clc = LONG_MAX; 55 clc = KTIME_MAX;
55 56
56 return (unsigned long) clc; 57 return clc;
57} 58}
58EXPORT_SYMBOL_GPL(clockevent_delta2ns); 59EXPORT_SYMBOL_GPL(clockevent_delta2ns);
59 60
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 4a310906b3e8..d422c7b2236b 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -107,6 +107,59 @@ u64 timecounter_cyc2time(struct timecounter *tc,
107} 107}
108EXPORT_SYMBOL_GPL(timecounter_cyc2time); 108EXPORT_SYMBOL_GPL(timecounter_cyc2time);
109 109
110/**
111 * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
112 * @mult: pointer to mult variable
113 * @shift: pointer to shift variable
114 * @from: frequency to convert from
115 * @to: frequency to convert to
116 * @minsec: guaranteed runtime conversion range in seconds
117 *
118 * The function evaluates the shift/mult pair for the scaled math
119 * operations of clocksources and clockevents.
120 *
121 * @to and @from are frequency values in HZ. For clock sources @to is
122 * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
123 * event @to is the counter frequency and @from is NSEC_PER_SEC.
124 *
125 * The @minsec conversion range argument controls the time frame in
126 * seconds which must be covered by the runtime conversion with the
127 * calculated mult and shift factors. This guarantees that no 64bit
128 * overflow happens when the input value of the conversion is
129 * multiplied with the calculated mult factor. Larger ranges may
130 * reduce the conversion accuracy by chosing smaller mult and shift
131 * factors.
132 */
133void
134clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec)
135{
136 u64 tmp;
137 u32 sft, sftacc= 32;
138
139 /*
140 * Calculate the shift factor which is limiting the conversion
141 * range:
142 */
143 tmp = ((u64)minsec * from) >> 32;
144 while (tmp) {
145 tmp >>=1;
146 sftacc--;
147 }
148
149 /*
150 * Find the conversion shift/mult pair which has the best
151 * accuracy and fits the maxsec conversion range:
152 */
153 for (sft = 32; sft > 0; sft--) {
154 tmp = (u64) to << sft;
155 do_div(tmp, from);
156 if ((tmp >> sftacc) == 0)
157 break;
158 }
159 *mult = tmp;
160 *shift = sft;
161}
162
110/*[Clocksource internal variables]--------- 163/*[Clocksource internal variables]---------
111 * curr_clocksource: 164 * curr_clocksource:
112 * currently selected clocksource. 165 * currently selected clocksource.
@@ -413,6 +466,47 @@ void clocksource_touch_watchdog(void)
413 clocksource_resume_watchdog(); 466 clocksource_resume_watchdog();
414} 467}
415 468
469/**
470 * clocksource_max_deferment - Returns max time the clocksource can be deferred
471 * @cs: Pointer to clocksource
472 *
473 */
474static u64 clocksource_max_deferment(struct clocksource *cs)
475{
476 u64 max_nsecs, max_cycles;
477
478 /*
479 * Calculate the maximum number of cycles that we can pass to the
480 * cyc2ns function without overflowing a 64-bit signed result. The
481 * maximum number of cycles is equal to ULLONG_MAX/cs->mult which
482 * is equivalent to the below.
483 * max_cycles < (2^63)/cs->mult
484 * max_cycles < 2^(log2((2^63)/cs->mult))
485 * max_cycles < 2^(log2(2^63) - log2(cs->mult))
486 * max_cycles < 2^(63 - log2(cs->mult))
487 * max_cycles < 1 << (63 - log2(cs->mult))
488 * Please note that we add 1 to the result of the log2 to account for
489 * any rounding errors, ensure the above inequality is satisfied and
490 * no overflow will occur.
491 */
492 max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1));
493
494 /*
495 * The actual maximum number of cycles we can defer the clocksource is
496 * determined by the minimum of max_cycles and cs->mask.
497 */
498 max_cycles = min_t(u64, max_cycles, (u64) cs->mask);
499 max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift);
500
501 /*
502 * To ensure that the clocksource does not wrap whilst we are idle,
503 * limit the time the clocksource can be deferred by 12.5%. Please
504 * note a margin of 12.5% is used because this can be computed with
505 * a shift, versus say 10% which would require division.
506 */
507 return max_nsecs - (max_nsecs >> 5);
508}
509
416#ifdef CONFIG_GENERIC_TIME 510#ifdef CONFIG_GENERIC_TIME
417 511
418/** 512/**
@@ -511,6 +605,9 @@ static void clocksource_enqueue(struct clocksource *cs)
511 */ 605 */
512int clocksource_register(struct clocksource *cs) 606int clocksource_register(struct clocksource *cs)
513{ 607{
608 /* calculate max idle time permitted for this clocksource */
609 cs->max_idle_ns = clocksource_max_deferment(cs);
610
514 mutex_lock(&clocksource_mutex); 611 mutex_lock(&clocksource_mutex);
515 clocksource_enqueue(cs); 612 clocksource_enqueue(cs);
516 clocksource_select(); 613 clocksource_select();
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index a96c0e2b89cf..0a8a213016f0 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -50,9 +50,9 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
50 dev->min_delta_ns += dev->min_delta_ns >> 1; 50 dev->min_delta_ns += dev->min_delta_ns >> 1;
51 51
52 printk(KERN_WARNING 52 printk(KERN_WARNING
53 "CE: %s increasing min_delta_ns to %lu nsec\n", 53 "CE: %s increasing min_delta_ns to %llu nsec\n",
54 dev->name ? dev->name : "?", 54 dev->name ? dev->name : "?",
55 dev->min_delta_ns << 1); 55 (unsigned long long) dev->min_delta_ns << 1);
56 56
57 i = 0; 57 i = 0;
58 } 58 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 89aed5933ed4..f992762d7f51 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -134,18 +134,13 @@ __setup("nohz=", setup_tick_nohz);
134 * value. We do this unconditionally on any cpu, as we don't know whether the 134 * value. We do this unconditionally on any cpu, as we don't know whether the
135 * cpu, which has the update task assigned is in a long sleep. 135 * cpu, which has the update task assigned is in a long sleep.
136 */ 136 */
137static void tick_nohz_update_jiffies(void) 137static void tick_nohz_update_jiffies(ktime_t now)
138{ 138{
139 int cpu = smp_processor_id(); 139 int cpu = smp_processor_id();
140 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 140 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
141 unsigned long flags; 141 unsigned long flags;
142 ktime_t now;
143
144 if (!ts->tick_stopped)
145 return;
146 142
147 cpumask_clear_cpu(cpu, nohz_cpu_mask); 143 cpumask_clear_cpu(cpu, nohz_cpu_mask);
148 now = ktime_get();
149 ts->idle_waketime = now; 144 ts->idle_waketime = now;
150 145
151 local_irq_save(flags); 146 local_irq_save(flags);
@@ -155,20 +150,17 @@ static void tick_nohz_update_jiffies(void)
155 touch_softlockup_watchdog(); 150 touch_softlockup_watchdog();
156} 151}
157 152
158static void tick_nohz_stop_idle(int cpu) 153static void tick_nohz_stop_idle(int cpu, ktime_t now)
159{ 154{
160 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 155 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
156 ktime_t delta;
161 157
162 if (ts->idle_active) { 158 delta = ktime_sub(now, ts->idle_entrytime);
163 ktime_t now, delta; 159 ts->idle_lastupdate = now;
164 now = ktime_get(); 160 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
165 delta = ktime_sub(now, ts->idle_entrytime); 161 ts->idle_active = 0;
166 ts->idle_lastupdate = now;
167 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
168 ts->idle_active = 0;
169 162
170 sched_clock_idle_wakeup_event(0); 163 sched_clock_idle_wakeup_event(0);
171 }
172} 164}
173 165
174static ktime_t tick_nohz_start_idle(struct tick_sched *ts) 166static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
@@ -216,6 +208,7 @@ void tick_nohz_stop_sched_tick(int inidle)
216 struct tick_sched *ts; 208 struct tick_sched *ts;
217 ktime_t last_update, expires, now; 209 ktime_t last_update, expires, now;
218 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; 210 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
211 u64 time_delta;
219 int cpu; 212 int cpu;
220 213
221 local_irq_save(flags); 214 local_irq_save(flags);
@@ -263,7 +256,7 @@ void tick_nohz_stop_sched_tick(int inidle)
263 256
264 if (ratelimit < 10) { 257 if (ratelimit < 10) {
265 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", 258 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
266 local_softirq_pending()); 259 (unsigned int) local_softirq_pending());
267 ratelimit++; 260 ratelimit++;
268 } 261 }
269 goto end; 262 goto end;
@@ -275,14 +268,18 @@ void tick_nohz_stop_sched_tick(int inidle)
275 seq = read_seqbegin(&xtime_lock); 268 seq = read_seqbegin(&xtime_lock);
276 last_update = last_jiffies_update; 269 last_update = last_jiffies_update;
277 last_jiffies = jiffies; 270 last_jiffies = jiffies;
271 time_delta = timekeeping_max_deferment();
278 } while (read_seqretry(&xtime_lock, seq)); 272 } while (read_seqretry(&xtime_lock, seq));
279 273
280 /* Get the next timer wheel timer */ 274 if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
281 next_jiffies = get_next_timer_interrupt(last_jiffies); 275 arch_needs_cpu(cpu)) {
282 delta_jiffies = next_jiffies - last_jiffies; 276 next_jiffies = last_jiffies + 1;
283
284 if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu))
285 delta_jiffies = 1; 277 delta_jiffies = 1;
278 } else {
279 /* Get the next timer wheel timer */
280 next_jiffies = get_next_timer_interrupt(last_jiffies);
281 delta_jiffies = next_jiffies - last_jiffies;
282 }
286 /* 283 /*
287 * Do not stop the tick, if we are only one off 284 * Do not stop the tick, if we are only one off
288 * or if the cpu is required for rcu 285 * or if the cpu is required for rcu
@@ -294,22 +291,51 @@ void tick_nohz_stop_sched_tick(int inidle)
294 if ((long)delta_jiffies >= 1) { 291 if ((long)delta_jiffies >= 1) {
295 292
296 /* 293 /*
297 * calculate the expiry time for the next timer wheel
298 * timer
299 */
300 expires = ktime_add_ns(last_update, tick_period.tv64 *
301 delta_jiffies);
302
303 /*
304 * If this cpu is the one which updates jiffies, then 294 * If this cpu is the one which updates jiffies, then
305 * give up the assignment and let it be taken by the 295 * give up the assignment and let it be taken by the
306 * cpu which runs the tick timer next, which might be 296 * cpu which runs the tick timer next, which might be
307 * this cpu as well. If we don't drop this here the 297 * this cpu as well. If we don't drop this here the
308 * jiffies might be stale and do_timer() never 298 * jiffies might be stale and do_timer() never
309 * invoked. 299 * invoked. Keep track of the fact that it was the one
300 * which had the do_timer() duty last. If this cpu is
301 * the one which had the do_timer() duty last, we
302 * limit the sleep time to the timekeeping
303 * max_deferement value which we retrieved
304 * above. Otherwise we can sleep as long as we want.
310 */ 305 */
311 if (cpu == tick_do_timer_cpu) 306 if (cpu == tick_do_timer_cpu) {
312 tick_do_timer_cpu = TICK_DO_TIMER_NONE; 307 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
308 ts->do_timer_last = 1;
309 } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
310 time_delta = KTIME_MAX;
311 ts->do_timer_last = 0;
312 } else if (!ts->do_timer_last) {
313 time_delta = KTIME_MAX;
314 }
315
316 /*
317 * calculate the expiry time for the next timer wheel
318 * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals
319 * that there is no timer pending or at least extremely
320 * far into the future (12 days for HZ=1000). In this
321 * case we set the expiry to the end of time.
322 */
323 if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) {
324 /*
325 * Calculate the time delta for the next timer event.
326 * If the time delta exceeds the maximum time delta
327 * permitted by the current clocksource then adjust
328 * the time delta accordingly to ensure the
329 * clocksource does not wrap.
330 */
331 time_delta = min_t(u64, time_delta,
332 tick_period.tv64 * delta_jiffies);
333 }
334
335 if (time_delta < KTIME_MAX)
336 expires = ktime_add_ns(last_update, time_delta);
337 else
338 expires.tv64 = KTIME_MAX;
313 339
314 if (delta_jiffies > 1) 340 if (delta_jiffies > 1)
315 cpumask_set_cpu(cpu, nohz_cpu_mask); 341 cpumask_set_cpu(cpu, nohz_cpu_mask);
@@ -342,22 +368,19 @@ void tick_nohz_stop_sched_tick(int inidle)
342 368
343 ts->idle_sleeps++; 369 ts->idle_sleeps++;
344 370
371 /* Mark expires */
372 ts->idle_expires = expires;
373
345 /* 374 /*
346 * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that 375 * If the expiration time == KTIME_MAX, then
347 * there is no timer pending or at least extremly far 376 * in this case we simply stop the tick timer.
348 * into the future (12 days for HZ=1000). In this case
349 * we simply stop the tick timer:
350 */ 377 */
351 if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) { 378 if (unlikely(expires.tv64 == KTIME_MAX)) {
352 ts->idle_expires.tv64 = KTIME_MAX;
353 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) 379 if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
354 hrtimer_cancel(&ts->sched_timer); 380 hrtimer_cancel(&ts->sched_timer);
355 goto out; 381 goto out;
356 } 382 }
357 383
358 /* Mark expiries */
359 ts->idle_expires = expires;
360
361 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 384 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
362 hrtimer_start(&ts->sched_timer, expires, 385 hrtimer_start(&ts->sched_timer, expires,
363 HRTIMER_MODE_ABS_PINNED); 386 HRTIMER_MODE_ABS_PINNED);
@@ -436,7 +459,11 @@ void tick_nohz_restart_sched_tick(void)
436 ktime_t now; 459 ktime_t now;
437 460
438 local_irq_disable(); 461 local_irq_disable();
439 tick_nohz_stop_idle(cpu); 462 if (ts->idle_active || (ts->inidle && ts->tick_stopped))
463 now = ktime_get();
464
465 if (ts->idle_active)
466 tick_nohz_stop_idle(cpu, now);
440 467
441 if (!ts->inidle || !ts->tick_stopped) { 468 if (!ts->inidle || !ts->tick_stopped) {
442 ts->inidle = 0; 469 ts->inidle = 0;
@@ -450,7 +477,6 @@ void tick_nohz_restart_sched_tick(void)
450 477
451 /* Update jiffies first */ 478 /* Update jiffies first */
452 select_nohz_load_balancer(0); 479 select_nohz_load_balancer(0);
453 now = ktime_get();
454 tick_do_update_jiffies64(now); 480 tick_do_update_jiffies64(now);
455 cpumask_clear_cpu(cpu, nohz_cpu_mask); 481 cpumask_clear_cpu(cpu, nohz_cpu_mask);
456 482
@@ -584,22 +610,18 @@ static void tick_nohz_switch_to_nohz(void)
584 * timer and do not touch the other magic bits which need to be done 610 * timer and do not touch the other magic bits which need to be done
585 * when idle is left. 611 * when idle is left.
586 */ 612 */
587static void tick_nohz_kick_tick(int cpu) 613static void tick_nohz_kick_tick(int cpu, ktime_t now)
588{ 614{
589#if 0 615#if 0
590 /* Switch back to 2.6.27 behaviour */ 616 /* Switch back to 2.6.27 behaviour */
591 617
592 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 618 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
593 ktime_t delta, now; 619 ktime_t delta;
594
595 if (!ts->tick_stopped)
596 return;
597 620
598 /* 621 /*
599 * Do not touch the tick device, when the next expiry is either 622 * Do not touch the tick device, when the next expiry is either
600 * already reached or less/equal than the tick period. 623 * already reached or less/equal than the tick period.
601 */ 624 */
602 now = ktime_get();
603 delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now); 625 delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now);
604 if (delta.tv64 <= tick_period.tv64) 626 if (delta.tv64 <= tick_period.tv64)
605 return; 627 return;
@@ -608,9 +630,26 @@ static void tick_nohz_kick_tick(int cpu)
608#endif 630#endif
609} 631}
610 632
633static inline void tick_check_nohz(int cpu)
634{
635 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
636 ktime_t now;
637
638 if (!ts->idle_active && !ts->tick_stopped)
639 return;
640 now = ktime_get();
641 if (ts->idle_active)
642 tick_nohz_stop_idle(cpu, now);
643 if (ts->tick_stopped) {
644 tick_nohz_update_jiffies(now);
645 tick_nohz_kick_tick(cpu, now);
646 }
647}
648
611#else 649#else
612 650
613static inline void tick_nohz_switch_to_nohz(void) { } 651static inline void tick_nohz_switch_to_nohz(void) { }
652static inline void tick_check_nohz(int cpu) { }
614 653
615#endif /* NO_HZ */ 654#endif /* NO_HZ */
616 655
@@ -620,11 +659,7 @@ static inline void tick_nohz_switch_to_nohz(void) { }
620void tick_check_idle(int cpu) 659void tick_check_idle(int cpu)
621{ 660{
622 tick_check_oneshot_broadcast(cpu); 661 tick_check_oneshot_broadcast(cpu);
623#ifdef CONFIG_NO_HZ 662 tick_check_nohz(cpu);
624 tick_nohz_stop_idle(cpu);
625 tick_nohz_update_jiffies();
626 tick_nohz_kick_tick(cpu);
627#endif
628} 663}
629 664
630/* 665/*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index c3a4e2907eaa..d1aebd73b191 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -165,13 +165,6 @@ struct timespec raw_time;
165/* flag for if timekeeping is suspended */ 165/* flag for if timekeeping is suspended */
166int __read_mostly timekeeping_suspended; 166int __read_mostly timekeeping_suspended;
167 167
168static struct timespec xtime_cache __attribute__ ((aligned (16)));
169void update_xtime_cache(u64 nsec)
170{
171 xtime_cache = xtime;
172 timespec_add_ns(&xtime_cache, nsec);
173}
174
175/* must hold xtime_lock */ 168/* must hold xtime_lock */
176void timekeeping_leap_insert(int leapsecond) 169void timekeeping_leap_insert(int leapsecond)
177{ 170{
@@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv)
332 325
333 xtime = *tv; 326 xtime = *tv;
334 327
335 update_xtime_cache(0);
336
337 timekeeper.ntp_error = 0; 328 timekeeper.ntp_error = 0;
338 ntp_clear(); 329 ntp_clear();
339 330
@@ -488,6 +479,17 @@ int timekeeping_valid_for_hres(void)
488} 479}
489 480
490/** 481/**
482 * timekeeping_max_deferment - Returns max time the clocksource can be deferred
483 *
484 * Caller must observe xtime_lock via read_seqbegin/read_seqretry to
485 * ensure that the clocksource does not change!
486 */
487u64 timekeeping_max_deferment(void)
488{
489 return timekeeper.clock->max_idle_ns;
490}
491
492/**
491 * read_persistent_clock - Return time from the persistent clock. 493 * read_persistent_clock - Return time from the persistent clock.
492 * 494 *
493 * Weak dummy function for arches that do not yet support it. 495 * Weak dummy function for arches that do not yet support it.
@@ -548,7 +550,6 @@ void __init timekeeping_init(void)
548 } 550 }
549 set_normalized_timespec(&wall_to_monotonic, 551 set_normalized_timespec(&wall_to_monotonic,
550 -boot.tv_sec, -boot.tv_nsec); 552 -boot.tv_sec, -boot.tv_nsec);
551 update_xtime_cache(0);
552 total_sleep_time.tv_sec = 0; 553 total_sleep_time.tv_sec = 0;
553 total_sleep_time.tv_nsec = 0; 554 total_sleep_time.tv_nsec = 0;
554 write_sequnlock_irqrestore(&xtime_lock, flags); 555 write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -582,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev)
582 wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); 583 wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
583 total_sleep_time = timespec_add_safe(total_sleep_time, ts); 584 total_sleep_time = timespec_add_safe(total_sleep_time, ts);
584 } 585 }
585 update_xtime_cache(0);
586 /* re-base the last cycle value */ 586 /* re-base the last cycle value */
587 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); 587 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
588 timekeeper.ntp_error = 0; 588 timekeeper.ntp_error = 0;
@@ -723,6 +723,49 @@ static void timekeeping_adjust(s64 offset)
723} 723}
724 724
725/** 725/**
726 * logarithmic_accumulation - shifted accumulation of cycles
727 *
728 * This functions accumulates a shifted interval of cycles into
729 * into a shifted interval nanoseconds. Allows for O(log) accumulation
730 * loop.
731 *
732 * Returns the unconsumed cycles.
733 */
734static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
735{
736 u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
737
738 /* If the offset is smaller then a shifted interval, do nothing */
739 if (offset < timekeeper.cycle_interval<<shift)
740 return offset;
741
742 /* Accumulate one shifted interval */
743 offset -= timekeeper.cycle_interval << shift;
744 timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift;
745
746 timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
747 while (timekeeper.xtime_nsec >= nsecps) {
748 timekeeper.xtime_nsec -= nsecps;
749 xtime.tv_sec++;
750 second_overflow();
751 }
752
753 /* Accumulate into raw time */
754 raw_time.tv_nsec += timekeeper.raw_interval << shift;;
755 while (raw_time.tv_nsec >= NSEC_PER_SEC) {
756 raw_time.tv_nsec -= NSEC_PER_SEC;
757 raw_time.tv_sec++;
758 }
759
760 /* Accumulate error between NTP and clock interval */
761 timekeeper.ntp_error += tick_length << shift;
762 timekeeper.ntp_error -= timekeeper.xtime_interval <<
763 (timekeeper.ntp_error_shift + shift);
764
765 return offset;
766}
767
768/**
726 * update_wall_time - Uses the current clocksource to increment the wall time 769 * update_wall_time - Uses the current clocksource to increment the wall time
727 * 770 *
728 * Called from the timer interrupt, must hold a write on xtime_lock. 771 * Called from the timer interrupt, must hold a write on xtime_lock.
@@ -731,7 +774,7 @@ void update_wall_time(void)
731{ 774{
732 struct clocksource *clock; 775 struct clocksource *clock;
733 cycle_t offset; 776 cycle_t offset;
734 u64 nsecs; 777 int shift = 0, maxshift;
735 778
736 /* Make sure we're fully resumed: */ 779 /* Make sure we're fully resumed: */
737 if (unlikely(timekeeping_suspended)) 780 if (unlikely(timekeeping_suspended))
@@ -745,33 +788,22 @@ void update_wall_time(void)
745#endif 788#endif
746 timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift; 789 timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift;
747 790
748 /* normally this loop will run just once, however in the 791 /*
749 * case of lost or late ticks, it will accumulate correctly. 792 * With NO_HZ we may have to accumulate many cycle_intervals
793 * (think "ticks") worth of time at once. To do this efficiently,
794 * we calculate the largest doubling multiple of cycle_intervals
795 * that is smaller then the offset. We then accumulate that
796 * chunk in one go, and then try to consume the next smaller
797 * doubled multiple.
750 */ 798 */
799 shift = ilog2(offset) - ilog2(timekeeper.cycle_interval);
800 shift = max(0, shift);
801 /* Bound shift to one less then what overflows tick_length */
802 maxshift = (8*sizeof(tick_length) - (ilog2(tick_length)+1)) - 1;
803 shift = min(shift, maxshift);
751 while (offset >= timekeeper.cycle_interval) { 804 while (offset >= timekeeper.cycle_interval) {
752 u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; 805 offset = logarithmic_accumulation(offset, shift);
753 806 shift--;
754 /* accumulate one interval */
755 offset -= timekeeper.cycle_interval;
756 clock->cycle_last += timekeeper.cycle_interval;
757
758 timekeeper.xtime_nsec += timekeeper.xtime_interval;
759 if (timekeeper.xtime_nsec >= nsecps) {
760 timekeeper.xtime_nsec -= nsecps;
761 xtime.tv_sec++;
762 second_overflow();
763 }
764
765 raw_time.tv_nsec += timekeeper.raw_interval;
766 if (raw_time.tv_nsec >= NSEC_PER_SEC) {
767 raw_time.tv_nsec -= NSEC_PER_SEC;
768 raw_time.tv_sec++;
769 }
770
771 /* accumulate error between NTP and clock interval */
772 timekeeper.ntp_error += tick_length;
773 timekeeper.ntp_error -= timekeeper.xtime_interval <<
774 timekeeper.ntp_error_shift;
775 } 807 }
776 808
777 /* correct the clock when NTP error is too big */ 809 /* correct the clock when NTP error is too big */
@@ -807,9 +839,6 @@ void update_wall_time(void)
807 timekeeper.ntp_error += timekeeper.xtime_nsec << 839 timekeeper.ntp_error += timekeeper.xtime_nsec <<
808 timekeeper.ntp_error_shift; 840 timekeeper.ntp_error_shift;
809 841
810 nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
811 update_xtime_cache(nsecs);
812
813 /* check to see if there is a new clocksource to use */ 842 /* check to see if there is a new clocksource to use */
814 update_vsyscall(&xtime, timekeeper.clock); 843 update_vsyscall(&xtime, timekeeper.clock);
815} 844}
@@ -846,13 +875,13 @@ void monotonic_to_bootbased(struct timespec *ts)
846 875
847unsigned long get_seconds(void) 876unsigned long get_seconds(void)
848{ 877{
849 return xtime_cache.tv_sec; 878 return xtime.tv_sec;
850} 879}
851EXPORT_SYMBOL(get_seconds); 880EXPORT_SYMBOL(get_seconds);
852 881
853struct timespec __current_kernel_time(void) 882struct timespec __current_kernel_time(void)
854{ 883{
855 return xtime_cache; 884 return xtime;
856} 885}
857 886
858struct timespec current_kernel_time(void) 887struct timespec current_kernel_time(void)
@@ -862,8 +891,7 @@ struct timespec current_kernel_time(void)
862 891
863 do { 892 do {
864 seq = read_seqbegin(&xtime_lock); 893 seq = read_seqbegin(&xtime_lock);
865 894 now = xtime;
866 now = xtime_cache;
867 } while (read_seqretry(&xtime_lock, seq)); 895 } while (read_seqretry(&xtime_lock, seq));
868 896
869 return now; 897 return now;
@@ -877,8 +905,7 @@ struct timespec get_monotonic_coarse(void)
877 905
878 do { 906 do {
879 seq = read_seqbegin(&xtime_lock); 907 seq = read_seqbegin(&xtime_lock);
880 908 now = xtime;
881 now = xtime_cache;
882 mono = wall_to_monotonic; 909 mono = wall_to_monotonic;
883 } while (read_seqretry(&xtime_lock, seq)); 910 } while (read_seqretry(&xtime_lock, seq));
884 911
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 1b5b7aa2fdfd..665c76edbf17 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -204,10 +204,12 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
204 return; 204 return;
205 } 205 }
206 SEQ_printf(m, "%s\n", dev->name); 206 SEQ_printf(m, "%s\n", dev->name);
207 SEQ_printf(m, " max_delta_ns: %lu\n", dev->max_delta_ns); 207 SEQ_printf(m, " max_delta_ns: %llu\n",
208 SEQ_printf(m, " min_delta_ns: %lu\n", dev->min_delta_ns); 208 (unsigned long long) dev->max_delta_ns);
209 SEQ_printf(m, " mult: %lu\n", dev->mult); 209 SEQ_printf(m, " min_delta_ns: %llu\n",
210 SEQ_printf(m, " shift: %d\n", dev->shift); 210 (unsigned long long) dev->min_delta_ns);
211 SEQ_printf(m, " mult: %u\n", dev->mult);
212 SEQ_printf(m, " shift: %u\n", dev->shift);
211 SEQ_printf(m, " mode: %d\n", dev->mode); 213 SEQ_printf(m, " mode: %d\n", dev->mode);
212 SEQ_printf(m, " next_event: %Ld nsecs\n", 214 SEQ_printf(m, " next_event: %Ld nsecs\n",
213 (unsigned long long) ktime_to_ns(dev->next_event)); 215 (unsigned long long) ktime_to_ns(dev->next_event));