aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/mwait.h43
-rw-r--r--arch/x86/include/asm/processor.h23
-rw-r--r--arch/x86/include/asm/timer.h77
-rw-r--r--arch/x86/kernel/acpi/cstate.c23
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c16
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/tsc.c318
-rw-r--r--arch/x86/platform/uv/tlb_uv.c66
-rw-r--r--arch/x86/syscalls/syscall_32.tbl2
-rw-r--r--arch/x86/syscalls/syscall_64.tbl2
12 files changed, 377 insertions, 199 deletions
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 2f366d0ac6b4..1da25a5f96f9 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_X86_MWAIT_H 1#ifndef _ASM_X86_MWAIT_H
2#define _ASM_X86_MWAIT_H 2#define _ASM_X86_MWAIT_H
3 3
4#include <linux/sched.h>
5
4#define MWAIT_SUBSTATE_MASK 0xf 6#define MWAIT_SUBSTATE_MASK 0xf
5#define MWAIT_CSTATE_MASK 0xf 7#define MWAIT_CSTATE_MASK 0xf
6#define MWAIT_SUBSTATE_SIZE 4 8#define MWAIT_SUBSTATE_SIZE 4
@@ -13,4 +15,45 @@
13 15
14#define MWAIT_ECX_INTERRUPT_BREAK 0x1 16#define MWAIT_ECX_INTERRUPT_BREAK 0x1
15 17
18static inline void __monitor(const void *eax, unsigned long ecx,
19 unsigned long edx)
20{
21 /* "monitor %eax, %ecx, %edx;" */
22 asm volatile(".byte 0x0f, 0x01, 0xc8;"
23 :: "a" (eax), "c" (ecx), "d"(edx));
24}
25
26static inline void __mwait(unsigned long eax, unsigned long ecx)
27{
28 /* "mwait %eax, %ecx;" */
29 asm volatile(".byte 0x0f, 0x01, 0xc9;"
30 :: "a" (eax), "c" (ecx));
31}
32
33/*
34 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
35 * which can obviate IPI to trigger checking of need_resched.
36 * We execute MONITOR against need_resched and enter optimized wait state
37 * through MWAIT. Whenever someone changes need_resched, we would be woken
38 * up from MWAIT (without an IPI).
39 *
40 * New with Core Duo processors, MWAIT can take some hints based on CPU
41 * capability.
42 */
43static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
44{
45 if (!current_set_polling_and_test()) {
46 if (static_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) {
47 mb();
48 clflush((void *)&current_thread_info()->flags);
49 mb();
50 }
51
52 __monitor((void *)&current_thread_info()->flags, 0, 0);
53 if (!need_resched())
54 __mwait(eax, ecx);
55 }
56 current_clr_polling();
57}
58
16#endif /* _ASM_X86_MWAIT_H */ 59#endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 7b034a4057f9..24821f5768bc 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -700,29 +700,6 @@ static inline void sync_core(void)
700#endif 700#endif
701} 701}
702 702
703static inline void __monitor(const void *eax, unsigned long ecx,
704 unsigned long edx)
705{
706 /* "monitor %eax, %ecx, %edx;" */
707 asm volatile(".byte 0x0f, 0x01, 0xc8;"
708 :: "a" (eax), "c" (ecx), "d"(edx));
709}
710
711static inline void __mwait(unsigned long eax, unsigned long ecx)
712{
713 /* "mwait %eax, %ecx;" */
714 asm volatile(".byte 0x0f, 0x01, 0xc9;"
715 :: "a" (eax), "c" (ecx));
716}
717
718static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
719{
720 trace_hardirqs_on();
721 /* "mwait %eax, %ecx;" */
722 asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
723 :: "a" (eax), "c" (ecx));
724}
725
726extern void select_idle_routine(const struct cpuinfo_x86 *c); 703extern void select_idle_routine(const struct cpuinfo_x86 *c);
727extern void init_amd_e400_c1e_mask(void); 704extern void init_amd_e400_c1e_mask(void);
728 705
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 34baa0eb5d0c..3de54ef0aea5 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -4,6 +4,7 @@
4#include <linux/pm.h> 4#include <linux/pm.h>
5#include <linux/percpu.h> 5#include <linux/percpu.h>
6#include <linux/interrupt.h> 6#include <linux/interrupt.h>
7#include <linux/math64.h>
7 8
8#define TICK_SIZE (tick_nsec / 1000) 9#define TICK_SIZE (tick_nsec / 1000)
9 10
@@ -12,68 +13,26 @@ extern int recalibrate_cpu_khz(void);
12 13
13extern int no_timer_check; 14extern int no_timer_check;
14 15
15/* Accelerators for sched_clock() 16/*
16 * convert from cycles(64bits) => nanoseconds (64bits) 17 * We use the full linear equation: f(x) = a + b*x, in order to allow
17 * basic equation: 18 * a continuous function in the face of dynamic freq changes.
18 * ns = cycles / (freq / ns_per_sec)
19 * ns = cycles * (ns_per_sec / freq)
20 * ns = cycles * (10^9 / (cpu_khz * 10^3))
21 * ns = cycles * (10^6 / cpu_khz)
22 * 19 *
23 * Then we use scaling math (suggested by george@mvista.com) to get: 20 * Continuity means that when our frequency changes our slope (b); we want to
24 * ns = cycles * (10^6 * SC / cpu_khz) / SC 21 * ensure that: f(t) == f'(t), which gives: a + b*t == a' + b'*t.
25 * ns = cycles * cyc2ns_scale / SC
26 * 22 *
27 * And since SC is a constant power of two, we can convert the div 23 * Without an offset (a) the above would not be possible.
28 * into a shift.
29 * 24 *
30 * We can use khz divisor instead of mhz to keep a better precision, since 25 * See the comment near cycles_2_ns() for details on how we compute (b).
31 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
32 * (mathieu.desnoyers@polymtl.ca)
33 *
34 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
35 *
36 * In:
37 *
38 * ns = cycles * cyc2ns_scale / SC
39 *
40 * Although we may still have enough bits to store the value of ns,
41 * in some cases, we may not have enough bits to store cycles * cyc2ns_scale,
42 * leading to an incorrect result.
43 *
44 * To avoid this, we can decompose 'cycles' into quotient and remainder
45 * of division by SC. Then,
46 *
47 * ns = (quot * SC + rem) * cyc2ns_scale / SC
48 * = quot * cyc2ns_scale + (rem * cyc2ns_scale) / SC
49 *
50 * - sqazi@google.com
51 */ 26 */
52 27struct cyc2ns_data {
53DECLARE_PER_CPU(unsigned long, cyc2ns); 28 u32 cyc2ns_mul;
54DECLARE_PER_CPU(unsigned long long, cyc2ns_offset); 29 u32 cyc2ns_shift;
55 30 u64 cyc2ns_offset;
56#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ 31 u32 __count;
57 32 /* u32 hole */
58static inline unsigned long long __cycles_2_ns(unsigned long long cyc) 33}; /* 24 bytes -- do not grow */
59{ 34
60 int cpu = smp_processor_id(); 35extern struct cyc2ns_data *cyc2ns_read_begin(void);
61 unsigned long long ns = per_cpu(cyc2ns_offset, cpu); 36extern void cyc2ns_read_end(struct cyc2ns_data *);
62 ns += mult_frac(cyc, per_cpu(cyc2ns, cpu),
63 (1UL << CYC2NS_SCALE_FACTOR));
64 return ns;
65}
66
67static inline unsigned long long cycles_2_ns(unsigned long long cyc)
68{
69 unsigned long long ns;
70 unsigned long flags;
71
72 local_irq_save(flags);
73 ns = __cycles_2_ns(cyc);
74 local_irq_restore(flags);
75
76 return ns;
77}
78 37
79#endif /* _ASM_X86_TIMER_H */ 38#endif /* _ASM_X86_TIMER_H */
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index d2b7f27781bc..e69182fd01cf 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -150,29 +150,6 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
150} 150}
151EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); 151EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
152 152
153/*
154 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
155 * which can obviate IPI to trigger checking of need_resched.
156 * We execute MONITOR against need_resched and enter optimized wait state
157 * through MWAIT. Whenever someone changes need_resched, we would be woken
158 * up from MWAIT (without an IPI).
159 *
160 * New with Core Duo processors, MWAIT can take some hints based on CPU
161 * capability.
162 */
163void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
164{
165 if (!need_resched()) {
166 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
167 clflush((void *)&current_thread_info()->flags);
168
169 __monitor((void *)&current_thread_info()->flags, 0, 0);
170 smp_mb();
171 if (!need_resched())
172 __mwait(ax, cx);
173 }
174}
175
176void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) 153void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
177{ 154{
178 unsigned int cpu = smp_processor_id(); 155 unsigned int cpu = smp_processor_id();
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bca023bdd6b2..8bc79cddd9a2 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -487,7 +487,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)
487 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 487 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
488 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 488 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
489 if (!check_tsc_unstable()) 489 if (!check_tsc_unstable())
490 sched_clock_stable = 1; 490 set_sched_clock_stable();
491 } 491 }
492 492
493#ifdef CONFIG_X86_64 493#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index ea04b342c026..1a439c047ff3 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -93,7 +93,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
93 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 93 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
94 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 94 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
95 if (!check_tsc_unstable()) 95 if (!check_tsc_unstable())
96 sched_clock_stable = 1; 96 set_sched_clock_stable();
97 } 97 }
98 98
99 /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ 99 /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8e132931614d..b88645191fe5 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1883,21 +1883,27 @@ static struct pmu pmu = {
1883 1883
1884void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) 1884void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
1885{ 1885{
1886 struct cyc2ns_data *data;
1887
1886 userpg->cap_user_time = 0; 1888 userpg->cap_user_time = 0;
1887 userpg->cap_user_time_zero = 0; 1889 userpg->cap_user_time_zero = 0;
1888 userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; 1890 userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
1889 userpg->pmc_width = x86_pmu.cntval_bits; 1891 userpg->pmc_width = x86_pmu.cntval_bits;
1890 1892
1891 if (!sched_clock_stable) 1893 if (!sched_clock_stable())
1892 return; 1894 return;
1893 1895
1896 data = cyc2ns_read_begin();
1897
1894 userpg->cap_user_time = 1; 1898 userpg->cap_user_time = 1;
1895 userpg->time_mult = this_cpu_read(cyc2ns); 1899 userpg->time_mult = data->cyc2ns_mul;
1896 userpg->time_shift = CYC2NS_SCALE_FACTOR; 1900 userpg->time_shift = data->cyc2ns_shift;
1897 userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; 1901 userpg->time_offset = data->cyc2ns_offset - now;
1898 1902
1899 userpg->cap_user_time_zero = 1; 1903 userpg->cap_user_time_zero = 1;
1900 userpg->time_zero = this_cpu_read(cyc2ns_offset); 1904 userpg->time_zero = data->cyc2ns_offset;
1905
1906 cyc2ns_read_end(data);
1901} 1907}
1902 1908
1903/* 1909/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 85dc05a3aa02..f5252c4eec8c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1417,7 +1417,9 @@ static inline void mwait_play_dead(void)
1417 * The WBINVD is insufficient due to the spurious-wakeup 1417 * The WBINVD is insufficient due to the spurious-wakeup
1418 * case where we return around the loop. 1418 * case where we return around the loop.
1419 */ 1419 */
1420 mb();
1420 clflush(mwait_ptr); 1421 clflush(mwait_ptr);
1422 mb();
1421 __monitor(mwait_ptr, 0, 0); 1423 __monitor(mwait_ptr, 0, 0);
1422 mb(); 1424 mb();
1423 __mwait(eax, 0); 1425 __mwait(eax, 0);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 930e5d48f560..6377fb28b958 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -11,6 +11,7 @@
11#include <linux/clocksource.h> 11#include <linux/clocksource.h>
12#include <linux/percpu.h> 12#include <linux/percpu.h>
13#include <linux/timex.h> 13#include <linux/timex.h>
14#include <linux/static_key.h>
14 15
15#include <asm/hpet.h> 16#include <asm/hpet.h>
16#include <asm/timer.h> 17#include <asm/timer.h>
@@ -37,13 +38,244 @@ static int __read_mostly tsc_unstable;
37 erroneous rdtsc usage on !cpu_has_tsc processors */ 38 erroneous rdtsc usage on !cpu_has_tsc processors */
38static int __read_mostly tsc_disabled = -1; 39static int __read_mostly tsc_disabled = -1;
39 40
41static struct static_key __use_tsc = STATIC_KEY_INIT;
42
40int tsc_clocksource_reliable; 43int tsc_clocksource_reliable;
44
45/*
46 * Use a ring-buffer like data structure, where a writer advances the head by
47 * writing a new data entry and a reader advances the tail when it observes a
48 * new entry.
49 *
50 * Writers are made to wait on readers until there's space to write a new
51 * entry.
52 *
53 * This means that we can always use an {offset, mul} pair to compute a ns
54 * value that is 'roughly' in the right direction, even if we're writing a new
55 * {offset, mul} pair during the clock read.
56 *
57 * The down-side is that we can no longer guarantee strict monotonicity anymore
58 * (assuming the TSC was that to begin with), because while we compute the
59 * intersection point of the two clock slopes and make sure the time is
60 * continuous at the point of switching; we can no longer guarantee a reader is
61 * strictly before or after the switch point.
62 *
63 * It does mean a reader no longer needs to disable IRQs in order to avoid
64 * CPU-Freq updates messing with his times, and similarly an NMI reader will
65 * no longer run the risk of hitting half-written state.
66 */
67
68struct cyc2ns {
69 struct cyc2ns_data data[2]; /* 0 + 2*24 = 48 */
70 struct cyc2ns_data *head; /* 48 + 8 = 56 */
71 struct cyc2ns_data *tail; /* 56 + 8 = 64 */
72}; /* exactly fits one cacheline */
73
74static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
75
76struct cyc2ns_data *cyc2ns_read_begin(void)
77{
78 struct cyc2ns_data *head;
79
80 preempt_disable();
81
82 head = this_cpu_read(cyc2ns.head);
83 /*
84 * Ensure we observe the entry when we observe the pointer to it.
85 * matches the wmb from cyc2ns_write_end().
86 */
87 smp_read_barrier_depends();
88 head->__count++;
89 barrier();
90
91 return head;
92}
93
94void cyc2ns_read_end(struct cyc2ns_data *head)
95{
96 barrier();
97 /*
98 * If we're the outer most nested read; update the tail pointer
99 * when we're done. This notifies possible pending writers
100 * that we've observed the head pointer and that the other
101 * entry is now free.
102 */
103 if (!--head->__count) {
104 /*
105 * x86-TSO does not reorder writes with older reads;
106 * therefore once this write becomes visible to another
107 * cpu, we must be finished reading the cyc2ns_data.
108 *
109 * matches with cyc2ns_write_begin().
110 */
111 this_cpu_write(cyc2ns.tail, head);
112 }
113 preempt_enable();
114}
115
116/*
117 * Begin writing a new @data entry for @cpu.
118 *
119 * Assumes some sort of write side lock; currently 'provided' by the assumption
120 * that cpufreq will call its notifiers sequentially.
121 */
122static struct cyc2ns_data *cyc2ns_write_begin(int cpu)
123{
124 struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
125 struct cyc2ns_data *data = c2n->data;
126
127 if (data == c2n->head)
128 data++;
129
130 /* XXX send an IPI to @cpu in order to guarantee a read? */
131
132 /*
133 * When we observe the tail write from cyc2ns_read_end(),
134 * the cpu must be done with that entry and its safe
135 * to start writing to it.
136 */
137 while (c2n->tail == data)
138 cpu_relax();
139
140 return data;
141}
142
143static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data)
144{
145 struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
146
147 /*
148 * Ensure the @data writes are visible before we publish the
149 * entry. Matches the data-depencency in cyc2ns_read_begin().
150 */
151 smp_wmb();
152
153 ACCESS_ONCE(c2n->head) = data;
154}
155
156/*
157 * Accelerators for sched_clock()
158 * convert from cycles(64bits) => nanoseconds (64bits)
159 * basic equation:
160 * ns = cycles / (freq / ns_per_sec)
161 * ns = cycles * (ns_per_sec / freq)
162 * ns = cycles * (10^9 / (cpu_khz * 10^3))
163 * ns = cycles * (10^6 / cpu_khz)
164 *
165 * Then we use scaling math (suggested by george@mvista.com) to get:
166 * ns = cycles * (10^6 * SC / cpu_khz) / SC
167 * ns = cycles * cyc2ns_scale / SC
168 *
169 * And since SC is a constant power of two, we can convert the div
170 * into a shift.
171 *
172 * We can use khz divisor instead of mhz to keep a better precision, since
173 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
174 * (mathieu.desnoyers@polymtl.ca)
175 *
176 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
177 */
178
179#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
180
181static void cyc2ns_data_init(struct cyc2ns_data *data)
182{
183 data->cyc2ns_mul = 1U << CYC2NS_SCALE_FACTOR;
184 data->cyc2ns_shift = CYC2NS_SCALE_FACTOR;
185 data->cyc2ns_offset = 0;
186 data->__count = 0;
187}
188
189static void cyc2ns_init(int cpu)
190{
191 struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
192
193 cyc2ns_data_init(&c2n->data[0]);
194 cyc2ns_data_init(&c2n->data[1]);
195
196 c2n->head = c2n->data;
197 c2n->tail = c2n->data;
198}
199
200static inline unsigned long long cycles_2_ns(unsigned long long cyc)
201{
202 struct cyc2ns_data *data, *tail;
203 unsigned long long ns;
204
205 /*
206 * See cyc2ns_read_*() for details; replicated in order to avoid
207 * an extra few instructions that came with the abstraction.
208 * Notable, it allows us to only do the __count and tail update
209 * dance when its actually needed.
210 */
211
212 preempt_disable();
213 data = this_cpu_read(cyc2ns.head);
214 tail = this_cpu_read(cyc2ns.tail);
215
216 if (likely(data == tail)) {
217 ns = data->cyc2ns_offset;
218 ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR);
219 } else {
220 data->__count++;
221
222 barrier();
223
224 ns = data->cyc2ns_offset;
225 ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR);
226
227 barrier();
228
229 if (!--data->__count)
230 this_cpu_write(cyc2ns.tail, data);
231 }
232 preempt_enable();
233
234 return ns;
235}
236
237/* XXX surely we already have this someplace in the kernel?! */
238#define DIV_ROUND(n, d) (((n) + ((d) / 2)) / (d))
239
240static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
241{
242 unsigned long long tsc_now, ns_now;
243 struct cyc2ns_data *data;
244 unsigned long flags;
245
246 local_irq_save(flags);
247 sched_clock_idle_sleep_event();
248
249 if (!cpu_khz)
250 goto done;
251
252 data = cyc2ns_write_begin(cpu);
253
254 rdtscll(tsc_now);
255 ns_now = cycles_2_ns(tsc_now);
256
257 /*
258 * Compute a new multiplier as per the above comment and ensure our
259 * time function is continuous; see the comment near struct
260 * cyc2ns_data.
261 */
262 data->cyc2ns_mul = DIV_ROUND(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR, cpu_khz);
263 data->cyc2ns_shift = CYC2NS_SCALE_FACTOR;
264 data->cyc2ns_offset = ns_now -
265 mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR);
266
267 cyc2ns_write_end(cpu, data);
268
269done:
270 sched_clock_idle_wakeup_event(0);
271 local_irq_restore(flags);
272}
41/* 273/*
42 * Scheduler clock - returns current time in nanosec units. 274 * Scheduler clock - returns current time in nanosec units.
43 */ 275 */
44u64 native_sched_clock(void) 276u64 native_sched_clock(void)
45{ 277{
46 u64 this_offset; 278 u64 tsc_now;
47 279
48 /* 280 /*
49 * Fall back to jiffies if there's no TSC available: 281 * Fall back to jiffies if there's no TSC available:
@@ -53,16 +285,16 @@ u64 native_sched_clock(void)
53 * very important for it to be as fast as the platform 285 * very important for it to be as fast as the platform
54 * can achieve it. ) 286 * can achieve it. )
55 */ 287 */
56 if (unlikely(tsc_disabled)) { 288 if (!static_key_false(&__use_tsc)) {
57 /* No locking but a rare wrong value is not a big deal: */ 289 /* No locking but a rare wrong value is not a big deal: */
58 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); 290 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
59 } 291 }
60 292
61 /* read the Time Stamp Counter: */ 293 /* read the Time Stamp Counter: */
62 rdtscll(this_offset); 294 rdtscll(tsc_now);
63 295
64 /* return the value in ns */ 296 /* return the value in ns */
65 return __cycles_2_ns(this_offset); 297 return cycles_2_ns(tsc_now);
66} 298}
67 299
68/* We need to define a real function for sched_clock, to override the 300/* We need to define a real function for sched_clock, to override the
@@ -589,61 +821,11 @@ int recalibrate_cpu_khz(void)
589EXPORT_SYMBOL(recalibrate_cpu_khz); 821EXPORT_SYMBOL(recalibrate_cpu_khz);
590 822
591 823
592/* Accelerators for sched_clock()
593 * convert from cycles(64bits) => nanoseconds (64bits)
594 * basic equation:
595 * ns = cycles / (freq / ns_per_sec)
596 * ns = cycles * (ns_per_sec / freq)
597 * ns = cycles * (10^9 / (cpu_khz * 10^3))
598 * ns = cycles * (10^6 / cpu_khz)
599 *
600 * Then we use scaling math (suggested by george@mvista.com) to get:
601 * ns = cycles * (10^6 * SC / cpu_khz) / SC
602 * ns = cycles * cyc2ns_scale / SC
603 *
604 * And since SC is a constant power of two, we can convert the div
605 * into a shift.
606 *
607 * We can use khz divisor instead of mhz to keep a better precision, since
608 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
609 * (mathieu.desnoyers@polymtl.ca)
610 *
611 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
612 */
613
614DEFINE_PER_CPU(unsigned long, cyc2ns);
615DEFINE_PER_CPU(unsigned long long, cyc2ns_offset);
616
617static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
618{
619 unsigned long long tsc_now, ns_now, *offset;
620 unsigned long flags, *scale;
621
622 local_irq_save(flags);
623 sched_clock_idle_sleep_event();
624
625 scale = &per_cpu(cyc2ns, cpu);
626 offset = &per_cpu(cyc2ns_offset, cpu);
627
628 rdtscll(tsc_now);
629 ns_now = __cycles_2_ns(tsc_now);
630
631 if (cpu_khz) {
632 *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) +
633 cpu_khz / 2) / cpu_khz;
634 *offset = ns_now - mult_frac(tsc_now, *scale,
635 (1UL << CYC2NS_SCALE_FACTOR));
636 }
637
638 sched_clock_idle_wakeup_event(0);
639 local_irq_restore(flags);
640}
641
642static unsigned long long cyc2ns_suspend; 824static unsigned long long cyc2ns_suspend;
643 825
644void tsc_save_sched_clock_state(void) 826void tsc_save_sched_clock_state(void)
645{ 827{
646 if (!sched_clock_stable) 828 if (!sched_clock_stable())
647 return; 829 return;
648 830
649 cyc2ns_suspend = sched_clock(); 831 cyc2ns_suspend = sched_clock();
@@ -663,16 +845,26 @@ void tsc_restore_sched_clock_state(void)
663 unsigned long flags; 845 unsigned long flags;
664 int cpu; 846 int cpu;
665 847
666 if (!sched_clock_stable) 848 if (!sched_clock_stable())
667 return; 849 return;
668 850
669 local_irq_save(flags); 851 local_irq_save(flags);
670 852
671 __this_cpu_write(cyc2ns_offset, 0); 853 /*
854 * We're comming out of suspend, there's no concurrency yet; don't
855 * bother being nice about the RCU stuff, just write to both
856 * data fields.
857 */
858
859 this_cpu_write(cyc2ns.data[0].cyc2ns_offset, 0);
860 this_cpu_write(cyc2ns.data[1].cyc2ns_offset, 0);
861
672 offset = cyc2ns_suspend - sched_clock(); 862 offset = cyc2ns_suspend - sched_clock();
673 863
674 for_each_possible_cpu(cpu) 864 for_each_possible_cpu(cpu) {
675 per_cpu(cyc2ns_offset, cpu) = offset; 865 per_cpu(cyc2ns.data[0].cyc2ns_offset, cpu) = offset;
866 per_cpu(cyc2ns.data[1].cyc2ns_offset, cpu) = offset;
867 }
676 868
677 local_irq_restore(flags); 869 local_irq_restore(flags);
678} 870}
@@ -795,7 +987,7 @@ void mark_tsc_unstable(char *reason)
795{ 987{
796 if (!tsc_unstable) { 988 if (!tsc_unstable) {
797 tsc_unstable = 1; 989 tsc_unstable = 1;
798 sched_clock_stable = 0; 990 clear_sched_clock_stable();
799 disable_sched_clock_irqtime(); 991 disable_sched_clock_irqtime();
800 pr_info("Marking TSC unstable due to %s\n", reason); 992 pr_info("Marking TSC unstable due to %s\n", reason);
801 /* Change only the rating, when not registered */ 993 /* Change only the rating, when not registered */
@@ -995,14 +1187,18 @@ void __init tsc_init(void)
995 * speed as the bootup CPU. (cpufreq notifiers will fix this 1187 * speed as the bootup CPU. (cpufreq notifiers will fix this
996 * up if their speed diverges) 1188 * up if their speed diverges)
997 */ 1189 */
998 for_each_possible_cpu(cpu) 1190 for_each_possible_cpu(cpu) {
1191 cyc2ns_init(cpu);
999 set_cyc2ns_scale(cpu_khz, cpu); 1192 set_cyc2ns_scale(cpu_khz, cpu);
1193 }
1000 1194
1001 if (tsc_disabled > 0) 1195 if (tsc_disabled > 0)
1002 return; 1196 return;
1003 1197
1004 /* now allow native_sched_clock() to use rdtsc */ 1198 /* now allow native_sched_clock() to use rdtsc */
1199
1005 tsc_disabled = 0; 1200 tsc_disabled = 0;
1201 static_key_slow_inc(&__use_tsc);
1006 1202
1007 if (!no_sched_irq_time) 1203 if (!no_sched_irq_time)
1008 enable_sched_clock_irqtime(); 1204 enable_sched_clock_irqtime();
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index efe4d7220397..dfe605ac1bcd 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -433,15 +433,49 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp)
433 return; 433 return;
434} 434}
435 435
436static inline unsigned long cycles_2_us(unsigned long long cyc) 436/*
437 * Not to be confused with cycles_2_ns() from tsc.c; this gives a relative
438 * number, not an absolute. It converts a duration in cycles to a duration in
439 * ns.
440 */
441static inline unsigned long long cycles_2_ns(unsigned long long cyc)
437{ 442{
443 struct cyc2ns_data *data = cyc2ns_read_begin();
438 unsigned long long ns; 444 unsigned long long ns;
439 unsigned long us;
440 int cpu = smp_processor_id();
441 445
442 ns = (cyc * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR; 446 ns = mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
443 us = ns / 1000; 447
444 return us; 448 cyc2ns_read_end(data);
449 return ns;
450}
451
452/*
453 * The reverse of the above; converts a duration in ns to a duration in cycles.
454 */
455static inline unsigned long long ns_2_cycles(unsigned long long ns)
456{
457 struct cyc2ns_data *data = cyc2ns_read_begin();
458 unsigned long long cyc;
459
460 cyc = (ns << data->cyc2ns_shift) / data->cyc2ns_mul;
461
462 cyc2ns_read_end(data);
463 return cyc;
464}
465
466static inline unsigned long cycles_2_us(unsigned long long cyc)
467{
468 return cycles_2_ns(cyc) / NSEC_PER_USEC;
469}
470
471static inline cycles_t sec_2_cycles(unsigned long sec)
472{
473 return ns_2_cycles(sec * NSEC_PER_SEC);
474}
475
476static inline unsigned long long usec_2_cycles(unsigned long usec)
477{
478 return ns_2_cycles(usec * NSEC_PER_USEC);
445} 479}
446 480
447/* 481/*
@@ -668,16 +702,6 @@ static int wait_completion(struct bau_desc *bau_desc,
668 bcp, try); 702 bcp, try);
669} 703}
670 704
671static inline cycles_t sec_2_cycles(unsigned long sec)
672{
673 unsigned long ns;
674 cycles_t cyc;
675
676 ns = sec * 1000000000;
677 cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id()));
678 return cyc;
679}
680
681/* 705/*
682 * Our retries are blocked by all destination sw ack resources being 706 * Our retries are blocked by all destination sw ack resources being
683 * in use, and a timeout is pending. In that case hardware immediately 707 * in use, and a timeout is pending. In that case hardware immediately
@@ -1327,16 +1351,6 @@ static void ptc_seq_stop(struct seq_file *file, void *data)
1327{ 1351{
1328} 1352}
1329 1353
1330static inline unsigned long long usec_2_cycles(unsigned long microsec)
1331{
1332 unsigned long ns;
1333 unsigned long long cyc;
1334
1335 ns = microsec * 1000;
1336 cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id()));
1337 return cyc;
1338}
1339
1340/* 1354/*
1341 * Display the statistics thru /proc/sgi_uv/ptc_statistics 1355 * Display the statistics thru /proc/sgi_uv/ptc_statistics
1342 * 'data' points to the cpu number 1356 * 'data' points to the cpu number
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index aabfb8380a1c..96bc506ac6de 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -357,3 +357,5 @@
357348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev 357348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
358349 i386 kcmp sys_kcmp 358349 i386 kcmp sys_kcmp
359350 i386 finit_module sys_finit_module 359350 i386 finit_module sys_finit_module
360351 i386 sched_setattr sys_sched_setattr
361352 i386 sched_getattr sys_sched_getattr
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 38ae65dfd14f..a12bddc7ccea 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -320,6 +320,8 @@
320311 64 process_vm_writev sys_process_vm_writev 320311 64 process_vm_writev sys_process_vm_writev
321312 common kcmp sys_kcmp 321312 common kcmp sys_kcmp
322313 common finit_module sys_finit_module 322313 common finit_module sys_finit_module
323314 common sched_setattr sys_sched_setattr
324315 common sched_getattr sys_sched_getattr
323 325
324# 326#
325# x32-specific system call numbers start at 512 to avoid cache impact 327# x32-specific system call numbers start at 512 to avoid cache impact