diff options
-rw-r--r-- | arch/x86_64/kernel/head.S | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/time.c | 13 | ||||
-rw-r--r-- | arch/x86_64/kernel/vmlinux.lds.S | 3 | ||||
-rw-r--r-- | arch/x86_64/kernel/vsyscall.c | 84 | ||||
-rw-r--r-- | include/asm-x86_64/segment.h | 5 | ||||
-rw-r--r-- | include/asm-x86_64/smp.h | 12 | ||||
-rw-r--r-- | include/asm-x86_64/vsyscall.h | 9 | ||||
-rw-r--r-- | include/linux/getcpu.h | 16 |
8 files changed, 130 insertions, 14 deletions
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index c9739ca81d06..505ec4a57506 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S | |||
@@ -371,7 +371,7 @@ ENTRY(cpu_gdt_table) | |||
371 | .quad 0,0 /* TSS */ | 371 | .quad 0,0 /* TSS */ |
372 | .quad 0,0 /* LDT */ | 372 | .quad 0,0 /* LDT */ |
373 | .quad 0,0,0 /* three TLS descriptors */ | 373 | .quad 0,0,0 /* three TLS descriptors */ |
374 | .quad 0 /* unused */ | 374 | .quad 0x0000f40000000000 /* node/CPU stored in limit */ |
375 | gdt_end: | 375 | gdt_end: |
376 | /* asm/segment.h:GDT_ENTRIES must match this */ | 376 | /* asm/segment.h:GDT_ENTRIES must match this */ |
377 | /* This should be a multiple of the cache line size */ | 377 | /* This should be a multiple of the cache line size */ |
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 97b9e46d1992..560ed944dc0e 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
@@ -899,12 +899,8 @@ static int __cpuinit | |||
899 | time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu) | 899 | time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu) |
900 | { | 900 | { |
901 | unsigned cpu = (unsigned long) hcpu; | 901 | unsigned cpu = (unsigned long) hcpu; |
902 | if (action == CPU_ONLINE && | 902 | if (action == CPU_ONLINE) |
903 | cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) { | 903 | vsyscall_set_cpu(cpu); |
904 | unsigned p; | ||
905 | p = smp_processor_id() | (cpu_to_node(smp_processor_id())<<12); | ||
906 | write_rdtscp_aux(p); | ||
907 | } | ||
908 | return NOTIFY_DONE; | 904 | return NOTIFY_DONE; |
909 | } | 905 | } |
910 | 906 | ||
@@ -993,6 +989,11 @@ void time_init_gtod(void) | |||
993 | if (unsynchronized_tsc()) | 989 | if (unsynchronized_tsc()) |
994 | notsc = 1; | 990 | notsc = 1; |
995 | 991 | ||
992 | if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) | ||
993 | vgetcpu_mode = VGETCPU_RDTSCP; | ||
994 | else | ||
995 | vgetcpu_mode = VGETCPU_LSL; | ||
996 | |||
996 | if (vxtime.hpet_address && notsc) { | 997 | if (vxtime.hpet_address && notsc) { |
997 | timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; | 998 | timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; |
998 | if (hpet_use_timer) | 999 | if (hpet_use_timer) |
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 7c4de31471d4..8d5a5149bb3a 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S | |||
@@ -99,6 +99,9 @@ SECTIONS | |||
99 | .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) } | 99 | .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) } |
100 | vxtime = VVIRT(.vxtime); | 100 | vxtime = VVIRT(.vxtime); |
101 | 101 | ||
102 | .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) } | ||
103 | vgetcpu_mode = VVIRT(.vgetcpu_mode); | ||
104 | |||
102 | .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) } | 105 | .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) } |
103 | wall_jiffies = VVIRT(.wall_jiffies); | 106 | wall_jiffies = VVIRT(.wall_jiffies); |
104 | 107 | ||
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index f603037df162..902783bc4d53 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/seqlock.h> | 26 | #include <linux/seqlock.h> |
27 | #include <linux/jiffies.h> | 27 | #include <linux/jiffies.h> |
28 | #include <linux/sysctl.h> | 28 | #include <linux/sysctl.h> |
29 | #include <linux/getcpu.h> | ||
29 | 30 | ||
30 | #include <asm/vsyscall.h> | 31 | #include <asm/vsyscall.h> |
31 | #include <asm/pgtable.h> | 32 | #include <asm/pgtable.h> |
@@ -33,11 +34,15 @@ | |||
33 | #include <asm/fixmap.h> | 34 | #include <asm/fixmap.h> |
34 | #include <asm/errno.h> | 35 | #include <asm/errno.h> |
35 | #include <asm/io.h> | 36 | #include <asm/io.h> |
37 | #include <asm/segment.h> | ||
38 | #include <asm/desc.h> | ||
39 | #include <asm/topology.h> | ||
36 | 40 | ||
37 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | 41 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) |
38 | 42 | ||
39 | int __sysctl_vsyscall __section_sysctl_vsyscall = 1; | 43 | int __sysctl_vsyscall __section_sysctl_vsyscall = 1; |
40 | seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; | 44 | seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; |
45 | int __vgetcpu_mode __section_vgetcpu_mode; | ||
41 | 46 | ||
42 | #include <asm/unistd.h> | 47 | #include <asm/unistd.h> |
43 | 48 | ||
@@ -127,9 +132,46 @@ time_t __vsyscall(1) vtime(time_t *t) | |||
127 | return __xtime.tv_sec; | 132 | return __xtime.tv_sec; |
128 | } | 133 | } |
129 | 134 | ||
130 | long __vsyscall(2) venosys_0(void) | 135 | /* Fast way to get current CPU and node. |
136 | This helps to do per node and per CPU caches in user space. | ||
137 | The result is not guaranteed without CPU affinity, but usually | ||
138 | works out because the scheduler tries to keep a thread on the same | ||
139 | CPU. | ||
140 | |||
141 | tcache must point to a two element sized long array. | ||
142 | All arguments can be NULL. */ | ||
143 | long __vsyscall(2) | ||
144 | vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) | ||
131 | { | 145 | { |
132 | return -ENOSYS; | 146 | unsigned int dummy, p; |
147 | unsigned long j = 0; | ||
148 | |||
149 | /* Fast cache - only recompute value once per jiffies and avoid | ||
150 | relatively costly rdtscp/cpuid otherwise. | ||
151 | This works because the scheduler usually keeps the process | ||
152 | on the same CPU and this syscall doesn't guarantee its | ||
153 | results anyways. | ||
154 | We do this here because otherwise user space would do it on | ||
155 | its own in a likely inferior way (no access to jiffies). | ||
156 | If you don't like it pass NULL. */ | ||
157 | if (tcache && tcache->t0 == (j = __jiffies)) { | ||
158 | p = tcache->t1; | ||
159 | } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { | ||
160 | /* Load per CPU data from RDTSCP */ | ||
161 | rdtscp(dummy, dummy, p); | ||
162 | } else { | ||
163 | /* Load per CPU data from GDT */ | ||
164 | asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); | ||
165 | } | ||
166 | if (tcache) { | ||
167 | tcache->t0 = j; | ||
168 | tcache->t1 = p; | ||
169 | } | ||
170 | if (cpu) | ||
171 | *cpu = p & 0xfff; | ||
172 | if (node) | ||
173 | *node = p >> 12; | ||
174 | return 0; | ||
133 | } | 175 | } |
134 | 176 | ||
135 | long __vsyscall(3) venosys_1(void) | 177 | long __vsyscall(3) venosys_1(void) |
@@ -200,6 +242,43 @@ static ctl_table kernel_root_table2[] = { | |||
200 | 242 | ||
201 | #endif | 243 | #endif |
202 | 244 | ||
245 | static void __cpuinit write_rdtscp_cb(void *info) | ||
246 | { | ||
247 | write_rdtscp_aux((unsigned long)info); | ||
248 | } | ||
249 | |||
250 | void __cpuinit vsyscall_set_cpu(int cpu) | ||
251 | { | ||
252 | unsigned long *d; | ||
253 | unsigned long node = 0; | ||
254 | #ifdef CONFIG_NUMA | ||
255 | node = cpu_to_node[cpu]; | ||
256 | #endif | ||
257 | if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) { | ||
258 | void *info = (void *)((node << 12) | cpu); | ||
259 | /* Can happen on preemptive kernel */ | ||
260 | if (get_cpu() == cpu) | ||
261 | write_rdtscp_cb(info); | ||
262 | #ifdef CONFIG_SMP | ||
263 | else { | ||
264 | /* the notifier is unfortunately not executed on the | ||
265 | target CPU */ | ||
266 | smp_call_function_single(cpu,write_rdtscp_cb,info,0,1); | ||
267 | } | ||
268 | #endif | ||
269 | put_cpu(); | ||
270 | } | ||
271 | |||
272 | /* Store cpu number in limit so that it can be loaded quickly | ||
273 | in user space in vgetcpu. | ||
274 | 12 bits for the CPU and 8 bits for the node. */ | ||
275 | d = (unsigned long *)(cpu_gdt(cpu) + GDT_ENTRY_PER_CPU); | ||
276 | *d = 0x0f40000000000ULL; | ||
277 | *d |= cpu; | ||
278 | *d |= (node & 0xf) << 12; | ||
279 | *d |= (node >> 4) << 48; | ||
280 | } | ||
281 | |||
203 | static void __init map_vsyscall(void) | 282 | static void __init map_vsyscall(void) |
204 | { | 283 | { |
205 | extern char __vsyscall_0; | 284 | extern char __vsyscall_0; |
@@ -214,6 +293,7 @@ static int __init vsyscall_init(void) | |||
214 | VSYSCALL_ADDR(__NR_vgettimeofday))); | 293 | VSYSCALL_ADDR(__NR_vgettimeofday))); |
215 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); | 294 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); |
216 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); | 295 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); |
296 | BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); | ||
217 | map_vsyscall(); | 297 | map_vsyscall(); |
218 | #ifdef CONFIG_SYSCTL | 298 | #ifdef CONFIG_SYSCTL |
219 | register_sysctl_table(kernel_root_table2, 0); | 299 | register_sysctl_table(kernel_root_table2, 0); |
diff --git a/include/asm-x86_64/segment.h b/include/asm-x86_64/segment.h index d4bed33fb32c..334ddcdd8f92 100644 --- a/include/asm-x86_64/segment.h +++ b/include/asm-x86_64/segment.h | |||
@@ -20,15 +20,16 @@ | |||
20 | #define __USER_CS 0x33 /* 6*8+3 */ | 20 | #define __USER_CS 0x33 /* 6*8+3 */ |
21 | #define __USER32_DS __USER_DS | 21 | #define __USER32_DS __USER_DS |
22 | 22 | ||
23 | #define GDT_ENTRY_TLS 1 | ||
24 | #define GDT_ENTRY_TSS 8 /* needs two entries */ | 23 | #define GDT_ENTRY_TSS 8 /* needs two entries */ |
25 | #define GDT_ENTRY_LDT 10 /* needs two entries */ | 24 | #define GDT_ENTRY_LDT 10 /* needs two entries */ |
26 | #define GDT_ENTRY_TLS_MIN 12 | 25 | #define GDT_ENTRY_TLS_MIN 12 |
27 | #define GDT_ENTRY_TLS_MAX 14 | 26 | #define GDT_ENTRY_TLS_MAX 14 |
28 | /* 15 free */ | ||
29 | 27 | ||
30 | #define GDT_ENTRY_TLS_ENTRIES 3 | 28 | #define GDT_ENTRY_TLS_ENTRIES 3 |
31 | 29 | ||
30 | #define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */ | ||
31 | #define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3) | ||
32 | |||
32 | /* TLS indexes for 64bit - hardcoded in arch_prctl */ | 33 | /* TLS indexes for 64bit - hardcoded in arch_prctl */ |
33 | #define FS_TLS 0 | 34 | #define FS_TLS 0 |
34 | #define GS_TLS 1 | 35 | #define GS_TLS 1 |
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index 6805e1feb300..d61547fd833b 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h | |||
@@ -133,13 +133,19 @@ static __inline int logical_smp_processor_id(void) | |||
133 | /* we don't want to mark this access volatile - bad code generation */ | 133 | /* we don't want to mark this access volatile - bad code generation */ |
134 | return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); | 134 | return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); |
135 | } | 135 | } |
136 | #endif | ||
137 | 136 | ||
138 | #ifdef CONFIG_SMP | 137 | #ifdef CONFIG_SMP |
139 | #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] | 138 | #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] |
140 | #else | 139 | #else |
141 | #define cpu_physical_id(cpu) boot_cpu_id | 140 | #define cpu_physical_id(cpu) boot_cpu_id |
142 | #endif | 141 | static inline int smp_call_function_single(int cpuid, void (*func) (void *info), |
143 | 142 | void *info, int retry, int wait) | |
143 | { | ||
144 | /* Disable interrupts here? */ | ||
145 | func(info); | ||
146 | return 0; | ||
147 | } | ||
148 | #endif /* !CONFIG_SMP */ | ||
149 | #endif /* !__ASSEMBLY */ | ||
144 | #endif | 150 | #endif |
145 | 151 | ||
diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h index 146b24402a5f..2281e9399b96 100644 --- a/include/asm-x86_64/vsyscall.h +++ b/include/asm-x86_64/vsyscall.h | |||
@@ -4,6 +4,7 @@ | |||
4 | enum vsyscall_num { | 4 | enum vsyscall_num { |
5 | __NR_vgettimeofday, | 5 | __NR_vgettimeofday, |
6 | __NR_vtime, | 6 | __NR_vtime, |
7 | __NR_vgetcpu, | ||
7 | }; | 8 | }; |
8 | 9 | ||
9 | #define VSYSCALL_START (-10UL << 20) | 10 | #define VSYSCALL_START (-10UL << 20) |
@@ -15,6 +16,7 @@ enum vsyscall_num { | |||
15 | #include <linux/seqlock.h> | 16 | #include <linux/seqlock.h> |
16 | 17 | ||
17 | #define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16))) | 18 | #define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16))) |
19 | #define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16))) | ||
18 | #define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16))) | 20 | #define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16))) |
19 | #define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16))) | 21 | #define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16))) |
20 | #define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16))) | 22 | #define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16))) |
@@ -26,6 +28,9 @@ enum vsyscall_num { | |||
26 | #define VXTIME_HPET 2 | 28 | #define VXTIME_HPET 2 |
27 | #define VXTIME_PMTMR 3 | 29 | #define VXTIME_PMTMR 3 |
28 | 30 | ||
31 | #define VGETCPU_RDTSCP 1 | ||
32 | #define VGETCPU_LSL 2 | ||
33 | |||
29 | struct vxtime_data { | 34 | struct vxtime_data { |
30 | long hpet_address; /* HPET base address */ | 35 | long hpet_address; /* HPET base address */ |
31 | int last; | 36 | int last; |
@@ -40,6 +45,7 @@ struct vxtime_data { | |||
40 | 45 | ||
41 | /* vsyscall space (readonly) */ | 46 | /* vsyscall space (readonly) */ |
42 | extern struct vxtime_data __vxtime; | 47 | extern struct vxtime_data __vxtime; |
48 | extern int __vgetcpu_mode; | ||
43 | extern struct timespec __xtime; | 49 | extern struct timespec __xtime; |
44 | extern volatile unsigned long __jiffies; | 50 | extern volatile unsigned long __jiffies; |
45 | extern unsigned long __wall_jiffies; | 51 | extern unsigned long __wall_jiffies; |
@@ -48,6 +54,7 @@ extern seqlock_t __xtime_lock; | |||
48 | 54 | ||
49 | /* kernel space (writeable) */ | 55 | /* kernel space (writeable) */ |
50 | extern struct vxtime_data vxtime; | 56 | extern struct vxtime_data vxtime; |
57 | extern int vgetcpu_mode; | ||
51 | extern unsigned long wall_jiffies; | 58 | extern unsigned long wall_jiffies; |
52 | extern struct timezone sys_tz; | 59 | extern struct timezone sys_tz; |
53 | extern int sysctl_vsyscall; | 60 | extern int sysctl_vsyscall; |
@@ -55,6 +62,8 @@ extern seqlock_t xtime_lock; | |||
55 | 62 | ||
56 | extern int sysctl_vsyscall; | 63 | extern int sysctl_vsyscall; |
57 | 64 | ||
65 | extern void vsyscall_set_cpu(int cpu); | ||
66 | |||
58 | #define ARCH_HAVE_XTIME_LOCK 1 | 67 | #define ARCH_HAVE_XTIME_LOCK 1 |
59 | 68 | ||
60 | #endif /* __KERNEL__ */ | 69 | #endif /* __KERNEL__ */ |
diff --git a/include/linux/getcpu.h b/include/linux/getcpu.h new file mode 100644 index 000000000000..031ed3780e45 --- /dev/null +++ b/include/linux/getcpu.h | |||
@@ -0,0 +1,16 @@ | |||
1 | #ifndef _LINUX_GETCPU_H | ||
2 | #define _LINUX_GETCPU_H 1 | ||
3 | |||
4 | /* Cache for getcpu() to speed it up. Results might be upto a jiffie | ||
5 | out of date, but will be faster. | ||
6 | User programs should not refer to the contents of this structure. | ||
7 | It is only a cache for vgetcpu(). It might change in future kernels. | ||
8 | The user program must store this information per thread (__thread) | ||
9 | If you want 100% accurate information pass NULL instead. */ | ||
10 | struct getcpu_cache { | ||
11 | unsigned long t0; | ||
12 | unsigned long t1; | ||
13 | unsigned long res[4]; | ||
14 | }; | ||
15 | |||
16 | #endif | ||