aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVojtech Pavlik <vojtech@suse.cz>2006-09-26 04:52:28 -0400
committerAndi Kleen <andi@basil.nowhere.org>2006-09-26 04:52:28 -0400
commitc08c820508233b424deab3302bc404bbecc6493a (patch)
treed6ed79fb2d03513f6c71fc7e47705c4a19512f53
parenta670fad0adb1cc6202a607d250f10bd380593905 (diff)
[PATCH] Add the vgetcpu vsyscall
This patch adds a vgetcpu vsyscall, which depending on the CPU RDTSCP capability uses either the RDTSCP or CPUID to obtain a CPU and node numbers and pass them to the program. AK: Lots of changes over Vojtech's original code: Better prototype for vgetcpu() It's better to pass the cpu / node numbers as separate arguments to avoid mistakes when going from SMP to NUMA. Also add a fast time stamp based cache using a user supplied argument to speed things more up. Use fast method from Chuck Ebbert to retrieve node/cpu from GDT limit instead of CPUID Made sure RDTSCP init is always executed after node is known. Drop printk Signed-off-by: Vojtech Pavlik <vojtech@suse.cz> Signed-off-by: Andi Kleen <ak@suse.de>
-rw-r--r--arch/x86_64/kernel/head.S2
-rw-r--r--arch/x86_64/kernel/time.c13
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S3
-rw-r--r--arch/x86_64/kernel/vsyscall.c84
-rw-r--r--include/asm-x86_64/segment.h5
-rw-r--r--include/asm-x86_64/smp.h12
-rw-r--r--include/asm-x86_64/vsyscall.h9
-rw-r--r--include/linux/getcpu.h16
8 files changed, 130 insertions, 14 deletions
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index c9739ca81d06..505ec4a57506 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -371,7 +371,7 @@ ENTRY(cpu_gdt_table)
371 .quad 0,0 /* TSS */ 371 .quad 0,0 /* TSS */
372 .quad 0,0 /* LDT */ 372 .quad 0,0 /* LDT */
373 .quad 0,0,0 /* three TLS descriptors */ 373 .quad 0,0,0 /* three TLS descriptors */
374 .quad 0 /* unused */ 374 .quad 0x0000f40000000000 /* node/CPU stored in limit */
375gdt_end: 375gdt_end:
376 /* asm/segment.h:GDT_ENTRIES must match this */ 376 /* asm/segment.h:GDT_ENTRIES must match this */
377 /* This should be a multiple of the cache line size */ 377 /* This should be a multiple of the cache line size */
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 97b9e46d1992..560ed944dc0e 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -899,12 +899,8 @@ static int __cpuinit
899time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu) 899time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu)
900{ 900{
901 unsigned cpu = (unsigned long) hcpu; 901 unsigned cpu = (unsigned long) hcpu;
902 if (action == CPU_ONLINE && 902 if (action == CPU_ONLINE)
903 cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) { 903 vsyscall_set_cpu(cpu);
904 unsigned p;
905 p = smp_processor_id() | (cpu_to_node(smp_processor_id())<<12);
906 write_rdtscp_aux(p);
907 }
908 return NOTIFY_DONE; 904 return NOTIFY_DONE;
909} 905}
910 906
@@ -993,6 +989,11 @@ void time_init_gtod(void)
993 if (unsynchronized_tsc()) 989 if (unsynchronized_tsc())
994 notsc = 1; 990 notsc = 1;
995 991
992 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
993 vgetcpu_mode = VGETCPU_RDTSCP;
994 else
995 vgetcpu_mode = VGETCPU_LSL;
996
996 if (vxtime.hpet_address && notsc) { 997 if (vxtime.hpet_address && notsc) {
997 timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; 998 timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
998 if (hpet_use_timer) 999 if (hpet_use_timer)
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 7c4de31471d4..8d5a5149bb3a 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -99,6 +99,9 @@ SECTIONS
99 .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) } 99 .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) }
100 vxtime = VVIRT(.vxtime); 100 vxtime = VVIRT(.vxtime);
101 101
102 .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) }
103 vgetcpu_mode = VVIRT(.vgetcpu_mode);
104
102 .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) } 105 .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) }
103 wall_jiffies = VVIRT(.wall_jiffies); 106 wall_jiffies = VVIRT(.wall_jiffies);
104 107
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index f603037df162..902783bc4d53 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -26,6 +26,7 @@
26#include <linux/seqlock.h> 26#include <linux/seqlock.h>
27#include <linux/jiffies.h> 27#include <linux/jiffies.h>
28#include <linux/sysctl.h> 28#include <linux/sysctl.h>
29#include <linux/getcpu.h>
29 30
30#include <asm/vsyscall.h> 31#include <asm/vsyscall.h>
31#include <asm/pgtable.h> 32#include <asm/pgtable.h>
@@ -33,11 +34,15 @@
33#include <asm/fixmap.h> 34#include <asm/fixmap.h>
34#include <asm/errno.h> 35#include <asm/errno.h>
35#include <asm/io.h> 36#include <asm/io.h>
37#include <asm/segment.h>
38#include <asm/desc.h>
39#include <asm/topology.h>
36 40
37#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) 41#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
38 42
39int __sysctl_vsyscall __section_sysctl_vsyscall = 1; 43int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
40seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; 44seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
45int __vgetcpu_mode __section_vgetcpu_mode;
41 46
42#include <asm/unistd.h> 47#include <asm/unistd.h>
43 48
@@ -127,9 +132,46 @@ time_t __vsyscall(1) vtime(time_t *t)
127 return __xtime.tv_sec; 132 return __xtime.tv_sec;
128} 133}
129 134
130long __vsyscall(2) venosys_0(void) 135/* Fast way to get current CPU and node.
136 This helps to do per node and per CPU caches in user space.
137 The result is not guaranteed without CPU affinity, but usually
138 works out because the scheduler tries to keep a thread on the same
139 CPU.
140
141 tcache must point to a two element sized long array.
142 All arguments can be NULL. */
143long __vsyscall(2)
144vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
131{ 145{
132 return -ENOSYS; 146 unsigned int dummy, p;
147 unsigned long j = 0;
148
149 /* Fast cache - only recompute value once per jiffies and avoid
150 relatively costly rdtscp/cpuid otherwise.
151 This works because the scheduler usually keeps the process
152 on the same CPU and this syscall doesn't guarantee its
153 results anyways.
154 We do this here because otherwise user space would do it on
155 its own in a likely inferior way (no access to jiffies).
156 If you don't like it pass NULL. */
157 if (tcache && tcache->t0 == (j = __jiffies)) {
158 p = tcache->t1;
159 } else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
160 /* Load per CPU data from RDTSCP */
161 rdtscp(dummy, dummy, p);
162 } else {
163 /* Load per CPU data from GDT */
164 asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
165 }
166 if (tcache) {
167 tcache->t0 = j;
168 tcache->t1 = p;
169 }
170 if (cpu)
171 *cpu = p & 0xfff;
172 if (node)
173 *node = p >> 12;
174 return 0;
133} 175}
134 176
135long __vsyscall(3) venosys_1(void) 177long __vsyscall(3) venosys_1(void)
@@ -200,6 +242,43 @@ static ctl_table kernel_root_table2[] = {
200 242
201#endif 243#endif
202 244
245static void __cpuinit write_rdtscp_cb(void *info)
246{
247 write_rdtscp_aux((unsigned long)info);
248}
249
250void __cpuinit vsyscall_set_cpu(int cpu)
251{
252 unsigned long *d;
253 unsigned long node = 0;
254#ifdef CONFIG_NUMA
255 node = cpu_to_node[cpu];
256#endif
257 if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
258 void *info = (void *)((node << 12) | cpu);
259 /* Can happen on preemptive kernel */
260 if (get_cpu() == cpu)
261 write_rdtscp_cb(info);
262#ifdef CONFIG_SMP
263 else {
264 /* the notifier is unfortunately not executed on the
265 target CPU */
266 smp_call_function_single(cpu,write_rdtscp_cb,info,0,1);
267 }
268#endif
269 put_cpu();
270 }
271
272 /* Store cpu number in limit so that it can be loaded quickly
273 in user space in vgetcpu.
274 12 bits for the CPU and 8 bits for the node. */
275 d = (unsigned long *)(cpu_gdt(cpu) + GDT_ENTRY_PER_CPU);
276 *d = 0x0f40000000000ULL;
277 *d |= cpu;
278 *d |= (node & 0xf) << 12;
279 *d |= (node >> 4) << 48;
280}
281
203static void __init map_vsyscall(void) 282static void __init map_vsyscall(void)
204{ 283{
205 extern char __vsyscall_0; 284 extern char __vsyscall_0;
@@ -214,6 +293,7 @@ static int __init vsyscall_init(void)
214 VSYSCALL_ADDR(__NR_vgettimeofday))); 293 VSYSCALL_ADDR(__NR_vgettimeofday)));
215 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); 294 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
216 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); 295 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
296 BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
217 map_vsyscall(); 297 map_vsyscall();
218#ifdef CONFIG_SYSCTL 298#ifdef CONFIG_SYSCTL
219 register_sysctl_table(kernel_root_table2, 0); 299 register_sysctl_table(kernel_root_table2, 0);
diff --git a/include/asm-x86_64/segment.h b/include/asm-x86_64/segment.h
index d4bed33fb32c..334ddcdd8f92 100644
--- a/include/asm-x86_64/segment.h
+++ b/include/asm-x86_64/segment.h
@@ -20,15 +20,16 @@
20#define __USER_CS 0x33 /* 6*8+3 */ 20#define __USER_CS 0x33 /* 6*8+3 */
21#define __USER32_DS __USER_DS 21#define __USER32_DS __USER_DS
22 22
23#define GDT_ENTRY_TLS 1
24#define GDT_ENTRY_TSS 8 /* needs two entries */ 23#define GDT_ENTRY_TSS 8 /* needs two entries */
25#define GDT_ENTRY_LDT 10 /* needs two entries */ 24#define GDT_ENTRY_LDT 10 /* needs two entries */
26#define GDT_ENTRY_TLS_MIN 12 25#define GDT_ENTRY_TLS_MIN 12
27#define GDT_ENTRY_TLS_MAX 14 26#define GDT_ENTRY_TLS_MAX 14
28/* 15 free */
29 27
30#define GDT_ENTRY_TLS_ENTRIES 3 28#define GDT_ENTRY_TLS_ENTRIES 3
31 29
30#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */
31#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3)
32
32/* TLS indexes for 64bit - hardcoded in arch_prctl */ 33/* TLS indexes for 64bit - hardcoded in arch_prctl */
33#define FS_TLS 0 34#define FS_TLS 0
34#define GS_TLS 1 35#define GS_TLS 1
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index 6805e1feb300..d61547fd833b 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -133,13 +133,19 @@ static __inline int logical_smp_processor_id(void)
133 /* we don't want to mark this access volatile - bad code generation */ 133 /* we don't want to mark this access volatile - bad code generation */
134 return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); 134 return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
135} 135}
136#endif
137 136
138#ifdef CONFIG_SMP 137#ifdef CONFIG_SMP
139#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] 138#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
140#else 139#else
141#define cpu_physical_id(cpu) boot_cpu_id 140#define cpu_physical_id(cpu) boot_cpu_id
142#endif 141static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
143 142 void *info, int retry, int wait)
143{
144 /* Disable interrupts here? */
145 func(info);
146 return 0;
147}
148#endif /* !CONFIG_SMP */
149#endif /* !__ASSEMBLY */
144#endif 150#endif
145 151
diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h
index 146b24402a5f..2281e9399b96 100644
--- a/include/asm-x86_64/vsyscall.h
+++ b/include/asm-x86_64/vsyscall.h
@@ -4,6 +4,7 @@
4enum vsyscall_num { 4enum vsyscall_num {
5 __NR_vgettimeofday, 5 __NR_vgettimeofday,
6 __NR_vtime, 6 __NR_vtime,
7 __NR_vgetcpu,
7}; 8};
8 9
9#define VSYSCALL_START (-10UL << 20) 10#define VSYSCALL_START (-10UL << 20)
@@ -15,6 +16,7 @@ enum vsyscall_num {
15#include <linux/seqlock.h> 16#include <linux/seqlock.h>
16 17
17#define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16))) 18#define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16)))
19#define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16)))
18#define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16))) 20#define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16)))
19#define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16))) 21#define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16)))
20#define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16))) 22#define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16)))
@@ -26,6 +28,9 @@ enum vsyscall_num {
26#define VXTIME_HPET 2 28#define VXTIME_HPET 2
27#define VXTIME_PMTMR 3 29#define VXTIME_PMTMR 3
28 30
31#define VGETCPU_RDTSCP 1
32#define VGETCPU_LSL 2
33
29struct vxtime_data { 34struct vxtime_data {
30 long hpet_address; /* HPET base address */ 35 long hpet_address; /* HPET base address */
31 int last; 36 int last;
@@ -40,6 +45,7 @@ struct vxtime_data {
40 45
41/* vsyscall space (readonly) */ 46/* vsyscall space (readonly) */
42extern struct vxtime_data __vxtime; 47extern struct vxtime_data __vxtime;
48extern int __vgetcpu_mode;
43extern struct timespec __xtime; 49extern struct timespec __xtime;
44extern volatile unsigned long __jiffies; 50extern volatile unsigned long __jiffies;
45extern unsigned long __wall_jiffies; 51extern unsigned long __wall_jiffies;
@@ -48,6 +54,7 @@ extern seqlock_t __xtime_lock;
48 54
49/* kernel space (writeable) */ 55/* kernel space (writeable) */
50extern struct vxtime_data vxtime; 56extern struct vxtime_data vxtime;
57extern int vgetcpu_mode;
51extern unsigned long wall_jiffies; 58extern unsigned long wall_jiffies;
52extern struct timezone sys_tz; 59extern struct timezone sys_tz;
53extern int sysctl_vsyscall; 60extern int sysctl_vsyscall;
@@ -55,6 +62,8 @@ extern seqlock_t xtime_lock;
55 62
56extern int sysctl_vsyscall; 63extern int sysctl_vsyscall;
57 64
65extern void vsyscall_set_cpu(int cpu);
66
58#define ARCH_HAVE_XTIME_LOCK 1 67#define ARCH_HAVE_XTIME_LOCK 1
59 68
60#endif /* __KERNEL__ */ 69#endif /* __KERNEL__ */
diff --git a/include/linux/getcpu.h b/include/linux/getcpu.h
new file mode 100644
index 000000000000..031ed3780e45
--- /dev/null
+++ b/include/linux/getcpu.h
@@ -0,0 +1,16 @@
1#ifndef _LINUX_GETCPU_H
2#define _LINUX_GETCPU_H 1
3
4/* Cache for getcpu() to speed it up. Results might be upto a jiffie
5 out of date, but will be faster.
6 User programs should not refer to the contents of this structure.
7 It is only a cache for vgetcpu(). It might change in future kernels.
8 The user program must store this information per thread (__thread)
9 If you want 100% accurate information pass NULL instead. */
10struct getcpu_cache {
11 unsigned long t0;
12 unsigned long t1;
13 unsigned long res[4];
14};
15
16#endif