aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
authorVojtech Pavlik <vojtech@suse.cz>2006-09-26 04:52:28 -0400
committerAndi Kleen <andi@basil.nowhere.org>2006-09-26 04:52:28 -0400
commitc08c820508233b424deab3302bc404bbecc6493a (patch)
treed6ed79fb2d03513f6c71fc7e47705c4a19512f53 /arch/x86_64
parenta670fad0adb1cc6202a607d250f10bd380593905 (diff)
[PATCH] Add the vgetcpu vsyscall
This patch adds a vgetcpu vsyscall, which depending on the CPU RDTSCP capability uses either the RDTSCP or CPUID to obtain a CPU and node numbers and pass them to the program. AK: Lots of changes over Vojtech's original code: Better prototype for vgetcpu() It's better to pass the cpu / node numbers as separate arguments to avoid mistakes when going from SMP to NUMA. Also add a fast time stamp based cache using a user supplied argument to speed things more up. Use fast method from Chuck Ebbert to retrieve node/cpu from GDT limit instead of CPUID Made sure RDTSCP init is always executed after node is known. Drop printk Signed-off-by: Vojtech Pavlik <vojtech@suse.cz> Signed-off-by: Andi Kleen <ak@suse.de>
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/kernel/head.S2
-rw-r--r--arch/x86_64/kernel/time.c13
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S3
-rw-r--r--arch/x86_64/kernel/vsyscall.c84
4 files changed, 93 insertions, 9 deletions
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index c9739ca81d06..505ec4a57506 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -371,7 +371,7 @@ ENTRY(cpu_gdt_table)
371 .quad 0,0 /* TSS */ 371 .quad 0,0 /* TSS */
372 .quad 0,0 /* LDT */ 372 .quad 0,0 /* LDT */
373 .quad 0,0,0 /* three TLS descriptors */ 373 .quad 0,0,0 /* three TLS descriptors */
374 .quad 0 /* unused */ 374 .quad 0x0000f40000000000 /* node/CPU stored in limit */
375gdt_end: 375gdt_end:
376 /* asm/segment.h:GDT_ENTRIES must match this */ 376 /* asm/segment.h:GDT_ENTRIES must match this */
377 /* This should be a multiple of the cache line size */ 377 /* This should be a multiple of the cache line size */
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 97b9e46d1992..560ed944dc0e 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -899,12 +899,8 @@ static int __cpuinit
899time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu) 899time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu)
900{ 900{
901 unsigned cpu = (unsigned long) hcpu; 901 unsigned cpu = (unsigned long) hcpu;
902 if (action == CPU_ONLINE && 902 if (action == CPU_ONLINE)
903 cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) { 903 vsyscall_set_cpu(cpu);
904 unsigned p;
905 p = smp_processor_id() | (cpu_to_node(smp_processor_id())<<12);
906 write_rdtscp_aux(p);
907 }
908 return NOTIFY_DONE; 904 return NOTIFY_DONE;
909} 905}
910 906
@@ -993,6 +989,11 @@ void time_init_gtod(void)
993 if (unsynchronized_tsc()) 989 if (unsynchronized_tsc())
994 notsc = 1; 990 notsc = 1;
995 991
992 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
993 vgetcpu_mode = VGETCPU_RDTSCP;
994 else
995 vgetcpu_mode = VGETCPU_LSL;
996
996 if (vxtime.hpet_address && notsc) { 997 if (vxtime.hpet_address && notsc) {
997 timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; 998 timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
998 if (hpet_use_timer) 999 if (hpet_use_timer)
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 7c4de31471d4..8d5a5149bb3a 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -99,6 +99,9 @@ SECTIONS
99 .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) } 99 .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) }
100 vxtime = VVIRT(.vxtime); 100 vxtime = VVIRT(.vxtime);
101 101
102 .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) }
103 vgetcpu_mode = VVIRT(.vgetcpu_mode);
104
102 .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) } 105 .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) }
103 wall_jiffies = VVIRT(.wall_jiffies); 106 wall_jiffies = VVIRT(.wall_jiffies);
104 107
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index f603037df162..902783bc4d53 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -26,6 +26,7 @@
26#include <linux/seqlock.h> 26#include <linux/seqlock.h>
27#include <linux/jiffies.h> 27#include <linux/jiffies.h>
28#include <linux/sysctl.h> 28#include <linux/sysctl.h>
29#include <linux/getcpu.h>
29 30
30#include <asm/vsyscall.h> 31#include <asm/vsyscall.h>
31#include <asm/pgtable.h> 32#include <asm/pgtable.h>
@@ -33,11 +34,15 @@
33#include <asm/fixmap.h> 34#include <asm/fixmap.h>
34#include <asm/errno.h> 35#include <asm/errno.h>
35#include <asm/io.h> 36#include <asm/io.h>
37#include <asm/segment.h>
38#include <asm/desc.h>
39#include <asm/topology.h>
36 40
37#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) 41#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
38 42
39int __sysctl_vsyscall __section_sysctl_vsyscall = 1; 43int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
40seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; 44seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
45int __vgetcpu_mode __section_vgetcpu_mode;
41 46
42#include <asm/unistd.h> 47#include <asm/unistd.h>
43 48
@@ -127,9 +132,46 @@ time_t __vsyscall(1) vtime(time_t *t)
127 return __xtime.tv_sec; 132 return __xtime.tv_sec;
128} 133}
129 134
130long __vsyscall(2) venosys_0(void) 135/* Fast way to get current CPU and node.
136 This helps to do per node and per CPU caches in user space.
137 The result is not guaranteed without CPU affinity, but usually
138 works out because the scheduler tries to keep a thread on the same
139 CPU.
140
141 tcache must point to a two element sized long array.
142 All arguments can be NULL. */
143long __vsyscall(2)
144vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
131{ 145{
132 return -ENOSYS; 146 unsigned int dummy, p;
147 unsigned long j = 0;
148
149 /* Fast cache - only recompute value once per jiffies and avoid
150 relatively costly rdtscp/cpuid otherwise.
151 This works because the scheduler usually keeps the process
152 on the same CPU and this syscall doesn't guarantee its
153 results anyways.
154 We do this here because otherwise user space would do it on
155 its own in a likely inferior way (no access to jiffies).
156 If you don't like it pass NULL. */
157 if (tcache && tcache->t0 == (j = __jiffies)) {
158 p = tcache->t1;
159 } else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
160 /* Load per CPU data from RDTSCP */
161 rdtscp(dummy, dummy, p);
162 } else {
163 /* Load per CPU data from GDT */
164 asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
165 }
166 if (tcache) {
167 tcache->t0 = j;
168 tcache->t1 = p;
169 }
170 if (cpu)
171 *cpu = p & 0xfff;
172 if (node)
173 *node = p >> 12;
174 return 0;
133} 175}
134 176
135long __vsyscall(3) venosys_1(void) 177long __vsyscall(3) venosys_1(void)
@@ -200,6 +242,43 @@ static ctl_table kernel_root_table2[] = {
200 242
201#endif 243#endif
202 244
245static void __cpuinit write_rdtscp_cb(void *info)
246{
247 write_rdtscp_aux((unsigned long)info);
248}
249
250void __cpuinit vsyscall_set_cpu(int cpu)
251{
252 unsigned long *d;
253 unsigned long node = 0;
254#ifdef CONFIG_NUMA
255 node = cpu_to_node[cpu];
256#endif
257 if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
258 void *info = (void *)((node << 12) | cpu);
259 /* Can happen on preemptive kernel */
260 if (get_cpu() == cpu)
261 write_rdtscp_cb(info);
262#ifdef CONFIG_SMP
263 else {
264 /* the notifier is unfortunately not executed on the
265 target CPU */
266 smp_call_function_single(cpu,write_rdtscp_cb,info,0,1);
267 }
268#endif
269 put_cpu();
270 }
271
272 /* Store cpu number in limit so that it can be loaded quickly
273 in user space in vgetcpu.
274 12 bits for the CPU and 8 bits for the node. */
275 d = (unsigned long *)(cpu_gdt(cpu) + GDT_ENTRY_PER_CPU);
276 *d = 0x0f40000000000ULL;
277 *d |= cpu;
278 *d |= (node & 0xf) << 12;
279 *d |= (node >> 4) << 48;
280}
281
203static void __init map_vsyscall(void) 282static void __init map_vsyscall(void)
204{ 283{
205 extern char __vsyscall_0; 284 extern char __vsyscall_0;
@@ -214,6 +293,7 @@ static int __init vsyscall_init(void)
214 VSYSCALL_ADDR(__NR_vgettimeofday))); 293 VSYSCALL_ADDR(__NR_vgettimeofday)));
215 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); 294 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
216 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); 295 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
296 BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
217 map_vsyscall(); 297 map_vsyscall();
218#ifdef CONFIG_SYSCTL 298#ifdef CONFIG_SYSCTL
219 register_sysctl_table(kernel_root_table2, 0); 299 register_sysctl_table(kernel_root_table2, 0);