diff options
author | Vojtech Pavlik <vojtech@suse.cz> | 2006-09-26 04:52:28 -0400 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2006-09-26 04:52:28 -0400 |
commit | c08c820508233b424deab3302bc404bbecc6493a (patch) | |
tree | d6ed79fb2d03513f6c71fc7e47705c4a19512f53 /arch/x86_64/kernel | |
parent | a670fad0adb1cc6202a607d250f10bd380593905 (diff) |
[PATCH] Add the vgetcpu vsyscall
This patch adds a vgetcpu vsyscall, which depending on the CPU RDTSCP
capability uses either the RDTSCP or CPUID to obtain a CPU and node
numbers and pass them to the program.
AK: Lots of changes over Vojtech's original code:
Better prototype for vgetcpu()
It's better to pass the cpu / node numbers as separate arguments
to avoid mistakes when going from SMP to NUMA.
Also add a fast time stamp based cache using a user supplied
argument to speed things more up.
Use fast method from Chuck Ebbert to retrieve node/cpu from
GDT limit instead of CPUID
Made sure RDTSCP init is always executed after node is known.
Drop printk
Signed-off-by: Vojtech Pavlik <vojtech@suse.cz>
Signed-off-by: Andi Kleen <ak@suse.de>
Diffstat (limited to 'arch/x86_64/kernel')
-rw-r--r-- | arch/x86_64/kernel/head.S | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/time.c | 13 | ||||
-rw-r--r-- | arch/x86_64/kernel/vmlinux.lds.S | 3 | ||||
-rw-r--r-- | arch/x86_64/kernel/vsyscall.c | 84 |
4 files changed, 93 insertions, 9 deletions
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index c9739ca81d06..505ec4a57506 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S | |||
@@ -371,7 +371,7 @@ ENTRY(cpu_gdt_table) | |||
371 | .quad 0,0 /* TSS */ | 371 | .quad 0,0 /* TSS */ |
372 | .quad 0,0 /* LDT */ | 372 | .quad 0,0 /* LDT */ |
373 | .quad 0,0,0 /* three TLS descriptors */ | 373 | .quad 0,0,0 /* three TLS descriptors */ |
374 | .quad 0 /* unused */ | 374 | .quad 0x0000f40000000000 /* node/CPU stored in limit */ |
375 | gdt_end: | 375 | gdt_end: |
376 | /* asm/segment.h:GDT_ENTRIES must match this */ | 376 | /* asm/segment.h:GDT_ENTRIES must match this */ |
377 | /* This should be a multiple of the cache line size */ | 377 | /* This should be a multiple of the cache line size */ |
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 97b9e46d1992..560ed944dc0e 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
@@ -899,12 +899,8 @@ static int __cpuinit | |||
899 | time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu) | 899 | time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu) |
900 | { | 900 | { |
901 | unsigned cpu = (unsigned long) hcpu; | 901 | unsigned cpu = (unsigned long) hcpu; |
902 | if (action == CPU_ONLINE && | 902 | if (action == CPU_ONLINE) |
903 | cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) { | 903 | vsyscall_set_cpu(cpu); |
904 | unsigned p; | ||
905 | p = smp_processor_id() | (cpu_to_node(smp_processor_id())<<12); | ||
906 | write_rdtscp_aux(p); | ||
907 | } | ||
908 | return NOTIFY_DONE; | 904 | return NOTIFY_DONE; |
909 | } | 905 | } |
910 | 906 | ||
@@ -993,6 +989,11 @@ void time_init_gtod(void) | |||
993 | if (unsynchronized_tsc()) | 989 | if (unsynchronized_tsc()) |
994 | notsc = 1; | 990 | notsc = 1; |
995 | 991 | ||
992 | if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) | ||
993 | vgetcpu_mode = VGETCPU_RDTSCP; | ||
994 | else | ||
995 | vgetcpu_mode = VGETCPU_LSL; | ||
996 | |||
996 | if (vxtime.hpet_address && notsc) { | 997 | if (vxtime.hpet_address && notsc) { |
997 | timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; | 998 | timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; |
998 | if (hpet_use_timer) | 999 | if (hpet_use_timer) |
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 7c4de31471d4..8d5a5149bb3a 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S | |||
@@ -99,6 +99,9 @@ SECTIONS | |||
99 | .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) } | 99 | .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) } |
100 | vxtime = VVIRT(.vxtime); | 100 | vxtime = VVIRT(.vxtime); |
101 | 101 | ||
102 | .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) } | ||
103 | vgetcpu_mode = VVIRT(.vgetcpu_mode); | ||
104 | |||
102 | .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) } | 105 | .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) } |
103 | wall_jiffies = VVIRT(.wall_jiffies); | 106 | wall_jiffies = VVIRT(.wall_jiffies); |
104 | 107 | ||
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index f603037df162..902783bc4d53 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/seqlock.h> | 26 | #include <linux/seqlock.h> |
27 | #include <linux/jiffies.h> | 27 | #include <linux/jiffies.h> |
28 | #include <linux/sysctl.h> | 28 | #include <linux/sysctl.h> |
29 | #include <linux/getcpu.h> | ||
29 | 30 | ||
30 | #include <asm/vsyscall.h> | 31 | #include <asm/vsyscall.h> |
31 | #include <asm/pgtable.h> | 32 | #include <asm/pgtable.h> |
@@ -33,11 +34,15 @@ | |||
33 | #include <asm/fixmap.h> | 34 | #include <asm/fixmap.h> |
34 | #include <asm/errno.h> | 35 | #include <asm/errno.h> |
35 | #include <asm/io.h> | 36 | #include <asm/io.h> |
37 | #include <asm/segment.h> | ||
38 | #include <asm/desc.h> | ||
39 | #include <asm/topology.h> | ||
36 | 40 | ||
37 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | 41 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) |
38 | 42 | ||
39 | int __sysctl_vsyscall __section_sysctl_vsyscall = 1; | 43 | int __sysctl_vsyscall __section_sysctl_vsyscall = 1; |
40 | seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; | 44 | seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; |
45 | int __vgetcpu_mode __section_vgetcpu_mode; | ||
41 | 46 | ||
42 | #include <asm/unistd.h> | 47 | #include <asm/unistd.h> |
43 | 48 | ||
@@ -127,9 +132,46 @@ time_t __vsyscall(1) vtime(time_t *t) | |||
127 | return __xtime.tv_sec; | 132 | return __xtime.tv_sec; |
128 | } | 133 | } |
129 | 134 | ||
130 | long __vsyscall(2) venosys_0(void) | 135 | /* Fast way to get current CPU and node. |
136 | This helps to do per node and per CPU caches in user space. | ||
137 | The result is not guaranteed without CPU affinity, but usually | ||
138 | works out because the scheduler tries to keep a thread on the same | ||
139 | CPU. | ||
140 | |||
141 | tcache must point to a two element sized long array. | ||
142 | All arguments can be NULL. */ | ||
143 | long __vsyscall(2) | ||
144 | vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) | ||
131 | { | 145 | { |
132 | return -ENOSYS; | 146 | unsigned int dummy, p; |
147 | unsigned long j = 0; | ||
148 | |||
149 | /* Fast cache - only recompute value once per jiffies and avoid | ||
150 | relatively costly rdtscp/cpuid otherwise. | ||
151 | This works because the scheduler usually keeps the process | ||
152 | on the same CPU and this syscall doesn't guarantee its | ||
153 | results anyways. | ||
154 | We do this here because otherwise user space would do it on | ||
155 | its own in a likely inferior way (no access to jiffies). | ||
156 | If you don't like it pass NULL. */ | ||
157 | if (tcache && tcache->t0 == (j = __jiffies)) { | ||
158 | p = tcache->t1; | ||
159 | } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { | ||
160 | /* Load per CPU data from RDTSCP */ | ||
161 | rdtscp(dummy, dummy, p); | ||
162 | } else { | ||
163 | /* Load per CPU data from GDT */ | ||
164 | asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); | ||
165 | } | ||
166 | if (tcache) { | ||
167 | tcache->t0 = j; | ||
168 | tcache->t1 = p; | ||
169 | } | ||
170 | if (cpu) | ||
171 | *cpu = p & 0xfff; | ||
172 | if (node) | ||
173 | *node = p >> 12; | ||
174 | return 0; | ||
133 | } | 175 | } |
134 | 176 | ||
135 | long __vsyscall(3) venosys_1(void) | 177 | long __vsyscall(3) venosys_1(void) |
@@ -200,6 +242,43 @@ static ctl_table kernel_root_table2[] = { | |||
200 | 242 | ||
201 | #endif | 243 | #endif |
202 | 244 | ||
245 | static void __cpuinit write_rdtscp_cb(void *info) | ||
246 | { | ||
247 | write_rdtscp_aux((unsigned long)info); | ||
248 | } | ||
249 | |||
250 | void __cpuinit vsyscall_set_cpu(int cpu) | ||
251 | { | ||
252 | unsigned long *d; | ||
253 | unsigned long node = 0; | ||
254 | #ifdef CONFIG_NUMA | ||
255 | node = cpu_to_node[cpu]; | ||
256 | #endif | ||
257 | if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) { | ||
258 | void *info = (void *)((node << 12) | cpu); | ||
259 | /* Can happen on preemptive kernel */ | ||
260 | if (get_cpu() == cpu) | ||
261 | write_rdtscp_cb(info); | ||
262 | #ifdef CONFIG_SMP | ||
263 | else { | ||
264 | /* the notifier is unfortunately not executed on the | ||
265 | target CPU */ | ||
266 | smp_call_function_single(cpu,write_rdtscp_cb,info,0,1); | ||
267 | } | ||
268 | #endif | ||
269 | put_cpu(); | ||
270 | } | ||
271 | |||
272 | /* Store cpu number in limit so that it can be loaded quickly | ||
273 | in user space in vgetcpu. | ||
274 | 12 bits for the CPU and 8 bits for the node. */ | ||
275 | d = (unsigned long *)(cpu_gdt(cpu) + GDT_ENTRY_PER_CPU); | ||
276 | *d = 0x0f40000000000ULL; | ||
277 | *d |= cpu; | ||
278 | *d |= (node & 0xf) << 12; | ||
279 | *d |= (node >> 4) << 48; | ||
280 | } | ||
281 | |||
203 | static void __init map_vsyscall(void) | 282 | static void __init map_vsyscall(void) |
204 | { | 283 | { |
205 | extern char __vsyscall_0; | 284 | extern char __vsyscall_0; |
@@ -214,6 +293,7 @@ static int __init vsyscall_init(void) | |||
214 | VSYSCALL_ADDR(__NR_vgettimeofday))); | 293 | VSYSCALL_ADDR(__NR_vgettimeofday))); |
215 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); | 294 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); |
216 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); | 295 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); |
296 | BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); | ||
217 | map_vsyscall(); | 297 | map_vsyscall(); |
218 | #ifdef CONFIG_SYSCTL | 298 | #ifdef CONFIG_SYSCTL |
219 | register_sysctl_table(kernel_root_table2, 0); | 299 | register_sysctl_table(kernel_root_table2, 0); |