diff options
| -rw-r--r-- | arch/x86_64/kernel/head.S | 2 | ||||
| -rw-r--r-- | arch/x86_64/kernel/time.c | 13 | ||||
| -rw-r--r-- | arch/x86_64/kernel/vmlinux.lds.S | 3 | ||||
| -rw-r--r-- | arch/x86_64/kernel/vsyscall.c | 84 | ||||
| -rw-r--r-- | include/asm-x86_64/segment.h | 5 | ||||
| -rw-r--r-- | include/asm-x86_64/smp.h | 12 | ||||
| -rw-r--r-- | include/asm-x86_64/vsyscall.h | 9 | ||||
| -rw-r--r-- | include/linux/getcpu.h | 16 |
8 files changed, 130 insertions, 14 deletions
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index c9739ca81d06..505ec4a57506 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S | |||
| @@ -371,7 +371,7 @@ ENTRY(cpu_gdt_table) | |||
| 371 | .quad 0,0 /* TSS */ | 371 | .quad 0,0 /* TSS */ |
| 372 | .quad 0,0 /* LDT */ | 372 | .quad 0,0 /* LDT */ |
| 373 | .quad 0,0,0 /* three TLS descriptors */ | 373 | .quad 0,0,0 /* three TLS descriptors */ |
| 374 | .quad 0 /* unused */ | 374 | .quad 0x0000f40000000000 /* node/CPU stored in limit */ |
| 375 | gdt_end: | 375 | gdt_end: |
| 376 | /* asm/segment.h:GDT_ENTRIES must match this */ | 376 | /* asm/segment.h:GDT_ENTRIES must match this */ |
| 377 | /* This should be a multiple of the cache line size */ | 377 | /* This should be a multiple of the cache line size */ |
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 97b9e46d1992..560ed944dc0e 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
| @@ -899,12 +899,8 @@ static int __cpuinit | |||
| 899 | time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu) | 899 | time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu) |
| 900 | { | 900 | { |
| 901 | unsigned cpu = (unsigned long) hcpu; | 901 | unsigned cpu = (unsigned long) hcpu; |
| 902 | if (action == CPU_ONLINE && | 902 | if (action == CPU_ONLINE) |
| 903 | cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) { | 903 | vsyscall_set_cpu(cpu); |
| 904 | unsigned p; | ||
| 905 | p = smp_processor_id() | (cpu_to_node(smp_processor_id())<<12); | ||
| 906 | write_rdtscp_aux(p); | ||
| 907 | } | ||
| 908 | return NOTIFY_DONE; | 904 | return NOTIFY_DONE; |
| 909 | } | 905 | } |
| 910 | 906 | ||
| @@ -993,6 +989,11 @@ void time_init_gtod(void) | |||
| 993 | if (unsynchronized_tsc()) | 989 | if (unsynchronized_tsc()) |
| 994 | notsc = 1; | 990 | notsc = 1; |
| 995 | 991 | ||
| 992 | if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) | ||
| 993 | vgetcpu_mode = VGETCPU_RDTSCP; | ||
| 994 | else | ||
| 995 | vgetcpu_mode = VGETCPU_LSL; | ||
| 996 | |||
| 996 | if (vxtime.hpet_address && notsc) { | 997 | if (vxtime.hpet_address && notsc) { |
| 997 | timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; | 998 | timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; |
| 998 | if (hpet_use_timer) | 999 | if (hpet_use_timer) |
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 7c4de31471d4..8d5a5149bb3a 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S | |||
| @@ -99,6 +99,9 @@ SECTIONS | |||
| 99 | .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) } | 99 | .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) } |
| 100 | vxtime = VVIRT(.vxtime); | 100 | vxtime = VVIRT(.vxtime); |
| 101 | 101 | ||
| 102 | .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) } | ||
| 103 | vgetcpu_mode = VVIRT(.vgetcpu_mode); | ||
| 104 | |||
| 102 | .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) } | 105 | .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) } |
| 103 | wall_jiffies = VVIRT(.wall_jiffies); | 106 | wall_jiffies = VVIRT(.wall_jiffies); |
| 104 | 107 | ||
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index f603037df162..902783bc4d53 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include <linux/seqlock.h> | 26 | #include <linux/seqlock.h> |
| 27 | #include <linux/jiffies.h> | 27 | #include <linux/jiffies.h> |
| 28 | #include <linux/sysctl.h> | 28 | #include <linux/sysctl.h> |
| 29 | #include <linux/getcpu.h> | ||
| 29 | 30 | ||
| 30 | #include <asm/vsyscall.h> | 31 | #include <asm/vsyscall.h> |
| 31 | #include <asm/pgtable.h> | 32 | #include <asm/pgtable.h> |
| @@ -33,11 +34,15 @@ | |||
| 33 | #include <asm/fixmap.h> | 34 | #include <asm/fixmap.h> |
| 34 | #include <asm/errno.h> | 35 | #include <asm/errno.h> |
| 35 | #include <asm/io.h> | 36 | #include <asm/io.h> |
| 37 | #include <asm/segment.h> | ||
| 38 | #include <asm/desc.h> | ||
| 39 | #include <asm/topology.h> | ||
| 36 | 40 | ||
| 37 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | 41 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) |
| 38 | 42 | ||
| 39 | int __sysctl_vsyscall __section_sysctl_vsyscall = 1; | 43 | int __sysctl_vsyscall __section_sysctl_vsyscall = 1; |
| 40 | seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; | 44 | seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; |
| 45 | int __vgetcpu_mode __section_vgetcpu_mode; | ||
| 41 | 46 | ||
| 42 | #include <asm/unistd.h> | 47 | #include <asm/unistd.h> |
| 43 | 48 | ||
| @@ -127,9 +132,46 @@ time_t __vsyscall(1) vtime(time_t *t) | |||
| 127 | return __xtime.tv_sec; | 132 | return __xtime.tv_sec; |
| 128 | } | 133 | } |
| 129 | 134 | ||
| 130 | long __vsyscall(2) venosys_0(void) | 135 | /* Fast way to get current CPU and node. |
| 136 | This helps to do per node and per CPU caches in user space. | ||
| 137 | The result is not guaranteed without CPU affinity, but usually | ||
| 138 | works out because the scheduler tries to keep a thread on the same | ||
| 139 | CPU. | ||
| 140 | |||
| 141 | tcache must point to a two element sized long array. | ||
| 142 | All arguments can be NULL. */ | ||
| 143 | long __vsyscall(2) | ||
| 144 | vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) | ||
| 131 | { | 145 | { |
| 132 | return -ENOSYS; | 146 | unsigned int dummy, p; |
| 147 | unsigned long j = 0; | ||
| 148 | |||
| 149 | /* Fast cache - only recompute value once per jiffies and avoid | ||
| 150 | relatively costly rdtscp/cpuid otherwise. | ||
| 151 | This works because the scheduler usually keeps the process | ||
| 152 | on the same CPU and this syscall doesn't guarantee its | ||
| 153 | results anyways. | ||
| 154 | We do this here because otherwise user space would do it on | ||
| 155 | its own in a likely inferior way (no access to jiffies). | ||
| 156 | If you don't like it pass NULL. */ | ||
| 157 | if (tcache && tcache->t0 == (j = __jiffies)) { | ||
| 158 | p = tcache->t1; | ||
| 159 | } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { | ||
| 160 | /* Load per CPU data from RDTSCP */ | ||
| 161 | rdtscp(dummy, dummy, p); | ||
| 162 | } else { | ||
| 163 | /* Load per CPU data from GDT */ | ||
| 164 | asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); | ||
| 165 | } | ||
| 166 | if (tcache) { | ||
| 167 | tcache->t0 = j; | ||
| 168 | tcache->t1 = p; | ||
| 169 | } | ||
| 170 | if (cpu) | ||
| 171 | *cpu = p & 0xfff; | ||
| 172 | if (node) | ||
| 173 | *node = p >> 12; | ||
| 174 | return 0; | ||
| 133 | } | 175 | } |
| 134 | 176 | ||
| 135 | long __vsyscall(3) venosys_1(void) | 177 | long __vsyscall(3) venosys_1(void) |
| @@ -200,6 +242,43 @@ static ctl_table kernel_root_table2[] = { | |||
| 200 | 242 | ||
| 201 | #endif | 243 | #endif |
| 202 | 244 | ||
| 245 | static void __cpuinit write_rdtscp_cb(void *info) | ||
| 246 | { | ||
| 247 | write_rdtscp_aux((unsigned long)info); | ||
| 248 | } | ||
| 249 | |||
| 250 | void __cpuinit vsyscall_set_cpu(int cpu) | ||
| 251 | { | ||
| 252 | unsigned long *d; | ||
| 253 | unsigned long node = 0; | ||
| 254 | #ifdef CONFIG_NUMA | ||
| 255 | node = cpu_to_node[cpu]; | ||
| 256 | #endif | ||
| 257 | if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) { | ||
| 258 | void *info = (void *)((node << 12) | cpu); | ||
| 259 | /* Can happen on preemptive kernel */ | ||
| 260 | if (get_cpu() == cpu) | ||
| 261 | write_rdtscp_cb(info); | ||
| 262 | #ifdef CONFIG_SMP | ||
| 263 | else { | ||
| 264 | /* the notifier is unfortunately not executed on the | ||
| 265 | target CPU */ | ||
| 266 | smp_call_function_single(cpu,write_rdtscp_cb,info,0,1); | ||
| 267 | } | ||
| 268 | #endif | ||
| 269 | put_cpu(); | ||
| 270 | } | ||
| 271 | |||
| 272 | /* Store cpu number in limit so that it can be loaded quickly | ||
| 273 | in user space in vgetcpu. | ||
| 274 | 12 bits for the CPU and 8 bits for the node. */ | ||
| 275 | d = (unsigned long *)(cpu_gdt(cpu) + GDT_ENTRY_PER_CPU); | ||
| 276 | *d = 0x0f40000000000ULL; | ||
| 277 | *d |= cpu; | ||
| 278 | *d |= (node & 0xf) << 12; | ||
| 279 | *d |= (node >> 4) << 48; | ||
| 280 | } | ||
| 281 | |||
| 203 | static void __init map_vsyscall(void) | 282 | static void __init map_vsyscall(void) |
| 204 | { | 283 | { |
| 205 | extern char __vsyscall_0; | 284 | extern char __vsyscall_0; |
| @@ -214,6 +293,7 @@ static int __init vsyscall_init(void) | |||
| 214 | VSYSCALL_ADDR(__NR_vgettimeofday))); | 293 | VSYSCALL_ADDR(__NR_vgettimeofday))); |
| 215 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); | 294 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); |
| 216 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); | 295 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); |
| 296 | BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); | ||
| 217 | map_vsyscall(); | 297 | map_vsyscall(); |
| 218 | #ifdef CONFIG_SYSCTL | 298 | #ifdef CONFIG_SYSCTL |
| 219 | register_sysctl_table(kernel_root_table2, 0); | 299 | register_sysctl_table(kernel_root_table2, 0); |
diff --git a/include/asm-x86_64/segment.h b/include/asm-x86_64/segment.h index d4bed33fb32c..334ddcdd8f92 100644 --- a/include/asm-x86_64/segment.h +++ b/include/asm-x86_64/segment.h | |||
| @@ -20,15 +20,16 @@ | |||
| 20 | #define __USER_CS 0x33 /* 6*8+3 */ | 20 | #define __USER_CS 0x33 /* 6*8+3 */ |
| 21 | #define __USER32_DS __USER_DS | 21 | #define __USER32_DS __USER_DS |
| 22 | 22 | ||
| 23 | #define GDT_ENTRY_TLS 1 | ||
| 24 | #define GDT_ENTRY_TSS 8 /* needs two entries */ | 23 | #define GDT_ENTRY_TSS 8 /* needs two entries */ |
| 25 | #define GDT_ENTRY_LDT 10 /* needs two entries */ | 24 | #define GDT_ENTRY_LDT 10 /* needs two entries */ |
| 26 | #define GDT_ENTRY_TLS_MIN 12 | 25 | #define GDT_ENTRY_TLS_MIN 12 |
| 27 | #define GDT_ENTRY_TLS_MAX 14 | 26 | #define GDT_ENTRY_TLS_MAX 14 |
| 28 | /* 15 free */ | ||
| 29 | 27 | ||
| 30 | #define GDT_ENTRY_TLS_ENTRIES 3 | 28 | #define GDT_ENTRY_TLS_ENTRIES 3 |
| 31 | 29 | ||
| 30 | #define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */ | ||
| 31 | #define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3) | ||
| 32 | |||
| 32 | /* TLS indexes for 64bit - hardcoded in arch_prctl */ | 33 | /* TLS indexes for 64bit - hardcoded in arch_prctl */ |
| 33 | #define FS_TLS 0 | 34 | #define FS_TLS 0 |
| 34 | #define GS_TLS 1 | 35 | #define GS_TLS 1 |
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index 6805e1feb300..d61547fd833b 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h | |||
| @@ -133,13 +133,19 @@ static __inline int logical_smp_processor_id(void) | |||
| 133 | /* we don't want to mark this access volatile - bad code generation */ | 133 | /* we don't want to mark this access volatile - bad code generation */ |
| 134 | return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); | 134 | return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); |
| 135 | } | 135 | } |
| 136 | #endif | ||
| 137 | 136 | ||
| 138 | #ifdef CONFIG_SMP | 137 | #ifdef CONFIG_SMP |
| 139 | #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] | 138 | #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] |
| 140 | #else | 139 | #else |
| 141 | #define cpu_physical_id(cpu) boot_cpu_id | 140 | #define cpu_physical_id(cpu) boot_cpu_id |
| 142 | #endif | 141 | static inline int smp_call_function_single(int cpuid, void (*func) (void *info), |
| 143 | 142 | void *info, int retry, int wait) | |
| 143 | { | ||
| 144 | /* Disable interrupts here? */ | ||
| 145 | func(info); | ||
| 146 | return 0; | ||
| 147 | } | ||
| 148 | #endif /* !CONFIG_SMP */ | ||
| 149 | #endif /* !__ASSEMBLY */ | ||
| 144 | #endif | 150 | #endif |
| 145 | 151 | ||
diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h index 146b24402a5f..2281e9399b96 100644 --- a/include/asm-x86_64/vsyscall.h +++ b/include/asm-x86_64/vsyscall.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | enum vsyscall_num { | 4 | enum vsyscall_num { |
| 5 | __NR_vgettimeofday, | 5 | __NR_vgettimeofday, |
| 6 | __NR_vtime, | 6 | __NR_vtime, |
| 7 | __NR_vgetcpu, | ||
| 7 | }; | 8 | }; |
| 8 | 9 | ||
| 9 | #define VSYSCALL_START (-10UL << 20) | 10 | #define VSYSCALL_START (-10UL << 20) |
| @@ -15,6 +16,7 @@ enum vsyscall_num { | |||
| 15 | #include <linux/seqlock.h> | 16 | #include <linux/seqlock.h> |
| 16 | 17 | ||
| 17 | #define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16))) | 18 | #define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16))) |
| 19 | #define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16))) | ||
| 18 | #define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16))) | 20 | #define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16))) |
| 19 | #define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16))) | 21 | #define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16))) |
| 20 | #define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16))) | 22 | #define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16))) |
| @@ -26,6 +28,9 @@ enum vsyscall_num { | |||
| 26 | #define VXTIME_HPET 2 | 28 | #define VXTIME_HPET 2 |
| 27 | #define VXTIME_PMTMR 3 | 29 | #define VXTIME_PMTMR 3 |
| 28 | 30 | ||
| 31 | #define VGETCPU_RDTSCP 1 | ||
| 32 | #define VGETCPU_LSL 2 | ||
| 33 | |||
| 29 | struct vxtime_data { | 34 | struct vxtime_data { |
| 30 | long hpet_address; /* HPET base address */ | 35 | long hpet_address; /* HPET base address */ |
| 31 | int last; | 36 | int last; |
| @@ -40,6 +45,7 @@ struct vxtime_data { | |||
| 40 | 45 | ||
| 41 | /* vsyscall space (readonly) */ | 46 | /* vsyscall space (readonly) */ |
| 42 | extern struct vxtime_data __vxtime; | 47 | extern struct vxtime_data __vxtime; |
| 48 | extern int __vgetcpu_mode; | ||
| 43 | extern struct timespec __xtime; | 49 | extern struct timespec __xtime; |
| 44 | extern volatile unsigned long __jiffies; | 50 | extern volatile unsigned long __jiffies; |
| 45 | extern unsigned long __wall_jiffies; | 51 | extern unsigned long __wall_jiffies; |
| @@ -48,6 +54,7 @@ extern seqlock_t __xtime_lock; | |||
| 48 | 54 | ||
| 49 | /* kernel space (writeable) */ | 55 | /* kernel space (writeable) */ |
| 50 | extern struct vxtime_data vxtime; | 56 | extern struct vxtime_data vxtime; |
| 57 | extern int vgetcpu_mode; | ||
| 51 | extern unsigned long wall_jiffies; | 58 | extern unsigned long wall_jiffies; |
| 52 | extern struct timezone sys_tz; | 59 | extern struct timezone sys_tz; |
| 53 | extern int sysctl_vsyscall; | 60 | extern int sysctl_vsyscall; |
| @@ -55,6 +62,8 @@ extern seqlock_t xtime_lock; | |||
| 55 | 62 | ||
| 56 | extern int sysctl_vsyscall; | 63 | extern int sysctl_vsyscall; |
| 57 | 64 | ||
| 65 | extern void vsyscall_set_cpu(int cpu); | ||
| 66 | |||
| 58 | #define ARCH_HAVE_XTIME_LOCK 1 | 67 | #define ARCH_HAVE_XTIME_LOCK 1 |
| 59 | 68 | ||
| 60 | #endif /* __KERNEL__ */ | 69 | #endif /* __KERNEL__ */ |
diff --git a/include/linux/getcpu.h b/include/linux/getcpu.h new file mode 100644 index 000000000000..031ed3780e45 --- /dev/null +++ b/include/linux/getcpu.h | |||
| @@ -0,0 +1,16 @@ | |||
| 1 | #ifndef _LINUX_GETCPU_H | ||
| 2 | #define _LINUX_GETCPU_H 1 | ||
| 3 | |||
| 4 | /* Cache for getcpu() to speed it up. Results might be upto a jiffie | ||
| 5 | out of date, but will be faster. | ||
| 6 | User programs should not refer to the contents of this structure. | ||
| 7 | It is only a cache for vgetcpu(). It might change in future kernels. | ||
| 8 | The user program must store this information per thread (__thread) | ||
| 9 | If you want 100% accurate information pass NULL instead. */ | ||
| 10 | struct getcpu_cache { | ||
| 11 | unsigned long t0; | ||
| 12 | unsigned long t1; | ||
| 13 | unsigned long res[4]; | ||
| 14 | }; | ||
| 15 | |||
| 16 | #endif | ||
