diff options
Diffstat (limited to 'arch/x86_64/kernel/vsyscall.c')
-rw-r--r-- | arch/x86_64/kernel/vsyscall.c | 98 |
1 files changed, 90 insertions, 8 deletions
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index f603037df162..ac48c3857ddb 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/seqlock.h> | 26 | #include <linux/seqlock.h> |
27 | #include <linux/jiffies.h> | 27 | #include <linux/jiffies.h> |
28 | #include <linux/sysctl.h> | 28 | #include <linux/sysctl.h> |
29 | #include <linux/getcpu.h> | ||
29 | 30 | ||
30 | #include <asm/vsyscall.h> | 31 | #include <asm/vsyscall.h> |
31 | #include <asm/pgtable.h> | 32 | #include <asm/pgtable.h> |
@@ -33,11 +34,15 @@ | |||
33 | #include <asm/fixmap.h> | 34 | #include <asm/fixmap.h> |
34 | #include <asm/errno.h> | 35 | #include <asm/errno.h> |
35 | #include <asm/io.h> | 36 | #include <asm/io.h> |
37 | #include <asm/segment.h> | ||
38 | #include <asm/desc.h> | ||
39 | #include <asm/topology.h> | ||
36 | 40 | ||
37 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | 41 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) |
38 | 42 | ||
39 | int __sysctl_vsyscall __section_sysctl_vsyscall = 1; | 43 | int __sysctl_vsyscall __section_sysctl_vsyscall = 1; |
40 | seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; | 44 | seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; |
45 | int __vgetcpu_mode __section_vgetcpu_mode; | ||
41 | 46 | ||
42 | #include <asm/unistd.h> | 47 | #include <asm/unistd.h> |
43 | 48 | ||
@@ -72,7 +77,8 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) | |||
72 | __vxtime.tsc_quot) >> 32; | 77 | __vxtime.tsc_quot) >> 32; |
73 | /* See comment in x86_64 do_gettimeofday. */ | 78 | /* See comment in x86_64 do_gettimeofday. */ |
74 | } else { | 79 | } else { |
75 | usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) - | 80 | usec += ((readl((void __iomem *) |
81 | fix_to_virt(VSYSCALL_HPET) + 0xf0) - | ||
76 | __vxtime.last) * __vxtime.quot) >> 32; | 82 | __vxtime.last) * __vxtime.quot) >> 32; |
77 | } | 83 | } |
78 | } while (read_seqretry(&__xtime_lock, sequence)); | 84 | } while (read_seqretry(&__xtime_lock, sequence)); |
@@ -127,9 +133,46 @@ time_t __vsyscall(1) vtime(time_t *t) | |||
127 | return __xtime.tv_sec; | 133 | return __xtime.tv_sec; |
128 | } | 134 | } |
129 | 135 | ||
130 | long __vsyscall(2) venosys_0(void) | 136 | /* Fast way to get current CPU and node. |
137 | This helps to do per node and per CPU caches in user space. | ||
138 | The result is not guaranteed without CPU affinity, but usually | ||
139 | works out because the scheduler tries to keep a thread on the same | ||
140 | CPU. | ||
141 | |||
142 | tcache must point to a two element sized long array. | ||
143 | All arguments can be NULL. */ | ||
144 | long __vsyscall(2) | ||
145 | vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) | ||
131 | { | 146 | { |
132 | return -ENOSYS; | 147 | unsigned int dummy, p; |
148 | unsigned long j = 0; | ||
149 | |||
150 | /* Fast cache - only recompute value once per jiffies and avoid | ||
151 | relatively costly rdtscp/cpuid otherwise. | ||
152 | This works because the scheduler usually keeps the process | ||
153 | on the same CPU and this syscall doesn't guarantee its | ||
154 | results anyways. | ||
155 | We do this here because otherwise user space would do it on | ||
156 | its own in a likely inferior way (no access to jiffies). | ||
157 | If you don't like it pass NULL. */ | ||
158 | if (tcache && tcache->t0 == (j = __jiffies)) { | ||
159 | p = tcache->t1; | ||
160 | } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { | ||
161 | /* Load per CPU data from RDTSCP */ | ||
162 | rdtscp(dummy, dummy, p); | ||
163 | } else { | ||
164 | /* Load per CPU data from GDT */ | ||
165 | asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); | ||
166 | } | ||
167 | if (tcache) { | ||
168 | tcache->t0 = j; | ||
169 | tcache->t1 = p; | ||
170 | } | ||
171 | if (cpu) | ||
172 | *cpu = p & 0xfff; | ||
173 | if (node) | ||
174 | *node = p >> 12; | ||
175 | return 0; | ||
133 | } | 176 | } |
134 | 177 | ||
135 | long __vsyscall(3) venosys_1(void) | 178 | long __vsyscall(3) venosys_1(void) |
@@ -149,7 +192,8 @@ static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp, | |||
149 | void __user *buffer, size_t *lenp, loff_t *ppos) | 192 | void __user *buffer, size_t *lenp, loff_t *ppos) |
150 | { | 193 | { |
151 | extern u16 vsysc1, vsysc2; | 194 | extern u16 vsysc1, vsysc2; |
152 | u16 *map1, *map2; | 195 | u16 __iomem *map1; |
196 | u16 __iomem *map2; | ||
153 | int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | 197 | int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); |
154 | if (!write) | 198 | if (!write) |
155 | return ret; | 199 | return ret; |
@@ -164,11 +208,11 @@ static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp, | |||
164 | goto out; | 208 | goto out; |
165 | } | 209 | } |
166 | if (!sysctl_vsyscall) { | 210 | if (!sysctl_vsyscall) { |
167 | *map1 = SYSCALL; | 211 | writew(SYSCALL, map1); |
168 | *map2 = SYSCALL; | 212 | writew(SYSCALL, map2); |
169 | } else { | 213 | } else { |
170 | *map1 = NOP2; | 214 | writew(NOP2, map1); |
171 | *map2 = NOP2; | 215 | writew(NOP2, map2); |
172 | } | 216 | } |
173 | iounmap(map2); | 217 | iounmap(map2); |
174 | out: | 218 | out: |
@@ -200,6 +244,43 @@ static ctl_table kernel_root_table2[] = { | |||
200 | 244 | ||
201 | #endif | 245 | #endif |
202 | 246 | ||
247 | static void __cpuinit write_rdtscp_cb(void *info) | ||
248 | { | ||
249 | write_rdtscp_aux((unsigned long)info); | ||
250 | } | ||
251 | |||
252 | void __cpuinit vsyscall_set_cpu(int cpu) | ||
253 | { | ||
254 | unsigned long *d; | ||
255 | unsigned long node = 0; | ||
256 | #ifdef CONFIG_NUMA | ||
257 | node = cpu_to_node[cpu]; | ||
258 | #endif | ||
259 | if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) { | ||
260 | void *info = (void *)((node << 12) | cpu); | ||
261 | /* Can happen on preemptive kernel */ | ||
262 | if (get_cpu() == cpu) | ||
263 | write_rdtscp_cb(info); | ||
264 | #ifdef CONFIG_SMP | ||
265 | else { | ||
266 | /* the notifier is unfortunately not executed on the | ||
267 | target CPU */ | ||
268 | smp_call_function_single(cpu,write_rdtscp_cb,info,0,1); | ||
269 | } | ||
270 | #endif | ||
271 | put_cpu(); | ||
272 | } | ||
273 | |||
274 | /* Store cpu number in limit so that it can be loaded quickly | ||
275 | in user space in vgetcpu. | ||
276 | 12 bits for the CPU and 8 bits for the node. */ | ||
277 | d = (unsigned long *)(cpu_gdt(cpu) + GDT_ENTRY_PER_CPU); | ||
278 | *d = 0x0f40000000000ULL; | ||
279 | *d |= cpu; | ||
280 | *d |= (node & 0xf) << 12; | ||
281 | *d |= (node >> 4) << 48; | ||
282 | } | ||
283 | |||
203 | static void __init map_vsyscall(void) | 284 | static void __init map_vsyscall(void) |
204 | { | 285 | { |
205 | extern char __vsyscall_0; | 286 | extern char __vsyscall_0; |
@@ -214,6 +295,7 @@ static int __init vsyscall_init(void) | |||
214 | VSYSCALL_ADDR(__NR_vgettimeofday))); | 295 | VSYSCALL_ADDR(__NR_vgettimeofday))); |
215 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); | 296 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); |
216 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); | 297 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); |
298 | BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); | ||
217 | map_vsyscall(); | 299 | map_vsyscall(); |
218 | #ifdef CONFIG_SYSCTL | 300 | #ifdef CONFIG_SYSCTL |
219 | register_sysctl_table(kernel_root_table2, 0); | 301 | register_sysctl_table(kernel_root_table2, 0); |