diff options
Diffstat (limited to 'arch/x86/kernel/process_64.c')
-rw-r--r-- | arch/x86/kernel/process_64.c | 197 |
1 files changed, 98 insertions, 99 deletions
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 238193822e23..4c4d8b3f046e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * Pentium III FXSR, SSE support | 4 | * Pentium III FXSR, SSE support |
5 | * Gareth Hughes <gareth@valinux.com>, May 2000 | 5 | * Gareth Hughes <gareth@valinux.com>, May 2000 |
6 | * | 6 | * |
7 | * X86-64 port | 7 | * X86-64 port |
8 | * Andi Kleen. | 8 | * Andi Kleen. |
9 | * | 9 | * |
@@ -19,19 +19,19 @@ | |||
19 | #include <linux/cpu.h> | 19 | #include <linux/cpu.h> |
20 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | #include <linux/fs.h> | ||
22 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
23 | #include <linux/mm.h> | 24 | #include <linux/mm.h> |
24 | #include <linux/fs.h> | ||
25 | #include <linux/elfcore.h> | 25 | #include <linux/elfcore.h> |
26 | #include <linux/smp.h> | 26 | #include <linux/smp.h> |
27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
28 | #include <linux/user.h> | 28 | #include <linux/user.h> |
29 | #include <linux/module.h> | ||
30 | #include <linux/a.out.h> | 29 | #include <linux/a.out.h> |
31 | #include <linux/interrupt.h> | 30 | #include <linux/interrupt.h> |
31 | #include <linux/utsname.h> | ||
32 | #include <linux/delay.h> | 32 | #include <linux/delay.h> |
33 | #include <linux/module.h> | ||
33 | #include <linux/ptrace.h> | 34 | #include <linux/ptrace.h> |
34 | #include <linux/utsname.h> | ||
35 | #include <linux/random.h> | 35 | #include <linux/random.h> |
36 | #include <linux/notifier.h> | 36 | #include <linux/notifier.h> |
37 | #include <linux/kprobes.h> | 37 | #include <linux/kprobes.h> |
@@ -129,54 +129,12 @@ static void default_idle(void) | |||
129 | * to poll the ->need_resched flag instead of waiting for the | 129 | * to poll the ->need_resched flag instead of waiting for the |
130 | * cross-CPU IPI to arrive. Use this option with caution. | 130 | * cross-CPU IPI to arrive. Use this option with caution. |
131 | */ | 131 | */ |
132 | static void poll_idle (void) | 132 | static void poll_idle(void) |
133 | { | 133 | { |
134 | local_irq_enable(); | 134 | local_irq_enable(); |
135 | cpu_relax(); | 135 | cpu_relax(); |
136 | } | 136 | } |
137 | 137 | ||
138 | static void do_nothing(void *unused) | ||
139 | { | ||
140 | } | ||
141 | |||
142 | void cpu_idle_wait(void) | ||
143 | { | ||
144 | unsigned int cpu, this_cpu = get_cpu(); | ||
145 | cpumask_t map, tmp = current->cpus_allowed; | ||
146 | |||
147 | set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); | ||
148 | put_cpu(); | ||
149 | |||
150 | cpus_clear(map); | ||
151 | for_each_online_cpu(cpu) { | ||
152 | per_cpu(cpu_idle_state, cpu) = 1; | ||
153 | cpu_set(cpu, map); | ||
154 | } | ||
155 | |||
156 | __get_cpu_var(cpu_idle_state) = 0; | ||
157 | |||
158 | wmb(); | ||
159 | do { | ||
160 | ssleep(1); | ||
161 | for_each_online_cpu(cpu) { | ||
162 | if (cpu_isset(cpu, map) && | ||
163 | !per_cpu(cpu_idle_state, cpu)) | ||
164 | cpu_clear(cpu, map); | ||
165 | } | ||
166 | cpus_and(map, map, cpu_online_map); | ||
167 | /* | ||
168 | * We waited 1 sec, if a CPU still did not call idle | ||
169 | * it may be because it is in idle and not waking up | ||
170 | * because it has nothing to do. | ||
171 | * Give all the remaining CPUS a kick. | ||
172 | */ | ||
173 | smp_call_function_mask(map, do_nothing, 0, 0); | ||
174 | } while (!cpus_empty(map)); | ||
175 | |||
176 | set_cpus_allowed(current, tmp); | ||
177 | } | ||
178 | EXPORT_SYMBOL_GPL(cpu_idle_wait); | ||
179 | |||
180 | #ifdef CONFIG_HOTPLUG_CPU | 138 | #ifdef CONFIG_HOTPLUG_CPU |
181 | DECLARE_PER_CPU(int, cpu_state); | 139 | DECLARE_PER_CPU(int, cpu_state); |
182 | 140 | ||
@@ -247,6 +205,47 @@ void cpu_idle(void) | |||
247 | } | 205 | } |
248 | } | 206 | } |
249 | 207 | ||
208 | static void do_nothing(void *unused) | ||
209 | { | ||
210 | } | ||
211 | |||
212 | void cpu_idle_wait(void) | ||
213 | { | ||
214 | unsigned int cpu, this_cpu = get_cpu(); | ||
215 | cpumask_t map, tmp = current->cpus_allowed; | ||
216 | |||
217 | set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); | ||
218 | put_cpu(); | ||
219 | |||
220 | cpus_clear(map); | ||
221 | for_each_online_cpu(cpu) { | ||
222 | per_cpu(cpu_idle_state, cpu) = 1; | ||
223 | cpu_set(cpu, map); | ||
224 | } | ||
225 | |||
226 | __get_cpu_var(cpu_idle_state) = 0; | ||
227 | |||
228 | wmb(); | ||
229 | do { | ||
230 | ssleep(1); | ||
231 | for_each_online_cpu(cpu) { | ||
232 | if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) | ||
233 | cpu_clear(cpu, map); | ||
234 | } | ||
235 | cpus_and(map, map, cpu_online_map); | ||
236 | /* | ||
237 | * We waited 1 sec, if a CPU still did not call idle | ||
238 | * it may be because it is in idle and not waking up | ||
239 | * because it has nothing to do. | ||
240 | * Give all the remaining CPUS a kick. | ||
241 | */ | ||
242 | smp_call_function_mask(map, do_nothing, 0, 0); | ||
243 | } while (!cpus_empty(map)); | ||
244 | |||
245 | set_cpus_allowed(current, tmp); | ||
246 | } | ||
247 | EXPORT_SYMBOL_GPL(cpu_idle_wait); | ||
248 | |||
250 | /* | 249 | /* |
251 | * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, | 250 | * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, |
252 | * which can obviate IPI to trigger checking of need_resched. | 251 | * which can obviate IPI to trigger checking of need_resched. |
@@ -300,7 +299,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | |||
300 | } | 299 | } |
301 | } | 300 | } |
302 | 301 | ||
303 | static int __init idle_setup (char *str) | 302 | static int __init idle_setup(char *str) |
304 | { | 303 | { |
305 | if (!strcmp(str, "poll")) { | 304 | if (!strcmp(str, "poll")) { |
306 | printk("using polling idle threads.\n"); | 305 | printk("using polling idle threads.\n"); |
@@ -315,13 +314,13 @@ static int __init idle_setup (char *str) | |||
315 | } | 314 | } |
316 | early_param("idle", idle_setup); | 315 | early_param("idle", idle_setup); |
317 | 316 | ||
318 | /* Prints also some state that isn't saved in the pt_regs */ | 317 | /* Prints also some state that isn't saved in the pt_regs */ |
319 | void __show_regs(struct pt_regs * regs) | 318 | void __show_regs(struct pt_regs * regs) |
320 | { | 319 | { |
321 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; | 320 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; |
322 | unsigned long d0, d1, d2, d3, d6, d7; | 321 | unsigned long d0, d1, d2, d3, d6, d7; |
323 | unsigned int fsindex,gsindex; | 322 | unsigned int fsindex, gsindex; |
324 | unsigned int ds,cs,es; | 323 | unsigned int ds, cs, es; |
325 | 324 | ||
326 | printk("\n"); | 325 | printk("\n"); |
327 | print_modules(); | 326 | print_modules(); |
@@ -390,7 +389,7 @@ void exit_thread(void) | |||
390 | struct task_struct *me = current; | 389 | struct task_struct *me = current; |
391 | struct thread_struct *t = &me->thread; | 390 | struct thread_struct *t = &me->thread; |
392 | 391 | ||
393 | if (me->thread.io_bitmap_ptr) { | 392 | if (me->thread.io_bitmap_ptr) { |
394 | struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); | 393 | struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); |
395 | 394 | ||
396 | kfree(t->io_bitmap_ptr); | 395 | kfree(t->io_bitmap_ptr); |
@@ -426,7 +425,7 @@ void flush_thread(void) | |||
426 | tsk->thread.debugreg3 = 0; | 425 | tsk->thread.debugreg3 = 0; |
427 | tsk->thread.debugreg6 = 0; | 426 | tsk->thread.debugreg6 = 0; |
428 | tsk->thread.debugreg7 = 0; | 427 | tsk->thread.debugreg7 = 0; |
429 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 428 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
430 | /* | 429 | /* |
431 | * Forget coprocessor state.. | 430 | * Forget coprocessor state.. |
432 | */ | 431 | */ |
@@ -449,7 +448,7 @@ void release_thread(struct task_struct *dead_task) | |||
449 | 448 | ||
450 | static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) | 449 | static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) |
451 | { | 450 | { |
452 | struct user_desc ud = { | 451 | struct user_desc ud = { |
453 | .base_addr = addr, | 452 | .base_addr = addr, |
454 | .limit = 0xfffff, | 453 | .limit = 0xfffff, |
455 | .seg_32bit = 1, | 454 | .seg_32bit = 1, |
@@ -458,8 +457,8 @@ static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) | |||
458 | }; | 457 | }; |
459 | struct n_desc_struct *desc = (void *)t->thread.tls_array; | 458 | struct n_desc_struct *desc = (void *)t->thread.tls_array; |
460 | desc += tls; | 459 | desc += tls; |
461 | desc->a = LDT_entry_a(&ud); | 460 | desc->a = LDT_entry_a(&ud); |
462 | desc->b = LDT_entry_b(&ud); | 461 | desc->b = LDT_entry_b(&ud); |
463 | } | 462 | } |
464 | 463 | ||
465 | static inline u32 read_32bit_tls(struct task_struct *t, int tls) | 464 | static inline u32 read_32bit_tls(struct task_struct *t, int tls) |
@@ -516,7 +515,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | |||
516 | memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, | 515 | memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, |
517 | IO_BITMAP_BYTES); | 516 | IO_BITMAP_BYTES); |
518 | set_tsk_thread_flag(p, TIF_IO_BITMAP); | 517 | set_tsk_thread_flag(p, TIF_IO_BITMAP); |
519 | } | 518 | } |
520 | 519 | ||
521 | /* | 520 | /* |
522 | * Set a new TLS for the child thread? | 521 | * Set a new TLS for the child thread? |
@@ -544,11 +543,29 @@ out: | |||
544 | /* | 543 | /* |
545 | * This special macro can be used to load a debugging register | 544 | * This special macro can be used to load a debugging register |
546 | */ | 545 | */ |
547 | #define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r) | 546 | #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r) |
547 | |||
548 | /* | ||
549 | * Capture the user space registers if the task is not running (in user space) | ||
550 | */ | ||
551 | int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) | ||
552 | { | ||
553 | struct pt_regs *pp, ptregs; | ||
554 | |||
555 | pp = task_pt_regs(tsk); | ||
556 | |||
557 | ptregs = *pp; | ||
558 | ptregs.cs &= 0xffff; | ||
559 | ptregs.ss &= 0xffff; | ||
560 | |||
561 | elf_core_copy_regs(regs, &ptregs); | ||
562 | |||
563 | return 1; | ||
564 | } | ||
548 | 565 | ||
549 | static inline void __switch_to_xtra(struct task_struct *prev_p, | 566 | static inline void __switch_to_xtra(struct task_struct *prev_p, |
550 | struct task_struct *next_p, | 567 | struct task_struct *next_p, |
551 | struct tss_struct *tss) | 568 | struct tss_struct *tss) |
552 | { | 569 | { |
553 | struct thread_struct *prev, *next; | 570 | struct thread_struct *prev, *next; |
554 | 571 | ||
@@ -586,7 +603,7 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
586 | /* | 603 | /* |
587 | * switch_to(x,y) should switch tasks from x to y. | 604 | * switch_to(x,y) should switch tasks from x to y. |
588 | * | 605 | * |
589 | * This could still be optimized: | 606 | * This could still be optimized: |
590 | * - fold all the options into a flag word and test it with a single test. | 607 | * - fold all the options into a flag word and test it with a single test. |
591 | * - could test fs/gs bitsliced | 608 | * - could test fs/gs bitsliced |
592 | * | 609 | * |
@@ -597,7 +614,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
597 | { | 614 | { |
598 | struct thread_struct *prev = &prev_p->thread, | 615 | struct thread_struct *prev = &prev_p->thread, |
599 | *next = &next_p->thread; | 616 | *next = &next_p->thread; |
600 | int cpu = smp_processor_id(); | 617 | int cpu = smp_processor_id(); |
601 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 618 | struct tss_struct *tss = &per_cpu(init_tss, cpu); |
602 | 619 | ||
603 | /* we're going to use this soon, after a few expensive things */ | 620 | /* we're going to use this soon, after a few expensive things */ |
@@ -700,7 +717,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
700 | /* | 717 | /* |
701 | * sys_execve() executes a new program. | 718 | * sys_execve() executes a new program. |
702 | */ | 719 | */ |
703 | asmlinkage | 720 | asmlinkage |
704 | long sys_execve(char __user *name, char __user * __user *argv, | 721 | long sys_execve(char __user *name, char __user * __user *argv, |
705 | char __user * __user *envp, struct pt_regs regs) | 722 | char __user * __user *envp, struct pt_regs regs) |
706 | { | 723 | { |
@@ -721,12 +738,12 @@ void set_personality_64bit(void) | |||
721 | /* inherit personality from parent */ | 738 | /* inherit personality from parent */ |
722 | 739 | ||
723 | /* Make sure to be in 64bit mode */ | 740 | /* Make sure to be in 64bit mode */ |
724 | clear_thread_flag(TIF_IA32); | 741 | clear_thread_flag(TIF_IA32); |
725 | 742 | ||
726 | /* TBD: overwrites user setup. Should have two bits. | 743 | /* TBD: overwrites user setup. Should have two bits. |
727 | But 64bit processes have always behaved this way, | 744 | But 64bit processes have always behaved this way, |
728 | so it's not too bad. The main problem is just that | 745 | so it's not too bad. The main problem is just that |
729 | 32bit childs are affected again. */ | 746 | 32bit childs are affected again. */ |
730 | current->personality &= ~READ_IMPLIES_EXEC; | 747 | current->personality &= ~READ_IMPLIES_EXEC; |
731 | } | 748 | } |
732 | 749 | ||
@@ -819,19 +836,19 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
819 | /* Not strictly needed for fs, but do it for symmetry | 836 | /* Not strictly needed for fs, but do it for symmetry |
820 | with gs */ | 837 | with gs */ |
821 | if (addr >= TASK_SIZE_OF(task)) | 838 | if (addr >= TASK_SIZE_OF(task)) |
822 | return -EPERM; | 839 | return -EPERM; |
823 | cpu = get_cpu(); | 840 | cpu = get_cpu(); |
824 | /* handle small bases via the GDT because that's faster to | 841 | /* handle small bases via the GDT because that's faster to |
825 | switch. */ | 842 | switch. */ |
826 | if (addr <= 0xffffffff) { | 843 | if (addr <= 0xffffffff) { |
827 | set_32bit_tls(task, FS_TLS, addr); | 844 | set_32bit_tls(task, FS_TLS, addr); |
828 | if (doit) { | 845 | if (doit) { |
829 | load_TLS(&task->thread, cpu); | 846 | load_TLS(&task->thread, cpu); |
830 | asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL)); | 847 | asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL)); |
831 | } | 848 | } |
832 | task->thread.fsindex = FS_TLS_SEL; | 849 | task->thread.fsindex = FS_TLS_SEL; |
833 | task->thread.fs = 0; | 850 | task->thread.fs = 0; |
834 | } else { | 851 | } else { |
835 | task->thread.fsindex = 0; | 852 | task->thread.fsindex = 0; |
836 | task->thread.fs = addr; | 853 | task->thread.fs = addr; |
837 | if (doit) { | 854 | if (doit) { |
@@ -843,24 +860,24 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
843 | } | 860 | } |
844 | put_cpu(); | 861 | put_cpu(); |
845 | break; | 862 | break; |
846 | case ARCH_GET_FS: { | 863 | case ARCH_GET_FS: { |
847 | unsigned long base; | 864 | unsigned long base; |
848 | if (task->thread.fsindex == FS_TLS_SEL) | 865 | if (task->thread.fsindex == FS_TLS_SEL) |
849 | base = read_32bit_tls(task, FS_TLS); | 866 | base = read_32bit_tls(task, FS_TLS); |
850 | else if (doit) | 867 | else if (doit) |
851 | rdmsrl(MSR_FS_BASE, base); | 868 | rdmsrl(MSR_FS_BASE, base); |
852 | else | 869 | else |
853 | base = task->thread.fs; | 870 | base = task->thread.fs; |
854 | ret = put_user(base, (unsigned long __user *)addr); | 871 | ret = put_user(base, (unsigned long __user *)addr); |
855 | break; | 872 | break; |
856 | } | 873 | } |
857 | case ARCH_GET_GS: { | 874 | case ARCH_GET_GS: { |
858 | unsigned long base; | 875 | unsigned long base; |
859 | unsigned gsindex; | 876 | unsigned gsindex; |
860 | if (task->thread.gsindex == GS_TLS_SEL) | 877 | if (task->thread.gsindex == GS_TLS_SEL) |
861 | base = read_32bit_tls(task, GS_TLS); | 878 | base = read_32bit_tls(task, GS_TLS); |
862 | else if (doit) { | 879 | else if (doit) { |
863 | asm("movl %%gs,%0" : "=r" (gsindex)); | 880 | asm("movl %%gs,%0" : "=r" (gsindex)); |
864 | if (gsindex) | 881 | if (gsindex) |
865 | rdmsrl(MSR_KERNEL_GS_BASE, base); | 882 | rdmsrl(MSR_KERNEL_GS_BASE, base); |
866 | else | 883 | else |
@@ -868,39 +885,21 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
868 | } | 885 | } |
869 | else | 886 | else |
870 | base = task->thread.gs; | 887 | base = task->thread.gs; |
871 | ret = put_user(base, (unsigned long __user *)addr); | 888 | ret = put_user(base, (unsigned long __user *)addr); |
872 | break; | 889 | break; |
873 | } | 890 | } |
874 | 891 | ||
875 | default: | 892 | default: |
876 | ret = -EINVAL; | 893 | ret = -EINVAL; |
877 | break; | 894 | break; |
878 | } | 895 | } |
879 | 896 | ||
880 | return ret; | 897 | return ret; |
881 | } | 898 | } |
882 | 899 | ||
883 | long sys_arch_prctl(int code, unsigned long addr) | 900 | long sys_arch_prctl(int code, unsigned long addr) |
884 | { | 901 | { |
885 | return do_arch_prctl(current, code, addr); | 902 | return do_arch_prctl(current, code, addr); |
886 | } | ||
887 | |||
888 | /* | ||
889 | * Capture the user space registers if the task is not running (in user space) | ||
890 | */ | ||
891 | int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) | ||
892 | { | ||
893 | struct pt_regs *pp, ptregs; | ||
894 | |||
895 | pp = task_pt_regs(tsk); | ||
896 | |||
897 | ptregs = *pp; | ||
898 | ptregs.cs &= 0xffff; | ||
899 | ptregs.ss &= 0xffff; | ||
900 | |||
901 | elf_core_copy_regs(regs, &ptregs); | ||
902 | |||
903 | return 1; | ||
904 | } | 903 | } |
905 | 904 | ||
906 | unsigned long arch_align_stack(unsigned long sp) | 905 | unsigned long arch_align_stack(unsigned long sp) |