aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/cpu/common_64.c277
-rw-r--r--arch/x86/kernel/setup64.c287
3 files changed, 277 insertions, 289 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9c9ce75e3ae4..0a1987b4acc4 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -22,7 +22,7 @@ obj-y += setup_$(BITS).o i8259.o irqinit_$(BITS).o setup.o
22obj-$(CONFIG_X86_32) += probe_roms_32.o 22obj-$(CONFIG_X86_32) += probe_roms_32.o
23obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 23obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
24obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 24obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
25obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o 25obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
26obj-y += bootflag.o e820.o 26obj-y += bootflag.o e820.o
27obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o 27obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
28obj-y += alternative.o i8253.o pci-nommu.o 28obj-y += alternative.o i8253.o pci-nommu.o
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index 48ba79961583..9fb5b7caaa89 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -1,10 +1,17 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <linux/kernel.h>
3#include <linux/sched.h>
4#include <linux/string.h>
5#include <linux/bootmem.h>
6#include <linux/bitops.h>
7#include <linux/module.h>
8#include <linux/kgdb.h>
9#include <linux/topology.h>
2#include <linux/string.h> 10#include <linux/string.h>
3#include <linux/delay.h> 11#include <linux/delay.h>
4#include <linux/smp.h> 12#include <linux/smp.h>
5#include <linux/module.h> 13#include <linux/module.h>
6#include <linux/percpu.h> 14#include <linux/percpu.h>
7#include <linux/bootmem.h>
8#include <asm/processor.h> 15#include <asm/processor.h>
9#include <asm/i387.h> 16#include <asm/i387.h>
10#include <asm/msr.h> 17#include <asm/msr.h>
@@ -19,6 +26,15 @@
19#include <asm/apic.h> 26#include <asm/apic.h>
20#include <mach_apic.h> 27#include <mach_apic.h>
21#endif 28#endif
29#include <asm/pda.h>
30#include <asm/pgtable.h>
31#include <asm/processor.h>
32#include <asm/desc.h>
33#include <asm/atomic.h>
34#include <asm/proto.h>
35#include <asm/sections.h>
36#include <asm/setup.h>
37#include <asm/genapic.h>
22 38
23#include "cpu.h" 39#include "cpu.h"
24 40
@@ -404,3 +420,262 @@ static __init int setup_disablecpuid(char *arg)
404 return 1; 420 return 1;
405} 421}
406__setup("clearcpuid=", setup_disablecpuid); 422__setup("clearcpuid=", setup_disablecpuid);
423
424#ifndef CONFIG_DEBUG_BOOT_PARAMS
425struct boot_params __initdata boot_params;
426#else
427struct boot_params boot_params;
428#endif
429
430cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
431
432struct x8664_pda **_cpu_pda __read_mostly;
433EXPORT_SYMBOL(_cpu_pda);
434
435struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
436
437char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
438
439unsigned long __supported_pte_mask __read_mostly = ~0UL;
440EXPORT_SYMBOL_GPL(__supported_pte_mask);
441
442static int do_not_nx __cpuinitdata;
443
444/* noexec=on|off
445Control non executable mappings for 64bit processes.
446
447on Enable(default)
448off Disable
449*/
450static int __init nonx_setup(char *str)
451{
452 if (!str)
453 return -EINVAL;
454 if (!strncmp(str, "on", 2)) {
455 __supported_pte_mask |= _PAGE_NX;
456 do_not_nx = 0;
457 } else if (!strncmp(str, "off", 3)) {
458 do_not_nx = 1;
459 __supported_pte_mask &= ~_PAGE_NX;
460 }
461 return 0;
462}
463early_param("noexec", nonx_setup);
464
465int force_personality32;
466
467/* noexec32=on|off
468Control non executable heap for 32bit processes.
469To control the stack too use noexec=off
470
471on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
472off PROT_READ implies PROT_EXEC
473*/
474static int __init nonx32_setup(char *str)
475{
476 if (!strcmp(str, "on"))
477 force_personality32 &= ~READ_IMPLIES_EXEC;
478 else if (!strcmp(str, "off"))
479 force_personality32 |= READ_IMPLIES_EXEC;
480 return 1;
481}
482__setup("noexec32=", nonx32_setup);
483
484void pda_init(int cpu)
485{
486 struct x8664_pda *pda = cpu_pda(cpu);
487
488 /* Setup up data that may be needed in __get_free_pages early */
489 asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
490 /* Memory clobbers used to order PDA accessed */
491 mb();
492 wrmsrl(MSR_GS_BASE, pda);
493 mb();
494
495 pda->cpunumber = cpu;
496 pda->irqcount = -1;
497 pda->kernelstack = (unsigned long)stack_thread_info() -
498 PDA_STACKOFFSET + THREAD_SIZE;
499 pda->active_mm = &init_mm;
500 pda->mmu_state = 0;
501
502 if (cpu == 0) {
503 /* others are initialized in smpboot.c */
504 pda->pcurrent = &init_task;
505 pda->irqstackptr = boot_cpu_stack;
506 } else {
507 pda->irqstackptr = (char *)
508 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
509 if (!pda->irqstackptr)
510 panic("cannot allocate irqstack for cpu %d", cpu);
511
512 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
513 pda->nodenumber = cpu_to_node(cpu);
514 }
515
516 pda->irqstackptr += IRQSTACKSIZE-64;
517}
518
519char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
520 DEBUG_STKSZ]
521__attribute__((section(".bss.page_aligned")));
522
523extern asmlinkage void ignore_sysret(void);
524
525/* May not be marked __init: used by software suspend */
526void syscall_init(void)
527{
528 /*
529 * LSTAR and STAR live in a bit strange symbiosis.
530 * They both write to the same internal register. STAR allows to
531 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
532 */
533 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
534 wrmsrl(MSR_LSTAR, system_call);
535 wrmsrl(MSR_CSTAR, ignore_sysret);
536
537#ifdef CONFIG_IA32_EMULATION
538 syscall32_cpu_init();
539#endif
540
541 /* Flags to clear on syscall */
542 wrmsrl(MSR_SYSCALL_MASK,
543 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
544}
545
546void __cpuinit check_efer(void)
547{
548 unsigned long efer;
549
550 rdmsrl(MSR_EFER, efer);
551 if (!(efer & EFER_NX) || do_not_nx)
552 __supported_pte_mask &= ~_PAGE_NX;
553}
554
555unsigned long kernel_eflags;
556
557/*
558 * Copies of the original ist values from the tss are only accessed during
559 * debugging, no special alignment required.
560 */
561DEFINE_PER_CPU(struct orig_ist, orig_ist);
562
563/*
564 * cpu_init() initializes state that is per-CPU. Some data is already
565 * initialized (naturally) in the bootstrap process, such as the GDT
566 * and IDT. We reload them nevertheless, this function acts as a
567 * 'CPU state barrier', nothing should get across.
568 * A lot of state is already set up in PDA init.
569 */
570void __cpuinit cpu_init(void)
571{
572 int cpu = stack_smp_processor_id();
573 struct tss_struct *t = &per_cpu(init_tss, cpu);
574 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
575 unsigned long v;
576 char *estacks = NULL;
577 struct task_struct *me;
578 int i;
579
580 /* CPU 0 is initialised in head64.c */
581 if (cpu != 0)
582 pda_init(cpu);
583 else
584 estacks = boot_exception_stacks;
585
586 me = current;
587
588 if (cpu_test_and_set(cpu, cpu_initialized))
589 panic("CPU#%d already initialized!\n", cpu);
590
591 printk(KERN_INFO "Initializing CPU#%d\n", cpu);
592
593 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
594
595 /*
596 * Initialize the per-CPU GDT with the boot GDT,
597 * and set up the GDT descriptor:
598 */
599
600 switch_to_new_gdt();
601 load_idt((const struct desc_ptr *)&idt_descr);
602
603 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
604 syscall_init();
605
606 wrmsrl(MSR_FS_BASE, 0);
607 wrmsrl(MSR_KERNEL_GS_BASE, 0);
608 barrier();
609
610 check_efer();
611
612 /*
613 * set up and load the per-CPU TSS
614 */
615 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
616 static const unsigned int order[N_EXCEPTION_STACKS] = {
617 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
618 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
619 };
620 if (cpu) {
621 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
622 if (!estacks)
623 panic("Cannot allocate exception stack %ld %d\n",
624 v, cpu);
625 }
626 estacks += PAGE_SIZE << order[v];
627 orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
628 }
629
630 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
631 /*
632 * <= is required because the CPU will access up to
633 * 8 bits beyond the end of the IO permission bitmap.
634 */
635 for (i = 0; i <= IO_BITMAP_LONGS; i++)
636 t->io_bitmap[i] = ~0UL;
637
638 atomic_inc(&init_mm.mm_count);
639 me->active_mm = &init_mm;
640 if (me->mm)
641 BUG();
642 enter_lazy_tlb(&init_mm, me);
643
644 load_sp0(t, &current->thread);
645 set_tss_desc(cpu, t);
646 load_TR_desc();
647 load_LDT(&init_mm.context);
648
649#ifdef CONFIG_KGDB
650 /*
651 * If the kgdb is connected no debug regs should be altered. This
652 * is only applicable when KGDB and a KGDB I/O module are built
653 * into the kernel and you are using early debugging with
654 * kgdbwait. KGDB will control the kernel HW breakpoint registers.
655 */
656 if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
657 arch_kgdb_ops.correct_hw_break();
658 else {
659#endif
660 /*
661 * Clear all 6 debug registers:
662 */
663
664 set_debugreg(0UL, 0);
665 set_debugreg(0UL, 1);
666 set_debugreg(0UL, 2);
667 set_debugreg(0UL, 3);
668 set_debugreg(0UL, 6);
669 set_debugreg(0UL, 7);
670#ifdef CONFIG_KGDB
671 /* If the kgdb is connected no debug regs should be altered. */
672 }
673#endif
674
675 fpu_init();
676
677 raw_local_save_flags(kernel_eflags);
678
679 if (is_uv_system())
680 uv_cpu_init();
681}
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
deleted file mode 100644
index 151d3155ddf6..000000000000
--- a/arch/x86/kernel/setup64.c
+++ /dev/null
@@ -1,287 +0,0 @@
1/*
2 * X86-64 specific CPU setup.
3 * Copyright (C) 1995 Linus Torvalds
4 * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen.
5 * See setup.c for older changelog.
6 */
7#include <linux/init.h>
8#include <linux/kernel.h>
9#include <linux/sched.h>
10#include <linux/string.h>
11#include <linux/bootmem.h>
12#include <linux/bitops.h>
13#include <linux/module.h>
14#include <linux/kgdb.h>
15#include <linux/topology.h>
16#include <asm/pda.h>
17#include <asm/pgtable.h>
18#include <asm/processor.h>
19#include <asm/desc.h>
20#include <asm/atomic.h>
21#include <asm/mmu_context.h>
22#include <asm/smp.h>
23#include <asm/i387.h>
24#include <asm/percpu.h>
25#include <asm/proto.h>
26#include <asm/sections.h>
27#include <asm/setup.h>
28#include <asm/genapic.h>
29
30#ifndef CONFIG_DEBUG_BOOT_PARAMS
31struct boot_params __initdata boot_params;
32#else
33struct boot_params boot_params;
34#endif
35
36cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
37
38struct x8664_pda **_cpu_pda __read_mostly;
39EXPORT_SYMBOL(_cpu_pda);
40
41struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
42
43char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
44
45unsigned long __supported_pte_mask __read_mostly = ~0UL;
46EXPORT_SYMBOL_GPL(__supported_pte_mask);
47
48static int do_not_nx __cpuinitdata = 0;
49
50/* noexec=on|off
51Control non executable mappings for 64bit processes.
52
53on Enable(default)
54off Disable
55*/
56static int __init nonx_setup(char *str)
57{
58 if (!str)
59 return -EINVAL;
60 if (!strncmp(str, "on", 2)) {
61 __supported_pte_mask |= _PAGE_NX;
62 do_not_nx = 0;
63 } else if (!strncmp(str, "off", 3)) {
64 do_not_nx = 1;
65 __supported_pte_mask &= ~_PAGE_NX;
66 }
67 return 0;
68}
69early_param("noexec", nonx_setup);
70
71int force_personality32 = 0;
72
73/* noexec32=on|off
74Control non executable heap for 32bit processes.
75To control the stack too use noexec=off
76
77on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
78off PROT_READ implies PROT_EXEC
79*/
80static int __init nonx32_setup(char *str)
81{
82 if (!strcmp(str, "on"))
83 force_personality32 &= ~READ_IMPLIES_EXEC;
84 else if (!strcmp(str, "off"))
85 force_personality32 |= READ_IMPLIES_EXEC;
86 return 1;
87}
88__setup("noexec32=", nonx32_setup);
89
90void pda_init(int cpu)
91{
92 struct x8664_pda *pda = cpu_pda(cpu);
93
94 /* Setup up data that may be needed in __get_free_pages early */
95 asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
96 /* Memory clobbers used to order PDA accessed */
97 mb();
98 wrmsrl(MSR_GS_BASE, pda);
99 mb();
100
101 pda->cpunumber = cpu;
102 pda->irqcount = -1;
103 pda->kernelstack =
104 (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE;
105 pda->active_mm = &init_mm;
106 pda->mmu_state = 0;
107
108 if (cpu == 0) {
109 /* others are initialized in smpboot.c */
110 pda->pcurrent = &init_task;
111 pda->irqstackptr = boot_cpu_stack;
112 } else {
113 pda->irqstackptr = (char *)
114 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
115 if (!pda->irqstackptr)
116 panic("cannot allocate irqstack for cpu %d", cpu);
117
118 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
119 pda->nodenumber = cpu_to_node(cpu);
120 }
121
122 pda->irqstackptr += IRQSTACKSIZE-64;
123}
124
125char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]
126__attribute__((section(".bss.page_aligned")));
127
128extern asmlinkage void ignore_sysret(void);
129
130/* May not be marked __init: used by software suspend */
131void syscall_init(void)
132{
133 /*
134 * LSTAR and STAR live in a bit strange symbiosis.
135 * They both write to the same internal register. STAR allows to set CS/DS
136 * but only a 32bit target. LSTAR sets the 64bit rip.
137 */
138 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
139 wrmsrl(MSR_LSTAR, system_call);
140 wrmsrl(MSR_CSTAR, ignore_sysret);
141
142#ifdef CONFIG_IA32_EMULATION
143 syscall32_cpu_init ();
144#endif
145
146 /* Flags to clear on syscall */
147 wrmsrl(MSR_SYSCALL_MASK,
148 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
149}
150
151void __cpuinit check_efer(void)
152{
153 unsigned long efer;
154
155 rdmsrl(MSR_EFER, efer);
156 if (!(efer & EFER_NX) || do_not_nx) {
157 __supported_pte_mask &= ~_PAGE_NX;
158 }
159}
160
161unsigned long kernel_eflags;
162
163/*
164 * Copies of the original ist values from the tss are only accessed during
165 * debugging, no special alignment required.
166 */
167DEFINE_PER_CPU(struct orig_ist, orig_ist);
168
169/*
170 * cpu_init() initializes state that is per-CPU. Some data is already
171 * initialized (naturally) in the bootstrap process, such as the GDT
172 * and IDT. We reload them nevertheless, this function acts as a
173 * 'CPU state barrier', nothing should get across.
174 * A lot of state is already set up in PDA init.
175 */
176void __cpuinit cpu_init (void)
177{
178 int cpu = stack_smp_processor_id();
179 struct tss_struct *t = &per_cpu(init_tss, cpu);
180 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
181 unsigned long v;
182 char *estacks = NULL;
183 struct task_struct *me;
184 int i;
185
186 /* CPU 0 is initialised in head64.c */
187 if (cpu != 0) {
188 pda_init(cpu);
189 } else
190 estacks = boot_exception_stacks;
191
192 me = current;
193
194 if (cpu_test_and_set(cpu, cpu_initialized))
195 panic("CPU#%d already initialized!\n", cpu);
196
197 printk("Initializing CPU#%d\n", cpu);
198
199 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
200
201 /*
202 * Initialize the per-CPU GDT with the boot GDT,
203 * and set up the GDT descriptor:
204 */
205
206 switch_to_new_gdt();
207 load_idt((const struct desc_ptr *)&idt_descr);
208
209 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
210 syscall_init();
211
212 wrmsrl(MSR_FS_BASE, 0);
213 wrmsrl(MSR_KERNEL_GS_BASE, 0);
214 barrier();
215
216 check_efer();
217
218 /*
219 * set up and load the per-CPU TSS
220 */
221 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
222 static const unsigned int order[N_EXCEPTION_STACKS] = {
223 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
224 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
225 };
226 if (cpu) {
227 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
228 if (!estacks)
229 panic("Cannot allocate exception stack %ld %d\n",
230 v, cpu);
231 }
232 estacks += PAGE_SIZE << order[v];
233 orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
234 }
235
236 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
237 /*
238 * <= is required because the CPU will access up to
239 * 8 bits beyond the end of the IO permission bitmap.
240 */
241 for (i = 0; i <= IO_BITMAP_LONGS; i++)
242 t->io_bitmap[i] = ~0UL;
243
244 atomic_inc(&init_mm.mm_count);
245 me->active_mm = &init_mm;
246 if (me->mm)
247 BUG();
248 enter_lazy_tlb(&init_mm, me);
249
250 load_sp0(t, &current->thread);
251 set_tss_desc(cpu, t);
252 load_TR_desc();
253 load_LDT(&init_mm.context);
254
255#ifdef CONFIG_KGDB
256 /*
257 * If the kgdb is connected no debug regs should be altered. This
258 * is only applicable when KGDB and a KGDB I/O module are built
259 * into the kernel and you are using early debugging with
260 * kgdbwait. KGDB will control the kernel HW breakpoint registers.
261 */
262 if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
263 arch_kgdb_ops.correct_hw_break();
264 else {
265#endif
266 /*
267 * Clear all 6 debug registers:
268 */
269
270 set_debugreg(0UL, 0);
271 set_debugreg(0UL, 1);
272 set_debugreg(0UL, 2);
273 set_debugreg(0UL, 3);
274 set_debugreg(0UL, 6);
275 set_debugreg(0UL, 7);
276#ifdef CONFIG_KGDB
277 /* If the kgdb is connected no debug regs should be altered. */
278 }
279#endif
280
281 fpu_init();
282
283 raw_local_save_flags(kernel_eflags);
284
285 if (is_uv_system())
286 uv_cpu_init();
287}