aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Kconfig62
-rw-r--r--arch/x86_64/Makefile4
-rw-r--r--arch/x86_64/boot/Makefile2
-rw-r--r--arch/x86_64/boot/compressed/Makefile12
-rw-r--r--arch/x86_64/boot/compressed/head.S339
-rw-r--r--arch/x86_64/boot/compressed/misc.c247
-rw-r--r--arch/x86_64/boot/compressed/vmlinux.lds44
-rw-r--r--arch/x86_64/boot/compressed/vmlinux.scr9
-rw-r--r--arch/x86_64/boot/setup.S85
-rw-r--r--arch/x86_64/boot/video.S2043
-rw-r--r--arch/x86_64/defconfig183
-rw-r--r--arch/x86_64/ia32/ia32_binfmt.c10
-rw-r--r--arch/x86_64/ia32/ia32entry.S4
-rw-r--r--arch/x86_64/ia32/syscall32.c1
-rw-r--r--arch/x86_64/kernel/Makefile7
-rw-r--r--arch/x86_64/kernel/acpi/sleep.c24
-rw-r--r--arch/x86_64/kernel/acpi/wakeup.S286
-rw-r--r--arch/x86_64/kernel/aperture.c5
-rw-r--r--arch/x86_64/kernel/apic.c35
-rw-r--r--arch/x86_64/kernel/asm-offsets.c10
-rw-r--r--arch/x86_64/kernel/bugs.c21
-rw-r--r--arch/x86_64/kernel/cpufreq/Kconfig19
-rw-r--r--arch/x86_64/kernel/e820.c31
-rw-r--r--arch/x86_64/kernel/early-quirks.c13
-rw-r--r--arch/x86_64/kernel/early_printk.c5
-rw-r--r--arch/x86_64/kernel/entry.S5
-rw-r--r--arch/x86_64/kernel/functionlist1284
-rw-r--r--arch/x86_64/kernel/genapic.c104
-rw-r--r--arch/x86_64/kernel/genapic_cluster.c137
-rw-r--r--arch/x86_64/kernel/genapic_flat.c25
-rw-r--r--arch/x86_64/kernel/head.S340
-rw-r--r--arch/x86_64/kernel/head64.c41
-rw-r--r--arch/x86_64/kernel/io_apic.c35
-rw-r--r--arch/x86_64/kernel/ioport.c1
-rw-r--r--arch/x86_64/kernel/machine_kexec.c16
-rw-r--r--arch/x86_64/kernel/mce.c32
-rw-r--r--arch/x86_64/kernel/mpparse.c2
-rw-r--r--arch/x86_64/kernel/nmi.c678
-rw-r--r--arch/x86_64/kernel/pci-calgary.c2
-rw-r--r--arch/x86_64/kernel/pci-gart.c2
-rw-r--r--arch/x86_64/kernel/pci-nommu.c2
-rw-r--r--arch/x86_64/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86_64/kernel/process.c12
-rw-r--r--arch/x86_64/kernel/setup.c26
-rw-r--r--arch/x86_64/kernel/setup64.c5
-rw-r--r--arch/x86_64/kernel/signal.c6
-rw-r--r--arch/x86_64/kernel/smp.c28
-rw-r--r--arch/x86_64/kernel/smpboot.c47
-rw-r--r--arch/x86_64/kernel/suspend.c19
-rw-r--r--arch/x86_64/kernel/suspend_asm.S7
-rw-r--r--arch/x86_64/kernel/sys_x86_64.c3
-rw-r--r--arch/x86_64/kernel/syscall.c1
-rw-r--r--arch/x86_64/kernel/time.c71
-rw-r--r--arch/x86_64/kernel/trampoline.S123
-rw-r--r--arch/x86_64/kernel/traps.c34
-rw-r--r--arch/x86_64/kernel/tsc.c17
-rw-r--r--arch/x86_64/kernel/tsc_sync.c4
-rw-r--r--arch/x86_64/kernel/verify_cpu.S119
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S20
-rw-r--r--arch/x86_64/kernel/vsyscall.c68
-rw-r--r--arch/x86_64/mm/fault.c5
-rw-r--r--arch/x86_64/mm/init.c161
-rw-r--r--arch/x86_64/mm/ioremap.c9
-rw-r--r--arch/x86_64/mm/k8topology.c9
-rw-r--r--arch/x86_64/mm/numa.c306
-rw-r--r--arch/x86_64/mm/pageattr.c16
-rw-r--r--arch/x86_64/mm/srat.c8
67 files changed, 1818 insertions, 5515 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 56eb14c98475..145bb824b2a8 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -415,13 +415,13 @@ config OUT_OF_LINE_PFN_TO_PAGE
415 depends on DISCONTIGMEM 415 depends on DISCONTIGMEM
416 416
417config NR_CPUS 417config NR_CPUS
418 int "Maximum number of CPUs (2-256)" 418 int "Maximum number of CPUs (2-255)"
419 range 2 255 419 range 2 255
420 depends on SMP 420 depends on SMP
421 default "8" 421 default "8"
422 help 422 help
423 This allows you to specify the maximum number of CPUs which this 423 This allows you to specify the maximum number of CPUs which this
424 kernel will support. Current maximum is 256 CPUs due to 424 kernel will support. Current maximum is 255 CPUs due to
425 APIC addressing limits. Less depending on the hardware. 425 APIC addressing limits. Less depending on the hardware.
426 426
427 This is purely to save memory - each supported CPU requires 427 This is purely to save memory - each supported CPU requires
@@ -565,23 +565,56 @@ config CRASH_DUMP
565 PHYSICAL_START. 565 PHYSICAL_START.
566 For more details see Documentation/kdump/kdump.txt 566 For more details see Documentation/kdump/kdump.txt
567 567
568config RELOCATABLE
569 bool "Build a relocatable kernel(EXPERIMENTAL)"
570 depends on EXPERIMENTAL
571 help
572 Builds a relocatable kernel. This enables loading and running
573 a kernel binary from a different physical address than it has
574 been compiled for.
575
576 One use is for the kexec on panic case where the recovery kernel
577 must live at a different physical address than the primary
578 kernel.
579
580 Note: If CONFIG_RELOCATABLE=y, then kernel run from the address
581 it has been loaded at and compile time physical address
582 (CONFIG_PHYSICAL_START) is ignored.
583
568config PHYSICAL_START 584config PHYSICAL_START
569 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) 585 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
570 default "0x1000000" if CRASH_DUMP
571 default "0x200000" 586 default "0x200000"
572 help 587 help
573 This gives the physical address where the kernel is loaded. Normally 588 This gives the physical address where the kernel is loaded. It
574 for regular kernels this value is 0x200000 (2MB). But in the case 589 should be aligned to 2MB boundary.
575 of kexec on panic the fail safe kernel needs to run at a different 590
576 address than the panic-ed kernel. This option is used to set the load 591 If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then
577 address for kernels used to capture crash dump on being kexec'ed 592 bzImage will decompress itself to above physical address and
578 after panic. The default value for crash dump kernels is 593 run from there. Otherwise, bzImage will run from the address where
579 0x1000000 (16MB). This can also be set based on the "X" value as 594 it has been loaded by the boot loader and will ignore above physical
595 address.
596
597 In normal kdump cases one does not have to set/change this option
598 as now bzImage can be compiled as a completely relocatable image
599 (CONFIG_RELOCATABLE=y) and be used to load and run from a different
600 address. This option is mainly useful for the folks who don't want
601 to use a bzImage for capturing the crash dump and want to use a
602 vmlinux instead.
603
604 So if you are using bzImage for capturing the crash dump, leave
605 the value here unchanged to 0x200000 and set CONFIG_RELOCATABLE=y.
606 Otherwise if you plan to use vmlinux for capturing the crash dump
607 change this value to start of the reserved region (Typically 16MB
608 0x1000000). In other words, it can be set based on the "X" value as
580 specified in the "crashkernel=YM@XM" command line boot parameter 609 specified in the "crashkernel=YM@XM" command line boot parameter
581 passed to the panic-ed kernel. Typically this parameter is set as 610 passed to the panic-ed kernel. Typically this parameter is set as
582 crashkernel=64M@16M. Please take a look at 611 crashkernel=64M@16M. Please take a look at
583 Documentation/kdump/kdump.txt for more details about crash dumps. 612 Documentation/kdump/kdump.txt for more details about crash dumps.
584 613
614 Usage of bzImage for capturing the crash dump is advantageous as
615 one does not have to build two kernels. Same kernel can be used
616 as production kernel and capture kernel.
617
585 Don't change this unless you know what you are doing. 618 Don't change this unless you know what you are doing.
586 619
587config SECCOMP 620config SECCOMP
@@ -627,14 +660,6 @@ config CC_STACKPROTECTOR_ALL
627 660
628source kernel/Kconfig.hz 661source kernel/Kconfig.hz
629 662
630config REORDER
631 bool "Function reordering"
632 default n
633 help
634 This option enables the toolchain to reorder functions for a more
635 optimal TLB usage. If you have pretty much any version of binutils,
636 this can increase your kernel build time by roughly one minute.
637
638config K8_NB 663config K8_NB
639 def_bool y 664 def_bool y
640 depends on AGP_AMD64 || IOMMU || (PCI && NUMA) 665 depends on AGP_AMD64 || IOMMU || (PCI && NUMA)
@@ -676,6 +701,7 @@ menu "Bus options (PCI etc.)"
676 701
677config PCI 702config PCI
678 bool "PCI support" 703 bool "PCI support"
704 select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC)
679 705
680# x86-64 doesn't support PCI BIOS access from long mode so always go direct. 706# x86-64 doesn't support PCI BIOS access from long mode so always go direct.
681config PCI_DIRECT 707config PCI_DIRECT
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 2941a915d4ef..29617ae3926d 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -40,10 +40,6 @@ cflags-y += -m64
40cflags-y += -mno-red-zone 40cflags-y += -mno-red-zone
41cflags-y += -mcmodel=kernel 41cflags-y += -mcmodel=kernel
42cflags-y += -pipe 42cflags-y += -pipe
43cflags-kernel-$(CONFIG_REORDER) += -ffunction-sections
44# this makes reading assembly source easier, but produces worse code
45# actually it makes the kernel smaller too.
46cflags-y += -fno-reorder-blocks
47cflags-y += -Wno-sign-compare 43cflags-y += -Wno-sign-compare
48cflags-y += -fno-asynchronous-unwind-tables 44cflags-y += -fno-asynchronous-unwind-tables
49ifneq ($(CONFIG_DEBUG_INFO),y) 45ifneq ($(CONFIG_DEBUG_INFO),y)
diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile
index deb063e7762d..ee6f6505f95f 100644
--- a/arch/x86_64/boot/Makefile
+++ b/arch/x86_64/boot/Makefile
@@ -36,7 +36,7 @@ subdir- := compressed/ #Let make clean descend in compressed/
36# --------------------------------------------------------------------------- 36# ---------------------------------------------------------------------------
37 37
38$(obj)/bzImage: IMAGE_OFFSET := 0x100000 38$(obj)/bzImage: IMAGE_OFFSET := 0x100000
39$(obj)/bzImage: EXTRA_AFLAGS := -traditional $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__ 39$(obj)/bzImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
40$(obj)/bzImage: BUILDFLAGS := -b 40$(obj)/bzImage: BUILDFLAGS := -b
41 41
42quiet_cmd_image = BUILD $@ 42quiet_cmd_image = BUILD $@
diff --git a/arch/x86_64/boot/compressed/Makefile b/arch/x86_64/boot/compressed/Makefile
index e70fa6e1da08..705a3e33d7e1 100644
--- a/arch/x86_64/boot/compressed/Makefile
+++ b/arch/x86_64/boot/compressed/Makefile
@@ -8,16 +8,14 @@
8 8
9targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o 9targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o
10EXTRA_AFLAGS := -traditional 10EXTRA_AFLAGS := -traditional
11AFLAGS := $(subst -m64,-m32,$(AFLAGS))
12 11
13# cannot use EXTRA_CFLAGS because base CFLAGS contains -mkernel which conflicts with 12# cannot use EXTRA_CFLAGS because base CFLAGS contains -mkernel which conflicts with
14# -m32 13# -m32
15CFLAGS := -m32 -D__KERNEL__ -Iinclude -O2 -fno-strict-aliasing 14CFLAGS := -m64 -D__KERNEL__ -Iinclude -O2 -fno-strict-aliasing -fPIC -mcmodel=small -fno-builtin
16LDFLAGS := -m elf_i386 15LDFLAGS := -m elf_x86_64
17 16
18LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32 -m elf_i386 17LDFLAGS_vmlinux := -T
19 18$(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
20$(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
21 $(call if_changed,ld) 19 $(call if_changed,ld)
22 @: 20 @:
23 21
@@ -27,7 +25,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE
27$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE 25$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
28 $(call if_changed,gzip) 26 $(call if_changed,gzip)
29 27
30LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T 28LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
31 29
32$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE 30$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
33 $(call if_changed,ld) 31 $(call if_changed,ld)
diff --git a/arch/x86_64/boot/compressed/head.S b/arch/x86_64/boot/compressed/head.S
index 6f55565e4d42..f9d5692a0106 100644
--- a/arch/x86_64/boot/compressed/head.S
+++ b/arch/x86_64/boot/compressed/head.S
@@ -26,116 +26,279 @@
26 26
27#include <linux/linkage.h> 27#include <linux/linkage.h>
28#include <asm/segment.h> 28#include <asm/segment.h>
29#include <asm/pgtable.h>
29#include <asm/page.h> 30#include <asm/page.h>
31#include <asm/msr.h>
30 32
33.section ".text.head"
31 .code32 34 .code32
32 .globl startup_32 35 .globl startup_32
33 36
34startup_32: 37startup_32:
35 cld 38 cld
36 cli 39 cli
37 movl $(__KERNEL_DS),%eax 40 movl $(__KERNEL_DS), %eax
38 movl %eax,%ds 41 movl %eax, %ds
39 movl %eax,%es 42 movl %eax, %es
40 movl %eax,%fs 43 movl %eax, %ss
41 movl %eax,%gs 44
42 45/* Calculate the delta between where we were compiled to run
43 lss stack_start,%esp 46 * at and where we were actually loaded at. This can only be done
44 xorl %eax,%eax 47 * with a short local call on x86. Nothing else will tell us what
451: incl %eax # check that A20 really IS enabled 48 * address we are running at. The reserved chunk of the real-mode
46 movl %eax,0x000000 # loop forever if it isn't 49 * data at 0x34-0x3f are used as the stack for this calculation.
47 cmpl %eax,0x100000 50 * Only 4 bytes are needed.
48 je 1b 51 */
52 leal 0x40(%esi), %esp
53 call 1f
541: popl %ebp
55 subl $1b, %ebp
56
57/* setup a stack and make sure cpu supports long mode. */
58 movl $user_stack_end, %eax
59 addl %ebp, %eax
60 movl %eax, %esp
61
62 call verify_cpu
63 testl %eax, %eax
64 jnz no_longmode
65
66/* Compute the delta between where we were compiled to run at
67 * and where the code will actually run at.
68 */
69/* %ebp contains the address we are loaded at by the boot loader and %ebx
70 * contains the address where we should move the kernel image temporarily
71 * for safe in-place decompression.
72 */
73
74#ifdef CONFIG_RELOCATABLE
75 movl %ebp, %ebx
76 addl $(LARGE_PAGE_SIZE -1), %ebx
77 andl $LARGE_PAGE_MASK, %ebx
78#else
79 movl $CONFIG_PHYSICAL_START, %ebx
80#endif
81
82 /* Replace the compressed data size with the uncompressed size */
83 subl input_len(%ebp), %ebx
84 movl output_len(%ebp), %eax
85 addl %eax, %ebx
86 /* Add 8 bytes for every 32K input block */
87 shrl $12, %eax
88 addl %eax, %ebx
89 /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
90 addl $(32768 + 18 + 4095), %ebx
91 andl $~4095, %ebx
49 92
50/* 93/*
51 * Initialize eflags. Some BIOS's leave bits like NT set. This would 94 * Prepare for entering 64 bit mode
52 * confuse the debugger if this code is traced.
53 * XXX - best to initialize before switching to protected mode.
54 */ 95 */
55 pushl $0 96
56 popfl 97 /* Load new GDT with the 64bit segments using 32bit descriptor */
98 leal gdt(%ebp), %eax
99 movl %eax, gdt+2(%ebp)
100 lgdt gdt(%ebp)
101
102 /* Enable PAE mode */
103 xorl %eax, %eax
104 orl $(1 << 5), %eax
105 movl %eax, %cr4
106
107 /*
108 * Build early 4G boot pagetable
109 */
110 /* Initialize Page tables to 0*/
111 leal pgtable(%ebx), %edi
112 xorl %eax, %eax
113 movl $((4096*6)/4), %ecx
114 rep stosl
115
116 /* Build Level 4 */
117 leal pgtable + 0(%ebx), %edi
118 leal 0x1007 (%edi), %eax
119 movl %eax, 0(%edi)
120
121 /* Build Level 3 */
122 leal pgtable + 0x1000(%ebx), %edi
123 leal 0x1007(%edi), %eax
124 movl $4, %ecx
1251: movl %eax, 0x00(%edi)
126 addl $0x00001000, %eax
127 addl $8, %edi
128 decl %ecx
129 jnz 1b
130
131 /* Build Level 2 */
132 leal pgtable + 0x2000(%ebx), %edi
133 movl $0x00000183, %eax
134 movl $2048, %ecx
1351: movl %eax, 0(%edi)
136 addl $0x00200000, %eax
137 addl $8, %edi
138 decl %ecx
139 jnz 1b
140
141 /* Enable the boot page tables */
142 leal pgtable(%ebx), %eax
143 movl %eax, %cr3
144
145 /* Enable Long mode in EFER (Extended Feature Enable Register) */
146 movl $MSR_EFER, %ecx
147 rdmsr
148 btsl $_EFER_LME, %eax
149 wrmsr
150
151 /* Setup for the jump to 64bit mode
152 *
153 * When the jump is performend we will be in long mode but
154 * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
155 * (and in turn EFER.LMA = 1). To jump into 64bit mode we use
156 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
157 * We place all of the values on our mini stack so lret can
158 * used to perform that far jump.
159 */
160 pushl $__KERNEL_CS
161 leal startup_64(%ebp), %eax
162 pushl %eax
163
164 /* Enter paged protected Mode, activating Long Mode */
165 movl $0x80000001, %eax /* Enable Paging and Protected mode */
166 movl %eax, %cr0
167
168 /* Jump from 32bit compatibility mode into 64bit mode. */
169 lret
170
171no_longmode:
172 /* This isn't an x86-64 CPU so hang */
1731:
174 hlt
175 jmp 1b
176
177#include "../../kernel/verify_cpu.S"
178
179 /* Be careful here startup_64 needs to be at a predictable
180 * address so I can export it in an ELF header. Bootloaders
181 * should look at the ELF header to find this address, as
182 * it may change in the future.
183 */
184 .code64
185 .org 0x200
186ENTRY(startup_64)
187 /* We come here either from startup_32 or directly from a
188 * 64bit bootloader. If we come here from a bootloader we depend on
189 * an identity mapped page table being provied that maps our
190 * entire text+data+bss and hopefully all of memory.
191 */
192
193 /* Setup data segments. */
194 xorl %eax, %eax
195 movl %eax, %ds
196 movl %eax, %es
197 movl %eax, %ss
198
199 /* Compute the decompressed kernel start address. It is where
200 * we were loaded at aligned to a 2M boundary. %rbp contains the
201 * decompressed kernel start address.
202 *
203 * If it is a relocatable kernel then decompress and run the kernel
204 * from load address aligned to 2MB addr, otherwise decompress and
205 * run the kernel from CONFIG_PHYSICAL_START
206 */
207
208 /* Start with the delta to where the kernel will run at. */
209#ifdef CONFIG_RELOCATABLE
210 leaq startup_32(%rip) /* - $startup_32 */, %rbp
211 addq $(LARGE_PAGE_SIZE - 1), %rbp
212 andq $LARGE_PAGE_MASK, %rbp
213 movq %rbp, %rbx
214#else
215 movq $CONFIG_PHYSICAL_START, %rbp
216 movq %rbp, %rbx
217#endif
218
219 /* Replace the compressed data size with the uncompressed size */
220 movl input_len(%rip), %eax
221 subq %rax, %rbx
222 movl output_len(%rip), %eax
223 addq %rax, %rbx
224 /* Add 8 bytes for every 32K input block */
225 shrq $12, %rax
226 addq %rax, %rbx
227 /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
228 addq $(32768 + 18 + 4095), %rbx
229 andq $~4095, %rbx
230
231/* Copy the compressed kernel to the end of our buffer
232 * where decompression in place becomes safe.
233 */
234 leaq _end(%rip), %r8
235 leaq _end(%rbx), %r9
236 movq $_end /* - $startup_32 */, %rcx
2371: subq $8, %r8
238 subq $8, %r9
239 movq 0(%r8), %rax
240 movq %rax, 0(%r9)
241 subq $8, %rcx
242 jnz 1b
243
244/*
245 * Jump to the relocated address.
246 */
247 leaq relocated(%rbx), %rax
248 jmp *%rax
249
250.section ".text"
251relocated:
252
57/* 253/*
58 * Clear BSS 254 * Clear BSS
59 */ 255 */
60 xorl %eax,%eax 256 xorq %rax, %rax
61 movl $_edata,%edi 257 leaq _edata(%rbx), %rdi
62 movl $_end,%ecx 258 leaq _end(%rbx), %rcx
63 subl %edi,%ecx 259 subq %rdi, %rcx
64 cld 260 cld
65 rep 261 rep
66 stosb 262 stosb
263
264 /* Setup the stack */
265 leaq user_stack_end(%rip), %rsp
266
267 /* zero EFLAGS after setting rsp */
268 pushq $0
269 popfq
270
67/* 271/*
68 * Do the decompression, and jump to the new kernel.. 272 * Do the decompression, and jump to the new kernel..
69 */ 273 */
70 subl $16,%esp # place for structure on the stack 274 pushq %rsi # Save the real mode argument
71 movl %esp,%eax 275 movq %rsi, %rdi # real mode address
72 pushl %esi # real mode pointer as second arg 276 leaq _heap(%rip), %rsi # _heap
73 pushl %eax # address of structure as first arg 277 leaq input_data(%rip), %rdx # input_data
74 call decompress_kernel 278 movl input_len(%rip), %eax
75 orl %eax,%eax 279 movq %rax, %rcx # input_len
76 jnz 3f 280 movq %rbp, %r8 # output
77 addl $8,%esp 281 call decompress_kernel
78 xorl %ebx,%ebx 282 popq %rsi
79 ljmp $(__KERNEL_CS), $__PHYSICAL_START
80 283
81/*
82 * We come here, if we were loaded high.
83 * We need to move the move-in-place routine down to 0x1000
84 * and then start it with the buffer addresses in registers,
85 * which we got from the stack.
86 */
873:
88 movl %esi,%ebx
89 movl $move_routine_start,%esi
90 movl $0x1000,%edi
91 movl $move_routine_end,%ecx
92 subl %esi,%ecx
93 addl $3,%ecx
94 shrl $2,%ecx
95 cld
96 rep
97 movsl
98
99 popl %esi # discard the address
100 addl $4,%esp # real mode pointer
101 popl %esi # low_buffer_start
102 popl %ecx # lcount
103 popl %edx # high_buffer_start
104 popl %eax # hcount
105 movl $__PHYSICAL_START,%edi
106 cli # make sure we don't get interrupted
107 ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine
108 284
109/* 285/*
110 * Routine (template) for moving the decompressed kernel in place, 286 * Jump to the decompressed kernel.
111 * if we were high loaded. This _must_ PIC-code !
112 */ 287 */
113move_routine_start: 288 jmp *%rbp
114 movl %ecx,%ebp
115 shrl $2,%ecx
116 rep
117 movsl
118 movl %ebp,%ecx
119 andl $3,%ecx
120 rep
121 movsb
122 movl %edx,%esi
123 movl %eax,%ecx # NOTE: rep movsb won't move if %ecx == 0
124 addl $3,%ecx
125 shrl $2,%ecx
126 rep
127 movsl
128 movl %ebx,%esi # Restore setup pointer
129 xorl %ebx,%ebx
130 ljmp $(__KERNEL_CS), $__PHYSICAL_START
131move_routine_end:
132 289
133 290 .data
134/* Stack for uncompression */ 291gdt:
135 .align 32 292 .word gdt_end - gdt
136user_stack: 293 .long gdt
294 .word 0
295 .quad 0x0000000000000000 /* NULL descriptor */
296 .quad 0x00af9a000000ffff /* __KERNEL_CS */
297 .quad 0x00cf92000000ffff /* __KERNEL_DS */
298gdt_end:
299 .bss
300/* Stack for uncompression */
301 .balign 4
302user_stack:
137 .fill 4096,4,0 303 .fill 4096,4,0
138stack_start: 304user_stack_end:
139 .long user_stack+4096
140 .word __KERNEL_DS
141
diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c
index 3755b2e394d0..f932b0e89096 100644
--- a/arch/x86_64/boot/compressed/misc.c
+++ b/arch/x86_64/boot/compressed/misc.c
@@ -9,10 +9,95 @@
9 * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 9 * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
10 */ 10 */
11 11
12#define _LINUX_STRING_H_ 1
13#define __LINUX_BITMAP_H 1
14
15#include <linux/linkage.h>
12#include <linux/screen_info.h> 16#include <linux/screen_info.h>
13#include <asm/io.h> 17#include <asm/io.h>
14#include <asm/page.h> 18#include <asm/page.h>
15 19
20/* WARNING!!
21 * This code is compiled with -fPIC and it is relocated dynamically
22 * at run time, but no relocation processing is performed.
23 * This means that it is not safe to place pointers in static structures.
24 */
25
26/*
27 * Getting to provable safe in place decompression is hard.
28 * Worst case behaviours need to be analized.
29 * Background information:
30 *
31 * The file layout is:
32 * magic[2]
33 * method[1]
34 * flags[1]
35 * timestamp[4]
36 * extraflags[1]
37 * os[1]
38 * compressed data blocks[N]
39 * crc[4] orig_len[4]
40 *
41 * resulting in 18 bytes of non compressed data overhead.
42 *
43 * Files divided into blocks
44 * 1 bit (last block flag)
45 * 2 bits (block type)
46 *
47 * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved.
48 * The smallest block type encoding is always used.
49 *
50 * stored:
51 * 32 bits length in bytes.
52 *
53 * fixed:
54 * magic fixed tree.
55 * symbols.
56 *
57 * dynamic:
58 * dynamic tree encoding.
59 * symbols.
60 *
61 *
62 * The buffer for decompression in place is the length of the
63 * uncompressed data, plus a small amount extra to keep the algorithm safe.
64 * The compressed data is placed at the end of the buffer. The output
65 * pointer is placed at the start of the buffer and the input pointer
66 * is placed where the compressed data starts. Problems will occur
67 * when the output pointer overruns the input pointer.
68 *
69 * The output pointer can only overrun the input pointer if the input
70 * pointer is moving faster than the output pointer. A condition only
71 * triggered by data whose compressed form is larger than the uncompressed
72 * form.
73 *
74 * The worst case at the block level is a growth of the compressed data
75 * of 5 bytes per 32767 bytes.
76 *
77 * The worst case internal to a compressed block is very hard to figure.
78 * The worst case can at least be boundined by having one bit that represents
79 * 32764 bytes and then all of the rest of the bytes representing the very
80 * very last byte.
81 *
82 * All of which is enough to compute an amount of extra data that is required
83 * to be safe. To avoid problems at the block level allocating 5 extra bytes
84 * per 32767 bytes of data is sufficient. To avoind problems internal to a block
85 * adding an extra 32767 bytes (the worst case uncompressed block size) is
86 * sufficient, to ensure that in the worst case the decompressed data for
87 * block will stop the byte before the compressed data for a block begins.
88 * To avoid problems with the compressed data's meta information an extra 18
89 * bytes are needed. Leading to the formula:
90 *
91 * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size.
92 *
93 * Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
94 * Adding 32768 instead of 32767 just makes for round numbers.
95 * Adding the decompressor_size is necessary as it musht live after all
96 * of the data as well. Last I measured the decompressor is about 14K.
97 * 10K of actuall data and 4K of bss.
98 *
99 */
100
16/* 101/*
17 * gzip declarations 102 * gzip declarations
18 */ 103 */
@@ -28,15 +113,20 @@ typedef unsigned char uch;
28typedef unsigned short ush; 113typedef unsigned short ush;
29typedef unsigned long ulg; 114typedef unsigned long ulg;
30 115
31#define WSIZE 0x8000 /* Window size must be at least 32k, */ 116#define WSIZE 0x80000000 /* Window size must be at least 32k,
32 /* and a power of two */ 117 * and a power of two
118 * We don't actually have a window just
119 * a huge output buffer so I report
120 * a 2G windows size, as that should
121 * always be larger than our output buffer.
122 */
33 123
34static uch *inbuf; /* input buffer */ 124static uch *inbuf; /* input buffer */
35static uch window[WSIZE]; /* Sliding window buffer */ 125static uch *window; /* Sliding window buffer, (and final output buffer) */
36 126
37static unsigned insize = 0; /* valid bytes in inbuf */ 127static unsigned insize; /* valid bytes in inbuf */
38static unsigned inptr = 0; /* index of next byte to be processed in inbuf */ 128static unsigned inptr; /* index of next byte to be processed in inbuf */
39static unsigned outcnt = 0; /* bytes in output buffer */ 129static unsigned outcnt; /* bytes in output buffer */
40 130
41/* gzip flag byte */ 131/* gzip flag byte */
42#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */ 132#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */
@@ -87,8 +177,6 @@ extern unsigned char input_data[];
87extern int input_len; 177extern int input_len;
88 178
89static long bytes_out = 0; 179static long bytes_out = 0;
90static uch *output_data;
91static unsigned long output_ptr = 0;
92 180
93static void *malloc(int size); 181static void *malloc(int size);
94static void free(void *where); 182static void free(void *where);
@@ -98,17 +186,10 @@ static void *memcpy(void *dest, const void *src, unsigned n);
98 186
99static void putstr(const char *); 187static void putstr(const char *);
100 188
101extern int end; 189static long free_mem_ptr;
102static long free_mem_ptr = (long)&end;
103static long free_mem_end_ptr; 190static long free_mem_end_ptr;
104 191
105#define INPLACE_MOVE_ROUTINE 0x1000 192#define HEAP_SIZE 0x7000
106#define LOW_BUFFER_START 0x2000
107#define LOW_BUFFER_MAX 0x90000
108#define HEAP_SIZE 0x3000
109static unsigned int low_buffer_end, low_buffer_size;
110static int high_loaded =0;
111static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;
112 193
113static char *vidmem = (char *)0xb8000; 194static char *vidmem = (char *)0xb8000;
114static int vidport; 195static int vidport;
@@ -218,58 +299,31 @@ static void* memcpy(void* dest, const void* src, unsigned n)
218 */ 299 */
219static int fill_inbuf(void) 300static int fill_inbuf(void)
220{ 301{
221 if (insize != 0) { 302 error("ran out of input data");
222 error("ran out of input data"); 303 return 0;
223 }
224
225 inbuf = input_data;
226 insize = input_len;
227 inptr = 1;
228 return inbuf[0];
229} 304}
230 305
231/* =========================================================================== 306/* ===========================================================================
232 * Write the output window window[0..outcnt-1] and update crc and bytes_out. 307 * Write the output window window[0..outcnt-1] and update crc and bytes_out.
233 * (Used for the decompressed data only.) 308 * (Used for the decompressed data only.)
234 */ 309 */
235static void flush_window_low(void)
236{
237 ulg c = crc; /* temporary variable */
238 unsigned n;
239 uch *in, *out, ch;
240
241 in = window;
242 out = &output_data[output_ptr];
243 for (n = 0; n < outcnt; n++) {
244 ch = *out++ = *in++;
245 c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
246 }
247 crc = c;
248 bytes_out += (ulg)outcnt;
249 output_ptr += (ulg)outcnt;
250 outcnt = 0;
251}
252
253static void flush_window_high(void)
254{
255 ulg c = crc; /* temporary variable */
256 unsigned n;
257 uch *in, ch;
258 in = window;
259 for (n = 0; n < outcnt; n++) {
260 ch = *output_data++ = *in++;
261 if ((ulg)output_data == low_buffer_end) output_data=high_buffer_start;
262 c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
263 }
264 crc = c;
265 bytes_out += (ulg)outcnt;
266 outcnt = 0;
267}
268
269static void flush_window(void) 310static void flush_window(void)
270{ 311{
271 if (high_loaded) flush_window_high(); 312 /* With my window equal to my output buffer
272 else flush_window_low(); 313 * I only need to compute the crc here.
314 */
315 ulg c = crc; /* temporary variable */
316 unsigned n;
317 uch *in, ch;
318
319 in = window;
320 for (n = 0; n < outcnt; n++) {
321 ch = *in++;
322 c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
323 }
324 crc = c;
325 bytes_out += (ulg)outcnt;
326 outcnt = 0;
273} 327}
274 328
275static void error(char *x) 329static void error(char *x)
@@ -281,57 +335,8 @@ static void error(char *x)
281 while(1); /* Halt */ 335 while(1); /* Halt */
282} 336}
283 337
284static void setup_normal_output_buffer(void) 338asmlinkage void decompress_kernel(void *rmode, unsigned long heap,
285{ 339 uch *input_data, unsigned long input_len, uch *output)
286#ifdef STANDARD_MEMORY_BIOS_CALL
287 if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory");
288#else
289 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
290#endif
291 output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
292 free_mem_end_ptr = (long)real_mode;
293}
294
295struct moveparams {
296 uch *low_buffer_start; int lcount;
297 uch *high_buffer_start; int hcount;
298};
299
300static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
301{
302 high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
303#ifdef STANDARD_MEMORY_BIOS_CALL
304 if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
305#else
306 if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
307#endif
308 mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
309 low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
310 ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
311 low_buffer_size = low_buffer_end - LOW_BUFFER_START;
312 high_loaded = 1;
313 free_mem_end_ptr = (long)high_buffer_start;
314 if ( (__PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) {
315 high_buffer_start = (uch *)(__PHYSICAL_START + low_buffer_size);
316 mv->hcount = 0; /* say: we need not to move high_buffer */
317 }
318 else mv->hcount = -1;
319 mv->high_buffer_start = high_buffer_start;
320}
321
322static void close_output_buffer_if_we_run_high(struct moveparams *mv)
323{
324 if (bytes_out > low_buffer_size) {
325 mv->lcount = low_buffer_size;
326 if (mv->hcount)
327 mv->hcount = bytes_out - low_buffer_size;
328 } else {
329 mv->lcount = bytes_out;
330 mv->hcount = 0;
331 }
332}
333
334int decompress_kernel(struct moveparams *mv, void *rmode)
335{ 340{
336 real_mode = rmode; 341 real_mode = rmode;
337 342
@@ -346,13 +351,21 @@ int decompress_kernel(struct moveparams *mv, void *rmode)
346 lines = RM_SCREEN_INFO.orig_video_lines; 351 lines = RM_SCREEN_INFO.orig_video_lines;
347 cols = RM_SCREEN_INFO.orig_video_cols; 352 cols = RM_SCREEN_INFO.orig_video_cols;
348 353
349 if (free_mem_ptr < 0x100000) setup_normal_output_buffer(); 354 window = output; /* Output buffer (Normally at 1M) */
350 else setup_output_buffer_if_we_run_high(mv); 355 free_mem_ptr = heap; /* Heap */
356 free_mem_end_ptr = heap + HEAP_SIZE;
357 inbuf = input_data; /* Input buffer */
358 insize = input_len;
359 inptr = 0;
360
361 if ((ulg)output & (__KERNEL_ALIGN - 1))
362 error("Destination address not 2M aligned");
363 if ((ulg)output >= 0xffffffffffUL)
364 error("Destination address too large");
351 365
352 makecrc(); 366 makecrc();
353 putstr(".\nDecompressing Linux..."); 367 putstr(".\nDecompressing Linux...");
354 gunzip(); 368 gunzip();
355 putstr("done.\nBooting the kernel.\n"); 369 putstr("done.\nBooting the kernel.\n");
356 if (high_loaded) close_output_buffer_if_we_run_high(mv); 370 return;
357 return high_loaded;
358} 371}
diff --git a/arch/x86_64/boot/compressed/vmlinux.lds b/arch/x86_64/boot/compressed/vmlinux.lds
new file mode 100644
index 000000000000..94c13e557fb4
--- /dev/null
+++ b/arch/x86_64/boot/compressed/vmlinux.lds
@@ -0,0 +1,44 @@
1OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
2OUTPUT_ARCH(i386:x86-64)
3ENTRY(startup_64)
4SECTIONS
5{
6 /* Be careful parts of head.S assume startup_32 is at
7 * address 0.
8 */
9 . = 0;
10 .text : {
11 _head = . ;
12 *(.text.head)
13 _ehead = . ;
14 *(.text.compressed)
15 _text = .; /* Text */
16 *(.text)
17 *(.text.*)
18 _etext = . ;
19 }
20 .rodata : {
21 _rodata = . ;
22 *(.rodata) /* read-only data */
23 *(.rodata.*)
24 _erodata = . ;
25 }
26 .data : {
27 _data = . ;
28 *(.data)
29 *(.data.*)
30 _edata = . ;
31 }
32 .bss : {
33 _bss = . ;
34 *(.bss)
35 *(.bss.*)
36 *(COMMON)
37 . = ALIGN(8);
38 _end = . ;
39 . = ALIGN(4096);
40 pgtable = . ;
41 . = . + 4096 * 6;
42 _heap = .;
43 }
44}
diff --git a/arch/x86_64/boot/compressed/vmlinux.scr b/arch/x86_64/boot/compressed/vmlinux.scr
index 1ed9d791f863..bd1429ce193e 100644
--- a/arch/x86_64/boot/compressed/vmlinux.scr
+++ b/arch/x86_64/boot/compressed/vmlinux.scr
@@ -1,9 +1,10 @@
1SECTIONS 1SECTIONS
2{ 2{
3 .data : { 3 .text.compressed : {
4 input_len = .; 4 input_len = .;
5 LONG(input_data_end - input_data) input_data = .; 5 LONG(input_data_end - input_data) input_data = .;
6 *(.data) 6 *(.data)
7 input_data_end = .; 7 output_len = . - 4;
8 input_data_end = .;
8 } 9 }
9} 10}
diff --git a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S
index 770940cc0108..e9e33f949697 100644
--- a/arch/x86_64/boot/setup.S
+++ b/arch/x86_64/boot/setup.S
@@ -51,6 +51,7 @@
51#include <asm/boot.h> 51#include <asm/boot.h>
52#include <asm/e820.h> 52#include <asm/e820.h>
53#include <asm/page.h> 53#include <asm/page.h>
54#include <asm/setup.h>
54 55
55/* Signature words to ensure LILO loaded us right */ 56/* Signature words to ensure LILO loaded us right */
56#define SIG1 0xAA55 57#define SIG1 0xAA55
@@ -80,7 +81,7 @@ start:
80# This is the setup header, and it must start at %cs:2 (old 0x9020:2) 81# This is the setup header, and it must start at %cs:2 (old 0x9020:2)
81 82
82 .ascii "HdrS" # header signature 83 .ascii "HdrS" # header signature
83 .word 0x0204 # header version number (>= 0x0105) 84 .word 0x0206 # header version number (>= 0x0105)
84 # or else old loadlin-1.5 will fail) 85 # or else old loadlin-1.5 will fail)
85realmode_swtch: .word 0, 0 # default_switch, SETUPSEG 86realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
86start_sys_seg: .word SYSSEG 87start_sys_seg: .word SYSSEG
@@ -155,7 +156,20 @@ cmd_line_ptr: .long 0 # (Header version 0x0202 or later)
155 # low memory 0x10000 or higher. 156 # low memory 0x10000 or higher.
156 157
157ramdisk_max: .long 0xffffffff 158ramdisk_max: .long 0xffffffff
158 159kernel_alignment: .long 0x200000 # physical addr alignment required for
160 # protected mode relocatable kernel
161#ifdef CONFIG_RELOCATABLE
162relocatable_kernel: .byte 1
163#else
164relocatable_kernel: .byte 0
165#endif
166pad2: .byte 0
167pad3: .word 0
168
169cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
170 #added with boot protocol
171 #version 2.06
172
159trampoline: call start_of_setup 173trampoline: call start_of_setup
160 .align 16 174 .align 16
161 # The offset at this point is 0x240 175 # The offset at this point is 0x240
@@ -290,64 +304,10 @@ loader_ok:
290 movw %cs,%ax 304 movw %cs,%ax
291 movw %ax,%ds 305 movw %ax,%ds
292 306
293 /* minimum CPUID flags for x86-64 */ 307 call verify_cpu
294 /* see http://www.x86-64.org/lists/discuss/msg02971.html */ 308 testl %eax,%eax
295#define SSE_MASK ((1<<25)|(1<<26)) 309 jz sse_ok
296#define REQUIRED_MASK1 ((1<<0)|(1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<<8)|\ 310
297 (1<<13)|(1<<15)|(1<<24))
298#define REQUIRED_MASK2 (1<<29)
299
300 pushfl /* standard way to check for cpuid */
301 popl %eax
302 movl %eax,%ebx
303 xorl $0x200000,%eax
304 pushl %eax
305 popfl
306 pushfl
307 popl %eax
308 cmpl %eax,%ebx
309 jz no_longmode /* cpu has no cpuid */
310 movl $0x0,%eax
311 cpuid
312 cmpl $0x1,%eax
313 jb no_longmode /* no cpuid 1 */
314 xor %di,%di
315 cmpl $0x68747541,%ebx /* AuthenticAMD */
316 jnz noamd
317 cmpl $0x69746e65,%edx
318 jnz noamd
319 cmpl $0x444d4163,%ecx
320 jnz noamd
321 mov $1,%di /* cpu is from AMD */
322noamd:
323 movl $0x1,%eax
324 cpuid
325 andl $REQUIRED_MASK1,%edx
326 xorl $REQUIRED_MASK1,%edx
327 jnz no_longmode
328 movl $0x80000000,%eax
329 cpuid
330 cmpl $0x80000001,%eax
331 jb no_longmode /* no extended cpuid */
332 movl $0x80000001,%eax
333 cpuid
334 andl $REQUIRED_MASK2,%edx
335 xorl $REQUIRED_MASK2,%edx
336 jnz no_longmode
337sse_test:
338 movl $1,%eax
339 cpuid
340 andl $SSE_MASK,%edx
341 cmpl $SSE_MASK,%edx
342 je sse_ok
343 test %di,%di
344 jz no_longmode /* only try to force SSE on AMD */
345 movl $0xc0010015,%ecx /* HWCR */
346 rdmsr
347 btr $15,%eax /* enable SSE */
348 wrmsr
349 xor %di,%di /* don't loop */
350 jmp sse_test /* try again */
351no_longmode: 311no_longmode:
352 call beep 312 call beep
353 lea long_mode_panic,%si 313 lea long_mode_panic,%si
@@ -357,7 +317,8 @@ no_longmode_loop:
357long_mode_panic: 317long_mode_panic:
358 .string "Your CPU does not support long mode. Use a 32bit distribution." 318 .string "Your CPU does not support long mode. Use a 32bit distribution."
359 .byte 0 319 .byte 0
360 320
321#include "../kernel/verify_cpu.S"
361sse_ok: 322sse_ok:
362 popw %ds 323 popw %ds
363 324
@@ -846,7 +807,7 @@ gdt_48:
846 807
847# Include video setup & detection code 808# Include video setup & detection code
848 809
849#include "video.S" 810#include "../../i386/boot/video.S"
850 811
851# Setup signature -- must be last 812# Setup signature -- must be last
852setup_sig1: .word SIG1 813setup_sig1: .word SIG1
diff --git a/arch/x86_64/boot/video.S b/arch/x86_64/boot/video.S
deleted file mode 100644
index 6090516c9c7f..000000000000
--- a/arch/x86_64/boot/video.S
+++ /dev/null
@@ -1,2043 +0,0 @@
1/* video.S
2 *
3 * Display adapter & video mode setup, version 2.13 (14-May-99)
4 *
5 * Copyright (C) 1995 -- 1998 Martin Mares <mj@ucw.cz>
6 * Based on the original setup.S code (C) Linus Torvalds and Mats Anderson
7 *
8 * Rewritten to use GNU 'as' by Chris Noe <stiker@northlink.com> May 1999
9 *
10 * For further information, look at Documentation/svga.txt.
11 *
12 */
13
14/* Enable autodetection of SVGA adapters and modes. */
15#undef CONFIG_VIDEO_SVGA
16
17/* Enable autodetection of VESA modes */
18#define CONFIG_VIDEO_VESA
19
20/* Enable compacting of mode table */
21#define CONFIG_VIDEO_COMPACT
22
23/* Retain screen contents when switching modes */
24#define CONFIG_VIDEO_RETAIN
25
26/* Enable local mode list */
27#undef CONFIG_VIDEO_LOCAL
28
29/* Force 400 scan lines for standard modes (hack to fix bad BIOS behaviour */
30#undef CONFIG_VIDEO_400_HACK
31
32/* Hack that lets you force specific BIOS mode ID and specific dimensions */
33#undef CONFIG_VIDEO_GFX_HACK
34#define VIDEO_GFX_BIOS_AX 0x4f02 /* 800x600 on ThinkPad */
35#define VIDEO_GFX_BIOS_BX 0x0102
36#define VIDEO_GFX_DUMMY_RESOLUTION 0x6425 /* 100x37 */
37
38/* This code uses an extended set of video mode numbers. These include:
39 * Aliases for standard modes
40 * NORMAL_VGA (-1)
41 * EXTENDED_VGA (-2)
42 * ASK_VGA (-3)
43 * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
44 * of compatibility when extending the table. These are between 0x00 and 0xff.
45 */
46#define VIDEO_FIRST_MENU 0x0000
47
48/* Standard BIOS video modes (BIOS number + 0x0100) */
49#define VIDEO_FIRST_BIOS 0x0100
50
51/* VESA BIOS video modes (VESA number + 0x0200) */
52#define VIDEO_FIRST_VESA 0x0200
53
54/* Video7 special modes (BIOS number + 0x0900) */
55#define VIDEO_FIRST_V7 0x0900
56
57/* Special video modes */
58#define VIDEO_FIRST_SPECIAL 0x0f00
59#define VIDEO_80x25 0x0f00
60#define VIDEO_8POINT 0x0f01
61#define VIDEO_80x43 0x0f02
62#define VIDEO_80x28 0x0f03
63#define VIDEO_CURRENT_MODE 0x0f04
64#define VIDEO_80x30 0x0f05
65#define VIDEO_80x34 0x0f06
66#define VIDEO_80x60 0x0f07
67#define VIDEO_GFX_HACK 0x0f08
68#define VIDEO_LAST_SPECIAL 0x0f09
69
70/* Video modes given by resolution */
71#define VIDEO_FIRST_RESOLUTION 0x1000
72
73/* The "recalculate timings" flag */
74#define VIDEO_RECALC 0x8000
75
76/* Positions of various video parameters passed to the kernel */
77/* (see also include/linux/tty.h) */
78#define PARAM_CURSOR_POS 0x00
79#define PARAM_VIDEO_PAGE 0x04
80#define PARAM_VIDEO_MODE 0x06
81#define PARAM_VIDEO_COLS 0x07
82#define PARAM_VIDEO_EGA_BX 0x0a
83#define PARAM_VIDEO_LINES 0x0e
84#define PARAM_HAVE_VGA 0x0f
85#define PARAM_FONT_POINTS 0x10
86
87#define PARAM_LFB_WIDTH 0x12
88#define PARAM_LFB_HEIGHT 0x14
89#define PARAM_LFB_DEPTH 0x16
90#define PARAM_LFB_BASE 0x18
91#define PARAM_LFB_SIZE 0x1c
92#define PARAM_LFB_LINELENGTH 0x24
93#define PARAM_LFB_COLORS 0x26
94#define PARAM_VESAPM_SEG 0x2e
95#define PARAM_VESAPM_OFF 0x30
96#define PARAM_LFB_PAGES 0x32
97#define PARAM_VESA_ATTRIB 0x34
98#define PARAM_CAPABILITIES 0x36
99
100/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */
101#ifdef CONFIG_VIDEO_RETAIN
102#define DO_STORE call store_screen
103#else
104#define DO_STORE
105#endif /* CONFIG_VIDEO_RETAIN */
106
107# This is the main entry point called by setup.S
108# %ds *must* be pointing to the bootsector
109video: pushw %ds # We use different segments
110 pushw %ds # FS contains original DS
111 popw %fs
112 pushw %cs # DS is equal to CS
113 popw %ds
114 pushw %cs # ES is equal to CS
115 popw %es
116 xorw %ax, %ax
117 movw %ax, %gs # GS is zero
118 cld
119 call basic_detect # Basic adapter type testing (EGA/VGA/MDA/CGA)
120#ifdef CONFIG_VIDEO_SELECT
121 movw %fs:(0x01fa), %ax # User selected video mode
122 cmpw $ASK_VGA, %ax # Bring up the menu
123 jz vid2
124
125 call mode_set # Set the mode
126 jc vid1
127
128 leaw badmdt, %si # Invalid mode ID
129 call prtstr
130vid2: call mode_menu
131vid1:
132#ifdef CONFIG_VIDEO_RETAIN
133 call restore_screen # Restore screen contents
134#endif /* CONFIG_VIDEO_RETAIN */
135 call store_edid
136#endif /* CONFIG_VIDEO_SELECT */
137 call mode_params # Store mode parameters
138 popw %ds # Restore original DS
139 ret
140
141# Detect if we have CGA, MDA, EGA or VGA and pass it to the kernel.
142basic_detect:
143 movb $0, %fs:(PARAM_HAVE_VGA)
144 movb $0x12, %ah # Check EGA/VGA
145 movb $0x10, %bl
146 int $0x10
147 movw %bx, %fs:(PARAM_VIDEO_EGA_BX) # Identifies EGA to the kernel
148 cmpb $0x10, %bl # No, it's a CGA/MDA/HGA card.
149 je basret
150
151 incb adapter
152 movw $0x1a00, %ax # Check EGA or VGA?
153 int $0x10
154 cmpb $0x1a, %al # 1a means VGA...
155 jne basret # anything else is EGA.
156
157 incb %fs:(PARAM_HAVE_VGA) # We've detected a VGA
158 incb adapter
159basret: ret
160
161# Store the video mode parameters for later usage by the kernel.
162# This is done by asking the BIOS except for the rows/columns
163# parameters in the default 80x25 mode -- these are set directly,
164# because some very obscure BIOSes supply insane values.
165mode_params:
166#ifdef CONFIG_VIDEO_SELECT
167 cmpb $0, graphic_mode
168 jnz mopar_gr
169#endif
170 movb $0x03, %ah # Read cursor position
171 xorb %bh, %bh
172 int $0x10
173 movw %dx, %fs:(PARAM_CURSOR_POS)
174 movb $0x0f, %ah # Read page/mode/width
175 int $0x10
176 movw %bx, %fs:(PARAM_VIDEO_PAGE)
177 movw %ax, %fs:(PARAM_VIDEO_MODE) # Video mode and screen width
178 cmpb $0x7, %al # MDA/HGA => segment differs
179 jnz mopar0
180
181 movw $0xb000, video_segment
182mopar0: movw %gs:(0x485), %ax # Font size
183 movw %ax, %fs:(PARAM_FONT_POINTS) # (valid only on EGA/VGA)
184 movw force_size, %ax # Forced size?
185 orw %ax, %ax
186 jz mopar1
187
188 movb %ah, %fs:(PARAM_VIDEO_COLS)
189 movb %al, %fs:(PARAM_VIDEO_LINES)
190 ret
191
192mopar1: movb $25, %al
193 cmpb $0, adapter # If we are on CGA/MDA/HGA, the
194 jz mopar2 # screen must have 25 lines.
195
196 movb %gs:(0x484), %al # On EGA/VGA, use the EGA+ BIOS
197 incb %al # location of max lines.
198mopar2: movb %al, %fs:(PARAM_VIDEO_LINES)
199 ret
200
201#ifdef CONFIG_VIDEO_SELECT
202# Fetching of VESA frame buffer parameters
203mopar_gr:
204 leaw modelist+1024, %di
205 movb $0x23, %fs:(PARAM_HAVE_VGA)
206 movw 16(%di), %ax
207 movw %ax, %fs:(PARAM_LFB_LINELENGTH)
208 movw 18(%di), %ax
209 movw %ax, %fs:(PARAM_LFB_WIDTH)
210 movw 20(%di), %ax
211 movw %ax, %fs:(PARAM_LFB_HEIGHT)
212 movb 25(%di), %al
213 movb $0, %ah
214 movw %ax, %fs:(PARAM_LFB_DEPTH)
215 movb 29(%di), %al
216 movb $0, %ah
217 movw %ax, %fs:(PARAM_LFB_PAGES)
218 movl 40(%di), %eax
219 movl %eax, %fs:(PARAM_LFB_BASE)
220 movl 31(%di), %eax
221 movl %eax, %fs:(PARAM_LFB_COLORS)
222 movl 35(%di), %eax
223 movl %eax, %fs:(PARAM_LFB_COLORS+4)
224 movw 0(%di), %ax
225 movw %ax, %fs:(PARAM_VESA_ATTRIB)
226
227# get video mem size
228 leaw modelist+1024, %di
229 movw $0x4f00, %ax
230 int $0x10
231 xorl %eax, %eax
232 movw 18(%di), %ax
233 movl %eax, %fs:(PARAM_LFB_SIZE)
234
235# store mode capabilities
236 movl 10(%di), %eax
237 movl %eax, %fs:(PARAM_CAPABILITIES)
238
239# switching the DAC to 8-bit is for <= 8 bpp only
240 movw %fs:(PARAM_LFB_DEPTH), %ax
241 cmpw $8, %ax
242 jg dac_done
243
244# get DAC switching capability
245 xorl %eax, %eax
246 movb 10(%di), %al
247 testb $1, %al
248 jz dac_set
249
250# attempt to switch DAC to 8-bit
251 movw $0x4f08, %ax
252 movw $0x0800, %bx
253 int $0x10
254 cmpw $0x004f, %ax
255 jne dac_set
256 movb %bh, dac_size # store actual DAC size
257
258dac_set:
259# set color size to DAC size
260 movb dac_size, %al
261 movb %al, %fs:(PARAM_LFB_COLORS+0)
262 movb %al, %fs:(PARAM_LFB_COLORS+2)
263 movb %al, %fs:(PARAM_LFB_COLORS+4)
264 movb %al, %fs:(PARAM_LFB_COLORS+6)
265
266# set color offsets to 0
267 movb $0, %fs:(PARAM_LFB_COLORS+1)
268 movb $0, %fs:(PARAM_LFB_COLORS+3)
269 movb $0, %fs:(PARAM_LFB_COLORS+5)
270 movb $0, %fs:(PARAM_LFB_COLORS+7)
271
272dac_done:
273# get protected mode interface informations
274 movw $0x4f0a, %ax
275 xorw %bx, %bx
276 xorw %di, %di
277 int $0x10
278 cmp $0x004f, %ax
279 jnz no_pm
280
281 movw %es, %fs:(PARAM_VESAPM_SEG)
282 movw %di, %fs:(PARAM_VESAPM_OFF)
283no_pm: ret
284
285# The video mode menu
286mode_menu:
287 leaw keymsg, %si # "Return/Space/Timeout" message
288 call prtstr
289 call flush
290nokey: call getkt
291
292 cmpb $0x0d, %al # ENTER ?
293 je listm # yes - manual mode selection
294
295 cmpb $0x20, %al # SPACE ?
296 je defmd1 # no - repeat
297
298 call beep
299 jmp nokey
300
301defmd1: ret # No mode chosen? Default 80x25
302
303listm: call mode_table # List mode table
304listm0: leaw name_bann, %si # Print adapter name
305 call prtstr
306 movw card_name, %si
307 orw %si, %si
308 jnz an2
309
310 movb adapter, %al
311 leaw old_name, %si
312 orb %al, %al
313 jz an1
314
315 leaw ega_name, %si
316 decb %al
317 jz an1
318
319 leaw vga_name, %si
320 jmp an1
321
322an2: call prtstr
323 leaw svga_name, %si
324an1: call prtstr
325 leaw listhdr, %si # Table header
326 call prtstr
327 movb $0x30, %dl # DL holds mode number
328 leaw modelist, %si
329lm1: cmpw $ASK_VGA, (%si) # End?
330 jz lm2
331
332 movb %dl, %al # Menu selection number
333 call prtchr
334 call prtsp2
335 lodsw
336 call prthw # Mode ID
337 call prtsp2
338 movb 0x1(%si), %al
339 call prtdec # Rows
340 movb $0x78, %al # the letter 'x'
341 call prtchr
342 lodsw
343 call prtdec # Columns
344 movb $0x0d, %al # New line
345 call prtchr
346 movb $0x0a, %al
347 call prtchr
348 incb %dl # Next character
349 cmpb $0x3a, %dl
350 jnz lm1
351
352 movb $0x61, %dl
353 jmp lm1
354
355lm2: leaw prompt, %si # Mode prompt
356 call prtstr
357 leaw edit_buf, %di # Editor buffer
358lm3: call getkey
359 cmpb $0x0d, %al # Enter?
360 jz lment
361
362 cmpb $0x08, %al # Backspace?
363 jz lmbs
364
365 cmpb $0x20, %al # Printable?
366 jc lm3
367
368 cmpw $edit_buf+4, %di # Enough space?
369 jz lm3
370
371 stosb
372 call prtchr
373 jmp lm3
374
375lmbs: cmpw $edit_buf, %di # Backspace
376 jz lm3
377
378 decw %di
379 movb $0x08, %al
380 call prtchr
381 call prtspc
382 movb $0x08, %al
383 call prtchr
384 jmp lm3
385
386lment: movb $0, (%di)
387 leaw crlft, %si
388 call prtstr
389 leaw edit_buf, %si
390 cmpb $0, (%si) # Empty string = default mode
391 jz lmdef
392
393 cmpb $0, 1(%si) # One character = menu selection
394 jz mnusel
395
396 cmpw $0x6373, (%si) # "scan" => mode scanning
397 jnz lmhx
398
399 cmpw $0x6e61, 2(%si)
400 jz lmscan
401
402lmhx: xorw %bx, %bx # Else => mode ID in hex
403lmhex: lodsb
404 orb %al, %al
405 jz lmuse1
406
407 subb $0x30, %al
408 jc lmbad
409
410 cmpb $10, %al
411 jc lmhx1
412
413 subb $7, %al
414 andb $0xdf, %al
415 cmpb $10, %al
416 jc lmbad
417
418 cmpb $16, %al
419 jnc lmbad
420
421lmhx1: shlw $4, %bx
422 orb %al, %bl
423 jmp lmhex
424
425lmuse1: movw %bx, %ax
426 jmp lmuse
427
428mnusel: lodsb # Menu selection
429 xorb %ah, %ah
430 subb $0x30, %al
431 jc lmbad
432
433 cmpb $10, %al
434 jc lmuse
435
436 cmpb $0x61-0x30, %al
437 jc lmbad
438
439 subb $0x61-0x30-10, %al
440 cmpb $36, %al
441 jnc lmbad
442
443lmuse: call mode_set
444 jc lmdef
445
446lmbad: leaw unknt, %si
447 call prtstr
448 jmp lm2
449lmscan: cmpb $0, adapter # Scanning only on EGA/VGA
450 jz lmbad
451
452 movw $0, mt_end # Scanning of modes is
453 movb $1, scanning # done as new autodetection.
454 call mode_table
455 jmp listm0
456lmdef: ret
457
458# Additional parts of mode_set... (relative jumps, you know)
459setv7: # Video7 extended modes
460 DO_STORE
461 subb $VIDEO_FIRST_V7>>8, %bh
462 movw $0x6f05, %ax
463 int $0x10
464 stc
465 ret
466
467_setrec: jmp setrec # Ugly...
468_set_80x25: jmp set_80x25
469
470# Aliases for backward compatibility.
471setalias:
472 movw $VIDEO_80x25, %ax
473 incw %bx
474 jz mode_set
475
476 movb $VIDEO_8POINT-VIDEO_FIRST_SPECIAL, %al
477 incw %bx
478 jnz setbad # Fall-through!
479
480# Setting of user mode (AX=mode ID) => CF=success
481mode_set:
482 movw %ax, %fs:(0x01fa) # Store mode for use in acpi_wakeup.S
483 movw %ax, %bx
484 cmpb $0xff, %ah
485 jz setalias
486
487 testb $VIDEO_RECALC>>8, %ah
488 jnz _setrec
489
490 cmpb $VIDEO_FIRST_RESOLUTION>>8, %ah
491 jnc setres
492
493 cmpb $VIDEO_FIRST_SPECIAL>>8, %ah
494 jz setspc
495
496 cmpb $VIDEO_FIRST_V7>>8, %ah
497 jz setv7
498
499 cmpb $VIDEO_FIRST_VESA>>8, %ah
500 jnc check_vesa
501
502 orb %ah, %ah
503 jz setmenu
504
505 decb %ah
506 jz setbios
507
508setbad: clc
509 movb $0, do_restore # The screen needn't be restored
510 ret
511
512setvesa:
513 DO_STORE
514 subb $VIDEO_FIRST_VESA>>8, %bh
515 movw $0x4f02, %ax # VESA BIOS mode set call
516 int $0x10
517 cmpw $0x004f, %ax # AL=4f if implemented
518 jnz setbad # AH=0 if OK
519
520 stc
521 ret
522
523setbios:
524 DO_STORE
525 int $0x10 # Standard BIOS mode set call
526 pushw %bx
527 movb $0x0f, %ah # Check if really set
528 int $0x10
529 popw %bx
530 cmpb %bl, %al
531 jnz setbad
532
533 stc
534 ret
535
536setspc: xorb %bh, %bh # Set special mode
537 cmpb $VIDEO_LAST_SPECIAL-VIDEO_FIRST_SPECIAL, %bl
538 jnc setbad
539
540 addw %bx, %bx
541 jmp *spec_inits(%bx)
542
543setmenu:
544 orb %al, %al # 80x25 is an exception
545 jz _set_80x25
546
547 pushw %bx # Set mode chosen from menu
548 call mode_table # Build the mode table
549 popw %ax
550 shlw $2, %ax
551 addw %ax, %si
552 cmpw %di, %si
553 jnc setbad
554
555 movw (%si), %ax # Fetch mode ID
556_m_s: jmp mode_set
557
558setres: pushw %bx # Set mode chosen by resolution
559 call mode_table
560 popw %bx
561 xchgb %bl, %bh
562setr1: lodsw
563 cmpw $ASK_VGA, %ax # End of the list?
564 jz setbad
565
566 lodsw
567 cmpw %bx, %ax
568 jnz setr1
569
570 movw -4(%si), %ax # Fetch mode ID
571 jmp _m_s
572
573check_vesa:
574#ifdef CONFIG_FIRMWARE_EDID
575 leaw modelist+1024, %di
576 movw $0x4f00, %ax
577 int $0x10
578 cmpw $0x004f, %ax
579 jnz setbad
580
581 movw 4(%di), %ax
582 movw %ax, vbe_version
583#endif
584 leaw modelist+1024, %di
585 subb $VIDEO_FIRST_VESA>>8, %bh
586 movw %bx, %cx # Get mode information structure
587 movw $0x4f01, %ax
588 int $0x10
589 addb $VIDEO_FIRST_VESA>>8, %bh
590 cmpw $0x004f, %ax
591 jnz setbad
592
593 movb (%di), %al # Check capabilities.
594 andb $0x19, %al
595 cmpb $0x09, %al
596 jz setvesa # This is a text mode
597
598 movb (%di), %al # Check capabilities.
599 andb $0x99, %al
600 cmpb $0x99, %al
601 jnz _setbad # Doh! No linear frame buffer.
602
603 subb $VIDEO_FIRST_VESA>>8, %bh
604 orw $0x4000, %bx # Use linear frame buffer
605 movw $0x4f02, %ax # VESA BIOS mode set call
606 int $0x10
607 cmpw $0x004f, %ax # AL=4f if implemented
608 jnz _setbad # AH=0 if OK
609
610 movb $1, graphic_mode # flag graphic mode
611 movb $0, do_restore # no screen restore
612 stc
613 ret
614
615_setbad: jmp setbad # Ugly...
616
617# Recalculate vertical display end registers -- this fixes various
618# inconsistencies of extended modes on many adapters. Called when
619# the VIDEO_RECALC flag is set in the mode ID.
620
621setrec: subb $VIDEO_RECALC>>8, %ah # Set the base mode
622 call mode_set
623 jnc rct3
624
625 movw %gs:(0x485), %ax # Font size in pixels
626 movb %gs:(0x484), %bl # Number of rows
627 incb %bl
628 mulb %bl # Number of visible
629 decw %ax # scan lines - 1
630 movw $0x3d4, %dx
631 movw %ax, %bx
632 movb $0x12, %al # Lower 8 bits
633 movb %bl, %ah
634 outw %ax, %dx
635 movb $0x07, %al # Bits 8 and 9 in the overflow register
636 call inidx
637 xchgb %al, %ah
638 andb $0xbd, %ah
639 shrb %bh
640 jnc rct1
641 orb $0x02, %ah
642rct1: shrb %bh
643 jnc rct2
644 orb $0x40, %ah
645rct2: movb $0x07, %al
646 outw %ax, %dx
647 stc
648rct3: ret
649
650# Table of routines for setting of the special modes.
651spec_inits:
652 .word set_80x25
653 .word set_8pixel
654 .word set_80x43
655 .word set_80x28
656 .word set_current
657 .word set_80x30
658 .word set_80x34
659 .word set_80x60
660 .word set_gfx
661
662# Set the 80x25 mode. If already set, do nothing.
663set_80x25:
664 movw $0x5019, force_size # Override possibly broken BIOS
665use_80x25:
666#ifdef CONFIG_VIDEO_400_HACK
667 movw $0x1202, %ax # Force 400 scan lines
668 movb $0x30, %bl
669 int $0x10
670#else
671 movb $0x0f, %ah # Get current mode ID
672 int $0x10
673 cmpw $0x5007, %ax # Mode 7 (80x25 mono) is the only one available
674 jz st80 # on CGA/MDA/HGA and is also available on EGAM
675
676 cmpw $0x5003, %ax # Unknown mode, force 80x25 color
677 jnz force3
678
679st80: cmpb $0, adapter # CGA/MDA/HGA => mode 3/7 is always 80x25
680 jz set80
681
682 movb %gs:(0x0484), %al # This is EGA+ -- beware of 80x50 etc.
683 orb %al, %al # Some buggy BIOS'es set 0 rows
684 jz set80
685
686 cmpb $24, %al # It's hopefully correct
687 jz set80
688#endif /* CONFIG_VIDEO_400_HACK */
689force3: DO_STORE
690 movw $0x0003, %ax # Forced set
691 int $0x10
692set80: stc
693 ret
694
695# Set the 80x50/80x43 8-pixel mode. Simple BIOS calls.
696set_8pixel:
697 DO_STORE
698 call use_80x25 # The base is 80x25
699set_8pt:
700 movw $0x1112, %ax # Use 8x8 font
701 xorb %bl, %bl
702 int $0x10
703 movw $0x1200, %ax # Use alternate print screen
704 movb $0x20, %bl
705 int $0x10
706 movw $0x1201, %ax # Turn off cursor emulation
707 movb $0x34, %bl
708 int $0x10
709 movb $0x01, %ah # Define cursor scan lines 6-7
710 movw $0x0607, %cx
711 int $0x10
712set_current:
713 stc
714 ret
715
716# Set the 80x28 mode. This mode works on all VGA's, because it's a standard
717# 80x25 mode with 14-point fonts instead of 16-point.
718set_80x28:
719 DO_STORE
720 call use_80x25 # The base is 80x25
721set14: movw $0x1111, %ax # Use 9x14 font
722 xorb %bl, %bl
723 int $0x10
724 movb $0x01, %ah # Define cursor scan lines 11-12
725 movw $0x0b0c, %cx
726 int $0x10
727 stc
728 ret
729
730# Set the 80x43 mode. This mode is works on all VGA's.
731# It's a 350-scanline mode with 8-pixel font.
732set_80x43:
733 DO_STORE
734 movw $0x1201, %ax # Set 350 scans
735 movb $0x30, %bl
736 int $0x10
737 movw $0x0003, %ax # Reset video mode
738 int $0x10
739 jmp set_8pt # Use 8-pixel font
740
741# Set the 80x30 mode (all VGA's). 480 scanlines, 16-pixel font.
742set_80x30:
743 call use_80x25 # Start with real 80x25
744 DO_STORE
745 movw $0x3cc, %dx # Get CRTC port
746 inb %dx, %al
747 movb $0xd4, %dl
748 rorb %al # Mono or color?
749 jc set48a
750
751 movb $0xb4, %dl
752set48a: movw $0x0c11, %ax # Vertical sync end (also unlocks CR0-7)
753 call outidx
754 movw $0x0b06, %ax # Vertical total
755 call outidx
756 movw $0x3e07, %ax # (Vertical) overflow
757 call outidx
758 movw $0xea10, %ax # Vertical sync start
759 call outidx
760 movw $0xdf12, %ax # Vertical display end
761 call outidx
762 movw $0xe715, %ax # Vertical blank start
763 call outidx
764 movw $0x0416, %ax # Vertical blank end
765 call outidx
766 pushw %dx
767 movb $0xcc, %dl # Misc output register (read)
768 inb %dx, %al
769 movb $0xc2, %dl # (write)
770 andb $0x0d, %al # Preserve clock select bits and color bit
771 orb $0xe2, %al # Set correct sync polarity
772 outb %al, %dx
773 popw %dx
774 movw $0x501e, force_size
775 stc # That's all.
776 ret
777
778# Set the 80x34 mode (all VGA's). 480 scans, 14-pixel font.
779set_80x34:
780 call set_80x30 # Set 480 scans
781 call set14 # And 14-pt font
782 movw $0xdb12, %ax # VGA vertical display end
783 movw $0x5022, force_size
784setvde: call outidx
785 stc
786 ret
787
788# Set the 80x60 mode (all VGA's). 480 scans, 8-pixel font.
789set_80x60:
790 call set_80x30 # Set 480 scans
791 call set_8pt # And 8-pt font
792 movw $0xdf12, %ax # VGA vertical display end
793 movw $0x503c, force_size
794 jmp setvde
795
796# Special hack for ThinkPad graphics
797set_gfx:
798#ifdef CONFIG_VIDEO_GFX_HACK
799 movw $VIDEO_GFX_BIOS_AX, %ax
800 movw $VIDEO_GFX_BIOS_BX, %bx
801 int $0x10
802 movw $VIDEO_GFX_DUMMY_RESOLUTION, force_size
803 stc
804#endif
805 ret
806
807#ifdef CONFIG_VIDEO_RETAIN
808
809# Store screen contents to temporary buffer.
810store_screen:
811 cmpb $0, do_restore # Already stored?
812 jnz stsr
813
814 testb $CAN_USE_HEAP, loadflags # Have we space for storing?
815 jz stsr
816
817 pushw %ax
818 pushw %bx
819 pushw force_size # Don't force specific size
820 movw $0, force_size
821 call mode_params # Obtain params of current mode
822 popw force_size
823 movb %fs:(PARAM_VIDEO_LINES), %ah
824 movb %fs:(PARAM_VIDEO_COLS), %al
825 movw %ax, %bx # BX=dimensions
826 mulb %ah
827 movw %ax, %cx # CX=number of characters
828 addw %ax, %ax # Calculate image size
829 addw $modelist+1024+4, %ax
830 cmpw heap_end_ptr, %ax
831 jnc sts1 # Unfortunately, out of memory
832
833 movw %fs:(PARAM_CURSOR_POS), %ax # Store mode params
834 leaw modelist+1024, %di
835 stosw
836 movw %bx, %ax
837 stosw
838 pushw %ds # Store the screen
839 movw video_segment, %ds
840 xorw %si, %si
841 rep
842 movsw
843 popw %ds
844 incb do_restore # Screen will be restored later
845sts1: popw %bx
846 popw %ax
847stsr: ret
848
849# Restore screen contents from temporary buffer.
850restore_screen:
851 cmpb $0, do_restore # Has the screen been stored?
852 jz res1
853
854 call mode_params # Get parameters of current mode
855 movb %fs:(PARAM_VIDEO_LINES), %cl
856 movb %fs:(PARAM_VIDEO_COLS), %ch
857 leaw modelist+1024, %si # Screen buffer
858 lodsw # Set cursor position
859 movw %ax, %dx
860 cmpb %cl, %dh
861 jc res2
862
863 movb %cl, %dh
864 decb %dh
865res2: cmpb %ch, %dl
866 jc res3
867
868 movb %ch, %dl
869 decb %dl
870res3: movb $0x02, %ah
871 movb $0x00, %bh
872 int $0x10
873 lodsw # Display size
874 movb %ah, %dl # DL=number of lines
875 movb $0, %ah # BX=phys. length of orig. line
876 movw %ax, %bx
877 cmpb %cl, %dl # Too many?
878 jc res4
879
880 pushw %ax
881 movb %dl, %al
882 subb %cl, %al
883 mulb %bl
884 addw %ax, %si
885 addw %ax, %si
886 popw %ax
887 movb %cl, %dl
888res4: cmpb %ch, %al # Too wide?
889 jc res5
890
891 movb %ch, %al # AX=width of src. line
892res5: movb $0, %cl
893 xchgb %ch, %cl
894 movw %cx, %bp # BP=width of dest. line
895 pushw %es
896 movw video_segment, %es
897 xorw %di, %di # Move the data
898 addw %bx, %bx # Convert BX and BP to _bytes_
899 addw %bp, %bp
900res6: pushw %si
901 pushw %di
902 movw %ax, %cx
903 rep
904 movsw
905 popw %di
906 popw %si
907 addw %bp, %di
908 addw %bx, %si
909 decb %dl
910 jnz res6
911
912 popw %es # Done
913res1: ret
914#endif /* CONFIG_VIDEO_RETAIN */
915
916# Write to indexed VGA register (AL=index, AH=data, DX=index reg. port)
917outidx: outb %al, %dx
918 pushw %ax
919 movb %ah, %al
920 incw %dx
921 outb %al, %dx
922 decw %dx
923 popw %ax
924 ret
925
926# Build the table of video modes (stored after the setup.S code at the
927# `modelist' label. Each video mode record looks like:
928# .word MODE-ID (our special mode ID (see above))
929# .byte rows (number of rows)
930# .byte columns (number of columns)
931# Returns address of the end of the table in DI, the end is marked
932# with a ASK_VGA ID.
933mode_table:
934 movw mt_end, %di # Already filled?
935 orw %di, %di
936 jnz mtab1x
937
938 leaw modelist, %di # Store standard modes:
939 movl $VIDEO_80x25 + 0x50190000, %eax # The 80x25 mode (ALL)
940 stosl
941 movb adapter, %al # CGA/MDA/HGA -- no more modes
942 orb %al, %al
943 jz mtabe
944
945 decb %al
946 jnz mtabv
947
948 movl $VIDEO_8POINT + 0x502b0000, %eax # The 80x43 EGA mode
949 stosl
950 jmp mtabe
951
952mtab1x: jmp mtab1
953
954mtabv: leaw vga_modes, %si # All modes for std VGA
955 movw $vga_modes_end-vga_modes, %cx
956 rep # I'm unable to use movsw as I don't know how to store a half
957 movsb # of the expression above to cx without using explicit shr.
958
959 cmpb $0, scanning # Mode scan requested?
960 jz mscan1
961
962 call mode_scan
963mscan1:
964
965#ifdef CONFIG_VIDEO_LOCAL
966 call local_modes
967#endif /* CONFIG_VIDEO_LOCAL */
968
969#ifdef CONFIG_VIDEO_VESA
970 call vesa_modes # Detect VESA VGA modes
971#endif /* CONFIG_VIDEO_VESA */
972
973#ifdef CONFIG_VIDEO_SVGA
974 cmpb $0, scanning # Bypass when scanning
975 jnz mscan2
976
977 call svga_modes # Detect SVGA cards & modes
978mscan2:
979#endif /* CONFIG_VIDEO_SVGA */
980
981mtabe:
982
983#ifdef CONFIG_VIDEO_COMPACT
984 leaw modelist, %si
985 movw %di, %dx
986 movw %si, %di
987cmt1: cmpw %dx, %si # Scan all modes
988 jz cmt2
989
990 leaw modelist, %bx # Find in previous entries
991 movw 2(%si), %cx
992cmt3: cmpw %bx, %si
993 jz cmt4
994
995 cmpw 2(%bx), %cx # Found => don't copy this entry
996 jz cmt5
997
998 addw $4, %bx
999 jmp cmt3
1000
1001cmt4: movsl # Copy entry
1002 jmp cmt1
1003
1004cmt5: addw $4, %si # Skip entry
1005 jmp cmt1
1006
1007cmt2:
1008#endif /* CONFIG_VIDEO_COMPACT */
1009
1010 movw $ASK_VGA, (%di) # End marker
1011 movw %di, mt_end
1012mtab1: leaw modelist, %si # SI=mode list, DI=list end
1013ret0: ret
1014
1015# Modes usable on all standard VGAs
1016vga_modes:
1017 .word VIDEO_8POINT
1018 .word 0x5032 # 80x50
1019 .word VIDEO_80x43
1020 .word 0x502b # 80x43
1021 .word VIDEO_80x28
1022 .word 0x501c # 80x28
1023 .word VIDEO_80x30
1024 .word 0x501e # 80x30
1025 .word VIDEO_80x34
1026 .word 0x5022 # 80x34
1027 .word VIDEO_80x60
1028 .word 0x503c # 80x60
1029#ifdef CONFIG_VIDEO_GFX_HACK
1030 .word VIDEO_GFX_HACK
1031 .word VIDEO_GFX_DUMMY_RESOLUTION
1032#endif
1033
1034vga_modes_end:
1035# Detect VESA modes.
1036
1037#ifdef CONFIG_VIDEO_VESA
1038vesa_modes:
1039 cmpb $2, adapter # VGA only
1040 jnz ret0
1041
1042 movw %di, %bp # BP=original mode table end
1043 addw $0x200, %di # Buffer space
1044 movw $0x4f00, %ax # VESA Get card info call
1045 int $0x10
1046 movw %bp, %di
1047 cmpw $0x004f, %ax # Successful?
1048 jnz ret0
1049
1050 cmpw $0x4556, 0x200(%di)
1051 jnz ret0
1052
1053 cmpw $0x4153, 0x202(%di)
1054 jnz ret0
1055
1056 movw $vesa_name, card_name # Set name to "VESA VGA"
1057 pushw %gs
1058 lgsw 0x20e(%di), %si # GS:SI=mode list
1059 movw $128, %cx # Iteration limit
1060vesa1:
1061# gas version 2.9.1, using BFD version 2.9.1.0.23 buggers the next inst.
1062# XXX: lodsw %gs:(%si), %ax # Get next mode in the list
1063 gs; lodsw
1064 cmpw $0xffff, %ax # End of the table?
1065 jz vesar
1066
1067 cmpw $0x0080, %ax # Check validity of mode ID
1068 jc vesa2
1069
1070 orb %ah, %ah # Valid IDs: 0x0000-0x007f/0x0100-0x07ff
1071 jz vesan # Certain BIOSes report 0x80-0xff!
1072
1073 cmpw $0x0800, %ax
1074 jnc vesae
1075
1076vesa2: pushw %cx
1077 movw %ax, %cx # Get mode information structure
1078 movw $0x4f01, %ax
1079 int $0x10
1080 movw %cx, %bx # BX=mode number
1081 addb $VIDEO_FIRST_VESA>>8, %bh
1082 popw %cx
1083 cmpw $0x004f, %ax
1084 jnz vesan # Don't report errors (buggy BIOSES)
1085
1086 movb (%di), %al # Check capabilities. We require
1087 andb $0x19, %al # a color text mode.
1088 cmpb $0x09, %al
1089 jnz vesan
1090
1091 cmpw $0xb800, 8(%di) # Standard video memory address required
1092 jnz vesan
1093
1094 testb $2, (%di) # Mode characteristics supplied?
1095 movw %bx, (%di) # Store mode number
1096 jz vesa3
1097
1098 xorw %dx, %dx
1099 movw 0x12(%di), %bx # Width
1100 orb %bh, %bh
1101 jnz vesan
1102
1103 movb %bl, 0x3(%di)
1104 movw 0x14(%di), %ax # Height
1105 orb %ah, %ah
1106 jnz vesan
1107
1108 movb %al, 2(%di)
1109 mulb %bl
1110 cmpw $8193, %ax # Small enough for Linux console driver?
1111 jnc vesan
1112
1113 jmp vesaok
1114
1115vesa3: subw $0x8108, %bx # This mode has no detailed info specified,
1116 jc vesan # so it must be a standard VESA mode.
1117
1118 cmpw $5, %bx
1119 jnc vesan
1120
1121 movw vesa_text_mode_table(%bx), %ax
1122 movw %ax, 2(%di)
1123vesaok: addw $4, %di # The mode is valid. Store it.
1124vesan: loop vesa1 # Next mode. Limit exceeded => error
1125vesae: leaw vesaer, %si
1126 call prtstr
1127 movw %bp, %di # Discard already found modes.
1128vesar: popw %gs
1129 ret
1130
1131# Dimensions of standard VESA text modes
1132vesa_text_mode_table:
1133 .byte 60, 80 # 0108
1134 .byte 25, 132 # 0109
1135 .byte 43, 132 # 010A
1136 .byte 50, 132 # 010B
1137 .byte 60, 132 # 010C
1138#endif /* CONFIG_VIDEO_VESA */
1139
1140# Scan for video modes. A bit dirty, but should work.
1141mode_scan:
1142 movw $0x0100, %cx # Start with mode 0
1143scm1: movb $0, %ah # Test the mode
1144 movb %cl, %al
1145 int $0x10
1146 movb $0x0f, %ah
1147 int $0x10
1148 cmpb %cl, %al
1149 jnz scm2 # Mode not set
1150
1151 movw $0x3c0, %dx # Test if it's a text mode
1152 movb $0x10, %al # Mode bits
1153 call inidx
1154 andb $0x03, %al
1155 jnz scm2
1156
1157 movb $0xce, %dl # Another set of mode bits
1158 movb $0x06, %al
1159 call inidx
1160 shrb %al
1161 jc scm2
1162
1163 movb $0xd4, %dl # Cursor location
1164 movb $0x0f, %al
1165 call inidx
1166 orb %al, %al
1167 jnz scm2
1168
1169 movw %cx, %ax # Ok, store the mode
1170 stosw
1171 movb %gs:(0x484), %al # Number of rows
1172 incb %al
1173 stosb
1174 movw %gs:(0x44a), %ax # Number of columns
1175 stosb
1176scm2: incb %cl
1177 jns scm1
1178
1179 movw $0x0003, %ax # Return back to mode 3
1180 int $0x10
1181 ret
1182
1183tstidx: outw %ax, %dx # OUT DX,AX and inidx
1184inidx: outb %al, %dx # Read from indexed VGA register
1185 incw %dx # AL=index, DX=index reg port -> AL=data
1186 inb %dx, %al
1187 decw %dx
1188 ret
1189
1190# Try to detect type of SVGA card and supply (usually approximate) video
1191# mode table for it.
1192
1193#ifdef CONFIG_VIDEO_SVGA
1194svga_modes:
1195 leaw svga_table, %si # Test all known SVGA adapters
1196dosvga: lodsw
1197 movw %ax, %bp # Default mode table
1198 orw %ax, %ax
1199 jz didsv1
1200
1201 lodsw # Pointer to test routine
1202 pushw %si
1203 pushw %di
1204 pushw %es
1205 movw $0xc000, %bx
1206 movw %bx, %es
1207 call *%ax # Call test routine
1208 popw %es
1209 popw %di
1210 popw %si
1211 orw %bp, %bp
1212 jz dosvga
1213
1214 movw %bp, %si # Found, copy the modes
1215 movb svga_prefix, %ah
1216cpsvga: lodsb
1217 orb %al, %al
1218 jz didsv
1219
1220 stosw
1221 movsw
1222 jmp cpsvga
1223
1224didsv: movw %si, card_name # Store pointer to card name
1225didsv1: ret
1226
1227# Table of all known SVGA cards. For each card, we store a pointer to
1228# a table of video modes supported by the card and a pointer to a routine
1229# used for testing of presence of the card. The video mode table is always
1230# followed by the name of the card or the chipset.
1231svga_table:
1232 .word ati_md, ati_test
1233 .word oak_md, oak_test
1234 .word paradise_md, paradise_test
1235 .word realtek_md, realtek_test
1236 .word s3_md, s3_test
1237 .word chips_md, chips_test
1238 .word video7_md, video7_test
1239 .word cirrus5_md, cirrus5_test
1240 .word cirrus6_md, cirrus6_test
1241 .word cirrus1_md, cirrus1_test
1242 .word ahead_md, ahead_test
1243 .word everex_md, everex_test
1244 .word genoa_md, genoa_test
1245 .word trident_md, trident_test
1246 .word tseng_md, tseng_test
1247 .word 0
1248
1249# Test routines and mode tables:
1250
1251# S3 - The test algorithm was taken from the SuperProbe package
1252# for XFree86 1.2.1. Report bugs to Christoph.Niemann@linux.org
1253s3_test:
1254 movw $0x0f35, %cx # we store some constants in cl/ch
1255 movw $0x03d4, %dx
1256 movb $0x38, %al
1257 call inidx
1258 movb %al, %bh # store current CRT-register 0x38
1259 movw $0x0038, %ax
1260 call outidx # disable writing to special regs
1261 movb %cl, %al # check whether we can write special reg 0x35
1262 call inidx
1263 movb %al, %bl # save the current value of CRT reg 0x35
1264 andb $0xf0, %al # clear bits 0-3
1265 movb %al, %ah
1266 movb %cl, %al # and write it to CRT reg 0x35
1267 call outidx
1268 call inidx # now read it back
1269 andb %ch, %al # clear the upper 4 bits
1270 jz s3_2 # the first test failed. But we have a
1271
1272 movb %bl, %ah # second chance
1273 movb %cl, %al
1274 call outidx
1275 jmp s3_1 # do the other tests
1276
1277s3_2: movw %cx, %ax # load ah with 0xf and al with 0x35
1278 orb %bl, %ah # set the upper 4 bits of ah with the orig value
1279 call outidx # write ...
1280 call inidx # ... and reread
1281 andb %cl, %al # turn off the upper 4 bits
1282 pushw %ax
1283 movb %bl, %ah # restore old value in register 0x35
1284 movb %cl, %al
1285 call outidx
1286 popw %ax
1287 cmpb %ch, %al # setting lower 4 bits was successful => bad
1288 je no_s3 # writing is allowed => this is not an S3
1289
1290s3_1: movw $0x4838, %ax # allow writing to special regs by putting
1291 call outidx # magic number into CRT-register 0x38
1292 movb %cl, %al # check whether we can write special reg 0x35
1293 call inidx
1294 movb %al, %bl
1295 andb $0xf0, %al
1296 movb %al, %ah
1297 movb %cl, %al
1298 call outidx
1299 call inidx
1300 andb %ch, %al
1301 jnz no_s3 # no, we can't write => no S3
1302
1303 movw %cx, %ax
1304 orb %bl, %ah
1305 call outidx
1306 call inidx
1307 andb %ch, %al
1308 pushw %ax
1309 movb %bl, %ah # restore old value in register 0x35
1310 movb %cl, %al
1311 call outidx
1312 popw %ax
1313 cmpb %ch, %al
1314 jne no_s31 # writing not possible => no S3
1315 movb $0x30, %al
1316 call inidx # now get the S3 id ...
1317 leaw idS3, %di
1318 movw $0x10, %cx
1319 repne
1320 scasb
1321 je no_s31
1322
1323 movb %bh, %ah
1324 movb $0x38, %al
1325 jmp s3rest
1326
1327no_s3: movb $0x35, %al # restore CRT register 0x35
1328 movb %bl, %ah
1329 call outidx
1330no_s31: xorw %bp, %bp # Detection failed
1331s3rest: movb %bh, %ah
1332 movb $0x38, %al # restore old value of CRT register 0x38
1333 jmp outidx
1334
1335idS3: .byte 0x81, 0x82, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95
1336 .byte 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa8, 0xb0
1337
1338s3_md: .byte 0x54, 0x2b, 0x84
1339 .byte 0x55, 0x19, 0x84
1340 .byte 0
1341 .ascii "S3"
1342 .byte 0
1343
1344# ATI cards.
1345ati_test:
1346 leaw idati, %si
1347 movw $0x31, %di
1348 movw $0x09, %cx
1349 repe
1350 cmpsb
1351 je atiok
1352
1353 xorw %bp, %bp
1354atiok: ret
1355
1356idati: .ascii "761295520"
1357
1358ati_md: .byte 0x23, 0x19, 0x84
1359 .byte 0x33, 0x2c, 0x84
1360 .byte 0x22, 0x1e, 0x64
1361 .byte 0x21, 0x19, 0x64
1362 .byte 0x58, 0x21, 0x50
1363 .byte 0x5b, 0x1e, 0x50
1364 .byte 0
1365 .ascii "ATI"
1366 .byte 0
1367
1368# AHEAD
1369ahead_test:
1370 movw $0x200f, %ax
1371 movw $0x3ce, %dx
1372 outw %ax, %dx
1373 incw %dx
1374 inb %dx, %al
1375 cmpb $0x20, %al
1376 je isahed
1377
1378 cmpb $0x21, %al
1379 je isahed
1380
1381 xorw %bp, %bp
1382isahed: ret
1383
1384ahead_md:
1385 .byte 0x22, 0x2c, 0x84
1386 .byte 0x23, 0x19, 0x84
1387 .byte 0x24, 0x1c, 0x84
1388 .byte 0x2f, 0x32, 0xa0
1389 .byte 0x32, 0x22, 0x50
1390 .byte 0x34, 0x42, 0x50
1391 .byte 0
1392 .ascii "Ahead"
1393 .byte 0
1394
1395# Chips & Tech.
1396chips_test:
1397 movw $0x3c3, %dx
1398 inb %dx, %al
1399 orb $0x10, %al
1400 outb %al, %dx
1401 movw $0x104, %dx
1402 inb %dx, %al
1403 movb %al, %bl
1404 movw $0x3c3, %dx
1405 inb %dx, %al
1406 andb $0xef, %al
1407 outb %al, %dx
1408 cmpb $0xa5, %bl
1409 je cantok
1410
1411 xorw %bp, %bp
1412cantok: ret
1413
1414chips_md:
1415 .byte 0x60, 0x19, 0x84
1416 .byte 0x61, 0x32, 0x84
1417 .byte 0
1418 .ascii "Chips & Technologies"
1419 .byte 0
1420
1421# Cirrus Logic 5X0
1422cirrus1_test:
1423 movw $0x3d4, %dx
1424 movb $0x0c, %al
1425 outb %al, %dx
1426 incw %dx
1427 inb %dx, %al
1428 movb %al, %bl
1429 xorb %al, %al
1430 outb %al, %dx
1431 decw %dx
1432 movb $0x1f, %al
1433 outb %al, %dx
1434 incw %dx
1435 inb %dx, %al
1436 movb %al, %bh
1437 xorb %ah, %ah
1438 shlb $4, %al
1439 movw %ax, %cx
1440 movb %bh, %al
1441 shrb $4, %al
1442 addw %ax, %cx
1443 shlw $8, %cx
1444 addw $6, %cx
1445 movw %cx, %ax
1446 movw $0x3c4, %dx
1447 outw %ax, %dx
1448 incw %dx
1449 inb %dx, %al
1450 andb %al, %al
1451 jnz nocirr
1452
1453 movb %bh, %al
1454 outb %al, %dx
1455 inb %dx, %al
1456 cmpb $0x01, %al
1457 je iscirr
1458
1459nocirr: xorw %bp, %bp
1460iscirr: movw $0x3d4, %dx
1461 movb %bl, %al
1462 xorb %ah, %ah
1463 shlw $8, %ax
1464 addw $0x0c, %ax
1465 outw %ax, %dx
1466 ret
1467
1468cirrus1_md:
1469 .byte 0x1f, 0x19, 0x84
1470 .byte 0x20, 0x2c, 0x84
1471 .byte 0x22, 0x1e, 0x84
1472 .byte 0x31, 0x25, 0x64
1473 .byte 0
1474 .ascii "Cirrus Logic 5X0"
1475 .byte 0
1476
1477# Cirrus Logic 54XX
1478cirrus5_test:
1479 movw $0x3c4, %dx
1480 movb $6, %al
1481 call inidx
1482 movb %al, %bl # BL=backup
1483 movw $6, %ax
1484 call tstidx
1485 cmpb $0x0f, %al
1486 jne c5fail
1487
1488 movw $0x1206, %ax
1489 call tstidx
1490 cmpb $0x12, %al
1491 jne c5fail
1492
1493 movb $0x1e, %al
1494 call inidx
1495 movb %al, %bh
1496 movb %bh, %ah
1497 andb $0xc0, %ah
1498 movb $0x1e, %al
1499 call tstidx
1500 andb $0x3f, %al
1501 jne c5xx
1502
1503 movb $0x1e, %al
1504 movb %bh, %ah
1505 orb $0x3f, %ah
1506 call tstidx
1507 xorb $0x3f, %al
1508 andb $0x3f, %al
1509c5xx: pushf
1510 movb $0x1e, %al
1511 movb %bh, %ah
1512 outw %ax, %dx
1513 popf
1514 je c5done
1515
1516c5fail: xorw %bp, %bp
1517c5done: movb $6, %al
1518 movb %bl, %ah
1519 outw %ax, %dx
1520 ret
1521
1522cirrus5_md:
1523 .byte 0x14, 0x19, 0x84
1524 .byte 0x54, 0x2b, 0x84
1525 .byte 0
1526 .ascii "Cirrus Logic 54XX"
1527 .byte 0
1528
1529# Cirrus Logic 64XX -- no known extra modes, but must be identified, because
1530# it's misidentified by the Ahead test.
1531cirrus6_test:
1532 movw $0x3ce, %dx
1533 movb $0x0a, %al
1534 call inidx
1535 movb %al, %bl # BL=backup
1536 movw $0xce0a, %ax
1537 call tstidx
1538 orb %al, %al
1539 jne c2fail
1540
1541 movw $0xec0a, %ax
1542 call tstidx
1543 cmpb $0x01, %al
1544 jne c2fail
1545
1546 movb $0xaa, %al
1547 call inidx # 4X, 5X, 7X and 8X are valid 64XX chip ID's.
1548 shrb $4, %al
1549 subb $4, %al
1550 jz c6done
1551
1552 decb %al
1553 jz c6done
1554
1555 subb $2, %al
1556 jz c6done
1557
1558 decb %al
1559 jz c6done
1560
1561c2fail: xorw %bp, %bp
1562c6done: movb $0x0a, %al
1563 movb %bl, %ah
1564 outw %ax, %dx
1565 ret
1566
1567cirrus6_md:
1568 .byte 0
1569 .ascii "Cirrus Logic 64XX"
1570 .byte 0
1571
1572# Everex / Trident
1573everex_test:
1574 movw $0x7000, %ax
1575 xorw %bx, %bx
1576 int $0x10
1577 cmpb $0x70, %al
1578 jne noevrx
1579
1580 shrw $4, %dx
1581 cmpw $0x678, %dx
1582 je evtrid
1583
1584 cmpw $0x236, %dx
1585 jne evrxok
1586
1587evtrid: leaw trident_md, %bp
1588evrxok: ret
1589
1590noevrx: xorw %bp, %bp
1591 ret
1592
1593everex_md:
1594 .byte 0x03, 0x22, 0x50
1595 .byte 0x04, 0x3c, 0x50
1596 .byte 0x07, 0x2b, 0x64
1597 .byte 0x08, 0x4b, 0x64
1598 .byte 0x0a, 0x19, 0x84
1599 .byte 0x0b, 0x2c, 0x84
1600 .byte 0x16, 0x1e, 0x50
1601 .byte 0x18, 0x1b, 0x64
1602 .byte 0x21, 0x40, 0xa0
1603 .byte 0x40, 0x1e, 0x84
1604 .byte 0
1605 .ascii "Everex/Trident"
1606 .byte 0
1607
1608# Genoa.
1609genoa_test:
1610 leaw idgenoa, %si # Check Genoa 'clues'
1611 xorw %ax, %ax
1612 movb %es:(0x37), %al
1613 movw %ax, %di
1614 movw $0x04, %cx
1615 decw %si
1616 decw %di
1617l1: incw %si
1618 incw %di
1619 movb (%si), %al
1620 testb %al, %al
1621 jz l2
1622
1623 cmpb %es:(%di), %al
1624l2: loope l1
1625 orw %cx, %cx
1626 je isgen
1627
1628 xorw %bp, %bp
1629isgen: ret
1630
1631idgenoa: .byte 0x77, 0x00, 0x99, 0x66
1632
1633genoa_md:
1634 .byte 0x58, 0x20, 0x50
1635 .byte 0x5a, 0x2a, 0x64
1636 .byte 0x60, 0x19, 0x84
1637 .byte 0x61, 0x1d, 0x84
1638 .byte 0x62, 0x20, 0x84
1639 .byte 0x63, 0x2c, 0x84
1640 .byte 0x64, 0x3c, 0x84
1641 .byte 0x6b, 0x4f, 0x64
1642 .byte 0x72, 0x3c, 0x50
1643 .byte 0x74, 0x42, 0x50
1644 .byte 0x78, 0x4b, 0x64
1645 .byte 0
1646 .ascii "Genoa"
1647 .byte 0
1648
1649# OAK
1650oak_test:
1651 leaw idoakvga, %si
1652 movw $0x08, %di
1653 movw $0x08, %cx
1654 repe
1655 cmpsb
1656 je isoak
1657
1658 xorw %bp, %bp
1659isoak: ret
1660
1661idoakvga: .ascii "OAK VGA "
1662
1663oak_md: .byte 0x4e, 0x3c, 0x50
1664 .byte 0x4f, 0x3c, 0x84
1665 .byte 0x50, 0x19, 0x84
1666 .byte 0x51, 0x2b, 0x84
1667 .byte 0
1668 .ascii "OAK"
1669 .byte 0
1670
1671# WD Paradise.
1672paradise_test:
1673 leaw idparadise, %si
1674 movw $0x7d, %di
1675 movw $0x04, %cx
1676 repe
1677 cmpsb
1678 je ispara
1679
1680 xorw %bp, %bp
1681ispara: ret
1682
1683idparadise: .ascii "VGA="
1684
1685paradise_md:
1686 .byte 0x41, 0x22, 0x50
1687 .byte 0x47, 0x1c, 0x84
1688 .byte 0x55, 0x19, 0x84
1689 .byte 0x54, 0x2c, 0x84
1690 .byte 0
1691 .ascii "Paradise"
1692 .byte 0
1693
1694# Trident.
1695trident_test:
1696 movw $0x3c4, %dx
1697 movb $0x0e, %al
1698 outb %al, %dx
1699 incw %dx
1700 inb %dx, %al
1701 xchgb %al, %ah
1702 xorb %al, %al
1703 outb %al, %dx
1704 inb %dx, %al
1705 xchgb %ah, %al
1706 movb %al, %bl # Strange thing ... in the book this wasn't
1707 andb $0x02, %bl # necessary but it worked on my card which
1708 jz setb2 # is a trident. Without it the screen goes
1709 # blurred ...
1710 andb $0xfd, %al
1711 jmp clrb2
1712
1713setb2: orb $0x02, %al
1714clrb2: outb %al, %dx
1715 andb $0x0f, %ah
1716 cmpb $0x02, %ah
1717 je istrid
1718
1719 xorw %bp, %bp
1720istrid: ret
1721
1722trident_md:
1723 .byte 0x50, 0x1e, 0x50
1724 .byte 0x51, 0x2b, 0x50
1725 .byte 0x52, 0x3c, 0x50
1726 .byte 0x57, 0x19, 0x84
1727 .byte 0x58, 0x1e, 0x84
1728 .byte 0x59, 0x2b, 0x84
1729 .byte 0x5a, 0x3c, 0x84
1730 .byte 0
1731 .ascii "Trident"
1732 .byte 0
1733
1734# Tseng.
1735tseng_test:
1736 movw $0x3cd, %dx
1737 inb %dx, %al # Could things be this simple ! :-)
1738 movb %al, %bl
1739 movb $0x55, %al
1740 outb %al, %dx
1741 inb %dx, %al
1742 movb %al, %ah
1743 movb %bl, %al
1744 outb %al, %dx
1745 cmpb $0x55, %ah
1746 je istsen
1747
1748isnot: xorw %bp, %bp
1749istsen: ret
1750
1751tseng_md:
1752 .byte 0x26, 0x3c, 0x50
1753 .byte 0x2a, 0x28, 0x64
1754 .byte 0x23, 0x19, 0x84
1755 .byte 0x24, 0x1c, 0x84
1756 .byte 0x22, 0x2c, 0x84
1757 .byte 0x21, 0x3c, 0x84
1758 .byte 0
1759 .ascii "Tseng"
1760 .byte 0
1761
1762# Video7.
1763video7_test:
1764 movw $0x3cc, %dx
1765 inb %dx, %al
1766 movw $0x3b4, %dx
1767 andb $0x01, %al
1768 jz even7
1769
1770 movw $0x3d4, %dx
1771even7: movb $0x0c, %al
1772 outb %al, %dx
1773 incw %dx
1774 inb %dx, %al
1775 movb %al, %bl
1776 movb $0x55, %al
1777 outb %al, %dx
1778 inb %dx, %al
1779 decw %dx
1780 movb $0x1f, %al
1781 outb %al, %dx
1782 incw %dx
1783 inb %dx, %al
1784 movb %al, %bh
1785 decw %dx
1786 movb $0x0c, %al
1787 outb %al, %dx
1788 incw %dx
1789 movb %bl, %al
1790 outb %al, %dx
1791 movb $0x55, %al
1792 xorb $0xea, %al
1793 cmpb %bh, %al
1794 jne isnot
1795
1796 movb $VIDEO_FIRST_V7>>8, svga_prefix # Use special mode switching
1797 ret
1798
1799video7_md:
1800 .byte 0x40, 0x2b, 0x50
1801 .byte 0x43, 0x3c, 0x50
1802 .byte 0x44, 0x3c, 0x64
1803 .byte 0x41, 0x19, 0x84
1804 .byte 0x42, 0x2c, 0x84
1805 .byte 0x45, 0x1c, 0x84
1806 .byte 0
1807 .ascii "Video 7"
1808 .byte 0
1809
1810# Realtek VGA
1811realtek_test:
1812 leaw idrtvga, %si
1813 movw $0x45, %di
1814 movw $0x0b, %cx
1815 repe
1816 cmpsb
1817 je isrt
1818
1819 xorw %bp, %bp
1820isrt: ret
1821
1822idrtvga: .ascii "REALTEK VGA"
1823
1824realtek_md:
1825 .byte 0x1a, 0x3c, 0x50
1826 .byte 0x1b, 0x19, 0x84
1827 .byte 0x1c, 0x1e, 0x84
1828 .byte 0x1d, 0x2b, 0x84
1829 .byte 0x1e, 0x3c, 0x84
1830 .byte 0
1831 .ascii "REALTEK"
1832 .byte 0
1833
1834#endif /* CONFIG_VIDEO_SVGA */
1835
1836# User-defined local mode table (VGA only)
1837#ifdef CONFIG_VIDEO_LOCAL
1838local_modes:
1839 leaw local_mode_table, %si
1840locm1: lodsw
1841 orw %ax, %ax
1842 jz locm2
1843
1844 stosw
1845 movsw
1846 jmp locm1
1847
1848locm2: ret
1849
1850# This is the table of local video modes which can be supplied manually
1851# by the user. Each entry consists of mode ID (word) and dimensions
1852# (byte for column count and another byte for row count). These modes
1853# are placed before all SVGA and VESA modes and override them if table
1854# compacting is enabled. The table must end with a zero word followed
1855# by NUL-terminated video adapter name.
1856local_mode_table:
1857 .word 0x0100 # Example: 40x25
1858 .byte 25,40
1859 .word 0
1860 .ascii "Local"
1861 .byte 0
1862#endif /* CONFIG_VIDEO_LOCAL */
1863
1864# Read a key and return the ASCII code in al, scan code in ah
1865getkey: xorb %ah, %ah
1866 int $0x16
1867 ret
1868
1869# Read a key with a timeout of 30 seconds.
1870# The hardware clock is used to get the time.
1871getkt: call gettime
1872 addb $30, %al # Wait 30 seconds
1873 cmpb $60, %al
1874 jl lminute
1875
1876 subb $60, %al
1877lminute:
1878 movb %al, %cl
1879again: movb $0x01, %ah
1880 int $0x16
1881 jnz getkey # key pressed, so get it
1882
1883 call gettime
1884 cmpb %cl, %al
1885 jne again
1886
1887 movb $0x20, %al # timeout, return `space'
1888 ret
1889
1890# Flush the keyboard buffer
1891flush: movb $0x01, %ah
1892 int $0x16
1893 jz empty
1894
1895 xorb %ah, %ah
1896 int $0x16
1897 jmp flush
1898
1899empty: ret
1900
1901# Print hexadecimal number.
1902prthw: pushw %ax
1903 movb %ah, %al
1904 call prthb
1905 popw %ax
1906prthb: pushw %ax
1907 shrb $4, %al
1908 call prthn
1909 popw %ax
1910 andb $0x0f, %al
1911prthn: cmpb $0x0a, %al
1912 jc prth1
1913
1914 addb $0x07, %al
1915prth1: addb $0x30, %al
1916 jmp prtchr
1917
1918# Print decimal number in al
1919prtdec: pushw %ax
1920 pushw %cx
1921 xorb %ah, %ah
1922 movb $0x0a, %cl
1923 idivb %cl
1924 cmpb $0x09, %al
1925 jbe lt100
1926
1927 call prtdec
1928 jmp skip10
1929
1930lt100: addb $0x30, %al
1931 call prtchr
1932skip10: movb %ah, %al
1933 addb $0x30, %al
1934 call prtchr
1935 popw %cx
1936 popw %ax
1937 ret
1938
1939store_edid:
1940#ifdef CONFIG_FIRMWARE_EDID
1941 pushw %es # just save all registers
1942 pushw %ax
1943 pushw %bx
1944 pushw %cx
1945 pushw %dx
1946 pushw %di
1947
1948 pushw %fs
1949 popw %es
1950
1951 movl $0x13131313, %eax # memset block with 0x13
1952 movw $32, %cx
1953 movw $0x140, %di
1954 cld
1955 rep
1956 stosl
1957
1958 cmpw $0x0200, vbe_version # only do EDID on >= VBE2.0
1959 jl no_edid
1960
1961 pushw %es # save ES
1962 xorw %di, %di # Report Capability
1963 pushw %di
1964 popw %es # ES:DI must be 0:0
1965 movw $0x4f15, %ax
1966 xorw %bx, %bx
1967 xorw %cx, %cx
1968 int $0x10
1969 popw %es # restore ES
1970
1971 cmpb $0x00, %ah # call successful
1972 jne no_edid
1973
1974 cmpb $0x4f, %al # function supported
1975 jne no_edid
1976
1977 movw $0x4f15, %ax # do VBE/DDC
1978 movw $0x01, %bx
1979 movw $0x00, %cx
1980 movw $0x01, %dx
1981 movw $0x140, %di
1982 int $0x10
1983
1984no_edid:
1985 popw %di # restore all registers
1986 popw %dx
1987 popw %cx
1988 popw %bx
1989 popw %ax
1990 popw %es
1991#endif
1992 ret
1993
1994# VIDEO_SELECT-only variables
1995mt_end: .word 0 # End of video mode table if built
1996edit_buf: .space 6 # Line editor buffer
1997card_name: .word 0 # Pointer to adapter name
1998scanning: .byte 0 # Performing mode scan
1999do_restore: .byte 0 # Screen contents altered during mode change
2000svga_prefix: .byte VIDEO_FIRST_BIOS>>8 # Default prefix for BIOS modes
2001graphic_mode: .byte 0 # Graphic mode with a linear frame buffer
2002dac_size: .byte 6 # DAC bit depth
2003vbe_version: .word 0 # VBE bios version
2004
2005# Status messages
2006keymsg: .ascii "Press <RETURN> to see video modes available, "
2007 .ascii "<SPACE> to continue or wait 30 secs"
2008 .byte 0x0d, 0x0a, 0
2009
2010listhdr: .byte 0x0d, 0x0a
2011 .ascii "Mode: COLSxROWS:"
2012
2013crlft: .byte 0x0d, 0x0a, 0
2014
2015prompt: .byte 0x0d, 0x0a
2016 .asciz "Enter mode number or `scan': "
2017
2018unknt: .asciz "Unknown mode ID. Try again."
2019
2020badmdt: .ascii "You passed an undefined mode number."
2021 .byte 0x0d, 0x0a, 0
2022
2023vesaer: .ascii "Error: Scanning of VESA modes failed. Please "
2024 .ascii "report to <mj@ucw.cz>."
2025 .byte 0x0d, 0x0a, 0
2026
2027old_name: .asciz "CGA/MDA/HGA"
2028
2029ega_name: .asciz "EGA"
2030
2031svga_name: .ascii " "
2032
2033vga_name: .asciz "VGA"
2034
2035vesa_name: .asciz "VESA"
2036
2037name_bann: .asciz "Video adapter: "
2038#endif /* CONFIG_VIDEO_SELECT */
2039
2040# Other variables:
2041adapter: .byte 0 # Video adapter: 0=CGA/MDA/HGA,1=EGA,2=VGA
2042video_segment: .word 0xb800 # Video memory segment
2043force_size: .word 0 # Use this size instead of the one in BIOS vars
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index b26378815b91..941a7e3aa5fb 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.21-rc3 3# Linux kernel version: 2.6.21-git3
4# Wed Mar 7 15:29:47 2007 4# Tue May 1 07:30:48 2007
5# 5#
6CONFIG_X86_64=y 6CONFIG_X86_64=y
7CONFIG_64BIT=y 7CONFIG_64BIT=y
@@ -118,11 +118,11 @@ CONFIG_X86_PC=y
118# CONFIG_X86_VSMP is not set 118# CONFIG_X86_VSMP is not set
119# CONFIG_MK8 is not set 119# CONFIG_MK8 is not set
120# CONFIG_MPSC is not set 120# CONFIG_MPSC is not set
121# CONFIG_MCORE2 is not set 121CONFIG_MCORE2=y
122CONFIG_GENERIC_CPU=y 122# CONFIG_GENERIC_CPU is not set
123CONFIG_X86_L1_CACHE_BYTES=128 123CONFIG_X86_L1_CACHE_BYTES=64
124CONFIG_X86_L1_CACHE_SHIFT=7 124CONFIG_X86_L1_CACHE_SHIFT=6
125CONFIG_X86_INTERNODE_CACHE_BYTES=128 125CONFIG_X86_INTERNODE_CACHE_BYTES=64
126CONFIG_X86_TSC=y 126CONFIG_X86_TSC=y
127CONFIG_X86_GOOD_APIC=y 127CONFIG_X86_GOOD_APIC=y
128# CONFIG_MICROCODE is not set 128# CONFIG_MICROCODE is not set
@@ -174,6 +174,7 @@ CONFIG_X86_MCE_INTEL=y
174CONFIG_X86_MCE_AMD=y 174CONFIG_X86_MCE_AMD=y
175# CONFIG_KEXEC is not set 175# CONFIG_KEXEC is not set
176# CONFIG_CRASH_DUMP is not set 176# CONFIG_CRASH_DUMP is not set
177# CONFIG_RELOCATABLE is not set
177CONFIG_PHYSICAL_START=0x200000 178CONFIG_PHYSICAL_START=0x200000
178CONFIG_SECCOMP=y 179CONFIG_SECCOMP=y
179# CONFIG_CC_STACKPROTECTOR is not set 180# CONFIG_CC_STACKPROTECTOR is not set
@@ -182,7 +183,6 @@ CONFIG_HZ_250=y
182# CONFIG_HZ_300 is not set 183# CONFIG_HZ_300 is not set
183# CONFIG_HZ_1000 is not set 184# CONFIG_HZ_1000 is not set
184CONFIG_HZ=250 185CONFIG_HZ=250
185# CONFIG_REORDER is not set
186CONFIG_K8_NB=y 186CONFIG_K8_NB=y
187CONFIG_GENERIC_HARDIRQS=y 187CONFIG_GENERIC_HARDIRQS=y
188CONFIG_GENERIC_IRQ_PROBE=y 188CONFIG_GENERIC_IRQ_PROBE=y
@@ -218,7 +218,6 @@ CONFIG_ACPI_HOTPLUG_CPU=y
218CONFIG_ACPI_THERMAL=y 218CONFIG_ACPI_THERMAL=y
219CONFIG_ACPI_NUMA=y 219CONFIG_ACPI_NUMA=y
220# CONFIG_ACPI_ASUS is not set 220# CONFIG_ACPI_ASUS is not set
221# CONFIG_ACPI_IBM is not set
222# CONFIG_ACPI_TOSHIBA is not set 221# CONFIG_ACPI_TOSHIBA is not set
223CONFIG_ACPI_BLACKLIST_YEAR=0 222CONFIG_ACPI_BLACKLIST_YEAR=0
224# CONFIG_ACPI_DEBUG is not set 223# CONFIG_ACPI_DEBUG is not set
@@ -243,7 +242,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
243# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set 242# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
244CONFIG_CPU_FREQ_GOV_USERSPACE=y 243CONFIG_CPU_FREQ_GOV_USERSPACE=y
245CONFIG_CPU_FREQ_GOV_ONDEMAND=y 244CONFIG_CPU_FREQ_GOV_ONDEMAND=y
246# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set 245CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
247 246
248# 247#
249# CPUFreq processor drivers 248# CPUFreq processor drivers
@@ -299,7 +298,6 @@ CONFIG_NET=y
299# 298#
300# Networking options 299# Networking options
301# 300#
302# CONFIG_NETDEBUG is not set
303CONFIG_PACKET=y 301CONFIG_PACKET=y
304# CONFIG_PACKET_MMAP is not set 302# CONFIG_PACKET_MMAP is not set
305CONFIG_UNIX=y 303CONFIG_UNIX=y
@@ -334,6 +332,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
334CONFIG_IPV6=y 332CONFIG_IPV6=y
335# CONFIG_IPV6_PRIVACY is not set 333# CONFIG_IPV6_PRIVACY is not set
336# CONFIG_IPV6_ROUTER_PREF is not set 334# CONFIG_IPV6_ROUTER_PREF is not set
335# CONFIG_IPV6_OPTIMISTIC_DAD is not set
337# CONFIG_INET6_AH is not set 336# CONFIG_INET6_AH is not set
338# CONFIG_INET6_ESP is not set 337# CONFIG_INET6_ESP is not set
339# CONFIG_INET6_IPCOMP is not set 338# CONFIG_INET6_IPCOMP is not set
@@ -389,6 +388,13 @@ CONFIG_IPV6_SIT=y
389# CONFIG_HAMRADIO is not set 388# CONFIG_HAMRADIO is not set
390# CONFIG_IRDA is not set 389# CONFIG_IRDA is not set
391# CONFIG_BT is not set 390# CONFIG_BT is not set
391# CONFIG_AF_RXRPC is not set
392
393#
394# Wireless
395#
396# CONFIG_CFG80211 is not set
397# CONFIG_WIRELESS_EXT is not set
392# CONFIG_IEEE80211 is not set 398# CONFIG_IEEE80211 is not set
393 399
394# 400#
@@ -409,10 +415,6 @@ CONFIG_FW_LOADER=y
409# Connector - unified userspace <-> kernelspace linker 415# Connector - unified userspace <-> kernelspace linker
410# 416#
411# CONFIG_CONNECTOR is not set 417# CONFIG_CONNECTOR is not set
412
413#
414# Memory Technology Devices (MTD)
415#
416# CONFIG_MTD is not set 418# CONFIG_MTD is not set
417 419
418# 420#
@@ -459,6 +461,7 @@ CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
459# CONFIG_SGI_IOC4 is not set 461# CONFIG_SGI_IOC4 is not set
460# CONFIG_TIFM_CORE is not set 462# CONFIG_TIFM_CORE is not set
461# CONFIG_SONY_LAPTOP is not set 463# CONFIG_SONY_LAPTOP is not set
464# CONFIG_THINKPAD_ACPI is not set
462 465
463# 466#
464# ATA/ATAPI/MFM/RLL support 467# ATA/ATAPI/MFM/RLL support
@@ -494,7 +497,6 @@ CONFIG_BLK_DEV_IDEPCI=y
494# CONFIG_BLK_DEV_RZ1000 is not set 497# CONFIG_BLK_DEV_RZ1000 is not set
495CONFIG_BLK_DEV_IDEDMA_PCI=y 498CONFIG_BLK_DEV_IDEDMA_PCI=y
496# CONFIG_BLK_DEV_IDEDMA_FORCED is not set 499# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
497CONFIG_IDEDMA_PCI_AUTO=y
498# CONFIG_IDEDMA_ONLYDISK is not set 500# CONFIG_IDEDMA_ONLYDISK is not set
499# CONFIG_BLK_DEV_AEC62XX is not set 501# CONFIG_BLK_DEV_AEC62XX is not set
500# CONFIG_BLK_DEV_ALI15X3 is not set 502# CONFIG_BLK_DEV_ALI15X3 is not set
@@ -525,7 +527,6 @@ CONFIG_BLK_DEV_PDC202XX_NEW=y
525# CONFIG_IDE_ARM is not set 527# CONFIG_IDE_ARM is not set
526CONFIG_BLK_DEV_IDEDMA=y 528CONFIG_BLK_DEV_IDEDMA=y
527# CONFIG_IDEDMA_IVB is not set 529# CONFIG_IDEDMA_IVB is not set
528CONFIG_IDEDMA_AUTO=y
529# CONFIG_BLK_DEV_HD is not set 530# CONFIG_BLK_DEV_HD is not set
530 531
531# 532#
@@ -584,11 +585,9 @@ CONFIG_AIC79XX_DEBUG_MASK=0
584# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set 585# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
585# CONFIG_SCSI_AIC94XX is not set 586# CONFIG_SCSI_AIC94XX is not set
586# CONFIG_SCSI_ARCMSR is not set 587# CONFIG_SCSI_ARCMSR is not set
587CONFIG_MEGARAID_NEWGEN=y 588# CONFIG_MEGARAID_NEWGEN is not set
588CONFIG_MEGARAID_MM=y
589CONFIG_MEGARAID_MAILBOX=y
590# CONFIG_MEGARAID_LEGACY is not set 589# CONFIG_MEGARAID_LEGACY is not set
591CONFIG_MEGARAID_SAS=y 590# CONFIG_MEGARAID_SAS is not set
592# CONFIG_SCSI_HPTIOP is not set 591# CONFIG_SCSI_HPTIOP is not set
593# CONFIG_SCSI_BUSLOGIC is not set 592# CONFIG_SCSI_BUSLOGIC is not set
594# CONFIG_SCSI_DMX3191D is not set 593# CONFIG_SCSI_DMX3191D is not set
@@ -608,6 +607,7 @@ CONFIG_MEGARAID_SAS=y
608# CONFIG_SCSI_DC395x is not set 607# CONFIG_SCSI_DC395x is not set
609# CONFIG_SCSI_DC390T is not set 608# CONFIG_SCSI_DC390T is not set
610# CONFIG_SCSI_DEBUG is not set 609# CONFIG_SCSI_DEBUG is not set
610# CONFIG_SCSI_ESP_CORE is not set
611# CONFIG_SCSI_SRP is not set 611# CONFIG_SCSI_SRP is not set
612 612
613# 613#
@@ -636,6 +636,7 @@ CONFIG_SATA_ACPI=y
636# CONFIG_PATA_AMD is not set 636# CONFIG_PATA_AMD is not set
637# CONFIG_PATA_ARTOP is not set 637# CONFIG_PATA_ARTOP is not set
638# CONFIG_PATA_ATIIXP is not set 638# CONFIG_PATA_ATIIXP is not set
639# CONFIG_PATA_CMD640_PCI is not set
639# CONFIG_PATA_CMD64X is not set 640# CONFIG_PATA_CMD64X is not set
640# CONFIG_PATA_CS5520 is not set 641# CONFIG_PATA_CS5520 is not set
641# CONFIG_PATA_CS5530 is not set 642# CONFIG_PATA_CS5530 is not set
@@ -687,7 +688,7 @@ CONFIG_BLK_DEV_DM=y
687CONFIG_FUSION=y 688CONFIG_FUSION=y
688CONFIG_FUSION_SPI=y 689CONFIG_FUSION_SPI=y
689# CONFIG_FUSION_FC is not set 690# CONFIG_FUSION_FC is not set
690CONFIG_FUSION_SAS=y 691# CONFIG_FUSION_SAS is not set
691CONFIG_FUSION_MAX_SGE=128 692CONFIG_FUSION_MAX_SGE=128
692# CONFIG_FUSION_CTL is not set 693# CONFIG_FUSION_CTL is not set
693 694
@@ -700,19 +701,22 @@ CONFIG_IEEE1394=y
700# Subsystem Options 701# Subsystem Options
701# 702#
702# CONFIG_IEEE1394_VERBOSEDEBUG is not set 703# CONFIG_IEEE1394_VERBOSEDEBUG is not set
703# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set
704 704
705# 705#
706# Device Drivers 706# Controllers
707#
708
709#
710# Texas Instruments PCILynx requires I2C
707# 711#
708# CONFIG_IEEE1394_PCILYNX is not set
709CONFIG_IEEE1394_OHCI1394=y 712CONFIG_IEEE1394_OHCI1394=y
710 713
711# 714#
712# Protocol Drivers 715# Protocols
713# 716#
714# CONFIG_IEEE1394_VIDEO1394 is not set 717# CONFIG_IEEE1394_VIDEO1394 is not set
715# CONFIG_IEEE1394_SBP2 is not set 718# CONFIG_IEEE1394_SBP2 is not set
719# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set
716# CONFIG_IEEE1394_ETH1394 is not set 720# CONFIG_IEEE1394_ETH1394 is not set
717# CONFIG_IEEE1394_DV1394 is not set 721# CONFIG_IEEE1394_DV1394 is not set
718CONFIG_IEEE1394_RAWIO=y 722CONFIG_IEEE1394_RAWIO=y
@@ -775,7 +779,8 @@ CONFIG_TULIP=y
775# CONFIG_HP100 is not set 779# CONFIG_HP100 is not set
776CONFIG_NET_PCI=y 780CONFIG_NET_PCI=y
777# CONFIG_PCNET32 is not set 781# CONFIG_PCNET32 is not set
778# CONFIG_AMD8111_ETH is not set 782CONFIG_AMD8111_ETH=y
783# CONFIG_AMD8111E_NAPI is not set
779# CONFIG_ADAPTEC_STARFIRE is not set 784# CONFIG_ADAPTEC_STARFIRE is not set
780CONFIG_B44=y 785CONFIG_B44=y
781CONFIG_FORCEDETH=y 786CONFIG_FORCEDETH=y
@@ -837,9 +842,10 @@ CONFIG_S2IO=m
837# CONFIG_TR is not set 842# CONFIG_TR is not set
838 843
839# 844#
840# Wireless LAN (non-hamradio) 845# Wireless LAN
841# 846#
842# CONFIG_NET_RADIO is not set 847# CONFIG_WLAN_PRE80211 is not set
848# CONFIG_WLAN_80211 is not set
843 849
844# 850#
845# Wan interfaces 851# Wan interfaces
@@ -853,7 +859,6 @@ CONFIG_S2IO=m
853# CONFIG_SHAPER is not set 859# CONFIG_SHAPER is not set
854CONFIG_NETCONSOLE=y 860CONFIG_NETCONSOLE=y
855CONFIG_NETPOLL=y 861CONFIG_NETPOLL=y
856# CONFIG_NETPOLL_RX is not set
857# CONFIG_NETPOLL_TRAP is not set 862# CONFIG_NETPOLL_TRAP is not set
858CONFIG_NET_POLL_CONTROLLER=y 863CONFIG_NET_POLL_CONTROLLER=y
859 864
@@ -987,57 +992,7 @@ CONFIG_HPET_MMAP=y
987# 992#
988# I2C support 993# I2C support
989# 994#
990CONFIG_I2C=m 995# CONFIG_I2C is not set
991CONFIG_I2C_CHARDEV=m
992
993#
994# I2C Algorithms
995#
996# CONFIG_I2C_ALGOBIT is not set
997# CONFIG_I2C_ALGOPCF is not set
998# CONFIG_I2C_ALGOPCA is not set
999
1000#
1001# I2C Hardware Bus support
1002#
1003# CONFIG_I2C_ALI1535 is not set
1004# CONFIG_I2C_ALI1563 is not set
1005# CONFIG_I2C_ALI15X3 is not set
1006# CONFIG_I2C_AMD756 is not set
1007# CONFIG_I2C_AMD8111 is not set
1008# CONFIG_I2C_I801 is not set
1009# CONFIG_I2C_I810 is not set
1010# CONFIG_I2C_PIIX4 is not set
1011CONFIG_I2C_ISA=m
1012# CONFIG_I2C_NFORCE2 is not set
1013# CONFIG_I2C_OCORES is not set
1014# CONFIG_I2C_PARPORT_LIGHT is not set
1015# CONFIG_I2C_PASEMI is not set
1016# CONFIG_I2C_PROSAVAGE is not set
1017# CONFIG_I2C_SAVAGE4 is not set
1018# CONFIG_I2C_SIS5595 is not set
1019# CONFIG_I2C_SIS630 is not set
1020# CONFIG_I2C_SIS96X is not set
1021# CONFIG_I2C_STUB is not set
1022# CONFIG_I2C_VIA is not set
1023# CONFIG_I2C_VIAPRO is not set
1024# CONFIG_I2C_VOODOO3 is not set
1025# CONFIG_I2C_PCA_ISA is not set
1026
1027#
1028# Miscellaneous I2C Chip support
1029#
1030# CONFIG_SENSORS_DS1337 is not set
1031# CONFIG_SENSORS_DS1374 is not set
1032# CONFIG_SENSORS_EEPROM is not set
1033# CONFIG_SENSORS_PCF8574 is not set
1034# CONFIG_SENSORS_PCA9539 is not set
1035# CONFIG_SENSORS_PCF8591 is not set
1036# CONFIG_SENSORS_MAX6875 is not set
1037# CONFIG_I2C_DEBUG_CORE is not set
1038# CONFIG_I2C_DEBUG_ALGO is not set
1039# CONFIG_I2C_DEBUG_BUS is not set
1040# CONFIG_I2C_DEBUG_CHIP is not set
1041 996
1042# 997#
1043# SPI support 998# SPI support
@@ -1053,54 +1008,8 @@ CONFIG_I2C_ISA=m
1053# 1008#
1054# Hardware Monitoring support 1009# Hardware Monitoring support
1055# 1010#
1056CONFIG_HWMON=y 1011# CONFIG_HWMON is not set
1057# CONFIG_HWMON_VID is not set 1012# CONFIG_HWMON_VID is not set
1058# CONFIG_SENSORS_ABITUGURU is not set
1059# CONFIG_SENSORS_ADM1021 is not set
1060# CONFIG_SENSORS_ADM1025 is not set
1061# CONFIG_SENSORS_ADM1026 is not set
1062# CONFIG_SENSORS_ADM1029 is not set
1063# CONFIG_SENSORS_ADM1031 is not set
1064# CONFIG_SENSORS_ADM9240 is not set
1065# CONFIG_SENSORS_K8TEMP is not set
1066# CONFIG_SENSORS_ASB100 is not set
1067# CONFIG_SENSORS_ATXP1 is not set
1068# CONFIG_SENSORS_DS1621 is not set
1069# CONFIG_SENSORS_F71805F is not set
1070# CONFIG_SENSORS_FSCHER is not set
1071# CONFIG_SENSORS_FSCPOS is not set
1072# CONFIG_SENSORS_GL518SM is not set
1073# CONFIG_SENSORS_GL520SM is not set
1074# CONFIG_SENSORS_IT87 is not set
1075# CONFIG_SENSORS_LM63 is not set
1076# CONFIG_SENSORS_LM75 is not set
1077# CONFIG_SENSORS_LM77 is not set
1078# CONFIG_SENSORS_LM78 is not set
1079# CONFIG_SENSORS_LM80 is not set
1080# CONFIG_SENSORS_LM83 is not set
1081# CONFIG_SENSORS_LM85 is not set
1082# CONFIG_SENSORS_LM87 is not set
1083# CONFIG_SENSORS_LM90 is not set
1084# CONFIG_SENSORS_LM92 is not set
1085# CONFIG_SENSORS_MAX1619 is not set
1086# CONFIG_SENSORS_PC87360 is not set
1087# CONFIG_SENSORS_PC87427 is not set
1088# CONFIG_SENSORS_SIS5595 is not set
1089# CONFIG_SENSORS_SMSC47M1 is not set
1090# CONFIG_SENSORS_SMSC47M192 is not set
1091CONFIG_SENSORS_SMSC47B397=m
1092# CONFIG_SENSORS_VIA686A is not set
1093# CONFIG_SENSORS_VT1211 is not set
1094# CONFIG_SENSORS_VT8231 is not set
1095# CONFIG_SENSORS_W83781D is not set
1096# CONFIG_SENSORS_W83791D is not set
1097# CONFIG_SENSORS_W83792D is not set
1098# CONFIG_SENSORS_W83793 is not set
1099# CONFIG_SENSORS_W83L785TS is not set
1100# CONFIG_SENSORS_W83627HF is not set
1101# CONFIG_SENSORS_W83627EHF is not set
1102# CONFIG_SENSORS_HDAPS is not set
1103# CONFIG_HWMON_DEBUG_CHIP is not set
1104 1013
1105# 1014#
1106# Multifunction device drivers 1015# Multifunction device drivers
@@ -1147,8 +1056,9 @@ CONFIG_SOUND=y
1147# Open Sound System 1056# Open Sound System
1148# 1057#
1149CONFIG_SOUND_PRIME=y 1058CONFIG_SOUND_PRIME=y
1150# CONFIG_OBSOLETE_OSS is not set 1059CONFIG_OBSOLETE_OSS=y
1151# CONFIG_SOUND_BT878 is not set 1060# CONFIG_SOUND_BT878 is not set
1061# CONFIG_SOUND_ES1371 is not set
1152CONFIG_SOUND_ICH=y 1062CONFIG_SOUND_ICH=y
1153# CONFIG_SOUND_TRIDENT is not set 1063# CONFIG_SOUND_TRIDENT is not set
1154# CONFIG_SOUND_MSNDCLAS is not set 1064# CONFIG_SOUND_MSNDCLAS is not set
@@ -1163,6 +1073,14 @@ CONFIG_HID=y
1163# CONFIG_HID_DEBUG is not set 1073# CONFIG_HID_DEBUG is not set
1164 1074
1165# 1075#
1076# USB Input Devices
1077#
1078CONFIG_USB_HID=y
1079# CONFIG_USB_HIDINPUT_POWERBOOK is not set
1080# CONFIG_HID_FF is not set
1081# CONFIG_USB_HIDDEV is not set
1082
1083#
1166# USB support 1084# USB support
1167# 1085#
1168CONFIG_USB_ARCH_HAS_HCD=y 1086CONFIG_USB_ARCH_HAS_HCD=y
@@ -1175,6 +1093,7 @@ CONFIG_USB=y
1175# Miscellaneous USB options 1093# Miscellaneous USB options
1176# 1094#
1177CONFIG_USB_DEVICEFS=y 1095CONFIG_USB_DEVICEFS=y
1096# CONFIG_USB_DEVICE_CLASS is not set
1178# CONFIG_USB_DYNAMIC_MINORS is not set 1097# CONFIG_USB_DYNAMIC_MINORS is not set
1179# CONFIG_USB_SUSPEND is not set 1098# CONFIG_USB_SUSPEND is not set
1180# CONFIG_USB_OTG is not set 1099# CONFIG_USB_OTG is not set
@@ -1225,10 +1144,6 @@ CONFIG_USB_STORAGE=y
1225# 1144#
1226# USB Input Devices 1145# USB Input Devices
1227# 1146#
1228CONFIG_USB_HID=y
1229# CONFIG_USB_HIDINPUT_POWERBOOK is not set
1230# CONFIG_HID_FF is not set
1231# CONFIG_USB_HIDDEV is not set
1232# CONFIG_USB_AIPTEK is not set 1147# CONFIG_USB_AIPTEK is not set
1233# CONFIG_USB_WACOM is not set 1148# CONFIG_USB_WACOM is not set
1234# CONFIG_USB_ACECAD is not set 1149# CONFIG_USB_ACECAD is not set
@@ -1556,7 +1471,7 @@ CONFIG_DEBUG_KERNEL=y
1556CONFIG_LOG_BUF_SHIFT=18 1471CONFIG_LOG_BUF_SHIFT=18
1557CONFIG_DETECT_SOFTLOCKUP=y 1472CONFIG_DETECT_SOFTLOCKUP=y
1558# CONFIG_SCHEDSTATS is not set 1473# CONFIG_SCHEDSTATS is not set
1559# CONFIG_TIMER_STATS is not set 1474CONFIG_TIMER_STATS=y
1560# CONFIG_DEBUG_SLAB is not set 1475# CONFIG_DEBUG_SLAB is not set
1561# CONFIG_DEBUG_RT_MUTEXES is not set 1476# CONFIG_DEBUG_RT_MUTEXES is not set
1562# CONFIG_RT_MUTEX_TESTER is not set 1477# CONFIG_RT_MUTEX_TESTER is not set
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 071100ea1251..185399baaf6d 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -5,6 +5,11 @@
5 * This tricks binfmt_elf.c into loading 32bit binaries using lots 5 * This tricks binfmt_elf.c into loading 32bit binaries using lots
6 * of ugly preprocessor tricks. Talk about very very poor man's inheritance. 6 * of ugly preprocessor tricks. Talk about very very poor man's inheritance.
7 */ 7 */
8#define __ASM_X86_64_ELF_H 1
9
10#undef ELF_CLASS
11#define ELF_CLASS ELFCLASS32
12
8#include <linux/types.h> 13#include <linux/types.h>
9#include <linux/stddef.h> 14#include <linux/stddef.h>
10#include <linux/rwsem.h> 15#include <linux/rwsem.h>
@@ -50,9 +55,6 @@ struct elf_phdr;
50#undef ELF_ARCH 55#undef ELF_ARCH
51#define ELF_ARCH EM_386 56#define ELF_ARCH EM_386
52 57
53#undef ELF_CLASS
54#define ELF_CLASS ELFCLASS32
55
56#define ELF_DATA ELFDATA2LSB 58#define ELF_DATA ELFDATA2LSB
57 59
58#define USE_ELF_CORE_DUMP 1 60#define USE_ELF_CORE_DUMP 1
@@ -136,7 +138,7 @@ struct elf_prpsinfo
136 138
137#define user user32 139#define user user32
138 140
139#define __ASM_X86_64_ELF_H 1 141#undef elf_read_implies_exec
140#define elf_read_implies_exec(ex, executable_stack) (executable_stack != EXSTACK_DISABLE_X) 142#define elf_read_implies_exec(ex, executable_stack) (executable_stack != EXSTACK_DISABLE_X)
141//#include <asm/ia32.h> 143//#include <asm/ia32.h>
142#include <linux/elf.h> 144#include <linux/elf.h>
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index 796df6992f62..c48087db6f75 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -481,11 +481,7 @@ ia32_sys_call_table:
481 .quad sys_symlink 481 .quad sys_symlink
482 .quad sys_lstat 482 .quad sys_lstat
483 .quad sys_readlink /* 85 */ 483 .quad sys_readlink /* 85 */
484#ifdef CONFIG_IA32_AOUT
485 .quad sys_uselib 484 .quad sys_uselib
486#else
487 .quad quiet_ni_syscall
488#endif
489 .quad sys_swapon 485 .quad sys_swapon
490 .quad sys_reboot 486 .quad sys_reboot
491 .quad compat_sys_old_readdir 487 .quad compat_sys_old_readdir
diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
index 568ff0df89e7..fc4419ff0355 100644
--- a/arch/x86_64/ia32/syscall32.c
+++ b/arch/x86_64/ia32/syscall32.c
@@ -13,6 +13,7 @@
13#include <asm/proto.h> 13#include <asm/proto.h>
14#include <asm/tlbflush.h> 14#include <asm/tlbflush.h>
15#include <asm/ia32_unistd.h> 15#include <asm/ia32_unistd.h>
16#include <asm/vsyscall32.h>
16 17
17extern unsigned char syscall32_syscall[], syscall32_syscall_end[]; 18extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
18extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[]; 19extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index bb47e86f3d02..4d94c51803d8 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -8,7 +8,8 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ 8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
9 x8664_ksyms.o i387.o syscall.o vsyscall.o \ 9 x8664_ksyms.o i387.o syscall.o vsyscall.o \
10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ 10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
11 pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o 11 pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o bugs.o \
12 perfctr-watchdog.o
12 13
13obj-$(CONFIG_STACKTRACE) += stacktrace.o 14obj-$(CONFIG_STACKTRACE) += stacktrace.o
14obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o 15obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o
@@ -21,8 +22,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
21obj-$(CONFIG_X86_CPUID) += cpuid.o 22obj-$(CONFIG_X86_CPUID) += cpuid.o
22obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o tsc_sync.o 23obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o tsc_sync.o
23obj-y += apic.o nmi.o 24obj-y += apic.o nmi.o
24obj-y += io_apic.o mpparse.o \ 25obj-y += io_apic.o mpparse.o genapic.o genapic_flat.o
25 genapic.o genapic_cluster.o genapic_flat.o
26obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o 26obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
27obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 27obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
28obj-$(CONFIG_PM) += suspend.o 28obj-$(CONFIG_PM) += suspend.o
@@ -58,3 +58,4 @@ i8237-y += ../../i386/kernel/i8237.o
58msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o 58msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
59alternative-y += ../../i386/kernel/alternative.o 59alternative-y += ../../i386/kernel/alternative.o
60pcspeaker-y += ../../i386/kernel/pcspeaker.o 60pcspeaker-y += ../../i386/kernel/pcspeaker.o
61perfctr-watchdog-y += ../../i386/kernel/cpu/perfctr-watchdog.o
diff --git a/arch/x86_64/kernel/acpi/sleep.c b/arch/x86_64/kernel/acpi/sleep.c
index e1548fbe95ae..195b7034a148 100644
--- a/arch/x86_64/kernel/acpi/sleep.c
+++ b/arch/x86_64/kernel/acpi/sleep.c
@@ -60,19 +60,6 @@ extern char wakeup_start, wakeup_end;
60 60
61extern unsigned long acpi_copy_wakeup_routine(unsigned long); 61extern unsigned long acpi_copy_wakeup_routine(unsigned long);
62 62
63static pgd_t low_ptr;
64
65static void init_low_mapping(void)
66{
67 pgd_t *slot0 = pgd_offset(current->mm, 0UL);
68 low_ptr = *slot0;
69 /* FIXME: We're playing with the current task's page tables here, which
70 * is potentially dangerous on SMP systems.
71 */
72 set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET));
73 local_flush_tlb();
74}
75
76/** 63/**
77 * acpi_save_state_mem - save kernel state 64 * acpi_save_state_mem - save kernel state
78 * 65 *
@@ -81,8 +68,6 @@ static void init_low_mapping(void)
81 */ 68 */
82int acpi_save_state_mem(void) 69int acpi_save_state_mem(void)
83{ 70{
84 init_low_mapping();
85
86 memcpy((void *)acpi_wakeup_address, &wakeup_start, 71 memcpy((void *)acpi_wakeup_address, &wakeup_start,
87 &wakeup_end - &wakeup_start); 72 &wakeup_end - &wakeup_start);
88 acpi_copy_wakeup_routine(acpi_wakeup_address); 73 acpi_copy_wakeup_routine(acpi_wakeup_address);
@@ -95,8 +80,6 @@ int acpi_save_state_mem(void)
95 */ 80 */
96void acpi_restore_state_mem(void) 81void acpi_restore_state_mem(void)
97{ 82{
98 set_pgd(pgd_offset(current->mm, 0UL), low_ptr);
99 local_flush_tlb();
100} 83}
101 84
102/** 85/**
@@ -109,10 +92,11 @@ void acpi_restore_state_mem(void)
109 */ 92 */
110void __init acpi_reserve_bootmem(void) 93void __init acpi_reserve_bootmem(void)
111{ 94{
112 acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); 95 acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
113 if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) 96 if ((&wakeup_end - &wakeup_start) > (PAGE_SIZE*2))
114 printk(KERN_CRIT 97 printk(KERN_CRIT
115 "ACPI: Wakeup code way too big, will crash on attempt to suspend\n"); 98 "ACPI: Wakeup code way too big, will crash on attempt"
99 " to suspend\n");
116} 100}
117 101
118static int __init acpi_sleep_setup(char *str) 102static int __init acpi_sleep_setup(char *str)
diff --git a/arch/x86_64/kernel/acpi/wakeup.S b/arch/x86_64/kernel/acpi/wakeup.S
index 185faa911db5..8550a6ffa275 100644
--- a/arch/x86_64/kernel/acpi/wakeup.S
+++ b/arch/x86_64/kernel/acpi/wakeup.S
@@ -1,6 +1,7 @@
1.text 1.text
2#include <linux/linkage.h> 2#include <linux/linkage.h>
3#include <asm/segment.h> 3#include <asm/segment.h>
4#include <asm/pgtable.h>
4#include <asm/page.h> 5#include <asm/page.h>
5#include <asm/msr.h> 6#include <asm/msr.h>
6 7
@@ -30,22 +31,28 @@ wakeup_code:
30 cld 31 cld
31 # setup data segment 32 # setup data segment
32 movw %cs, %ax 33 movw %cs, %ax
33 movw %ax, %ds # Make ds:0 point to wakeup_start 34 movw %ax, %ds # Make ds:0 point to wakeup_start
34 movw %ax, %ss 35 movw %ax, %ss
35 mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board 36 # Private stack is needed for ASUS board
37 mov $(wakeup_stack - wakeup_code), %sp
36 38
37 pushl $0 # Kill any dangerous flags 39 pushl $0 # Kill any dangerous flags
38 popfl 40 popfl
39 41
40 movl real_magic - wakeup_code, %eax 42 movl real_magic - wakeup_code, %eax
41 cmpl $0x12345678, %eax 43 cmpl $0x12345678, %eax
42 jne bogus_real_magic 44 jne bogus_real_magic
43 45
46 call verify_cpu # Verify the cpu supports long
47 # mode
48 testl %eax, %eax
49 jnz no_longmode
50
44 testl $1, video_flags - wakeup_code 51 testl $1, video_flags - wakeup_code
45 jz 1f 52 jz 1f
46 lcall $0xc000,$3 53 lcall $0xc000,$3
47 movw %cs, %ax 54 movw %cs, %ax
48 movw %ax, %ds # Bios might have played with that 55 movw %ax, %ds # Bios might have played with that
49 movw %ax, %ss 56 movw %ax, %ss
501: 571:
51 58
@@ -61,12 +68,15 @@ wakeup_code:
61 68
62 movb $0xa2, %al ; outb %al, $0x80 69 movb $0xa2, %al ; outb %al, $0x80
63 70
64 lidt %ds:idt_48a - wakeup_code 71 mov %ds, %ax # Find 32bit wakeup_code addr
65 xorl %eax, %eax 72 movzx %ax, %esi # (Convert %ds:gdt to a liner ptr)
66 movw %ds, %ax # (Convert %ds:gdt to a linear ptr) 73 shll $4, %esi
67 shll $4, %eax 74 # Fix up the vectors
68 addl $(gdta - wakeup_code), %eax 75 addl %esi, wakeup_32_vector - wakeup_code
69 movl %eax, gdt_48a +2 - wakeup_code 76 addl %esi, wakeup_long64_vector - wakeup_code
77 addl %esi, gdt_48a + 2 - wakeup_code # Fixup the gdt pointer
78
79 lidtl %ds:idt_48a - wakeup_code
70 lgdtl %ds:gdt_48a - wakeup_code # load gdt with whatever is 80 lgdtl %ds:gdt_48a - wakeup_code # load gdt with whatever is
71 # appropriate 81 # appropriate
72 82
@@ -75,86 +85,63 @@ wakeup_code:
75 jmp 1f 85 jmp 1f
761: 861:
77 87
78 .byte 0x66, 0xea # prefix + jmpi-opcode 88 ljmpl *(wakeup_32_vector - wakeup_code)
79 .long wakeup_32 - __START_KERNEL_map 89
80 .word __KERNEL_CS 90 .balign 4
91wakeup_32_vector:
92 .long wakeup_32 - wakeup_code
93 .word __KERNEL32_CS, 0
81 94
82 .code32 95 .code32
83wakeup_32: 96wakeup_32:
84# Running in this code, but at low address; paging is not yet turned on. 97# Running in this code, but at low address; paging is not yet turned on.
85 movb $0xa5, %al ; outb %al, $0x80 98 movb $0xa5, %al ; outb %al, $0x80
86 99
87 /* Check if extended functions are implemented */ 100 movl $__KERNEL_DS, %eax
88 movl $0x80000000, %eax 101 movl %eax, %ds
89 cpuid
90 cmpl $0x80000000, %eax
91 jbe bogus_cpu
92 wbinvd
93 mov $0x80000001, %eax
94 cpuid
95 btl $29, %edx
96 jnc bogus_cpu
97 movl %edx,%edi
98
99 movw $__KERNEL_DS, %ax
100 movw %ax, %ds
101 movw %ax, %es
102 movw %ax, %fs
103 movw %ax, %gs
104
105 movw $__KERNEL_DS, %ax
106 movw %ax, %ss
107 102
108 mov $(wakeup_stack - __START_KERNEL_map), %esp 103 movw $0x0e00 + 'i', %ds:(0xb8012)
109 movl saved_magic - __START_KERNEL_map, %eax 104 movb $0xa8, %al ; outb %al, $0x80;
110 cmpl $0x9abcdef0, %eax
111 jne bogus_32_magic
112 105
113 /* 106 /*
114 * Prepare for entering 64bits mode 107 * Prepare for entering 64bits mode
115 */ 108 */
116 109
117 /* Enable PAE mode and PGE */ 110 /* Enable PAE */
118 xorl %eax, %eax 111 xorl %eax, %eax
119 btsl $5, %eax 112 btsl $5, %eax
120 btsl $7, %eax
121 movl %eax, %cr4 113 movl %eax, %cr4
122 114
123 /* Setup early boot stage 4 level pagetables */ 115 /* Setup early boot stage 4 level pagetables */
124 movl $(wakeup_level4_pgt - __START_KERNEL_map), %eax 116 leal (wakeup_level4_pgt - wakeup_code)(%esi), %eax
125 movl %eax, %cr3 117 movl %eax, %cr3
126 118
127 /* Setup EFER (Extended Feature Enable Register) */ 119 /* Check if nx is implemented */
128 movl $MSR_EFER, %ecx 120 movl $0x80000001, %eax
129 rdmsr 121 cpuid
130 /* Fool rdmsr and reset %eax to avoid dependences */ 122 movl %edx,%edi
131 xorl %eax, %eax 123
132 /* Enable Long Mode */ 124 /* Enable Long Mode */
125 xorl %eax, %eax
133 btsl $_EFER_LME, %eax 126 btsl $_EFER_LME, %eax
134 /* Enable System Call */
135 btsl $_EFER_SCE, %eax
136 127
137 /* No Execute supported? */ 128 /* No Execute supported? */
138 btl $20,%edi 129 btl $20,%edi
139 jnc 1f 130 jnc 1f
140 btsl $_EFER_NX, %eax 131 btsl $_EFER_NX, %eax
1411:
142 132
143 /* Make changes effective */ 133 /* Make changes effective */
1341: movl $MSR_EFER, %ecx
135 xorl %edx, %edx
144 wrmsr 136 wrmsr
145 wbinvd
146 137
147 xorl %eax, %eax 138 xorl %eax, %eax
148 btsl $31, %eax /* Enable paging and in turn activate Long Mode */ 139 btsl $31, %eax /* Enable paging and in turn activate Long Mode */
149 btsl $0, %eax /* Enable protected mode */ 140 btsl $0, %eax /* Enable protected mode */
150 btsl $1, %eax /* Enable MP */
151 btsl $4, %eax /* Enable ET */
152 btsl $5, %eax /* Enable NE */
153 btsl $16, %eax /* Enable WP */
154 btsl $18, %eax /* Enable AM */
155 141
156 /* Make changes effective */ 142 /* Make changes effective */
157 movl %eax, %cr0 143 movl %eax, %cr0
144
158 /* At this point: 145 /* At this point:
159 CR4.PAE must be 1 146 CR4.PAE must be 1
160 CS.L must be 0 147 CS.L must be 0
@@ -162,11 +149,6 @@ wakeup_32:
162 Next instruction must be a branch 149 Next instruction must be a branch
163 This must be on identity-mapped page 150 This must be on identity-mapped page
164 */ 151 */
165 jmp reach_compatibility_mode
166reach_compatibility_mode:
167 movw $0x0e00 + 'i', %ds:(0xb8012)
168 movb $0xa8, %al ; outb %al, $0x80;
169
170 /* 152 /*
171 * At this point we're in long mode but in 32bit compatibility mode 153 * At this point we're in long mode but in 32bit compatibility mode
172 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn 154 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
@@ -174,24 +156,19 @@ reach_compatibility_mode:
174 * the new gdt/idt that has __KERNEL_CS with CS.L = 1. 156 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
175 */ 157 */
176 158
177 movw $0x0e00 + 'n', %ds:(0xb8014)
178 movb $0xa9, %al ; outb %al, $0x80
179
180 /* Load new GDT with the 64bit segment using 32bit descriptor */
181 movl $(pGDT32 - __START_KERNEL_map), %eax
182 lgdt (%eax)
183
184 movl $(wakeup_jumpvector - __START_KERNEL_map), %eax
185 /* Finally jump in 64bit mode */ 159 /* Finally jump in 64bit mode */
186 ljmp *(%eax) 160 ljmp *(wakeup_long64_vector - wakeup_code)(%esi)
187 161
188wakeup_jumpvector: 162 .balign 4
189 .long wakeup_long64 - __START_KERNEL_map 163wakeup_long64_vector:
190 .word __KERNEL_CS 164 .long wakeup_long64 - wakeup_code
165 .word __KERNEL_CS, 0
191 166
192.code64 167.code64
193 168
194 /* Hooray, we are in Long 64-bit mode (but still running in low memory) */ 169 /* Hooray, we are in Long 64-bit mode (but still running in
170 * low memory)
171 */
195wakeup_long64: 172wakeup_long64:
196 /* 173 /*
197 * We must switch to a new descriptor in kernel space for the GDT 174 * We must switch to a new descriptor in kernel space for the GDT
@@ -199,7 +176,15 @@ wakeup_long64:
199 * addresses where we're currently running on. We have to do that here 176 * addresses where we're currently running on. We have to do that here
200 * because in 32bit we couldn't load a 64bit linear address. 177 * because in 32bit we couldn't load a 64bit linear address.
201 */ 178 */
202 lgdt cpu_gdt_descr - __START_KERNEL_map 179 lgdt cpu_gdt_descr
180
181 movw $0x0e00 + 'n', %ds:(0xb8014)
182 movb $0xa9, %al ; outb %al, $0x80
183
184 movq saved_magic, %rax
185 movq $0x123456789abcdef0, %rdx
186 cmpq %rdx, %rax
187 jne bogus_64_magic
203 188
204 movw $0x0e00 + 'u', %ds:(0xb8016) 189 movw $0x0e00 + 'u', %ds:(0xb8016)
205 190
@@ -211,75 +196,58 @@ wakeup_long64:
211 movw %ax, %es 196 movw %ax, %es
212 movw %ax, %fs 197 movw %ax, %fs
213 movw %ax, %gs 198 movw %ax, %gs
214 movq saved_esp, %rsp 199 movq saved_rsp, %rsp
215 200
216 movw $0x0e00 + 'x', %ds:(0xb8018) 201 movw $0x0e00 + 'x', %ds:(0xb8018)
217 movq saved_ebx, %rbx 202 movq saved_rbx, %rbx
218 movq saved_edi, %rdi 203 movq saved_rdi, %rdi
219 movq saved_esi, %rsi 204 movq saved_rsi, %rsi
220 movq saved_ebp, %rbp 205 movq saved_rbp, %rbp
221 206
222 movw $0x0e00 + '!', %ds:(0xb801a) 207 movw $0x0e00 + '!', %ds:(0xb801a)
223 movq saved_eip, %rax 208 movq saved_rip, %rax
224 jmp *%rax 209 jmp *%rax
225 210
226.code32 211.code32
227 212
228 .align 64 213 .align 64
229gdta: 214gdta:
215 /* Its good to keep gdt in sync with one in trampoline.S */
230 .word 0, 0, 0, 0 # dummy 216 .word 0, 0, 0, 0 # dummy
231 217 /* ??? Why I need the accessed bit set in order for this to work? */
232 .word 0, 0, 0, 0 # unused 218 .quad 0x00cf9b000000ffff # __KERNEL32_CS
233 219 .quad 0x00af9b000000ffff # __KERNEL_CS
234 .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb) 220 .quad 0x00cf93000000ffff # __KERNEL_DS
235 .word 0 # base address = 0
236 .word 0x9B00 # code read/exec. ??? Why I need 0x9B00 (as opposed to 0x9A00 in order for this to work?)
237 .word 0x00CF # granularity = 4096, 386
238 # (+5th nibble of limit)
239
240 .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
241 .word 0 # base address = 0
242 .word 0x9200 # data read/write
243 .word 0x00CF # granularity = 4096, 386
244 # (+5th nibble of limit)
245# this is 64bit descriptor for code
246 .word 0xFFFF
247 .word 0
248 .word 0x9A00 # code read/exec
249 .word 0x00AF # as above, but it is long mode and with D=0
250 221
251idt_48a: 222idt_48a:
252 .word 0 # idt limit = 0 223 .word 0 # idt limit = 0
253 .word 0, 0 # idt base = 0L 224 .word 0, 0 # idt base = 0L
254 225
255gdt_48a: 226gdt_48a:
256 .word 0x8000 # gdt limit=2048, 227 .word 0x800 # gdt limit=2048,
257 # 256 GDT entries 228 # 256 GDT entries
258 .word 0, 0 # gdt base (filled in later) 229 .long gdta - wakeup_code # gdt base (relocated in later)
259
260 230
261real_save_gdt: .word 0
262 .quad 0
263real_magic: .quad 0 231real_magic: .quad 0
264video_mode: .quad 0 232video_mode: .quad 0
265video_flags: .quad 0 233video_flags: .quad 0
266 234
235.code16
267bogus_real_magic: 236bogus_real_magic:
268 movb $0xba,%al ; outb %al,$0x80 237 movb $0xba,%al ; outb %al,$0x80
269 jmp bogus_real_magic 238 jmp bogus_real_magic
270 239
271bogus_32_magic: 240.code64
241bogus_64_magic:
272 movb $0xb3,%al ; outb %al,$0x80 242 movb $0xb3,%al ; outb %al,$0x80
273 jmp bogus_32_magic 243 jmp bogus_64_magic
274 244
275bogus_31_magic: 245.code16
276 movb $0xb1,%al ; outb %al,$0x80 246no_longmode:
277 jmp bogus_31_magic 247 movb $0xbc,%al ; outb %al,$0x80
278 248 jmp no_longmode
279bogus_cpu:
280 movb $0xbc,%al ; outb %al,$0x80
281 jmp bogus_cpu
282 249
250#include "../verify_cpu.S"
283 251
284/* This code uses an extended set of video mode numbers. These include: 252/* This code uses an extended set of video mode numbers. These include:
285 * Aliases for standard modes 253 * Aliases for standard modes
@@ -301,6 +269,7 @@ bogus_cpu:
301#define VIDEO_FIRST_V7 0x0900 269#define VIDEO_FIRST_V7 0x0900
302 270
303# Setting of user mode (AX=mode ID) => CF=success 271# Setting of user mode (AX=mode ID) => CF=success
272.code16
304mode_seta: 273mode_seta:
305 movw %ax, %bx 274 movw %ax, %bx
306#if 0 275#if 0
@@ -346,21 +315,18 @@ check_vesaa:
346 315
347_setbada: jmp setbada 316_setbada: jmp setbada
348 317
349 .code64
350bogus_magic:
351 movw $0x0e00 + 'B', %ds:(0xb8018)
352 jmp bogus_magic
353
354bogus_magic2:
355 movw $0x0e00 + '2', %ds:(0xb8018)
356 jmp bogus_magic2
357
358
359wakeup_stack_begin: # Stack grows down 318wakeup_stack_begin: # Stack grows down
360 319
361.org 0xff0 320.org 0xff0
362wakeup_stack: # Just below end of page 321wakeup_stack: # Just below end of page
363 322
323.org 0x1000
324ENTRY(wakeup_level4_pgt)
325 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
326 .fill 510,8,0
327 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
328 .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
329
364ENTRY(wakeup_end) 330ENTRY(wakeup_end)
365 331
366## 332##
@@ -373,28 +339,11 @@ ENTRY(wakeup_end)
373# 339#
374# Returned address is location of code in low memory (past data and stack) 340# Returned address is location of code in low memory (past data and stack)
375# 341#
342 .code64
376ENTRY(acpi_copy_wakeup_routine) 343ENTRY(acpi_copy_wakeup_routine)
377 pushq %rax 344 pushq %rax
378 pushq %rcx
379 pushq %rdx 345 pushq %rdx
380 346
381 sgdt saved_gdt
382 sidt saved_idt
383 sldt saved_ldt
384 str saved_tss
385
386 movq %cr3, %rdx
387 movq %rdx, saved_cr3
388 movq %cr4, %rdx
389 movq %rdx, saved_cr4
390 movq %cr0, %rdx
391 movq %rdx, saved_cr0
392 sgdt real_save_gdt - wakeup_start (,%rdi)
393 movl $MSR_EFER, %ecx
394 rdmsr
395 movl %eax, saved_efer
396 movl %edx, saved_efer2
397
398 movl saved_video_mode, %edx 347 movl saved_video_mode, %edx
399 movl %edx, video_mode - wakeup_start (,%rdi) 348 movl %edx, video_mode - wakeup_start (,%rdi)
400 movl acpi_video_flags, %edx 349 movl acpi_video_flags, %edx
@@ -403,21 +352,13 @@ ENTRY(acpi_copy_wakeup_routine)
403 movq $0x123456789abcdef0, %rdx 352 movq $0x123456789abcdef0, %rdx
404 movq %rdx, saved_magic 353 movq %rdx, saved_magic
405 354
406 movl saved_magic - __START_KERNEL_map, %eax 355 movq saved_magic, %rax
407 cmpl $0x9abcdef0, %eax 356 movq $0x123456789abcdef0, %rdx
408 jne bogus_32_magic 357 cmpq %rdx, %rax
409 358 jne bogus_64_magic
410 # make sure %cr4 is set correctly (features, etc)
411 movl saved_cr4 - __START_KERNEL_map, %eax
412 movq %rax, %cr4
413 359
414 movl saved_cr0 - __START_KERNEL_map, %eax
415 movq %rax, %cr0
416 jmp 1f # Flush pipelines
4171:
418 # restore the regs we used 360 # restore the regs we used
419 popq %rdx 361 popq %rdx
420 popq %rcx
421 popq %rax 362 popq %rax
422ENTRY(do_suspend_lowlevel_s4bios) 363ENTRY(do_suspend_lowlevel_s4bios)
423 ret 364 ret
@@ -450,13 +391,13 @@ do_suspend_lowlevel:
450 movq %r15, saved_context_r15(%rip) 391 movq %r15, saved_context_r15(%rip)
451 pushfq ; popq saved_context_eflags(%rip) 392 pushfq ; popq saved_context_eflags(%rip)
452 393
453 movq $.L97, saved_eip(%rip) 394 movq $.L97, saved_rip(%rip)
454 395
455 movq %rsp,saved_esp 396 movq %rsp,saved_rsp
456 movq %rbp,saved_ebp 397 movq %rbp,saved_rbp
457 movq %rbx,saved_ebx 398 movq %rbx,saved_rbx
458 movq %rdi,saved_edi 399 movq %rdi,saved_rdi
459 movq %rsi,saved_esi 400 movq %rsi,saved_rsi
460 401
461 addq $8, %rsp 402 addq $8, %rsp
462 movl $3, %edi 403 movl $3, %edi
@@ -503,25 +444,12 @@ do_suspend_lowlevel:
503 444
504.data 445.data
505ALIGN 446ALIGN
506ENTRY(saved_ebp) .quad 0 447ENTRY(saved_rbp) .quad 0
507ENTRY(saved_esi) .quad 0 448ENTRY(saved_rsi) .quad 0
508ENTRY(saved_edi) .quad 0 449ENTRY(saved_rdi) .quad 0
509ENTRY(saved_ebx) .quad 0 450ENTRY(saved_rbx) .quad 0
510 451
511ENTRY(saved_eip) .quad 0 452ENTRY(saved_rip) .quad 0
512ENTRY(saved_esp) .quad 0 453ENTRY(saved_rsp) .quad 0
513 454
514ENTRY(saved_magic) .quad 0 455ENTRY(saved_magic) .quad 0
515
516ALIGN
517# saved registers
518saved_gdt: .quad 0,0
519saved_idt: .quad 0,0
520saved_ldt: .quad 0
521saved_tss: .quad 0
522
523saved_cr0: .quad 0
524saved_cr3: .quad 0
525saved_cr4: .quad 0
526saved_efer: .quad 0
527saved_efer2: .quad 0
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index b487396c4c5b..a52af5820592 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -51,7 +51,6 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size)
51 51
52static u32 __init allocate_aperture(void) 52static u32 __init allocate_aperture(void)
53{ 53{
54 pg_data_t *nd0 = NODE_DATA(0);
55 u32 aper_size; 54 u32 aper_size;
56 void *p; 55 void *p;
57 56
@@ -65,12 +64,12 @@ static u32 __init allocate_aperture(void)
65 * Unfortunately we cannot move it up because that would make the 64 * Unfortunately we cannot move it up because that would make the
66 * IOMMU useless. 65 * IOMMU useless.
67 */ 66 */
68 p = __alloc_bootmem_node(nd0, aper_size, aper_size, 0); 67 p = __alloc_bootmem_nopanic(aper_size, aper_size, 0);
69 if (!p || __pa(p)+aper_size > 0xffffffff) { 68 if (!p || __pa(p)+aper_size > 0xffffffff) {
70 printk("Cannot allocate aperture memory hole (%p,%uK)\n", 69 printk("Cannot allocate aperture memory hole (%p,%uK)\n",
71 p, aper_size>>10); 70 p, aper_size>>10);
72 if (p) 71 if (p)
73 free_bootmem_node(nd0, __pa(p), aper_size); 72 free_bootmem(__pa(p), aper_size);
74 return 0; 73 return 0;
75 } 74 }
76 printk("Mapping aperture over %d KB of RAM @ %lx\n", 75 printk("Mapping aperture over %d KB of RAM @ %lx\n",
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index bd3e45d47c37..d198f7d82e5a 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -68,6 +68,28 @@ int using_apic_timer __read_mostly = 0;
68 68
69static void apic_pm_activate(void); 69static void apic_pm_activate(void);
70 70
71void apic_wait_icr_idle(void)
72{
73 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
74 cpu_relax();
75}
76
77unsigned int safe_apic_wait_icr_idle(void)
78{
79 unsigned int send_status;
80 int timeout;
81
82 timeout = 0;
83 do {
84 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
85 if (!send_status)
86 break;
87 udelay(100);
88 } while (timeout++ < 1000);
89
90 return send_status;
91}
92
71void enable_NMI_through_LVT0 (void * dummy) 93void enable_NMI_through_LVT0 (void * dummy)
72{ 94{
73 unsigned int v; 95 unsigned int v;
@@ -817,14 +839,15 @@ static void setup_APIC_timer(unsigned int clocks)
817 839
818static int __init calibrate_APIC_clock(void) 840static int __init calibrate_APIC_clock(void)
819{ 841{
820 int apic, apic_start, tsc, tsc_start; 842 unsigned apic, apic_start;
843 unsigned long tsc, tsc_start;
821 int result; 844 int result;
822 /* 845 /*
823 * Put whatever arbitrary (but long enough) timeout 846 * Put whatever arbitrary (but long enough) timeout
824 * value into the APIC clock, we just want to get the 847 * value into the APIC clock, we just want to get the
825 * counter running for calibration. 848 * counter running for calibration.
826 */ 849 */
827 __setup_APIC_LVTT(1000000000); 850 __setup_APIC_LVTT(4000000000);
828 851
829 apic_start = apic_read(APIC_TMCCT); 852 apic_start = apic_read(APIC_TMCCT);
830#ifdef CONFIG_X86_PM_TIMER 853#ifdef CONFIG_X86_PM_TIMER
@@ -835,15 +858,15 @@ static int __init calibrate_APIC_clock(void)
835 } else 858 } else
836#endif 859#endif
837 { 860 {
838 rdtscl(tsc_start); 861 rdtscll(tsc_start);
839 862
840 do { 863 do {
841 apic = apic_read(APIC_TMCCT); 864 apic = apic_read(APIC_TMCCT);
842 rdtscl(tsc); 865 rdtscll(tsc);
843 } while ((tsc - tsc_start) < TICK_COUNT && 866 } while ((tsc - tsc_start) < TICK_COUNT &&
844 (apic - apic_start) < TICK_COUNT); 867 (apic_start - apic) < TICK_COUNT);
845 868
846 result = (apic_start - apic) * 1000L * cpu_khz / 869 result = (apic_start - apic) * 1000L * tsc_khz /
847 (tsc - tsc_start); 870 (tsc - tsc_start);
848 } 871 }
849 printk("result %d\n", result); 872 printk("result %d\n", result);
diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c
index 96687e2beb2c..778953bc636c 100644
--- a/arch/x86_64/kernel/asm-offsets.c
+++ b/arch/x86_64/kernel/asm-offsets.c
@@ -21,6 +21,14 @@
21 21
22#define BLANK() asm volatile("\n->" : : ) 22#define BLANK() asm volatile("\n->" : : )
23 23
24#define __NO_STUBS 1
25#undef __SYSCALL
26#undef _ASM_X86_64_UNISTD_H_
27#define __SYSCALL(nr, sym) [nr] = 1,
28static char syscalls[] = {
29#include <asm/unistd.h>
30};
31
24int main(void) 32int main(void)
25{ 33{
26#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry)) 34#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
@@ -71,5 +79,7 @@ int main(void)
71 DEFINE(TSS_ist, offsetof(struct tss_struct, ist)); 79 DEFINE(TSS_ist, offsetof(struct tss_struct, ist));
72 BLANK(); 80 BLANK();
73 DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx)); 81 DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
82 BLANK();
83 DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
74 return 0; 84 return 0;
75} 85}
diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c
new file mode 100644
index 000000000000..12b585b5345d
--- /dev/null
+++ b/arch/x86_64/kernel/bugs.c
@@ -0,0 +1,21 @@
1/*
2 * arch/x86_64/kernel/bugs.c
3 *
4 * Copyright (C) 1994 Linus Torvalds
5 * Copyright (C) 2000 SuSE
6 */
7
8#include <linux/kernel.h>
9#include <linux/init.h>
10#include <asm/alternative.h>
11#include <asm/processor.h>
12
13void __init check_bugs(void)
14{
15 identify_cpu(&boot_cpu_data);
16#if !defined(CONFIG_SMP)
17 printk("CPU: ");
18 print_cpu_info(&boot_cpu_data);
19#endif
20 alternative_instructions();
21}
diff --git a/arch/x86_64/kernel/cpufreq/Kconfig b/arch/x86_64/kernel/cpufreq/Kconfig
index 40acb67fb882..c0749d2479f5 100644
--- a/arch/x86_64/kernel/cpufreq/Kconfig
+++ b/arch/x86_64/kernel/cpufreq/Kconfig
@@ -16,6 +16,9 @@ config X86_POWERNOW_K8
16 help 16 help
17 This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. 17 This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors.
18 18
19 To compile this driver as a module, choose M here: the
20 module will be called powernow-k8.
21
19 For details, take a look at <file:Documentation/cpu-freq/>. 22 For details, take a look at <file:Documentation/cpu-freq/>.
20 23
21 If in doubt, say N. 24 If in doubt, say N.
@@ -38,6 +41,9 @@ config X86_SPEEDSTEP_CENTRINO
38 mobile CPUs. This means Intel Pentium M (Centrino) CPUs 41 mobile CPUs. This means Intel Pentium M (Centrino) CPUs
39 or 64bit enabled Intel Xeons. 42 or 64bit enabled Intel Xeons.
40 43
44 To compile this driver as a module, choose M here: the
45 module will be called speedstep-centrino.
46
41 For details, take a look at <file:Documentation/cpu-freq/>. 47 For details, take a look at <file:Documentation/cpu-freq/>.
42 48
43 If in doubt, say N. 49 If in doubt, say N.
@@ -55,6 +61,9 @@ config X86_ACPI_CPUFREQ
55 Processor Performance States. 61 Processor Performance States.
56 This driver also supports Intel Enhanced Speedstep. 62 This driver also supports Intel Enhanced Speedstep.
57 63
64 To compile this driver as a module, choose M here: the
65 module will be called acpi-cpufreq.
66
58 For details, take a look at <file:Documentation/cpu-freq/>. 67 For details, take a look at <file:Documentation/cpu-freq/>.
59 68
60 If in doubt, say N. 69 If in doubt, say N.
@@ -62,7 +71,7 @@ config X86_ACPI_CPUFREQ
62comment "shared options" 71comment "shared options"
63 72
64config X86_ACPI_CPUFREQ_PROC_INTF 73config X86_ACPI_CPUFREQ_PROC_INTF
65 bool "/proc/acpi/processor/../performance interface (deprecated)" 74 bool "/proc/acpi/processor/../performance interface (deprecated)"
66 depends on PROC_FS 75 depends on PROC_FS
67 depends on X86_ACPI_CPUFREQ || X86_SPEEDSTEP_CENTRINO_ACPI || X86_POWERNOW_K8_ACPI 76 depends on X86_ACPI_CPUFREQ || X86_SPEEDSTEP_CENTRINO_ACPI || X86_POWERNOW_K8_ACPI
68 help 77 help
@@ -86,16 +95,18 @@ config X86_P4_CLOCKMOD
86 slowdowns and noticeable latencies. Normally Speedstep should be used 95 slowdowns and noticeable latencies. Normally Speedstep should be used
87 instead. 96 instead.
88 97
98 To compile this driver as a module, choose M here: the
99 module will be called p4-clockmod.
100
89 For details, take a look at <file:Documentation/cpu-freq/>. 101 For details, take a look at <file:Documentation/cpu-freq/>.
90 102
91 Unless you are absolutely sure say N. 103 Unless you are absolutely sure say N.
92 104
93 105
94config X86_SPEEDSTEP_LIB 106config X86_SPEEDSTEP_LIB
95 tristate 107 tristate
96 default X86_P4_CLOCKMOD 108 default X86_P4_CLOCKMOD
97 109
98endif 110endif
99 111
100endmenu 112endmenu
101
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index a490fabfcf47..13c6c37610e0 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -17,6 +17,8 @@
17#include <linux/kexec.h> 17#include <linux/kexec.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/mm.h> 19#include <linux/mm.h>
20#include <linux/suspend.h>
21#include <linux/pfn.h>
20 22
21#include <asm/pgtable.h> 23#include <asm/pgtable.h>
22#include <asm/page.h> 24#include <asm/page.h>
@@ -25,7 +27,7 @@
25#include <asm/bootsetup.h> 27#include <asm/bootsetup.h>
26#include <asm/sections.h> 28#include <asm/sections.h>
27 29
28struct e820map e820 __initdata; 30struct e820map e820;
29 31
30/* 32/*
31 * PFN of last memory page. 33 * PFN of last memory page.
@@ -98,7 +100,7 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
98 * This function checks if any part of the range <start,end> is mapped 100 * This function checks if any part of the range <start,end> is mapped
99 * with type. 101 * with type.
100 */ 102 */
101int __meminit 103int
102e820_any_mapped(unsigned long start, unsigned long end, unsigned type) 104e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
103{ 105{
104 int i; 106 int i;
@@ -112,6 +114,7 @@ e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
112 } 114 }
113 return 0; 115 return 0;
114} 116}
117EXPORT_SYMBOL_GPL(e820_any_mapped);
115 118
116/* 119/*
117 * This function checks if the entire range <start,end> is mapped with type. 120 * This function checks if the entire range <start,end> is mapped with type.
@@ -255,22 +258,6 @@ void __init e820_reserve_resources(void)
255 } 258 }
256} 259}
257 260
258/* Mark pages corresponding to given address range as nosave */
259static void __init
260e820_mark_nosave_range(unsigned long start, unsigned long end)
261{
262 unsigned long pfn, max_pfn;
263
264 if (start >= end)
265 return;
266
267 printk("Nosave address range: %016lx - %016lx\n", start, end);
268 max_pfn = end >> PAGE_SHIFT;
269 for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
270 if (pfn_valid(pfn))
271 SetPageNosave(pfn_to_page(pfn));
272}
273
274/* 261/*
275 * Find the ranges of physical addresses that do not correspond to 262 * Find the ranges of physical addresses that do not correspond to
276 * e820 RAM areas and mark the corresponding pages as nosave for software 263 * e820 RAM areas and mark the corresponding pages as nosave for software
@@ -289,13 +276,13 @@ void __init e820_mark_nosave_regions(void)
289 struct e820entry *ei = &e820.map[i]; 276 struct e820entry *ei = &e820.map[i];
290 277
291 if (paddr < ei->addr) 278 if (paddr < ei->addr)
292 e820_mark_nosave_range(paddr, 279 register_nosave_region(PFN_DOWN(paddr),
293 round_up(ei->addr, PAGE_SIZE)); 280 PFN_UP(ei->addr));
294 281
295 paddr = round_down(ei->addr + ei->size, PAGE_SIZE); 282 paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
296 if (ei->type != E820_RAM) 283 if (ei->type != E820_RAM)
297 e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE), 284 register_nosave_region(PFN_UP(ei->addr),
298 paddr); 285 PFN_DOWN(paddr));
299 286
300 if (paddr >= (end_pfn << PAGE_SHIFT)) 287 if (paddr >= (end_pfn << PAGE_SHIFT))
301 break; 288 break;
diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index fede55a53995..990d9c218a5d 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -71,18 +71,6 @@ static void __init ati_bugs(void)
71 } 71 }
72} 72}
73 73
74static void intel_bugs(void)
75{
76 u16 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID);
77
78#ifdef CONFIG_SMP
79 if (device == PCI_DEVICE_ID_INTEL_E7320_MCH ||
80 device == PCI_DEVICE_ID_INTEL_E7520_MCH ||
81 device == PCI_DEVICE_ID_INTEL_E7525_MCH)
82 quirk_intel_irqbalance();
83#endif
84}
85
86struct chipset { 74struct chipset {
87 u16 vendor; 75 u16 vendor;
88 void (*f)(void); 76 void (*f)(void);
@@ -92,7 +80,6 @@ static struct chipset early_qrk[] __initdata = {
92 { PCI_VENDOR_ID_NVIDIA, nvidia_bugs }, 80 { PCI_VENDOR_ID_NVIDIA, nvidia_bugs },
93 { PCI_VENDOR_ID_VIA, via_bugs }, 81 { PCI_VENDOR_ID_VIA, via_bugs },
94 { PCI_VENDOR_ID_ATI, ati_bugs }, 82 { PCI_VENDOR_ID_ATI, ati_bugs },
95 { PCI_VENDOR_ID_INTEL, intel_bugs},
96 {} 83 {}
97}; 84};
98 85
diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c
index 47b6d90349da..92213d2b7c11 100644
--- a/arch/x86_64/kernel/early_printk.c
+++ b/arch/x86_64/kernel/early_printk.c
@@ -11,11 +11,10 @@
11 11
12#ifdef __i386__ 12#ifdef __i386__
13#include <asm/setup.h> 13#include <asm/setup.h>
14#define VGABASE (__ISA_IO_base + 0xb8000)
15#else 14#else
16#include <asm/bootsetup.h> 15#include <asm/bootsetup.h>
17#define VGABASE ((void __iomem *)0xffffffff800b8000UL)
18#endif 16#endif
17#define VGABASE (__ISA_IO_base + 0xb8000)
19 18
20static int max_ypos = 25, max_xpos = 80; 19static int max_ypos = 25, max_xpos = 80;
21static int current_ypos = 25, current_xpos = 0; 20static int current_ypos = 25, current_xpos = 0;
@@ -176,7 +175,7 @@ static noinline long simnow(long cmd, long a, long b, long c)
176 return ret; 175 return ret;
177} 176}
178 177
179void __init simnow_init(char *str) 178static void __init simnow_init(char *str)
180{ 179{
181 char *fn = "klog"; 180 char *fn = "klog";
182 if (*str == '=') 181 if (*str == '=')
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index ed4350ced3d0..fa984b53e7e6 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -701,6 +701,7 @@ END(spurious_interrupt)
701 CFI_ADJUST_CFA_OFFSET 8 701 CFI_ADJUST_CFA_OFFSET 8
702 pushq %rax /* push real oldrax to the rdi slot */ 702 pushq %rax /* push real oldrax to the rdi slot */
703 CFI_ADJUST_CFA_OFFSET 8 703 CFI_ADJUST_CFA_OFFSET 8
704 CFI_REL_OFFSET rax,0
704 leaq \sym(%rip),%rax 705 leaq \sym(%rip),%rax
705 jmp error_entry 706 jmp error_entry
706 CFI_ENDPROC 707 CFI_ENDPROC
@@ -710,6 +711,7 @@ END(spurious_interrupt)
710 XCPT_FRAME 711 XCPT_FRAME
711 pushq %rax 712 pushq %rax
712 CFI_ADJUST_CFA_OFFSET 8 713 CFI_ADJUST_CFA_OFFSET 8
714 CFI_REL_OFFSET rax,0
713 leaq \sym(%rip),%rax 715 leaq \sym(%rip),%rax
714 jmp error_entry 716 jmp error_entry
715 CFI_ENDPROC 717 CFI_ENDPROC
@@ -817,6 +819,7 @@ paranoid_schedule\trace:
817 */ 819 */
818KPROBE_ENTRY(error_entry) 820KPROBE_ENTRY(error_entry)
819 _frame RDI 821 _frame RDI
822 CFI_REL_OFFSET rax,0
820 /* rdi slot contains rax, oldrax contains error code */ 823 /* rdi slot contains rax, oldrax contains error code */
821 cld 824 cld
822 subq $14*8,%rsp 825 subq $14*8,%rsp
@@ -824,6 +827,7 @@ KPROBE_ENTRY(error_entry)
824 movq %rsi,13*8(%rsp) 827 movq %rsi,13*8(%rsp)
825 CFI_REL_OFFSET rsi,RSI 828 CFI_REL_OFFSET rsi,RSI
826 movq 14*8(%rsp),%rsi /* load rax from rdi slot */ 829 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
830 CFI_REGISTER rax,rsi
827 movq %rdx,12*8(%rsp) 831 movq %rdx,12*8(%rsp)
828 CFI_REL_OFFSET rdx,RDX 832 CFI_REL_OFFSET rdx,RDX
829 movq %rcx,11*8(%rsp) 833 movq %rcx,11*8(%rsp)
@@ -857,6 +861,7 @@ error_swapgs:
857 swapgs 861 swapgs
858error_sti: 862error_sti:
859 movq %rdi,RDI(%rsp) 863 movq %rdi,RDI(%rsp)
864 CFI_REL_OFFSET rdi,RDI
860 movq %rsp,%rdi 865 movq %rsp,%rdi
861 movq ORIG_RAX(%rsp),%rsi /* get error code */ 866 movq ORIG_RAX(%rsp),%rsi /* get error code */
862 movq $-1,ORIG_RAX(%rsp) 867 movq $-1,ORIG_RAX(%rsp)
diff --git a/arch/x86_64/kernel/functionlist b/arch/x86_64/kernel/functionlist
deleted file mode 100644
index 7ae18ec12454..000000000000
--- a/arch/x86_64/kernel/functionlist
+++ /dev/null
@@ -1,1284 +0,0 @@
1*(.text.flush_thread)
2*(.text.check_poison_obj)
3*(.text.copy_page)
4*(.text.__set_personality)
5*(.text.gart_map_sg)
6*(.text.kmem_cache_free)
7*(.text.find_get_page)
8*(.text._raw_spin_lock)
9*(.text.ide_outb)
10*(.text.unmap_vmas)
11*(.text.copy_page_range)
12*(.text.kprobe_handler)
13*(.text.__handle_mm_fault)
14*(.text.__d_lookup)
15*(.text.copy_user_generic)
16*(.text.__link_path_walk)
17*(.text.get_page_from_freelist)
18*(.text.kmem_cache_alloc)
19*(.text.drive_cmd_intr)
20*(.text.ia32_setup_sigcontext)
21*(.text.huge_pte_offset)
22*(.text.do_page_fault)
23*(.text.page_remove_rmap)
24*(.text.release_pages)
25*(.text.ide_end_request)
26*(.text.__mutex_lock_slowpath)
27*(.text.__find_get_block)
28*(.text.kfree)
29*(.text.vfs_read)
30*(.text._raw_spin_unlock)
31*(.text.free_hot_cold_page)
32*(.text.fget_light)
33*(.text.schedule)
34*(.text.memcmp)
35*(.text.touch_atime)
36*(.text.__might_sleep)
37*(.text.__down_read_trylock)
38*(.text.arch_pick_mmap_layout)
39*(.text.find_vma)
40*(.text.__make_request)
41*(.text.do_generic_mapping_read)
42*(.text.mutex_lock_interruptible)
43*(.text.__generic_file_aio_read)
44*(.text._atomic_dec_and_lock)
45*(.text.__wake_up_bit)
46*(.text.add_to_page_cache)
47*(.text.cache_alloc_debugcheck_after)
48*(.text.vm_normal_page)
49*(.text.mutex_debug_check_no_locks_freed)
50*(.text.net_rx_action)
51*(.text.__find_first_zero_bit)
52*(.text.put_page)
53*(.text._raw_read_lock)
54*(.text.__delay)
55*(.text.dnotify_parent)
56*(.text.do_path_lookup)
57*(.text.do_sync_read)
58*(.text.do_lookup)
59*(.text.bit_waitqueue)
60*(.text.file_read_actor)
61*(.text.strncpy_from_user)
62*(.text.__pagevec_lru_add_active)
63*(.text.fget)
64*(.text.dput)
65*(.text.__strnlen_user)
66*(.text.inotify_inode_queue_event)
67*(.text.rw_verify_area)
68*(.text.ide_intr)
69*(.text.inotify_dentry_parent_queue_event)
70*(.text.permission)
71*(.text.memscan)
72*(.text.hpet_rtc_interrupt)
73*(.text.do_mmap_pgoff)
74*(.text.current_fs_time)
75*(.text.vfs_getattr)
76*(.text.kmem_flagcheck)
77*(.text.mark_page_accessed)
78*(.text.free_pages_and_swap_cache)
79*(.text.generic_fillattr)
80*(.text.__block_prepare_write)
81*(.text.__set_page_dirty_nobuffers)
82*(.text.link_path_walk)
83*(.text.find_get_pages_tag)
84*(.text.ide_do_request)
85*(.text.__alloc_pages)
86*(.text.generic_permission)
87*(.text.mod_page_state_offset)
88*(.text.free_pgd_range)
89*(.text.generic_file_buffered_write)
90*(.text.number)
91*(.text.ide_do_rw_disk)
92*(.text.__brelse)
93*(.text.__mod_page_state_offset)
94*(.text.rotate_reclaimable_page)
95*(.text.find_vma_prepare)
96*(.text.find_vma_prev)
97*(.text.lru_cache_add_active)
98*(.text.__kmalloc_track_caller)
99*(.text.smp_invalidate_interrupt)
100*(.text.handle_IRQ_event)
101*(.text.__find_get_block_slow)
102*(.text.do_wp_page)
103*(.text.do_select)
104*(.text.set_user_nice)
105*(.text.sys_read)
106*(.text.do_munmap)
107*(.text.csum_partial)
108*(.text.__do_softirq)
109*(.text.may_open)
110*(.text.getname)
111*(.text.get_empty_filp)
112*(.text.__fput)
113*(.text.remove_mapping)
114*(.text.filp_ctor)
115*(.text.poison_obj)
116*(.text.unmap_region)
117*(.text.test_set_page_writeback)
118*(.text.__do_page_cache_readahead)
119*(.text.sock_def_readable)
120*(.text.ide_outl)
121*(.text.shrink_zone)
122*(.text.rb_insert_color)
123*(.text.get_request)
124*(.text.sys_pread64)
125*(.text.spin_bug)
126*(.text.ide_outsl)
127*(.text.mask_and_ack_8259A)
128*(.text.filemap_nopage)
129*(.text.page_add_file_rmap)
130*(.text.find_lock_page)
131*(.text.tcp_poll)
132*(.text.__mark_inode_dirty)
133*(.text.file_ra_state_init)
134*(.text.generic_file_llseek)
135*(.text.__pagevec_lru_add)
136*(.text.page_cache_readahead)
137*(.text.n_tty_receive_buf)
138*(.text.zonelist_policy)
139*(.text.vma_adjust)
140*(.text.test_clear_page_dirty)
141*(.text.sync_buffer)
142*(.text.do_exit)
143*(.text.__bitmap_weight)
144*(.text.alloc_pages_current)
145*(.text.get_unused_fd)
146*(.text.zone_watermark_ok)
147*(.text.cpuset_update_task_memory_state)
148*(.text.__bitmap_empty)
149*(.text.sys_munmap)
150*(.text.__inode_dir_notify)
151*(.text.__generic_file_aio_write_nolock)
152*(.text.__pte_alloc)
153*(.text.sys_select)
154*(.text.vm_acct_memory)
155*(.text.vfs_write)
156*(.text.__lru_add_drain)
157*(.text.prio_tree_insert)
158*(.text.generic_file_aio_read)
159*(.text.vma_merge)
160*(.text.block_write_full_page)
161*(.text.__page_set_anon_rmap)
162*(.text.apic_timer_interrupt)
163*(.text.release_console_sem)
164*(.text.sys_write)
165*(.text.sys_brk)
166*(.text.dup_mm)
167*(.text.read_current_timer)
168*(.text.ll_rw_block)
169*(.text.blk_rq_map_sg)
170*(.text.dbg_userword)
171*(.text.__block_commit_write)
172*(.text.cache_grow)
173*(.text.copy_strings)
174*(.text.release_task)
175*(.text.do_sync_write)
176*(.text.unlock_page)
177*(.text.load_elf_binary)
178*(.text.__follow_mount)
179*(.text.__getblk)
180*(.text.do_sys_open)
181*(.text.current_kernel_time)
182*(.text.call_rcu)
183*(.text.write_chan)
184*(.text.vsnprintf)
185*(.text.dummy_inode_setsecurity)
186*(.text.submit_bh)
187*(.text.poll_freewait)
188*(.text.bio_alloc_bioset)
189*(.text.skb_clone)
190*(.text.page_waitqueue)
191*(.text.__mutex_lock_interruptible_slowpath)
192*(.text.get_index)
193*(.text.csum_partial_copy_generic)
194*(.text.bad_range)
195*(.text.remove_vma)
196*(.text.cp_new_stat)
197*(.text.alloc_arraycache)
198*(.text.test_clear_page_writeback)
199*(.text.strsep)
200*(.text.open_namei)
201*(.text._raw_read_unlock)
202*(.text.get_vma_policy)
203*(.text.__down_write_trylock)
204*(.text.find_get_pages)
205*(.text.tcp_rcv_established)
206*(.text.generic_make_request)
207*(.text.__block_write_full_page)
208*(.text.cfq_set_request)
209*(.text.sys_inotify_init)
210*(.text.split_vma)
211*(.text.__mod_timer)
212*(.text.get_options)
213*(.text.vma_link)
214*(.text.mpage_writepages)
215*(.text.truncate_complete_page)
216*(.text.tcp_recvmsg)
217*(.text.sigprocmask)
218*(.text.filemap_populate)
219*(.text.sys_close)
220*(.text.inotify_dev_queue_event)
221*(.text.do_task_stat)
222*(.text.__dentry_open)
223*(.text.unlink_file_vma)
224*(.text.__pollwait)
225*(.text.packet_rcv_spkt)
226*(.text.drop_buffers)
227*(.text.free_pgtables)
228*(.text.generic_file_direct_write)
229*(.text.copy_process)
230*(.text.netif_receive_skb)
231*(.text.dnotify_flush)
232*(.text.print_bad_pte)
233*(.text.anon_vma_unlink)
234*(.text.sys_mprotect)
235*(.text.sync_sb_inodes)
236*(.text.find_inode_fast)
237*(.text.dummy_inode_readlink)
238*(.text.putname)
239*(.text.init_smp_flush)
240*(.text.dbg_redzone2)
241*(.text.sk_run_filter)
242*(.text.may_expand_vm)
243*(.text.generic_file_aio_write)
244*(.text.find_next_zero_bit)
245*(.text.file_kill)
246*(.text.audit_getname)
247*(.text.arch_unmap_area_topdown)
248*(.text.alloc_page_vma)
249*(.text.tcp_transmit_skb)
250*(.text.rb_next)
251*(.text.dbg_redzone1)
252*(.text.generic_file_mmap)
253*(.text.vfs_fstat)
254*(.text.sys_time)
255*(.text.page_lock_anon_vma)
256*(.text.get_unmapped_area)
257*(.text.remote_llseek)
258*(.text.__up_read)
259*(.text.fd_install)
260*(.text.eventpoll_init_file)
261*(.text.dma_alloc_coherent)
262*(.text.create_empty_buffers)
263*(.text.__mutex_unlock_slowpath)
264*(.text.dup_fd)
265*(.text.d_alloc)
266*(.text.tty_ldisc_try)
267*(.text.sys_stime)
268*(.text.__rb_rotate_right)
269*(.text.d_validate)
270*(.text.rb_erase)
271*(.text.path_release)
272*(.text.memmove)
273*(.text.invalidate_complete_page)
274*(.text.clear_inode)
275*(.text.cache_estimate)
276*(.text.alloc_buffer_head)
277*(.text.smp_call_function_interrupt)
278*(.text.flush_tlb_others)
279*(.text.file_move)
280*(.text.balance_dirty_pages_ratelimited)
281*(.text.vma_prio_tree_add)
282*(.text.timespec_trunc)
283*(.text.mempool_alloc)
284*(.text.iget_locked)
285*(.text.d_alloc_root)
286*(.text.cpuset_populate_dir)
287*(.text.anon_vma_prepare)
288*(.text.sys_newstat)
289*(.text.alloc_page_interleave)
290*(.text.__path_lookup_intent_open)
291*(.text.__pagevec_free)
292*(.text.inode_init_once)
293*(.text.free_vfsmnt)
294*(.text.__user_walk_fd)
295*(.text.cfq_idle_slice_timer)
296*(.text.sys_mmap)
297*(.text.sys_llseek)
298*(.text.prio_tree_remove)
299*(.text.filp_close)
300*(.text.file_permission)
301*(.text.vma_prio_tree_remove)
302*(.text.tcp_ack)
303*(.text.nameidata_to_filp)
304*(.text.sys_lseek)
305*(.text.percpu_counter_mod)
306*(.text.igrab)
307*(.text.__bread)
308*(.text.alloc_inode)
309*(.text.filldir)
310*(.text.__rb_rotate_left)
311*(.text.irq_affinity_write_proc)
312*(.text.init_request_from_bio)
313*(.text.find_or_create_page)
314*(.text.tty_poll)
315*(.text.tcp_sendmsg)
316*(.text.ide_wait_stat)
317*(.text.free_buffer_head)
318*(.text.flush_signal_handlers)
319*(.text.tcp_v4_rcv)
320*(.text.nr_blockdev_pages)
321*(.text.locks_remove_flock)
322*(.text.__iowrite32_copy)
323*(.text.do_filp_open)
324*(.text.try_to_release_page)
325*(.text.page_add_new_anon_rmap)
326*(.text.kmem_cache_size)
327*(.text.eth_type_trans)
328*(.text.try_to_free_buffers)
329*(.text.schedule_tail)
330*(.text.proc_lookup)
331*(.text.no_llseek)
332*(.text.kfree_skbmem)
333*(.text.do_wait)
334*(.text.do_mpage_readpage)
335*(.text.vfs_stat_fd)
336*(.text.tty_write)
337*(.text.705)
338*(.text.sync_page)
339*(.text.__remove_shared_vm_struct)
340*(.text.__kfree_skb)
341*(.text.sock_poll)
342*(.text.get_request_wait)
343*(.text.do_sigaction)
344*(.text.do_brk)
345*(.text.tcp_event_data_recv)
346*(.text.read_chan)
347*(.text.pipe_writev)
348*(.text.__emul_lookup_dentry)
349*(.text.rtc_get_rtc_time)
350*(.text.print_objinfo)
351*(.text.file_update_time)
352*(.text.do_signal)
353*(.text.disable_8259A_irq)
354*(.text.blk_queue_bounce)
355*(.text.__anon_vma_link)
356*(.text.__vma_link)
357*(.text.vfs_rename)
358*(.text.sys_newlstat)
359*(.text.sys_newfstat)
360*(.text.sys_mknod)
361*(.text.__show_regs)
362*(.text.iput)
363*(.text.get_signal_to_deliver)
364*(.text.flush_tlb_page)
365*(.text.debug_mutex_wake_waiter)
366*(.text.copy_thread)
367*(.text.clear_page_dirty_for_io)
368*(.text.buffer_io_error)
369*(.text.vfs_permission)
370*(.text.truncate_inode_pages_range)
371*(.text.sys_recvfrom)
372*(.text.remove_suid)
373*(.text.mark_buffer_dirty)
374*(.text.local_bh_enable)
375*(.text.get_zeroed_page)
376*(.text.get_vmalloc_info)
377*(.text.flush_old_exec)
378*(.text.dummy_inode_permission)
379*(.text.__bio_add_page)
380*(.text.prio_tree_replace)
381*(.text.notify_change)
382*(.text.mntput_no_expire)
383*(.text.fput)
384*(.text.__end_that_request_first)
385*(.text.wake_up_bit)
386*(.text.unuse_mm)
387*(.text.shrink_icache_memory)
388*(.text.sched_balance_self)
389*(.text.__pmd_alloc)
390*(.text.pipe_poll)
391*(.text.normal_poll)
392*(.text.__free_pages)
393*(.text.follow_mount)
394*(.text.cdrom_start_packet_command)
395*(.text.blk_recount_segments)
396*(.text.bio_put)
397*(.text.__alloc_skb)
398*(.text.__wake_up)
399*(.text.vm_stat_account)
400*(.text.sys_fcntl)
401*(.text.sys_fadvise64)
402*(.text._raw_write_unlock)
403*(.text.__pud_alloc)
404*(.text.alloc_page_buffers)
405*(.text.vfs_llseek)
406*(.text.sockfd_lookup)
407*(.text._raw_write_lock)
408*(.text.put_compound_page)
409*(.text.prune_dcache)
410*(.text.pipe_readv)
411*(.text.mempool_free)
412*(.text.make_ahead_window)
413*(.text.lru_add_drain)
414*(.text.constant_test_bit)
415*(.text.__clear_user)
416*(.text.arch_unmap_area)
417*(.text.anon_vma_link)
418*(.text.sys_chroot)
419*(.text.setup_arg_pages)
420*(.text.radix_tree_preload)
421*(.text.init_rwsem)
422*(.text.generic_osync_inode)
423*(.text.generic_delete_inode)
424*(.text.do_sys_poll)
425*(.text.dev_queue_xmit)
426*(.text.default_llseek)
427*(.text.__writeback_single_inode)
428*(.text.vfs_ioctl)
429*(.text.__up_write)
430*(.text.unix_poll)
431*(.text.sys_rt_sigprocmask)
432*(.text.sock_recvmsg)
433*(.text.recalc_bh_state)
434*(.text.__put_unused_fd)
435*(.text.process_backlog)
436*(.text.locks_remove_posix)
437*(.text.lease_modify)
438*(.text.expand_files)
439*(.text.end_buffer_read_nobh)
440*(.text.d_splice_alias)
441*(.text.debug_mutex_init_waiter)
442*(.text.copy_from_user)
443*(.text.cap_vm_enough_memory)
444*(.text.show_vfsmnt)
445*(.text.release_sock)
446*(.text.pfifo_fast_enqueue)
447*(.text.half_md4_transform)
448*(.text.fs_may_remount_ro)
449*(.text.do_fork)
450*(.text.copy_hugetlb_page_range)
451*(.text.cache_free_debugcheck)
452*(.text.__tcp_select_window)
453*(.text.task_handoff_register)
454*(.text.sys_open)
455*(.text.strlcpy)
456*(.text.skb_copy_datagram_iovec)
457*(.text.set_up_list3s)
458*(.text.release_open_intent)
459*(.text.qdisc_restart)
460*(.text.n_tty_chars_in_buffer)
461*(.text.inode_change_ok)
462*(.text.__downgrade_write)
463*(.text.debug_mutex_unlock)
464*(.text.add_timer_randomness)
465*(.text.sock_common_recvmsg)
466*(.text.set_bh_page)
467*(.text.printk_lock)
468*(.text.path_release_on_umount)
469*(.text.ip_output)
470*(.text.ide_build_dmatable)
471*(.text.__get_user_8)
472*(.text.end_buffer_read_sync)
473*(.text.__d_path)
474*(.text.d_move)
475*(.text.del_timer)
476*(.text.constant_test_bit)
477*(.text.blockable_page_cache_readahead)
478*(.text.tty_read)
479*(.text.sys_readlink)
480*(.text.sys_faccessat)
481*(.text.read_swap_cache_async)
482*(.text.pty_write_room)
483*(.text.page_address_in_vma)
484*(.text.kthread)
485*(.text.cfq_exit_io_context)
486*(.text.__tcp_push_pending_frames)
487*(.text.sys_pipe)
488*(.text.submit_bio)
489*(.text.pid_revalidate)
490*(.text.page_referenced_file)
491*(.text.lock_sock)
492*(.text.get_page_state_node)
493*(.text.generic_block_bmap)
494*(.text.do_setitimer)
495*(.text.dev_queue_xmit_nit)
496*(.text.copy_from_read_buf)
497*(.text.__const_udelay)
498*(.text.console_conditional_schedule)
499*(.text.wake_up_new_task)
500*(.text.wait_for_completion_interruptible)
501*(.text.tcp_rcv_rtt_update)
502*(.text.sys_mlockall)
503*(.text.set_fs_altroot)
504*(.text.schedule_timeout)
505*(.text.nr_free_pagecache_pages)
506*(.text.nf_iterate)
507*(.text.mapping_tagged)
508*(.text.ip_queue_xmit)
509*(.text.ip_local_deliver)
510*(.text.follow_page)
511*(.text.elf_map)
512*(.text.dummy_file_permission)
513*(.text.dispose_list)
514*(.text.dentry_open)
515*(.text.dentry_iput)
516*(.text.bio_alloc)
517*(.text.wait_on_page_bit)
518*(.text.vfs_readdir)
519*(.text.vfs_lstat)
520*(.text.seq_escape)
521*(.text.__posix_lock_file)
522*(.text.mm_release)
523*(.text.kref_put)
524*(.text.ip_rcv)
525*(.text.__iget)
526*(.text.free_pages)
527*(.text.find_mergeable_anon_vma)
528*(.text.find_extend_vma)
529*(.text.dummy_inode_listsecurity)
530*(.text.bio_add_page)
531*(.text.__vm_enough_memory)
532*(.text.vfs_stat)
533*(.text.tty_paranoia_check)
534*(.text.tcp_read_sock)
535*(.text.tcp_data_queue)
536*(.text.sys_uname)
537*(.text.sys_renameat)
538*(.text.__strncpy_from_user)
539*(.text.__mutex_init)
540*(.text.__lookup_hash)
541*(.text.kref_get)
542*(.text.ip_route_input)
543*(.text.__insert_inode_hash)
544*(.text.do_sock_write)
545*(.text.blk_done_softirq)
546*(.text.__wake_up_sync)
547*(.text.__vma_link_rb)
548*(.text.tty_ioctl)
549*(.text.tracesys)
550*(.text.sys_getdents)
551*(.text.sys_dup)
552*(.text.stub_execve)
553*(.text.sha_transform)
554*(.text.radix_tree_tag_clear)
555*(.text.put_unused_fd)
556*(.text.put_files_struct)
557*(.text.mpage_readpages)
558*(.text.may_delete)
559*(.text.kmem_cache_create)
560*(.text.ip_mc_output)
561*(.text.interleave_nodes)
562*(.text.groups_search)
563*(.text.generic_drop_inode)
564*(.text.generic_commit_write)
565*(.text.fcntl_setlk)
566*(.text.exit_mmap)
567*(.text.end_page_writeback)
568*(.text.__d_rehash)
569*(.text.debug_mutex_free_waiter)
570*(.text.csum_ipv6_magic)
571*(.text.count)
572*(.text.cleanup_rbuf)
573*(.text.check_spinlock_acquired_node)
574*(.text.can_vma_merge_after)
575*(.text.bio_endio)
576*(.text.alloc_pidmap)
577*(.text.write_ldt)
578*(.text.vmtruncate_range)
579*(.text.vfs_create)
580*(.text.__user_walk)
581*(.text.update_send_head)
582*(.text.unmap_underlying_metadata)
583*(.text.tty_ldisc_deref)
584*(.text.tcp_setsockopt)
585*(.text.tcp_send_ack)
586*(.text.sys_pause)
587*(.text.sys_gettimeofday)
588*(.text.sync_dirty_buffer)
589*(.text.strncmp)
590*(.text.release_posix_timer)
591*(.text.proc_file_read)
592*(.text.prepare_to_wait)
593*(.text.locks_mandatory_locked)
594*(.text.interruptible_sleep_on_timeout)
595*(.text.inode_sub_bytes)
596*(.text.in_group_p)
597*(.text.hrtimer_try_to_cancel)
598*(.text.filldir64)
599*(.text.fasync_helper)
600*(.text.dummy_sb_pivotroot)
601*(.text.d_lookup)
602*(.text.d_instantiate)
603*(.text.__d_find_alias)
604*(.text.cpu_idle_wait)
605*(.text.cond_resched_lock)
606*(.text.chown_common)
607*(.text.blk_congestion_wait)
608*(.text.activate_page)
609*(.text.unlock_buffer)
610*(.text.tty_wakeup)
611*(.text.tcp_v4_do_rcv)
612*(.text.tcp_current_mss)
613*(.text.sys_openat)
614*(.text.sys_fchdir)
615*(.text.strnlen_user)
616*(.text.strnlen)
617*(.text.strchr)
618*(.text.sock_common_getsockopt)
619*(.text.skb_checksum)
620*(.text.remove_wait_queue)
621*(.text.rb_replace_node)
622*(.text.radix_tree_node_ctor)
623*(.text.pty_chars_in_buffer)
624*(.text.profile_hit)
625*(.text.prio_tree_left)
626*(.text.pgd_clear_bad)
627*(.text.pfifo_fast_dequeue)
628*(.text.page_referenced)
629*(.text.open_exec)
630*(.text.mmput)
631*(.text.mm_init)
632*(.text.__ide_dma_off_quietly)
633*(.text.ide_dma_intr)
634*(.text.hrtimer_start)
635*(.text.get_io_context)
636*(.text.__get_free_pages)
637*(.text.find_first_zero_bit)
638*(.text.file_free_rcu)
639*(.text.dummy_socket_sendmsg)
640*(.text.do_unlinkat)
641*(.text.do_arch_prctl)
642*(.text.destroy_inode)
643*(.text.can_vma_merge_before)
644*(.text.block_sync_page)
645*(.text.block_prepare_write)
646*(.text.bio_init)
647*(.text.arch_ptrace)
648*(.text.wake_up_inode)
649*(.text.wait_on_retry_sync_kiocb)
650*(.text.vma_prio_tree_next)
651*(.text.tcp_rcv_space_adjust)
652*(.text.__tcp_ack_snd_check)
653*(.text.sys_utime)
654*(.text.sys_recvmsg)
655*(.text.sys_mremap)
656*(.text.sys_bdflush)
657*(.text.sleep_on)
658*(.text.set_page_dirty_lock)
659*(.text.seq_path)
660*(.text.schedule_timeout_interruptible)
661*(.text.sched_fork)
662*(.text.rt_run_flush)
663*(.text.profile_munmap)
664*(.text.prepare_binprm)
665*(.text.__pagevec_release_nonlru)
666*(.text.m_show)
667*(.text.lookup_mnt)
668*(.text.__lookup_mnt)
669*(.text.lock_timer_base)
670*(.text.is_subdir)
671*(.text.invalidate_bh_lru)
672*(.text.init_buffer_head)
673*(.text.ifind_fast)
674*(.text.ide_dma_start)
675*(.text.__get_page_state)
676*(.text.flock_to_posix_lock)
677*(.text.__find_symbol)
678*(.text.do_futex)
679*(.text.do_execve)
680*(.text.dirty_writeback_centisecs_handler)
681*(.text.dev_watchdog)
682*(.text.can_share_swap_page)
683*(.text.blkdev_put)
684*(.text.bio_get_nr_vecs)
685*(.text.xfrm_compile_policy)
686*(.text.vma_prio_tree_insert)
687*(.text.vfs_lstat_fd)
688*(.text.__user_path_lookup_open)
689*(.text.thread_return)
690*(.text.tcp_send_delayed_ack)
691*(.text.sock_def_error_report)
692*(.text.shrink_slab)
693*(.text.serial_out)
694*(.text.seq_read)
695*(.text.secure_ip_id)
696*(.text.search_binary_handler)
697*(.text.proc_pid_unhash)
698*(.text.pagevec_lookup)
699*(.text.new_inode)
700*(.text.memcpy_toiovec)
701*(.text.locks_free_lock)
702*(.text.__lock_page)
703*(.text.__lock_buffer)
704*(.text.load_module)
705*(.text.is_bad_inode)
706*(.text.invalidate_inode_buffers)
707*(.text.insert_vm_struct)
708*(.text.inode_setattr)
709*(.text.inode_add_bytes)
710*(.text.ide_read_24)
711*(.text.ide_get_error_location)
712*(.text.ide_do_drive_cmd)
713*(.text.get_locked_pte)
714*(.text.get_filesystem_list)
715*(.text.generic_file_open)
716*(.text.follow_down)
717*(.text.find_next_bit)
718*(.text.__find_first_bit)
719*(.text.exit_mm)
720*(.text.exec_keys)
721*(.text.end_buffer_write_sync)
722*(.text.end_bio_bh_io_sync)
723*(.text.dummy_socket_shutdown)
724*(.text.d_rehash)
725*(.text.d_path)
726*(.text.do_ioctl)
727*(.text.dget_locked)
728*(.text.copy_thread_group_keys)
729*(.text.cdrom_end_request)
730*(.text.cap_bprm_apply_creds)
731*(.text.blk_rq_bio_prep)
732*(.text.__bitmap_intersects)
733*(.text.bio_phys_segments)
734*(.text.bio_free)
735*(.text.arch_get_unmapped_area_topdown)
736*(.text.writeback_in_progress)
737*(.text.vfs_follow_link)
738*(.text.tcp_rcv_state_process)
739*(.text.tcp_check_space)
740*(.text.sys_stat)
741*(.text.sys_rt_sigreturn)
742*(.text.sys_rt_sigaction)
743*(.text.sys_remap_file_pages)
744*(.text.sys_pwrite64)
745*(.text.sys_fchownat)
746*(.text.sys_fchmodat)
747*(.text.strncat)
748*(.text.strlcat)
749*(.text.strcmp)
750*(.text.steal_locks)
751*(.text.sock_create)
752*(.text.sk_stream_rfree)
753*(.text.sk_stream_mem_schedule)
754*(.text.skip_atoi)
755*(.text.sk_alloc)
756*(.text.show_stat)
757*(.text.set_fs_pwd)
758*(.text.set_binfmt)
759*(.text.pty_unthrottle)
760*(.text.proc_symlink)
761*(.text.pipe_release)
762*(.text.pageout)
763*(.text.n_tty_write_wakeup)
764*(.text.n_tty_ioctl)
765*(.text.nr_free_zone_pages)
766*(.text.migration_thread)
767*(.text.mempool_free_slab)
768*(.text.meminfo_read_proc)
769*(.text.max_sane_readahead)
770*(.text.lru_cache_add)
771*(.text.kill_fasync)
772*(.text.kernel_read)
773*(.text.invalidate_mapping_pages)
774*(.text.inode_has_buffers)
775*(.text.init_once)
776*(.text.inet_sendmsg)
777*(.text.idedisk_issue_flush)
778*(.text.generic_file_write)
779*(.text.free_more_memory)
780*(.text.__free_fdtable)
781*(.text.filp_dtor)
782*(.text.exit_sem)
783*(.text.exit_itimers)
784*(.text.error_interrupt)
785*(.text.end_buffer_async_write)
786*(.text.eligible_child)
787*(.text.elf_map)
788*(.text.dump_task_regs)
789*(.text.dummy_task_setscheduler)
790*(.text.dummy_socket_accept)
791*(.text.dummy_file_free_security)
792*(.text.__down_read)
793*(.text.do_sock_read)
794*(.text.do_sigaltstack)
795*(.text.do_mremap)
796*(.text.current_io_context)
797*(.text.cpu_swap_callback)
798*(.text.copy_vma)
799*(.text.cap_bprm_set_security)
800*(.text.blk_insert_request)
801*(.text.bio_map_kern_endio)
802*(.text.bio_hw_segments)
803*(.text.bictcp_cong_avoid)
804*(.text.add_interrupt_randomness)
805*(.text.wait_for_completion)
806*(.text.version_read_proc)
807*(.text.unix_write_space)
808*(.text.tty_ldisc_ref_wait)
809*(.text.tty_ldisc_put)
810*(.text.try_to_wake_up)
811*(.text.tcp_v4_tw_remember_stamp)
812*(.text.tcp_try_undo_dsack)
813*(.text.tcp_may_send_now)
814*(.text.sys_waitid)
815*(.text.sys_sched_getparam)
816*(.text.sys_getppid)
817*(.text.sys_getcwd)
818*(.text.sys_dup2)
819*(.text.sys_chmod)
820*(.text.sys_chdir)
821*(.text.sprintf)
822*(.text.sock_wfree)
823*(.text.sock_aio_write)
824*(.text.skb_drop_fraglist)
825*(.text.skb_dequeue)
826*(.text.set_close_on_exec)
827*(.text.set_brk)
828*(.text.seq_puts)
829*(.text.SELECT_DRIVE)
830*(.text.sched_exec)
831*(.text.return_EIO)
832*(.text.remove_from_page_cache)
833*(.text.rcu_start_batch)
834*(.text.__put_task_struct)
835*(.text.proc_pid_readdir)
836*(.text.proc_get_inode)
837*(.text.prepare_to_wait_exclusive)
838*(.text.pipe_wait)
839*(.text.pipe_new)
840*(.text.pdflush_operation)
841*(.text.__pagevec_release)
842*(.text.pagevec_lookup_tag)
843*(.text.packet_rcv)
844*(.text.n_tty_set_room)
845*(.text.nr_free_pages)
846*(.text.__net_timestamp)
847*(.text.mpage_end_io_read)
848*(.text.mod_timer)
849*(.text.__memcpy)
850*(.text.mb_cache_shrink_fn)
851*(.text.lock_rename)
852*(.text.kstrdup)
853*(.text.is_ignored)
854*(.text.int_very_careful)
855*(.text.inotify_inode_is_dead)
856*(.text.inotify_get_cookie)
857*(.text.inode_get_bytes)
858*(.text.init_timer)
859*(.text.init_dev)
860*(.text.inet_getname)
861*(.text.ide_map_sg)
862*(.text.__ide_dma_end)
863*(.text.hrtimer_get_remaining)
864*(.text.get_task_mm)
865*(.text.get_random_int)
866*(.text.free_pipe_info)
867*(.text.filemap_write_and_wait_range)
868*(.text.exit_thread)
869*(.text.enter_idle)
870*(.text.end_that_request_first)
871*(.text.end_8259A_irq)
872*(.text.dummy_file_alloc_security)
873*(.text.do_group_exit)
874*(.text.debug_mutex_init)
875*(.text.cpuset_exit)
876*(.text.cpu_idle)
877*(.text.copy_semundo)
878*(.text.copy_files)
879*(.text.chrdev_open)
880*(.text.cdrom_transfer_packet_command)
881*(.text.cdrom_mode_sense)
882*(.text.blk_phys_contig_segment)
883*(.text.blk_get_queue)
884*(.text.bio_split)
885*(.text.audit_alloc)
886*(.text.anon_pipe_buf_release)
887*(.text.add_wait_queue_exclusive)
888*(.text.add_wait_queue)
889*(.text.acct_process)
890*(.text.account)
891*(.text.zeromap_page_range)
892*(.text.yield)
893*(.text.writeback_acquire)
894*(.text.worker_thread)
895*(.text.wait_on_page_writeback_range)
896*(.text.__wait_on_buffer)
897*(.text.vscnprintf)
898*(.text.vmalloc_to_pfn)
899*(.text.vgacon_save_screen)
900*(.text.vfs_unlink)
901*(.text.vfs_rmdir)
902*(.text.unregister_md_personality)
903*(.text.unlock_new_inode)
904*(.text.unix_stream_sendmsg)
905*(.text.unix_stream_recvmsg)
906*(.text.unhash_process)
907*(.text.udp_v4_lookup_longway)
908*(.text.tty_ldisc_flush)
909*(.text.tty_ldisc_enable)
910*(.text.tty_hung_up_p)
911*(.text.tty_buffer_free_all)
912*(.text.tso_fragment)
913*(.text.try_to_del_timer_sync)
914*(.text.tcp_v4_err)
915*(.text.tcp_unhash)
916*(.text.tcp_seq_next)
917*(.text.tcp_select_initial_window)
918*(.text.tcp_sacktag_write_queue)
919*(.text.tcp_cwnd_validate)
920*(.text.sys_vhangup)
921*(.text.sys_uselib)
922*(.text.sys_symlink)
923*(.text.sys_signal)
924*(.text.sys_poll)
925*(.text.sys_mount)
926*(.text.sys_kill)
927*(.text.sys_ioctl)
928*(.text.sys_inotify_add_watch)
929*(.text.sys_getuid)
930*(.text.sys_getrlimit)
931*(.text.sys_getitimer)
932*(.text.sys_getgroups)
933*(.text.sys_ftruncate)
934*(.text.sysfs_lookup)
935*(.text.sys_exit_group)
936*(.text.stub_fork)
937*(.text.sscanf)
938*(.text.sock_map_fd)
939*(.text.sock_get_timestamp)
940*(.text.__sock_create)
941*(.text.smp_call_function_single)
942*(.text.sk_stop_timer)
943*(.text.skb_copy_and_csum_datagram)
944*(.text.__skb_checksum_complete)
945*(.text.single_next)
946*(.text.sigqueue_alloc)
947*(.text.shrink_dcache_parent)
948*(.text.select_idle_routine)
949*(.text.run_workqueue)
950*(.text.run_local_timers)
951*(.text.remove_inode_hash)
952*(.text.remove_dquot_ref)
953*(.text.register_binfmt)
954*(.text.read_cache_pages)
955*(.text.rb_last)
956*(.text.pty_open)
957*(.text.proc_root_readdir)
958*(.text.proc_pid_flush)
959*(.text.proc_pident_lookup)
960*(.text.proc_fill_super)
961*(.text.proc_exe_link)
962*(.text.posix_locks_deadlock)
963*(.text.pipe_iov_copy_from_user)
964*(.text.opost)
965*(.text.nf_register_hook)
966*(.text.netif_rx_ni)
967*(.text.m_start)
968*(.text.mpage_writepage)
969*(.text.mm_alloc)
970*(.text.memory_open)
971*(.text.mark_buffer_async_write)
972*(.text.lru_add_drain_all)
973*(.text.locks_init_lock)
974*(.text.locks_delete_lock)
975*(.text.lock_hrtimer_base)
976*(.text.load_script)
977*(.text.__kill_fasync)
978*(.text.ip_mc_sf_allow)
979*(.text.__ioremap)
980*(.text.int_with_check)
981*(.text.int_sqrt)
982*(.text.install_thread_keyring)
983*(.text.init_page_buffers)
984*(.text.inet_sock_destruct)
985*(.text.idle_notifier_register)
986*(.text.ide_execute_command)
987*(.text.ide_end_drive_cmd)
988*(.text.__ide_dma_host_on)
989*(.text.hrtimer_run_queues)
990*(.text.hpet_mask_rtc_irq_bit)
991*(.text.__get_zone_counts)
992*(.text.get_zone_counts)
993*(.text.get_write_access)
994*(.text.get_fs_struct)
995*(.text.get_dirty_limits)
996*(.text.generic_readlink)
997*(.text.free_hot_page)
998*(.text.finish_wait)
999*(.text.find_inode)
1000*(.text.find_first_bit)
1001*(.text.__filemap_fdatawrite_range)
1002*(.text.__filemap_copy_from_user_iovec)
1003*(.text.exit_aio)
1004*(.text.elv_set_request)
1005*(.text.elv_former_request)
1006*(.text.dup_namespace)
1007*(.text.dupfd)
1008*(.text.dummy_socket_getsockopt)
1009*(.text.dummy_sb_post_mountroot)
1010*(.text.dummy_quotactl)
1011*(.text.dummy_inode_rename)
1012*(.text.__do_SAK)
1013*(.text.do_pipe)
1014*(.text.do_fsync)
1015*(.text.d_instantiate_unique)
1016*(.text.d_find_alias)
1017*(.text.deny_write_access)
1018*(.text.dentry_unhash)
1019*(.text.d_delete)
1020*(.text.datagram_poll)
1021*(.text.cpuset_fork)
1022*(.text.cpuid_read)
1023*(.text.copy_namespace)
1024*(.text.cond_resched)
1025*(.text.check_version)
1026*(.text.__change_page_attr)
1027*(.text.cfq_slab_kill)
1028*(.text.cfq_completed_request)
1029*(.text.cdrom_pc_intr)
1030*(.text.cdrom_decode_status)
1031*(.text.cap_capset_check)
1032*(.text.blk_put_request)
1033*(.text.bio_fs_destructor)
1034*(.text.bictcp_min_cwnd)
1035*(.text.alloc_chrdev_region)
1036*(.text.add_element)
1037*(.text.acct_update_integrals)
1038*(.text.write_boundary_block)
1039*(.text.writeback_release)
1040*(.text.writeback_inodes)
1041*(.text.wake_up_state)
1042*(.text.__wake_up_locked)
1043*(.text.wake_futex)
1044*(.text.wait_task_inactive)
1045*(.text.__wait_on_freeing_inode)
1046*(.text.wait_noreap_copyout)
1047*(.text.vmstat_start)
1048*(.text.vgacon_do_font_op)
1049*(.text.vfs_readv)
1050*(.text.vfs_quota_sync)
1051*(.text.update_queue)
1052*(.text.unshare_files)
1053*(.text.unmap_vm_area)
1054*(.text.unix_socketpair)
1055*(.text.unix_release_sock)
1056*(.text.unix_detach_fds)
1057*(.text.unix_create1)
1058*(.text.unix_bind)
1059*(.text.udp_sendmsg)
1060*(.text.udp_rcv)
1061*(.text.udp_queue_rcv_skb)
1062*(.text.uart_write)
1063*(.text.uart_startup)
1064*(.text.uart_open)
1065*(.text.tty_vhangup)
1066*(.text.tty_termios_baud_rate)
1067*(.text.tty_release)
1068*(.text.tty_ldisc_ref)
1069*(.text.throttle_vm_writeout)
1070*(.text.058)
1071*(.text.tcp_xmit_probe_skb)
1072*(.text.tcp_v4_send_check)
1073*(.text.tcp_v4_destroy_sock)
1074*(.text.tcp_sync_mss)
1075*(.text.tcp_snd_test)
1076*(.text.tcp_slow_start)
1077*(.text.tcp_send_fin)
1078*(.text.tcp_rtt_estimator)
1079*(.text.tcp_parse_options)
1080*(.text.tcp_ioctl)
1081*(.text.tcp_init_tso_segs)
1082*(.text.tcp_init_cwnd)
1083*(.text.tcp_getsockopt)
1084*(.text.tcp_fin)
1085*(.text.tcp_connect)
1086*(.text.tcp_cong_avoid)
1087*(.text.__tcp_checksum_complete_user)
1088*(.text.task_dumpable)
1089*(.text.sys_wait4)
1090*(.text.sys_utimes)
1091*(.text.sys_symlinkat)
1092*(.text.sys_socketpair)
1093*(.text.sys_rmdir)
1094*(.text.sys_readahead)
1095*(.text.sys_nanosleep)
1096*(.text.sys_linkat)
1097*(.text.sys_fstat)
1098*(.text.sysfs_readdir)
1099*(.text.sys_execve)
1100*(.text.sysenter_tracesys)
1101*(.text.sys_chown)
1102*(.text.stub_clone)
1103*(.text.strrchr)
1104*(.text.strncpy)
1105*(.text.stopmachine_set_state)
1106*(.text.sock_sendmsg)
1107*(.text.sock_release)
1108*(.text.sock_fasync)
1109*(.text.sock_close)
1110*(.text.sk_stream_write_space)
1111*(.text.sk_reset_timer)
1112*(.text.skb_split)
1113*(.text.skb_recv_datagram)
1114*(.text.skb_queue_tail)
1115*(.text.sk_attach_filter)
1116*(.text.si_swapinfo)
1117*(.text.simple_strtoll)
1118*(.text.set_termios)
1119*(.text.set_task_comm)
1120*(.text.set_shrinker)
1121*(.text.set_normalized_timespec)
1122*(.text.set_brk)
1123*(.text.serial_in)
1124*(.text.seq_printf)
1125*(.text.secure_dccp_sequence_number)
1126*(.text.rwlock_bug)
1127*(.text.rt_hash_code)
1128*(.text.__rta_fill)
1129*(.text.__request_resource)
1130*(.text.relocate_new_kernel)
1131*(.text.release_thread)
1132*(.text.release_mem)
1133*(.text.rb_prev)
1134*(.text.rb_first)
1135*(.text.random_poll)
1136*(.text.__put_super_and_need_restart)
1137*(.text.pty_write)
1138*(.text.ptrace_stop)
1139*(.text.proc_self_readlink)
1140*(.text.proc_root_lookup)
1141*(.text.proc_root_link)
1142*(.text.proc_pid_make_inode)
1143*(.text.proc_pid_attr_write)
1144*(.text.proc_lookupfd)
1145*(.text.proc_delete_inode)
1146*(.text.posix_same_owner)
1147*(.text.posix_block_lock)
1148*(.text.poll_initwait)
1149*(.text.pipe_write)
1150*(.text.pipe_read_fasync)
1151*(.text.pipe_ioctl)
1152*(.text.pdflush)
1153*(.text.pci_user_read_config_dword)
1154*(.text.page_readlink)
1155*(.text.null_lseek)
1156*(.text.nf_hook_slow)
1157*(.text.netlink_sock_destruct)
1158*(.text.netlink_broadcast)
1159*(.text.neigh_resolve_output)
1160*(.text.name_to_int)
1161*(.text.mwait_idle)
1162*(.text.mutex_trylock)
1163*(.text.mutex_debug_check_no_locks_held)
1164*(.text.m_stop)
1165*(.text.mpage_end_io_write)
1166*(.text.mpage_alloc)
1167*(.text.move_page_tables)
1168*(.text.mounts_open)
1169*(.text.__memset)
1170*(.text.memcpy_fromiovec)
1171*(.text.make_8259A_irq)
1172*(.text.lookup_user_key_possessed)
1173*(.text.lookup_create)
1174*(.text.locks_insert_lock)
1175*(.text.locks_alloc_lock)
1176*(.text.kthread_should_stop)
1177*(.text.kswapd)
1178*(.text.kobject_uevent)
1179*(.text.kobject_get_path)
1180*(.text.kobject_get)
1181*(.text.klist_children_put)
1182*(.text.__ip_route_output_key)
1183*(.text.ip_flush_pending_frames)
1184*(.text.ip_compute_csum)
1185*(.text.ip_append_data)
1186*(.text.ioc_set_batching)
1187*(.text.invalidate_inode_pages)
1188*(.text.__invalidate_device)
1189*(.text.install_arg_page)
1190*(.text.in_sched_functions)
1191*(.text.inotify_unmount_inodes)
1192*(.text.init_once)
1193*(.text.init_cdrom_command)
1194*(.text.inet_stream_connect)
1195*(.text.inet_sk_rebuild_header)
1196*(.text.inet_csk_addr2sockaddr)
1197*(.text.inet_create)
1198*(.text.ifind)
1199*(.text.ide_setup_dma)
1200*(.text.ide_outsw)
1201*(.text.ide_fixstring)
1202*(.text.ide_dma_setup)
1203*(.text.ide_cdrom_packet)
1204*(.text.ide_cd_put)
1205*(.text.ide_build_sglist)
1206*(.text.i8259A_shutdown)
1207*(.text.hung_up_tty_ioctl)
1208*(.text.hrtimer_nanosleep)
1209*(.text.hrtimer_init)
1210*(.text.hrtimer_cancel)
1211*(.text.hash_futex)
1212*(.text.group_send_sig_info)
1213*(.text.grab_cache_page_nowait)
1214*(.text.get_wchan)
1215*(.text.get_stack)
1216*(.text.get_page_state)
1217*(.text.getnstimeofday)
1218*(.text.get_node)
1219*(.text.get_kprobe)
1220*(.text.generic_unplug_device)
1221*(.text.free_task)
1222*(.text.frag_show)
1223*(.text.find_next_zero_string)
1224*(.text.filp_open)
1225*(.text.fillonedir)
1226*(.text.exit_io_context)
1227*(.text.exit_idle)
1228*(.text.exact_lock)
1229*(.text.eth_header)
1230*(.text.dummy_unregister_security)
1231*(.text.dummy_socket_post_create)
1232*(.text.dummy_socket_listen)
1233*(.text.dummy_quota_on)
1234*(.text.dummy_inode_follow_link)
1235*(.text.dummy_file_receive)
1236*(.text.dummy_file_mprotect)
1237*(.text.dummy_file_lock)
1238*(.text.dummy_file_ioctl)
1239*(.text.dummy_bprm_post_apply_creds)
1240*(.text.do_writepages)
1241*(.text.__down_interruptible)
1242*(.text.do_notify_resume)
1243*(.text.do_acct_process)
1244*(.text.del_timer_sync)
1245*(.text.default_rebuild_header)
1246*(.text.d_callback)
1247*(.text.dcache_readdir)
1248*(.text.ctrl_dumpfamily)
1249*(.text.cpuset_rmdir)
1250*(.text.copy_strings_kernel)
1251*(.text.con_write_room)
1252*(.text.complete_all)
1253*(.text.collect_sigign_sigcatch)
1254*(.text.clear_user)
1255*(.text.check_unthrottle)
1256*(.text.cdrom_release)
1257*(.text.cdrom_newpc_intr)
1258*(.text.cdrom_ioctl)
1259*(.text.cdrom_check_status)
1260*(.text.cdev_put)
1261*(.text.cdev_add)
1262*(.text.cap_ptrace)
1263*(.text.cap_bprm_secureexec)
1264*(.text.cache_alloc_refill)
1265*(.text.bmap)
1266*(.text.blk_run_queue)
1267*(.text.blk_queue_dma_alignment)
1268*(.text.blk_ordered_req_seq)
1269*(.text.blk_backing_dev_unplug)
1270*(.text.__bitmap_subset)
1271*(.text.__bitmap_and)
1272*(.text.bio_unmap_user)
1273*(.text.__bforget)
1274*(.text.bd_forget)
1275*(.text.bad_pipe_w)
1276*(.text.bad_get_user)
1277*(.text.audit_free)
1278*(.text.anon_vma_ctor)
1279*(.text.anon_pipe_buf_map)
1280*(.text.alloc_sock_iocb)
1281*(.text.alloc_fdset)
1282*(.text.aio_kick_handler)
1283*(.text.__add_entropy_words)
1284*(.text.add_disk_randomness)
diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c
index 0b3603adf56d..47496a40e84f 100644
--- a/arch/x86_64/kernel/genapic.c
+++ b/arch/x86_64/kernel/genapic.c
@@ -11,120 +11,54 @@
11#include <linux/threads.h> 11#include <linux/threads.h>
12#include <linux/cpumask.h> 12#include <linux/cpumask.h>
13#include <linux/string.h> 13#include <linux/string.h>
14#include <linux/module.h>
14#include <linux/kernel.h> 15#include <linux/kernel.h>
15#include <linux/ctype.h> 16#include <linux/ctype.h>
16#include <linux/init.h> 17#include <linux/init.h>
17#include <linux/module.h>
18 18
19#include <asm/smp.h> 19#include <asm/smp.h>
20#include <asm/ipi.h> 20#include <asm/ipi.h>
21#include <asm/genapic.h>
21 22
22#if defined(CONFIG_ACPI) 23#ifdef CONFIG_ACPI
23#include <acpi/acpi_bus.h> 24#include <acpi/acpi_bus.h>
24#endif 25#endif
25 26
26/* which logical CPU number maps to which CPU (physical APIC ID) */ 27/* which logical CPU number maps to which CPU (physical APIC ID) */
27u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; 28u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly
29 = { [0 ... NR_CPUS-1] = BAD_APICID };
28EXPORT_SYMBOL(x86_cpu_to_apicid); 30EXPORT_SYMBOL(x86_cpu_to_apicid);
29u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
30 31
31extern struct genapic apic_cluster; 32u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
32extern struct genapic apic_flat;
33extern struct genapic apic_physflat;
34 33
35struct genapic *genapic = &apic_flat; 34struct genapic __read_mostly *genapic = &apic_flat;
36struct genapic *genapic_force;
37 35
38/* 36/*
39 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 37 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
40 */ 38 */
41void __init clustered_apic_check(void) 39void __init setup_apic_routing(void)
42{ 40{
43 long i; 41#ifdef CONFIG_ACPI
44 u8 clusters, max_cluster;
45 u8 id;
46 u8 cluster_cnt[NUM_APIC_CLUSTERS];
47 int max_apic = 0;
48
49 /* genapic selection can be forced because of certain quirks.
50 */
51 if (genapic_force) {
52 genapic = genapic_force;
53 goto print;
54 }
55
56#if defined(CONFIG_ACPI)
57 /* 42 /*
58 * Some x86_64 machines use physical APIC mode regardless of how many 43 * Quirk: some x86_64 machines can only use physical APIC mode
59 * procs/clusters are present (x86_64 ES7000 is an example). 44 * regardless of how many processors are present (x86_64 ES7000
45 * is an example).
60 */ 46 */
61 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID) 47 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
62 if (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) { 48 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
63 genapic = &apic_cluster;
64 goto print;
65 }
66#endif
67
68 memset(cluster_cnt, 0, sizeof(cluster_cnt));
69 for (i = 0; i < NR_CPUS; i++) {
70 id = bios_cpu_apicid[i];
71 if (id == BAD_APICID)
72 continue;
73 if (id > max_apic)
74 max_apic = id;
75 cluster_cnt[APIC_CLUSTERID(id)]++;
76 }
77
78 /* Don't use clustered mode on AMD platforms. */
79 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
80 genapic = &apic_physflat; 49 genapic = &apic_physflat;
81#ifndef CONFIG_HOTPLUG_CPU 50 else
82 /* In the CPU hotplug case we cannot use broadcast mode
83 because that opens a race when a CPU is removed.
84 Stay at physflat mode in this case.
85 It is bad to do this unconditionally though. Once
86 we have ACPI platform support for CPU hotplug
87 we should detect hotplug capablity from ACPI tables and
88 only do this when really needed. -AK */
89 if (max_apic <= 8)
90 genapic = &apic_flat;
91#endif 51#endif
92 goto print;
93 }
94 52
95 clusters = 0; 53 if (cpus_weight(cpu_possible_map) <= 8)
96 max_cluster = 0;
97
98 for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
99 if (cluster_cnt[i] > 0) {
100 ++clusters;
101 if (cluster_cnt[i] > max_cluster)
102 max_cluster = cluster_cnt[i];
103 }
104 }
105
106 /*
107 * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode,
108 * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical
109 * else physical mode.
110 * (We don't use lowest priority delivery + HW APIC IRQ steering, so
111 * can ignore the clustered logical case and go straight to physical.)
112 */
113 if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) {
114#ifdef CONFIG_HOTPLUG_CPU
115 /* Don't use APIC shortcuts in CPU hotplug to avoid races */
116 genapic = &apic_physflat;
117#else
118 genapic = &apic_flat; 54 genapic = &apic_flat;
119#endif 55 else
120 } else 56 genapic = &apic_physflat;
121 genapic = &apic_cluster;
122 57
123print:
124 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); 58 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
125} 59}
126 60
127/* Same for both flat and clustered. */ 61/* Same for both flat and physical. */
128 62
129void send_IPI_self(int vector) 63void send_IPI_self(int vector)
130{ 64{
diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c
deleted file mode 100644
index 73d76308b955..000000000000
--- a/arch/x86_64/kernel/genapic_cluster.c
+++ /dev/null
@@ -1,137 +0,0 @@
1/*
2 * Copyright 2004 James Cleverdon, IBM.
3 * Subject to the GNU Public License, v.2
4 *
5 * Clustered APIC subarch code. Up to 255 CPUs, physical delivery.
6 * (A more realistic maximum is around 230 CPUs.)
7 *
8 * Hacked for x86-64 by James Cleverdon from i386 architecture code by
9 * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
10 * James Cleverdon.
11 */
12#include <linux/threads.h>
13#include <linux/cpumask.h>
14#include <linux/string.h>
15#include <linux/kernel.h>
16#include <linux/ctype.h>
17#include <linux/init.h>
18#include <asm/smp.h>
19#include <asm/ipi.h>
20
21
22/*
23 * Set up the logical destination ID.
24 *
25 * Intel recommends to set DFR, LDR and TPR before enabling
26 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
27 * document number 292116). So here it goes...
28 */
29static void cluster_init_apic_ldr(void)
30{
31 unsigned long val, id;
32 long i, count;
33 u8 lid;
34 u8 my_id = hard_smp_processor_id();
35 u8 my_cluster = APIC_CLUSTER(my_id);
36
37 /* Create logical APIC IDs by counting CPUs already in cluster. */
38 for (count = 0, i = NR_CPUS; --i >= 0; ) {
39 lid = x86_cpu_to_log_apicid[i];
40 if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
41 ++count;
42 }
43 /*
44 * We only have a 4 wide bitmap in cluster mode. There's no way
45 * to get above 60 CPUs and still give each one it's own bit.
46 * But, we're using physical IRQ delivery, so we don't care.
47 * Use bit 3 for the 4th through Nth CPU in each cluster.
48 */
49 if (count >= XAPIC_DEST_CPUS_SHIFT)
50 count = 3;
51 id = my_cluster | (1UL << count);
52 x86_cpu_to_log_apicid[smp_processor_id()] = id;
53 apic_write(APIC_DFR, APIC_DFR_CLUSTER);
54 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
55 val |= SET_APIC_LOGICAL_ID(id);
56 apic_write(APIC_LDR, val);
57}
58
59/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
60
61static cpumask_t cluster_target_cpus(void)
62{
63 return cpumask_of_cpu(0);
64}
65
66static cpumask_t cluster_vector_allocation_domain(int cpu)
67{
68 cpumask_t domain = CPU_MASK_NONE;
69 cpu_set(cpu, domain);
70 return domain;
71}
72
73static void cluster_send_IPI_mask(cpumask_t mask, int vector)
74{
75 send_IPI_mask_sequence(mask, vector);
76}
77
78static void cluster_send_IPI_allbutself(int vector)
79{
80 cpumask_t mask = cpu_online_map;
81
82 cpu_clear(smp_processor_id(), mask);
83
84 if (!cpus_empty(mask))
85 cluster_send_IPI_mask(mask, vector);
86}
87
88static void cluster_send_IPI_all(int vector)
89{
90 cluster_send_IPI_mask(cpu_online_map, vector);
91}
92
93static int cluster_apic_id_registered(void)
94{
95 return 1;
96}
97
98static unsigned int cluster_cpu_mask_to_apicid(cpumask_t cpumask)
99{
100 int cpu;
101
102 /*
103 * We're using fixed IRQ delivery, can only return one phys APIC ID.
104 * May as well be the first.
105 */
106 cpu = first_cpu(cpumask);
107 if ((unsigned)cpu < NR_CPUS)
108 return x86_cpu_to_apicid[cpu];
109 else
110 return BAD_APICID;
111}
112
113/* cpuid returns the value latched in the HW at reset, not the APIC ID
114 * register's value. For any box whose BIOS changes APIC IDs, like
115 * clustered APIC systems, we must use hard_smp_processor_id.
116 *
117 * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
118 */
119static unsigned int phys_pkg_id(int index_msb)
120{
121 return hard_smp_processor_id() >> index_msb;
122}
123
124struct genapic apic_cluster = {
125 .name = "clustered",
126 .int_delivery_mode = dest_Fixed,
127 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
128 .target_cpus = cluster_target_cpus,
129 .vector_allocation_domain = cluster_vector_allocation_domain,
130 .apic_id_registered = cluster_apic_id_registered,
131 .init_apic_ldr = cluster_init_apic_ldr,
132 .send_IPI_all = cluster_send_IPI_all,
133 .send_IPI_allbutself = cluster_send_IPI_allbutself,
134 .send_IPI_mask = cluster_send_IPI_mask,
135 .cpu_mask_to_apicid = cluster_cpu_mask_to_apicid,
136 .phys_pkg_id = phys_pkg_id,
137};
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index 7c01db8fa9d1..ecb01eefdd27 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -8,6 +8,7 @@
8 * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and 8 * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
9 * James Cleverdon. 9 * James Cleverdon.
10 */ 10 */
11#include <linux/errno.h>
11#include <linux/threads.h> 12#include <linux/threads.h>
12#include <linux/cpumask.h> 13#include <linux/cpumask.h>
13#include <linux/string.h> 14#include <linux/string.h>
@@ -16,6 +17,7 @@
16#include <linux/init.h> 17#include <linux/init.h>
17#include <asm/smp.h> 18#include <asm/smp.h>
18#include <asm/ipi.h> 19#include <asm/ipi.h>
20#include <asm/genapic.h>
19 21
20static cpumask_t flat_target_cpus(void) 22static cpumask_t flat_target_cpus(void)
21{ 23{
@@ -60,31 +62,10 @@ static void flat_init_apic_ldr(void)
60static void flat_send_IPI_mask(cpumask_t cpumask, int vector) 62static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
61{ 63{
62 unsigned long mask = cpus_addr(cpumask)[0]; 64 unsigned long mask = cpus_addr(cpumask)[0];
63 unsigned long cfg;
64 unsigned long flags; 65 unsigned long flags;
65 66
66 local_irq_save(flags); 67 local_irq_save(flags);
67 68 __send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL);
68 /*
69 * Wait for idle.
70 */
71 apic_wait_icr_idle();
72
73 /*
74 * prepare target chip field
75 */
76 cfg = __prepare_ICR2(mask);
77 apic_write(APIC_ICR2, cfg);
78
79 /*
80 * program the ICR
81 */
82 cfg = __prepare_ICR(0, vector, APIC_DEST_LOGICAL);
83
84 /*
85 * Send the IPI. The write to APIC_ICR fires this off.
86 */
87 apic_write(APIC_ICR, cfg);
88 local_irq_restore(flags); 69 local_irq_restore(flags);
89} 70}
90 71
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index 598a4d0351fc..1fab487dee86 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -5,6 +5,7 @@
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2000 Karsten Keil <kkeil@suse.de> 6 * Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
7 * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de> 7 * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
8 * Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com>
8 */ 9 */
9 10
10 11
@@ -13,97 +14,131 @@
13#include <linux/init.h> 14#include <linux/init.h>
14#include <asm/desc.h> 15#include <asm/desc.h>
15#include <asm/segment.h> 16#include <asm/segment.h>
17#include <asm/pgtable.h>
16#include <asm/page.h> 18#include <asm/page.h>
17#include <asm/msr.h> 19#include <asm/msr.h>
18#include <asm/cache.h> 20#include <asm/cache.h>
19 21
20/* we are not able to switch in one step to the final KERNEL ADRESS SPACE 22/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
21 * because we need identity-mapped pages on setup so define __START_KERNEL to 23 * because we need identity-mapped pages.
22 * 0x100000 for this stage 24 *
23 *
24 */ 25 */
25 26
26 .text 27 .text
27 .section .bootstrap.text 28 .section .bootstrap.text
28 .code32 29 .code64
29 .globl startup_32 30 .globl startup_64
30/* %bx: 1 if coming from smp trampoline on secondary cpu */ 31startup_64:
31startup_32: 32
32
33 /* 33 /*
34 * At this point the CPU runs in 32bit protected mode (CS.D = 1) with 34 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
35 * paging disabled and the point of this file is to switch to 64bit 35 * and someone has loaded an identity mapped page table
36 * long mode with a kernel mapping for kerneland to jump into the 36 * for us. These identity mapped page tables map all of the
37 * kernel virtual addresses. 37 * kernel pages and possibly all of memory.
38 * There is no stack until we set one up. 38 *
39 * %esi holds a physical pointer to real_mode_data.
40 *
41 * We come here either directly from a 64bit bootloader, or from
42 * arch/x86_64/boot/compressed/head.S.
43 *
44 * We only come here initially at boot nothing else comes here.
45 *
46 * Since we may be loaded at an address different from what we were
47 * compiled to run at we first fixup the physical addresses in our page
48 * tables and then reload them.
39 */ 49 */
40 50
41 /* Initialize the %ds segment register */ 51 /* Compute the delta between the address I am compiled to run at and the
42 movl $__KERNEL_DS,%eax 52 * address I am actually running at.
43 movl %eax,%ds
44
45 /* Load new GDT with the 64bit segments using 32bit descriptor */
46 lgdt pGDT32 - __START_KERNEL_map
47
48 /* If the CPU doesn't support CPUID this will double fault.
49 * Unfortunately it is hard to check for CPUID without a stack.
50 */ 53 */
51 54 leaq _text(%rip), %rbp
52 /* Check if extended functions are implemented */ 55 subq $_text - __START_KERNEL_map, %rbp
53 movl $0x80000000, %eax 56
54 cpuid 57 /* Is the address not 2M aligned? */
55 cmpl $0x80000000, %eax 58 movq %rbp, %rax
56 jbe no_long_mode 59 andl $~LARGE_PAGE_MASK, %eax
57 /* Check if long mode is implemented */ 60 testl %eax, %eax
58 mov $0x80000001, %eax 61 jnz bad_address
59 cpuid 62
60 btl $29, %edx 63 /* Is the address too large? */
61 jnc no_long_mode 64 leaq _text(%rip), %rdx
62 65 movq $PGDIR_SIZE, %rax
63 /* 66 cmpq %rax, %rdx
64 * Prepare for entering 64bits mode 67 jae bad_address
68
69 /* Fixup the physical addresses in the page table
65 */ 70 */
71 addq %rbp, init_level4_pgt + 0(%rip)
72 addq %rbp, init_level4_pgt + (258*8)(%rip)
73 addq %rbp, init_level4_pgt + (511*8)(%rip)
74
75 addq %rbp, level3_ident_pgt + 0(%rip)
76 addq %rbp, level3_kernel_pgt + (510*8)(%rip)
77
78 /* Add an Identity mapping if I am above 1G */
79 leaq _text(%rip), %rdi
80 andq $LARGE_PAGE_MASK, %rdi
81
82 movq %rdi, %rax
83 shrq $PUD_SHIFT, %rax
84 andq $(PTRS_PER_PUD - 1), %rax
85 jz ident_complete
86
87 leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
88 leaq level3_ident_pgt(%rip), %rbx
89 movq %rdx, 0(%rbx, %rax, 8)
90
91 movq %rdi, %rax
92 shrq $PMD_SHIFT, %rax
93 andq $(PTRS_PER_PMD - 1), %rax
94 leaq __PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx
95 leaq level2_spare_pgt(%rip), %rbx
96 movq %rdx, 0(%rbx, %rax, 8)
97ident_complete:
98
99 /* Fixup the kernel text+data virtual addresses
100 */
101 leaq level2_kernel_pgt(%rip), %rdi
102 leaq 4096(%rdi), %r8
103 /* See if it is a valid page table entry */
1041: testq $1, 0(%rdi)
105 jz 2f
106 addq %rbp, 0(%rdi)
107 /* Go to the next page */
1082: addq $8, %rdi
109 cmp %r8, %rdi
110 jne 1b
111
112 /* Fixup phys_base */
113 addq %rbp, phys_base(%rip)
66 114
67 /* Enable PAE mode */ 115#ifdef CONFIG_SMP
68 xorl %eax, %eax 116 addq %rbp, trampoline_level4_pgt + 0(%rip)
69 btsl $5, %eax 117 addq %rbp, trampoline_level4_pgt + (511*8)(%rip)
70 movl %eax, %cr4 118#endif
71 119#ifdef CONFIG_ACPI_SLEEP
72 /* Setup early boot stage 4 level pagetables */ 120 addq %rbp, wakeup_level4_pgt + 0(%rip)
73 movl $(boot_level4_pgt - __START_KERNEL_map), %eax 121 addq %rbp, wakeup_level4_pgt + (511*8)(%rip)
74 movl %eax, %cr3 122#endif
75
76 /* Setup EFER (Extended Feature Enable Register) */
77 movl $MSR_EFER, %ecx
78 rdmsr
79
80 /* Enable Long Mode */
81 btsl $_EFER_LME, %eax
82
83 /* Make changes effective */
84 wrmsr
85 123
86 xorl %eax, %eax 124 /* Due to ENTRY(), sometimes the empty space gets filled with
87 btsl $31, %eax /* Enable paging and in turn activate Long Mode */ 125 * zeros. Better take a jmp than relying on empty space being
88 btsl $0, %eax /* Enable protected mode */ 126 * filled with 0x90 (nop)
89 /* Make changes effective */
90 movl %eax, %cr0
91 /*
92 * At this point we're in long mode but in 32bit compatibility mode
93 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
94 * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use
95 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
96 */ 127 */
97 ljmp $__KERNEL_CS, $(startup_64 - __START_KERNEL_map) 128 jmp secondary_startup_64
98 129ENTRY(secondary_startup_64)
99 .code64 130 /*
100 .org 0x100 131 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
101 .globl startup_64 132 * and someone has loaded a mapped page table.
102startup_64: 133 *
103 /* We come here either from startup_32 134 * %esi holds a physical pointer to real_mode_data.
104 * or directly from a 64bit bootloader. 135 *
105 * Since we may have come directly from a bootloader we 136 * We come here either from startup_64 (using physical addresses)
106 * reload the page tables here. 137 * or from trampoline.S (using virtual addresses).
138 *
139 * Using virtual addresses from trampoline.S removes the need
140 * to have any identity mapped pages in the kernel page table
141 * after the boot processor executes this code.
107 */ 142 */
108 143
109 /* Enable PAE mode and PGE */ 144 /* Enable PAE mode and PGE */
@@ -113,9 +148,15 @@ startup_64:
113 movq %rax, %cr4 148 movq %rax, %cr4
114 149
115 /* Setup early boot stage 4 level pagetables. */ 150 /* Setup early boot stage 4 level pagetables. */
116 movq $(boot_level4_pgt - __START_KERNEL_map), %rax 151 movq $(init_level4_pgt - __START_KERNEL_map), %rax
152 addq phys_base(%rip), %rax
117 movq %rax, %cr3 153 movq %rax, %cr3
118 154
155 /* Ensure I am executing from virtual addresses */
156 movq $1f, %rax
157 jmp *%rax
1581:
159
119 /* Check if nx is implemented */ 160 /* Check if nx is implemented */
120 movl $0x80000001, %eax 161 movl $0x80000001, %eax
121 cpuid 162 cpuid
@@ -124,17 +165,11 @@ startup_64:
124 /* Setup EFER (Extended Feature Enable Register) */ 165 /* Setup EFER (Extended Feature Enable Register) */
125 movl $MSR_EFER, %ecx 166 movl $MSR_EFER, %ecx
126 rdmsr 167 rdmsr
127 168 btsl $_EFER_SCE, %eax /* Enable System Call */
128 /* Enable System Call */ 169 btl $20,%edi /* No Execute supported? */
129 btsl $_EFER_SCE, %eax
130
131 /* No Execute supported? */
132 btl $20,%edi
133 jnc 1f 170 jnc 1f
134 btsl $_EFER_NX, %eax 171 btsl $_EFER_NX, %eax
1351: 1721: wrmsr /* Make changes effective */
136 /* Make changes effective */
137 wrmsr
138 173
139 /* Setup cr0 */ 174 /* Setup cr0 */
140#define CR0_PM 1 /* protected mode */ 175#define CR0_PM 1 /* protected mode */
@@ -161,7 +196,7 @@ startup_64:
161 * addresses where we're currently running on. We have to do that here 196 * addresses where we're currently running on. We have to do that here
162 * because in 32bit we couldn't load a 64bit linear address. 197 * because in 32bit we couldn't load a 64bit linear address.
163 */ 198 */
164 lgdt cpu_gdt_descr 199 lgdt cpu_gdt_descr(%rip)
165 200
166 /* set up data segments. actually 0 would do too */ 201 /* set up data segments. actually 0 would do too */
167 movl $__KERNEL_DS,%eax 202 movl $__KERNEL_DS,%eax
@@ -212,6 +247,9 @@ initial_code:
212init_rsp: 247init_rsp:
213 .quad init_thread_union+THREAD_SIZE-8 248 .quad init_thread_union+THREAD_SIZE-8
214 249
250bad_address:
251 jmp bad_address
252
215ENTRY(early_idt_handler) 253ENTRY(early_idt_handler)
216 cmpl $2,early_recursion_flag(%rip) 254 cmpl $2,early_recursion_flag(%rip)
217 jz 1f 255 jz 1f
@@ -240,110 +278,66 @@ early_idt_msg:
240early_idt_ripmsg: 278early_idt_ripmsg:
241 .asciz "RIP %s\n" 279 .asciz "RIP %s\n"
242 280
243.code32 281.balign PAGE_SIZE
244ENTRY(no_long_mode)
245 /* This isn't an x86-64 CPU so hang */
2461:
247 jmp 1b
248
249.org 0xf00
250 .globl pGDT32
251pGDT32:
252 .word gdt_end-cpu_gdt_table-1
253 .long cpu_gdt_table-__START_KERNEL_map
254
255.org 0xf10
256ljumpvector:
257 .long startup_64-__START_KERNEL_map
258 .word __KERNEL_CS
259 282
260ENTRY(stext)
261ENTRY(_stext)
262
263 $page = 0
264#define NEXT_PAGE(name) \ 283#define NEXT_PAGE(name) \
265 $page = $page + 1; \ 284 .balign PAGE_SIZE; \
266 .org $page * 0x1000; \
267 phys_/**/name = $page * 0x1000 + __PHYSICAL_START; \
268ENTRY(name) 285ENTRY(name)
269 286
287/* Automate the creation of 1 to 1 mapping pmd entries */
288#define PMDS(START, PERM, COUNT) \
289 i = 0 ; \
290 .rept (COUNT) ; \
291 .quad (START) + (i << 21) + (PERM) ; \
292 i = i + 1 ; \
293 .endr
294
295 /*
296 * This default setting generates an ident mapping at address 0x100000
297 * and a mapping for the kernel that precisely maps virtual address
298 * 0xffffffff80000000 to physical address 0x000000. (always using
299 * 2Mbyte large pages provided by PAE mode)
300 */
270NEXT_PAGE(init_level4_pgt) 301NEXT_PAGE(init_level4_pgt)
271 /* This gets initialized in x86_64_start_kernel */ 302 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
272 .fill 512,8,0 303 .fill 257,8,0
304 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
305 .fill 252,8,0
306 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
307 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
273 308
274NEXT_PAGE(level3_ident_pgt) 309NEXT_PAGE(level3_ident_pgt)
275 .quad phys_level2_ident_pgt | 0x007 310 .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
276 .fill 511,8,0 311 .fill 511,8,0
277 312
278NEXT_PAGE(level3_kernel_pgt) 313NEXT_PAGE(level3_kernel_pgt)
279 .fill 510,8,0 314 .fill 510,8,0
280 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ 315 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
281 .quad phys_level2_kernel_pgt | 0x007 316 .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
282 .fill 1,8,0 317 .fill 1,8,0
283 318
284NEXT_PAGE(level2_ident_pgt) 319NEXT_PAGE(level2_ident_pgt)
285 /* 40MB for bootup. */ 320 /* Since I easily can, map the first 1G.
286 i = 0 321 * Don't set NX because code runs from these pages.
287 .rept 20 322 */
288 .quad i << 21 | 0x083 323 PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
289 i = i + 1 324
290 .endr
291 /* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */
292 .globl temp_boot_pmds
293temp_boot_pmds:
294 .fill 492,8,0
295
296NEXT_PAGE(level2_kernel_pgt) 325NEXT_PAGE(level2_kernel_pgt)
297 /* 40MB kernel mapping. The kernel code cannot be bigger than that. 326 /* 40MB kernel mapping. The kernel code cannot be bigger than that.
298 When you change this change KERNEL_TEXT_SIZE in page.h too. */ 327 When you change this change KERNEL_TEXT_SIZE in page.h too. */
299 /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */ 328 /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
300 i = 0 329 PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL,
301 .rept 20 330 KERNEL_TEXT_SIZE/PMD_SIZE)
302 .quad i << 21 | 0x183
303 i = i + 1
304 .endr
305 /* Module mapping starts here */ 331 /* Module mapping starts here */
306 .fill 492,8,0 332 .fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0
307 333
308NEXT_PAGE(level3_physmem_pgt) 334NEXT_PAGE(level2_spare_pgt)
309 .quad phys_level2_kernel_pgt | 0x007 /* so that __va works even before pagetable_init */ 335 .fill 512,8,0
310 .fill 511,8,0
311 336
337#undef PMDS
312#undef NEXT_PAGE 338#undef NEXT_PAGE
313 339
314 .data 340 .data
315
316#ifdef CONFIG_ACPI_SLEEP
317 .align PAGE_SIZE
318ENTRY(wakeup_level4_pgt)
319 .quad phys_level3_ident_pgt | 0x007
320 .fill 255,8,0
321 .quad phys_level3_physmem_pgt | 0x007
322 .fill 254,8,0
323 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
324 .quad phys_level3_kernel_pgt | 0x007
325#endif
326
327#ifndef CONFIG_HOTPLUG_CPU
328 __INITDATA
329#endif
330 /*
331 * This default setting generates an ident mapping at address 0x100000
332 * and a mapping for the kernel that precisely maps virtual address
333 * 0xffffffff80000000 to physical address 0x000000. (always using
334 * 2Mbyte large pages provided by PAE mode)
335 */
336 .align PAGE_SIZE
337ENTRY(boot_level4_pgt)
338 .quad phys_level3_ident_pgt | 0x007
339 .fill 255,8,0
340 .quad phys_level3_physmem_pgt | 0x007
341 .fill 254,8,0
342 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
343 .quad phys_level3_kernel_pgt | 0x007
344
345 .data
346
347 .align 16 341 .align 16
348 .globl cpu_gdt_descr 342 .globl cpu_gdt_descr
349cpu_gdt_descr: 343cpu_gdt_descr:
@@ -357,6 +351,10 @@ gdt:
357 .endr 351 .endr
358#endif 352#endif
359 353
354ENTRY(phys_base)
355 /* This must match the first entry in level2_kernel_pgt */
356 .quad 0x0000000000000000
357
360/* We need valid kernel segments for data and code in long mode too 358/* We need valid kernel segments for data and code in long mode too
361 * IRET will check the segment types kkeil 2000/10/28 359 * IRET will check the segment types kkeil 2000/10/28
362 * Also sysret mandates a special GDT layout 360 * Also sysret mandates a special GDT layout
@@ -370,13 +368,13 @@ gdt:
370 368
371ENTRY(cpu_gdt_table) 369ENTRY(cpu_gdt_table)
372 .quad 0x0000000000000000 /* NULL descriptor */ 370 .quad 0x0000000000000000 /* NULL descriptor */
371 .quad 0x00cf9b000000ffff /* __KERNEL32_CS */
372 .quad 0x00af9b000000ffff /* __KERNEL_CS */
373 .quad 0x00cf93000000ffff /* __KERNEL_DS */
374 .quad 0x00cffb000000ffff /* __USER32_CS */
375 .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */
376 .quad 0x00affb000000ffff /* __USER_CS */
373 .quad 0x0 /* unused */ 377 .quad 0x0 /* unused */
374 .quad 0x00af9a000000ffff /* __KERNEL_CS */
375 .quad 0x00cf92000000ffff /* __KERNEL_DS */
376 .quad 0x00cffa000000ffff /* __USER32_CS */
377 .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */
378 .quad 0x00affa000000ffff /* __USER_CS */
379 .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
380 .quad 0,0 /* TSS */ 378 .quad 0,0 /* TSS */
381 .quad 0,0 /* LDT */ 379 .quad 0,0 /* LDT */
382 .quad 0,0,0 /* three TLS descriptors */ 380 .quad 0,0,0 /* three TLS descriptors */
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index 5f197b0a330a..213d90e04755 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -18,8 +18,16 @@
18#include <asm/setup.h> 18#include <asm/setup.h>
19#include <asm/desc.h> 19#include <asm/desc.h>
20#include <asm/pgtable.h> 20#include <asm/pgtable.h>
21#include <asm/tlbflush.h>
21#include <asm/sections.h> 22#include <asm/sections.h>
22 23
24static void __init zap_identity_mappings(void)
25{
26 pgd_t *pgd = pgd_offset_k(0UL);
27 pgd_clear(pgd);
28 __flush_tlb();
29}
30
23/* Don't add a printk in there. printk relies on the PDA which is not initialized 31/* Don't add a printk in there. printk relies on the PDA which is not initialized
24 yet. */ 32 yet. */
25static void __init clear_bss(void) 33static void __init clear_bss(void)
@@ -29,25 +37,24 @@ static void __init clear_bss(void)
29} 37}
30 38
31#define NEW_CL_POINTER 0x228 /* Relative to real mode data */ 39#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
32#define OLD_CL_MAGIC_ADDR 0x90020 40#define OLD_CL_MAGIC_ADDR 0x20
33#define OLD_CL_MAGIC 0xA33F 41#define OLD_CL_MAGIC 0xA33F
34#define OLD_CL_BASE_ADDR 0x90000 42#define OLD_CL_OFFSET 0x22
35#define OLD_CL_OFFSET 0x90022
36 43
37static void __init copy_bootdata(char *real_mode_data) 44static void __init copy_bootdata(char *real_mode_data)
38{ 45{
39 int new_data; 46 unsigned long new_data;
40 char * command_line; 47 char * command_line;
41 48
42 memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE); 49 memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE);
43 new_data = *(int *) (x86_boot_params + NEW_CL_POINTER); 50 new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER);
44 if (!new_data) { 51 if (!new_data) {
45 if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) { 52 if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) {
46 return; 53 return;
47 } 54 }
48 new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET; 55 new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET);
49 } 56 }
50 command_line = (char *) ((u64)(new_data)); 57 command_line = __va(new_data);
51 memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); 58 memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
52} 59}
53 60
@@ -55,26 +62,30 @@ void __init x86_64_start_kernel(char * real_mode_data)
55{ 62{
56 int i; 63 int i;
57 64
65 /*
66 * Make sure kernel is aligned to 2MB address. Catching it at compile
67 * time is better. Change your config file and compile the kernel
68 * for a 2MB aligned address (CONFIG_PHYSICAL_START)
69 */
70 BUILD_BUG_ON(CONFIG_PHYSICAL_START & (__KERNEL_ALIGN - 1));
71
58 /* clear bss before set_intr_gate with early_idt_handler */ 72 /* clear bss before set_intr_gate with early_idt_handler */
59 clear_bss(); 73 clear_bss();
60 74
75 /* Make NULL pointers segfault */
76 zap_identity_mappings();
77
61 for (i = 0; i < IDT_ENTRIES; i++) 78 for (i = 0; i < IDT_ENTRIES; i++)
62 set_intr_gate(i, early_idt_handler); 79 set_intr_gate(i, early_idt_handler);
63 asm volatile("lidt %0" :: "m" (idt_descr)); 80 asm volatile("lidt %0" :: "m" (idt_descr));
64 81
65 early_printk("Kernel alive\n"); 82 early_printk("Kernel alive\n");
66 83
67 /*
68 * switch to init_level4_pgt from boot_level4_pgt
69 */
70 memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t));
71 asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
72
73 for (i = 0; i < NR_CPUS; i++) 84 for (i = 0; i < NR_CPUS; i++)
74 cpu_pda(i) = &boot_cpu_pda[i]; 85 cpu_pda(i) = &boot_cpu_pda[i];
75 86
76 pda_init(0); 87 pda_init(0);
77 copy_bootdata(real_mode_data); 88 copy_bootdata(__va(real_mode_data));
78#ifdef CONFIG_SMP 89#ifdef CONFIG_SMP
79 cpu_set(0, cpu_online_map); 90 cpu_set(0, cpu_online_map);
80#endif 91#endif
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index c6a5bc7e8118..2a2df14dab7e 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -907,10 +907,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
907 enable_8259A_irq(0); 907 enable_8259A_irq(0);
908} 908}
909 909
910void __init UNEXPECTED_IO_APIC(void)
911{
912}
913
914void __apicdebuginit print_IO_APIC(void) 910void __apicdebuginit print_IO_APIC(void)
915{ 911{
916 int apic, i; 912 int apic, i;
@@ -946,40 +942,16 @@ void __apicdebuginit print_IO_APIC(void)
946 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); 942 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
947 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 943 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
948 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 944 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
949 if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
950 UNEXPECTED_IO_APIC();
951 945
952 printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01); 946 printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
953 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); 947 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
954 if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
955 (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
956 (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
957 (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
958 (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
959 (reg_01.bits.entries != 0x2E) &&
960 (reg_01.bits.entries != 0x3F) &&
961 (reg_01.bits.entries != 0x03)
962 )
963 UNEXPECTED_IO_APIC();
964 948
965 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); 949 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
966 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); 950 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
967 if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
968 (reg_01.bits.version != 0x02) && /* 82801BA IO-APICs (ICH2) */
969 (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
970 (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
971 (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
972 (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
973 )
974 UNEXPECTED_IO_APIC();
975 if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
976 UNEXPECTED_IO_APIC();
977 951
978 if (reg_01.bits.version >= 0x10) { 952 if (reg_01.bits.version >= 0x10) {
979 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); 953 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
980 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); 954 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
981 if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
982 UNEXPECTED_IO_APIC();
983 } 955 }
984 956
985 printk(KERN_DEBUG ".... IRQ redirection table:\n"); 957 printk(KERN_DEBUG ".... IRQ redirection table:\n");
@@ -1407,8 +1379,7 @@ static void irq_complete_move(unsigned int irq)
1407 1379
1408 vector = ~get_irq_regs()->orig_rax; 1380 vector = ~get_irq_regs()->orig_rax;
1409 me = smp_processor_id(); 1381 me = smp_processor_id();
1410 if ((vector == cfg->vector) && 1382 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
1411 cpu_isset(smp_processor_id(), cfg->domain)) {
1412 cpumask_t cleanup_mask; 1383 cpumask_t cleanup_mask;
1413 1384
1414 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 1385 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
@@ -1983,18 +1954,18 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
1983 if (irq < 0) 1954 if (irq < 0)
1984 return irq; 1955 return irq;
1985 1956
1986 set_irq_msi(irq, desc);
1987 ret = msi_compose_msg(dev, irq, &msg); 1957 ret = msi_compose_msg(dev, irq, &msg);
1988 if (ret < 0) { 1958 if (ret < 0) {
1989 destroy_irq(irq); 1959 destroy_irq(irq);
1990 return ret; 1960 return ret;
1991 } 1961 }
1992 1962
1963 set_irq_msi(irq, desc);
1993 write_msi_msg(irq, &msg); 1964 write_msi_msg(irq, &msg);
1994 1965
1995 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 1966 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
1996 1967
1997 return irq; 1968 return 0;
1998} 1969}
1999 1970
2000void arch_teardown_msi_irq(unsigned int irq) 1971void arch_teardown_msi_irq(unsigned int irq)
diff --git a/arch/x86_64/kernel/ioport.c b/arch/x86_64/kernel/ioport.c
index 745b1f0f494e..387d347b0e07 100644
--- a/arch/x86_64/kernel/ioport.c
+++ b/arch/x86_64/kernel/ioport.c
@@ -16,6 +16,7 @@
16#include <linux/stddef.h> 16#include <linux/stddef.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/thread_info.h> 18#include <linux/thread_info.h>
19#include <linux/syscalls.h>
19 20
20/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ 21/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
21static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) 22static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c
index 0497e3bd5bff..c3a554703672 100644
--- a/arch/x86_64/kernel/machine_kexec.c
+++ b/arch/x86_64/kernel/machine_kexec.c
@@ -189,21 +189,21 @@ NORET_TYPE void machine_kexec(struct kimage *image)
189 control_page = page_address(image->control_code_page) + PAGE_SIZE; 189 control_page = page_address(image->control_code_page) + PAGE_SIZE;
190 memcpy(control_page, relocate_kernel, PAGE_SIZE); 190 memcpy(control_page, relocate_kernel, PAGE_SIZE);
191 191
192 page_list[PA_CONTROL_PAGE] = __pa(control_page); 192 page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
193 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; 193 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
194 page_list[PA_PGD] = __pa(kexec_pgd); 194 page_list[PA_PGD] = virt_to_phys(&kexec_pgd);
195 page_list[VA_PGD] = (unsigned long)kexec_pgd; 195 page_list[VA_PGD] = (unsigned long)kexec_pgd;
196 page_list[PA_PUD_0] = __pa(kexec_pud0); 196 page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0);
197 page_list[VA_PUD_0] = (unsigned long)kexec_pud0; 197 page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
198 page_list[PA_PMD_0] = __pa(kexec_pmd0); 198 page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0);
199 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; 199 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
200 page_list[PA_PTE_0] = __pa(kexec_pte0); 200 page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0);
201 page_list[VA_PTE_0] = (unsigned long)kexec_pte0; 201 page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
202 page_list[PA_PUD_1] = __pa(kexec_pud1); 202 page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1);
203 page_list[VA_PUD_1] = (unsigned long)kexec_pud1; 203 page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
204 page_list[PA_PMD_1] = __pa(kexec_pmd1); 204 page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1);
205 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; 205 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
206 page_list[PA_PTE_1] = __pa(kexec_pte1); 206 page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1);
207 page_list[VA_PTE_1] = (unsigned long)kexec_pte1; 207 page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
208 208
209 page_list[PA_TABLE_PAGE] = 209 page_list[PA_TABLE_PAGE] =
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 8011a8e1c7d4..fa2672682477 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -323,10 +323,13 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
323#endif /* CONFIG_X86_MCE_INTEL */ 323#endif /* CONFIG_X86_MCE_INTEL */
324 324
325/* 325/*
326 * Periodic polling timer for "silent" machine check errors. 326 * Periodic polling timer for "silent" machine check errors. If the
327 * poller finds an MCE, poll 2x faster. When the poller finds no more
328 * errors, poll 2x slower (up to check_interval seconds).
327 */ 329 */
328 330
329static int check_interval = 5 * 60; /* 5 minutes */ 331static int check_interval = 5 * 60; /* 5 minutes */
332static int next_interval; /* in jiffies */
330static void mcheck_timer(struct work_struct *work); 333static void mcheck_timer(struct work_struct *work);
331static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer); 334static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
332 335
@@ -339,7 +342,6 @@ static void mcheck_check_cpu(void *info)
339static void mcheck_timer(struct work_struct *work) 342static void mcheck_timer(struct work_struct *work)
340{ 343{
341 on_each_cpu(mcheck_check_cpu, NULL, 1, 1); 344 on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
342 schedule_delayed_work(&mcheck_work, check_interval * HZ);
343 345
344 /* 346 /*
345 * It's ok to read stale data here for notify_user and 347 * It's ok to read stale data here for notify_user and
@@ -349,17 +351,30 @@ static void mcheck_timer(struct work_struct *work)
349 * writes. 351 * writes.
350 */ 352 */
351 if (notify_user && console_logged) { 353 if (notify_user && console_logged) {
354 static unsigned long last_print;
355 unsigned long now = jiffies;
356
357 /* if we logged an MCE, reduce the polling interval */
358 next_interval = max(next_interval/2, HZ/100);
352 notify_user = 0; 359 notify_user = 0;
353 clear_bit(0, &console_logged); 360 clear_bit(0, &console_logged);
354 printk(KERN_INFO "Machine check events logged\n"); 361 if (time_after_eq(now, last_print + (check_interval*HZ))) {
362 last_print = now;
363 printk(KERN_INFO "Machine check events logged\n");
364 }
365 } else {
366 next_interval = min(next_interval*2, check_interval*HZ);
355 } 367 }
368
369 schedule_delayed_work(&mcheck_work, next_interval);
356} 370}
357 371
358 372
359static __init int periodic_mcheck_init(void) 373static __init int periodic_mcheck_init(void)
360{ 374{
361 if (check_interval) 375 next_interval = check_interval * HZ;
362 schedule_delayed_work(&mcheck_work, check_interval*HZ); 376 if (next_interval)
377 schedule_delayed_work(&mcheck_work, next_interval);
363 return 0; 378 return 0;
364} 379}
365__initcall(periodic_mcheck_init); 380__initcall(periodic_mcheck_init);
@@ -597,12 +612,13 @@ static int mce_resume(struct sys_device *dev)
597/* Reinit MCEs after user configuration changes */ 612/* Reinit MCEs after user configuration changes */
598static void mce_restart(void) 613static void mce_restart(void)
599{ 614{
600 if (check_interval) 615 if (next_interval)
601 cancel_delayed_work(&mcheck_work); 616 cancel_delayed_work(&mcheck_work);
602 /* Timer race is harmless here */ 617 /* Timer race is harmless here */
603 on_each_cpu(mce_init, NULL, 1, 1); 618 on_each_cpu(mce_init, NULL, 1, 1);
604 if (check_interval) 619 next_interval = check_interval * HZ;
605 schedule_delayed_work(&mcheck_work, check_interval*HZ); 620 if (next_interval)
621 schedule_delayed_work(&mcheck_work, next_interval);
606} 622}
607 623
608static struct sysdev_class mce_sysclass = { 624static struct sysdev_class mce_sysclass = {
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index 455aa0b932f0..d0dc4891599b 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -300,7 +300,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
300 } 300 }
301 } 301 }
302 } 302 }
303 clustered_apic_check(); 303 setup_apic_routing();
304 if (!num_processors) 304 if (!num_processors)
305 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 305 printk(KERN_ERR "MPTABLE: no processors registered!\n");
306 return num_processors; 306 return num_processors;
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index dfab9f167366..6cd2b30e2ffc 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -27,28 +27,11 @@
27#include <asm/proto.h> 27#include <asm/proto.h>
28#include <asm/kdebug.h> 28#include <asm/kdebug.h>
29#include <asm/mce.h> 29#include <asm/mce.h>
30#include <asm/intel_arch_perfmon.h>
31 30
32int unknown_nmi_panic; 31int unknown_nmi_panic;
33int nmi_watchdog_enabled; 32int nmi_watchdog_enabled;
34int panic_on_unrecovered_nmi; 33int panic_on_unrecovered_nmi;
35 34
36/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
37 * evtsel_nmi_owner tracks the ownership of the event selection
38 * - different performance counters/ event selection may be reserved for
39 * different subsystems this reservation system just tries to coordinate
40 * things a little
41 */
42
43/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
44 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
45 */
46#define NMI_MAX_COUNTER_BITS 66
47#define NMI_MAX_COUNTER_LONGS BITS_TO_LONGS(NMI_MAX_COUNTER_BITS)
48
49static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner[NMI_MAX_COUNTER_LONGS]);
50static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[NMI_MAX_COUNTER_LONGS]);
51
52static cpumask_t backtrace_mask = CPU_MASK_NONE; 35static cpumask_t backtrace_mask = CPU_MASK_NONE;
53 36
54/* nmi_active: 37/* nmi_active:
@@ -63,191 +46,11 @@ int panic_on_timeout;
63unsigned int nmi_watchdog = NMI_DEFAULT; 46unsigned int nmi_watchdog = NMI_DEFAULT;
64static unsigned int nmi_hz = HZ; 47static unsigned int nmi_hz = HZ;
65 48
66struct nmi_watchdog_ctlblk { 49static DEFINE_PER_CPU(short, wd_enabled);
67 int enabled;
68 u64 check_bit;
69 unsigned int cccr_msr;
70 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
71 unsigned int evntsel_msr; /* the MSR to select the events to handle */
72};
73static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
74 50
75/* local prototypes */ 51/* local prototypes */
76static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); 52static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
77 53
78/* converts an msr to an appropriate reservation bit */
79static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
80{
81 /* returns the bit offset of the performance counter register */
82 switch (boot_cpu_data.x86_vendor) {
83 case X86_VENDOR_AMD:
84 return (msr - MSR_K7_PERFCTR0);
85 case X86_VENDOR_INTEL:
86 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
87 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
88 else
89 return (msr - MSR_P4_BPU_PERFCTR0);
90 }
91 return 0;
92}
93
94/* converts an msr to an appropriate reservation bit */
95static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
96{
97 /* returns the bit offset of the event selection register */
98 switch (boot_cpu_data.x86_vendor) {
99 case X86_VENDOR_AMD:
100 return (msr - MSR_K7_EVNTSEL0);
101 case X86_VENDOR_INTEL:
102 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
103 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
104 else
105 return (msr - MSR_P4_BSU_ESCR0);
106 }
107 return 0;
108}
109
110/* checks for a bit availability (hack for oprofile) */
111int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
112{
113 int cpu;
114 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
115 for_each_possible_cpu (cpu) {
116 if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
117 return 0;
118 }
119 return 1;
120}
121
122/* checks the an msr for availability */
123int avail_to_resrv_perfctr_nmi(unsigned int msr)
124{
125 unsigned int counter;
126 int cpu;
127
128 counter = nmi_perfctr_msr_to_bit(msr);
129 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
130
131 for_each_possible_cpu (cpu) {
132 if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
133 return 0;
134 }
135 return 1;
136}
137
138static int __reserve_perfctr_nmi(int cpu, unsigned int msr)
139{
140 unsigned int counter;
141 if (cpu < 0)
142 cpu = smp_processor_id();
143
144 counter = nmi_perfctr_msr_to_bit(msr);
145 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
146
147 if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)))
148 return 1;
149 return 0;
150}
151
152static void __release_perfctr_nmi(int cpu, unsigned int msr)
153{
154 unsigned int counter;
155 if (cpu < 0)
156 cpu = smp_processor_id();
157
158 counter = nmi_perfctr_msr_to_bit(msr);
159 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
160
161 clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu));
162}
163
164int reserve_perfctr_nmi(unsigned int msr)
165{
166 int cpu, i;
167 for_each_possible_cpu (cpu) {
168 if (!__reserve_perfctr_nmi(cpu, msr)) {
169 for_each_possible_cpu (i) {
170 if (i >= cpu)
171 break;
172 __release_perfctr_nmi(i, msr);
173 }
174 return 0;
175 }
176 }
177 return 1;
178}
179
180void release_perfctr_nmi(unsigned int msr)
181{
182 int cpu;
183 for_each_possible_cpu (cpu)
184 __release_perfctr_nmi(cpu, msr);
185}
186
187int __reserve_evntsel_nmi(int cpu, unsigned int msr)
188{
189 unsigned int counter;
190 if (cpu < 0)
191 cpu = smp_processor_id();
192
193 counter = nmi_evntsel_msr_to_bit(msr);
194 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
195
196 if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]))
197 return 1;
198 return 0;
199}
200
201static void __release_evntsel_nmi(int cpu, unsigned int msr)
202{
203 unsigned int counter;
204 if (cpu < 0)
205 cpu = smp_processor_id();
206
207 counter = nmi_evntsel_msr_to_bit(msr);
208 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
209
210 clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]);
211}
212
213int reserve_evntsel_nmi(unsigned int msr)
214{
215 int cpu, i;
216 for_each_possible_cpu (cpu) {
217 if (!__reserve_evntsel_nmi(cpu, msr)) {
218 for_each_possible_cpu (i) {
219 if (i >= cpu)
220 break;
221 __release_evntsel_nmi(i, msr);
222 }
223 return 0;
224 }
225 }
226 return 1;
227}
228
229void release_evntsel_nmi(unsigned int msr)
230{
231 int cpu;
232 for_each_possible_cpu (cpu) {
233 __release_evntsel_nmi(cpu, msr);
234 }
235}
236
237static __cpuinit inline int nmi_known_cpu(void)
238{
239 switch (boot_cpu_data.x86_vendor) {
240 case X86_VENDOR_AMD:
241 return boot_cpu_data.x86 == 15 || boot_cpu_data.x86 == 16;
242 case X86_VENDOR_INTEL:
243 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
244 return 1;
245 else
246 return (boot_cpu_data.x86 == 15);
247 }
248 return 0;
249}
250
251/* Run after command line and cpu_init init, but before all other checks */ 54/* Run after command line and cpu_init init, but before all other checks */
252void nmi_watchdog_default(void) 55void nmi_watchdog_default(void)
253{ 56{
@@ -277,23 +80,6 @@ static __init void nmi_cpu_busy(void *data)
277} 80}
278#endif 81#endif
279 82
280static unsigned int adjust_for_32bit_ctr(unsigned int hz)
281{
282 unsigned int retval = hz;
283
284 /*
285 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
286 * are writable, with higher bits sign extending from bit 31.
287 * So, we can only program the counter with 31 bit values and
288 * 32nd bit should be 1, for 33.. to be 1.
289 * Find the appropriate nmi_hz
290 */
291 if ((((u64)cpu_khz * 1000) / retval) > 0x7fffffffULL) {
292 retval = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1;
293 }
294 return retval;
295}
296
297int __init check_nmi_watchdog (void) 83int __init check_nmi_watchdog (void)
298{ 84{
299 int *counts; 85 int *counts;
@@ -322,14 +108,14 @@ int __init check_nmi_watchdog (void)
322 mdelay((20*1000)/nmi_hz); // wait 20 ticks 108 mdelay((20*1000)/nmi_hz); // wait 20 ticks
323 109
324 for_each_online_cpu(cpu) { 110 for_each_online_cpu(cpu) {
325 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled) 111 if (!per_cpu(wd_enabled, cpu))
326 continue; 112 continue;
327 if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { 113 if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) {
328 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", 114 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
329 cpu, 115 cpu,
330 counts[cpu], 116 counts[cpu],
331 cpu_pda(cpu)->__nmi_count); 117 cpu_pda(cpu)->__nmi_count);
332 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0; 118 per_cpu(wd_enabled, cpu) = 0;
333 atomic_dec(&nmi_active); 119 atomic_dec(&nmi_active);
334 } 120 }
335 } 121 }
@@ -344,13 +130,8 @@ int __init check_nmi_watchdog (void)
344 130
345 /* now that we know it works we can reduce NMI frequency to 131 /* now that we know it works we can reduce NMI frequency to
346 something more reasonable; makes a difference in some configs */ 132 something more reasonable; makes a difference in some configs */
347 if (nmi_watchdog == NMI_LOCAL_APIC) { 133 if (nmi_watchdog == NMI_LOCAL_APIC)
348 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 134 nmi_hz = lapic_adjust_nmi_hz(1);
349
350 nmi_hz = 1;
351 if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0)
352 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
353 }
354 135
355 kfree(counts); 136 kfree(counts);
356 return 0; 137 return 0;
@@ -379,57 +160,6 @@ int __init setup_nmi_watchdog(char *str)
379 160
380__setup("nmi_watchdog=", setup_nmi_watchdog); 161__setup("nmi_watchdog=", setup_nmi_watchdog);
381 162
382static void disable_lapic_nmi_watchdog(void)
383{
384 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
385
386 if (atomic_read(&nmi_active) <= 0)
387 return;
388
389 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
390
391 BUG_ON(atomic_read(&nmi_active) != 0);
392}
393
394static void enable_lapic_nmi_watchdog(void)
395{
396 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
397
398 /* are we already enabled */
399 if (atomic_read(&nmi_active) != 0)
400 return;
401
402 /* are we lapic aware */
403 if (nmi_known_cpu() <= 0)
404 return;
405
406 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
407 touch_nmi_watchdog();
408}
409
410void disable_timer_nmi_watchdog(void)
411{
412 BUG_ON(nmi_watchdog != NMI_IO_APIC);
413
414 if (atomic_read(&nmi_active) <= 0)
415 return;
416
417 disable_irq(0);
418 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
419
420 BUG_ON(atomic_read(&nmi_active) != 0);
421}
422
423void enable_timer_nmi_watchdog(void)
424{
425 BUG_ON(nmi_watchdog != NMI_IO_APIC);
426
427 if (atomic_read(&nmi_active) == 0) {
428 touch_nmi_watchdog();
429 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
430 enable_irq(0);
431 }
432}
433 163
434static void __acpi_nmi_disable(void *__unused) 164static void __acpi_nmi_disable(void *__unused)
435{ 165{
@@ -515,275 +245,9 @@ late_initcall(init_lapic_nmi_sysfs);
515 245
516#endif /* CONFIG_PM */ 246#endif /* CONFIG_PM */
517 247
518/*
519 * Activate the NMI watchdog via the local APIC.
520 * Original code written by Keith Owens.
521 */
522
523/* Note that these events don't tick when the CPU idles. This means
524 the frequency varies with CPU load. */
525
526#define K7_EVNTSEL_ENABLE (1 << 22)
527#define K7_EVNTSEL_INT (1 << 20)
528#define K7_EVNTSEL_OS (1 << 17)
529#define K7_EVNTSEL_USR (1 << 16)
530#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
531#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
532
533static int setup_k7_watchdog(void)
534{
535 unsigned int perfctr_msr, evntsel_msr;
536 unsigned int evntsel;
537 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
538
539 perfctr_msr = MSR_K7_PERFCTR0;
540 evntsel_msr = MSR_K7_EVNTSEL0;
541 if (!__reserve_perfctr_nmi(-1, perfctr_msr))
542 goto fail;
543
544 if (!__reserve_evntsel_nmi(-1, evntsel_msr))
545 goto fail1;
546
547 /* Simulator may not support it */
548 if (checking_wrmsrl(evntsel_msr, 0UL))
549 goto fail2;
550 wrmsrl(perfctr_msr, 0UL);
551
552 evntsel = K7_EVNTSEL_INT
553 | K7_EVNTSEL_OS
554 | K7_EVNTSEL_USR
555 | K7_NMI_EVENT;
556
557 /* setup the timer */
558 wrmsr(evntsel_msr, evntsel, 0);
559 wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
560 apic_write(APIC_LVTPC, APIC_DM_NMI);
561 evntsel |= K7_EVNTSEL_ENABLE;
562 wrmsr(evntsel_msr, evntsel, 0);
563
564 wd->perfctr_msr = perfctr_msr;
565 wd->evntsel_msr = evntsel_msr;
566 wd->cccr_msr = 0; //unused
567 wd->check_bit = 1ULL<<63;
568 return 1;
569fail2:
570 __release_evntsel_nmi(-1, evntsel_msr);
571fail1:
572 __release_perfctr_nmi(-1, perfctr_msr);
573fail:
574 return 0;
575}
576
577static void stop_k7_watchdog(void)
578{
579 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
580
581 wrmsr(wd->evntsel_msr, 0, 0);
582
583 __release_evntsel_nmi(-1, wd->evntsel_msr);
584 __release_perfctr_nmi(-1, wd->perfctr_msr);
585}
586
587/* Note that these events don't tick when the CPU idles. This means
588 the frequency varies with CPU load. */
589
590#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
591#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
592#define P4_ESCR_OS (1<<3)
593#define P4_ESCR_USR (1<<2)
594#define P4_CCCR_OVF_PMI0 (1<<26)
595#define P4_CCCR_OVF_PMI1 (1<<27)
596#define P4_CCCR_THRESHOLD(N) ((N)<<20)
597#define P4_CCCR_COMPLEMENT (1<<19)
598#define P4_CCCR_COMPARE (1<<18)
599#define P4_CCCR_REQUIRED (3<<16)
600#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
601#define P4_CCCR_ENABLE (1<<12)
602#define P4_CCCR_OVF (1<<31)
603/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
604 CRU_ESCR0 (with any non-null event selector) through a complemented
605 max threshold. [IA32-Vol3, Section 14.9.9] */
606
607static int setup_p4_watchdog(void)
608{
609 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
610 unsigned int evntsel, cccr_val;
611 unsigned int misc_enable, dummy;
612 unsigned int ht_num;
613 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
614
615 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
616 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
617 return 0;
618
619#ifdef CONFIG_SMP
620 /* detect which hyperthread we are on */
621 if (smp_num_siblings == 2) {
622 unsigned int ebx, apicid;
623
624 ebx = cpuid_ebx(1);
625 apicid = (ebx >> 24) & 0xff;
626 ht_num = apicid & 1;
627 } else
628#endif
629 ht_num = 0;
630
631 /* performance counters are shared resources
632 * assign each hyperthread its own set
633 * (re-use the ESCR0 register, seems safe
634 * and keeps the cccr_val the same)
635 */
636 if (!ht_num) {
637 /* logical cpu 0 */
638 perfctr_msr = MSR_P4_IQ_PERFCTR0;
639 evntsel_msr = MSR_P4_CRU_ESCR0;
640 cccr_msr = MSR_P4_IQ_CCCR0;
641 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
642 } else {
643 /* logical cpu 1 */
644 perfctr_msr = MSR_P4_IQ_PERFCTR1;
645 evntsel_msr = MSR_P4_CRU_ESCR0;
646 cccr_msr = MSR_P4_IQ_CCCR1;
647 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
648 }
649
650 if (!__reserve_perfctr_nmi(-1, perfctr_msr))
651 goto fail;
652
653 if (!__reserve_evntsel_nmi(-1, evntsel_msr))
654 goto fail1;
655
656 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
657 | P4_ESCR_OS
658 | P4_ESCR_USR;
659
660 cccr_val |= P4_CCCR_THRESHOLD(15)
661 | P4_CCCR_COMPLEMENT
662 | P4_CCCR_COMPARE
663 | P4_CCCR_REQUIRED;
664
665 wrmsr(evntsel_msr, evntsel, 0);
666 wrmsr(cccr_msr, cccr_val, 0);
667 wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
668 apic_write(APIC_LVTPC, APIC_DM_NMI);
669 cccr_val |= P4_CCCR_ENABLE;
670 wrmsr(cccr_msr, cccr_val, 0);
671
672 wd->perfctr_msr = perfctr_msr;
673 wd->evntsel_msr = evntsel_msr;
674 wd->cccr_msr = cccr_msr;
675 wd->check_bit = 1ULL<<39;
676 return 1;
677fail1:
678 __release_perfctr_nmi(-1, perfctr_msr);
679fail:
680 return 0;
681}
682
683static void stop_p4_watchdog(void)
684{
685 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
686
687 wrmsr(wd->cccr_msr, 0, 0);
688 wrmsr(wd->evntsel_msr, 0, 0);
689
690 __release_evntsel_nmi(-1, wd->evntsel_msr);
691 __release_perfctr_nmi(-1, wd->perfctr_msr);
692}
693
694#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
695#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
696
697static int setup_intel_arch_watchdog(void)
698{
699 unsigned int ebx;
700 union cpuid10_eax eax;
701 unsigned int unused;
702 unsigned int perfctr_msr, evntsel_msr;
703 unsigned int evntsel;
704 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
705
706 /*
707 * Check whether the Architectural PerfMon supports
708 * Unhalted Core Cycles Event or not.
709 * NOTE: Corresponding bit = 0 in ebx indicates event present.
710 */
711 cpuid(10, &(eax.full), &ebx, &unused, &unused);
712 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
713 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
714 goto fail;
715
716 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
717 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
718
719 if (!__reserve_perfctr_nmi(-1, perfctr_msr))
720 goto fail;
721
722 if (!__reserve_evntsel_nmi(-1, evntsel_msr))
723 goto fail1;
724
725 wrmsrl(perfctr_msr, 0UL);
726
727 evntsel = ARCH_PERFMON_EVENTSEL_INT
728 | ARCH_PERFMON_EVENTSEL_OS
729 | ARCH_PERFMON_EVENTSEL_USR
730 | ARCH_PERFMON_NMI_EVENT_SEL
731 | ARCH_PERFMON_NMI_EVENT_UMASK;
732
733 /* setup the timer */
734 wrmsr(evntsel_msr, evntsel, 0);
735
736 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
737 wrmsr(perfctr_msr, (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0);
738
739 apic_write(APIC_LVTPC, APIC_DM_NMI);
740 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
741 wrmsr(evntsel_msr, evntsel, 0);
742
743 wd->perfctr_msr = perfctr_msr;
744 wd->evntsel_msr = evntsel_msr;
745 wd->cccr_msr = 0; //unused
746 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
747 return 1;
748fail1:
749 __release_perfctr_nmi(-1, perfctr_msr);
750fail:
751 return 0;
752}
753
754static void stop_intel_arch_watchdog(void)
755{
756 unsigned int ebx;
757 union cpuid10_eax eax;
758 unsigned int unused;
759 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
760
761 /*
762 * Check whether the Architectural PerfMon supports
763 * Unhalted Core Cycles Event or not.
764 * NOTE: Corresponding bit = 0 in ebx indicates event present.
765 */
766 cpuid(10, &(eax.full), &ebx, &unused, &unused);
767 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
768 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
769 return;
770
771 wrmsr(wd->evntsel_msr, 0, 0);
772
773 __release_evntsel_nmi(-1, wd->evntsel_msr);
774 __release_perfctr_nmi(-1, wd->perfctr_msr);
775}
776
777void setup_apic_nmi_watchdog(void *unused) 248void setup_apic_nmi_watchdog(void *unused)
778{ 249{
779 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 250 if (__get_cpu_var(wd_enabled) == 1)
780
781 /* only support LOCAL and IO APICs for now */
782 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
783 (nmi_watchdog != NMI_IO_APIC))
784 return;
785
786 if (wd->enabled == 1)
787 return; 251 return;
788 252
789 /* cheap hack to support suspend/resume */ 253 /* cheap hack to support suspend/resume */
@@ -791,62 +255,31 @@ void setup_apic_nmi_watchdog(void *unused)
791 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) 255 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
792 return; 256 return;
793 257
794 if (nmi_watchdog == NMI_LOCAL_APIC) { 258 switch (nmi_watchdog) {
795 switch (boot_cpu_data.x86_vendor) { 259 case NMI_LOCAL_APIC:
796 case X86_VENDOR_AMD: 260 __get_cpu_var(wd_enabled) = 1;
797 if (strstr(boot_cpu_data.x86_model_id, "Screwdriver")) 261 if (lapic_watchdog_init(nmi_hz) < 0) {
798 return; 262 __get_cpu_var(wd_enabled) = 0;
799 if (!setup_k7_watchdog())
800 return;
801 break;
802 case X86_VENDOR_INTEL:
803 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
804 if (!setup_intel_arch_watchdog())
805 return;
806 break;
807 }
808 if (!setup_p4_watchdog())
809 return;
810 break;
811 default:
812 return; 263 return;
813 } 264 }
265 /* FALL THROUGH */
266 case NMI_IO_APIC:
267 __get_cpu_var(wd_enabled) = 1;
268 atomic_inc(&nmi_active);
814 } 269 }
815 wd->enabled = 1;
816 atomic_inc(&nmi_active);
817} 270}
818 271
819void stop_apic_nmi_watchdog(void *unused) 272void stop_apic_nmi_watchdog(void *unused)
820{ 273{
821 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
822
823 /* only support LOCAL and IO APICs for now */ 274 /* only support LOCAL and IO APICs for now */
824 if ((nmi_watchdog != NMI_LOCAL_APIC) && 275 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
825 (nmi_watchdog != NMI_IO_APIC)) 276 (nmi_watchdog != NMI_IO_APIC))
826 return; 277 return;
827 278 if (__get_cpu_var(wd_enabled) == 0)
828 if (wd->enabled == 0)
829 return; 279 return;
830 280 if (nmi_watchdog == NMI_LOCAL_APIC)
831 if (nmi_watchdog == NMI_LOCAL_APIC) { 281 lapic_watchdog_stop();
832 switch (boot_cpu_data.x86_vendor) { 282 __get_cpu_var(wd_enabled) = 0;
833 case X86_VENDOR_AMD:
834 if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
835 return;
836 stop_k7_watchdog();
837 break;
838 case X86_VENDOR_INTEL:
839 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
840 stop_intel_arch_watchdog();
841 break;
842 }
843 stop_p4_watchdog();
844 break;
845 default:
846 return;
847 }
848 }
849 wd->enabled = 0;
850 atomic_dec(&nmi_active); 283 atomic_dec(&nmi_active);
851} 284}
852 285
@@ -885,9 +318,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
885 int sum; 318 int sum;
886 int touched = 0; 319 int touched = 0;
887 int cpu = smp_processor_id(); 320 int cpu = smp_processor_id();
888 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 321 int rc = 0;
889 u64 dummy;
890 int rc=0;
891 322
892 /* check for other users first */ 323 /* check for other users first */
893 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) 324 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
@@ -934,55 +365,20 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
934 } 365 }
935 366
936 /* see if the nmi watchdog went off */ 367 /* see if the nmi watchdog went off */
937 if (wd->enabled) { 368 if (!__get_cpu_var(wd_enabled))
938 if (nmi_watchdog == NMI_LOCAL_APIC) { 369 return rc;
939 rdmsrl(wd->perfctr_msr, dummy); 370 switch (nmi_watchdog) {
940 if (dummy & wd->check_bit){ 371 case NMI_LOCAL_APIC:
941 /* this wasn't a watchdog timer interrupt */ 372 rc |= lapic_wd_event(nmi_hz);
942 goto done; 373 break;
943 } 374 case NMI_IO_APIC:
944 375 /* don't know how to accurately check for this.
945 /* only Intel uses the cccr msr */ 376 * just assume it was a watchdog timer interrupt
946 if (wd->cccr_msr != 0) { 377 * This matches the old behaviour.
947 /* 378 */
948 * P4 quirks: 379 rc = 1;
949 * - An overflown perfctr will assert its interrupt 380 break;
950 * until the OVF flag in its CCCR is cleared.
951 * - LVTPC is masked on interrupt and must be
952 * unmasked by the LVTPC handler.
953 */
954 rdmsrl(wd->cccr_msr, dummy);
955 dummy &= ~P4_CCCR_OVF;
956 wrmsrl(wd->cccr_msr, dummy);
957 apic_write(APIC_LVTPC, APIC_DM_NMI);
958 /* start the cycle over again */
959 wrmsrl(wd->perfctr_msr,
960 -((u64)cpu_khz * 1000 / nmi_hz));
961 } else if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
962 /*
963 * ArchPerfom/Core Duo needs to re-unmask
964 * the apic vector
965 */
966 apic_write(APIC_LVTPC, APIC_DM_NMI);
967 /* ARCH_PERFMON has 32 bit counter writes */
968 wrmsr(wd->perfctr_msr,
969 (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0);
970 } else {
971 /* start the cycle over again */
972 wrmsrl(wd->perfctr_msr,
973 -((u64)cpu_khz * 1000 / nmi_hz));
974 }
975 rc = 1;
976 } else if (nmi_watchdog == NMI_IO_APIC) {
977 /* don't know how to accurately check for this.
978 * just assume it was a watchdog timer interrupt
979 * This matches the old behaviour.
980 */
981 rc = 1;
982 } else
983 printk(KERN_WARNING "Unknown enabled NMI hardware?!\n");
984 } 381 }
985done:
986 return rc; 382 return rc;
987} 383}
988 384
@@ -1067,12 +463,4 @@ void __trigger_all_cpu_backtrace(void)
1067 463
1068EXPORT_SYMBOL(nmi_active); 464EXPORT_SYMBOL(nmi_active);
1069EXPORT_SYMBOL(nmi_watchdog); 465EXPORT_SYMBOL(nmi_watchdog);
1070EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
1071EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
1072EXPORT_SYMBOL(reserve_perfctr_nmi);
1073EXPORT_SYMBOL(release_perfctr_nmi);
1074EXPORT_SYMBOL(reserve_evntsel_nmi);
1075EXPORT_SYMBOL(release_evntsel_nmi);
1076EXPORT_SYMBOL(disable_timer_nmi_watchdog);
1077EXPORT_SYMBOL(enable_timer_nmi_watchdog);
1078EXPORT_SYMBOL(touch_nmi_watchdog); 466EXPORT_SYMBOL(touch_nmi_watchdog);
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index 04480c3b68f5..5bd20b542c1e 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -507,7 +507,7 @@ error:
507 return ret; 507 return ret;
508} 508}
509 509
510static struct dma_mapping_ops calgary_dma_ops = { 510static const struct dma_mapping_ops calgary_dma_ops = {
511 .alloc_coherent = calgary_alloc_coherent, 511 .alloc_coherent = calgary_alloc_coherent,
512 .map_single = calgary_map_single, 512 .map_single = calgary_map_single,
513 .unmap_single = calgary_unmap_single, 513 .unmap_single = calgary_unmap_single,
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 0bae862e9a55..0a762e10f2be 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -556,7 +556,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
556 556
557extern int agp_amd64_init(void); 557extern int agp_amd64_init(void);
558 558
559static struct dma_mapping_ops gart_dma_ops = { 559static const struct dma_mapping_ops gart_dma_ops = {
560 .mapping_error = NULL, 560 .mapping_error = NULL,
561 .map_single = gart_map_single, 561 .map_single = gart_map_single,
562 .map_simple = gart_map_simple, 562 .map_simple = gart_map_simple,
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c
index df09ab05a1bd..6dade0c867cc 100644
--- a/arch/x86_64/kernel/pci-nommu.c
+++ b/arch/x86_64/kernel/pci-nommu.c
@@ -79,7 +79,7 @@ void nommu_unmap_sg(struct device *dev, struct scatterlist *sg,
79{ 79{
80} 80}
81 81
82struct dma_mapping_ops nommu_dma_ops = { 82const struct dma_mapping_ops nommu_dma_ops = {
83 .map_single = nommu_map_single, 83 .map_single = nommu_map_single,
84 .unmap_single = nommu_unmap_single, 84 .unmap_single = nommu_unmap_single,
85 .map_sg = nommu_map_sg, 85 .map_sg = nommu_map_sg,
diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c
index eb18be5a6569..4b4569abc60c 100644
--- a/arch/x86_64/kernel/pci-swiotlb.c
+++ b/arch/x86_64/kernel/pci-swiotlb.c
@@ -12,7 +12,7 @@
12int swiotlb __read_mostly; 12int swiotlb __read_mostly;
13EXPORT_SYMBOL(swiotlb); 13EXPORT_SYMBOL(swiotlb);
14 14
15struct dma_mapping_ops swiotlb_dma_ops = { 15const struct dma_mapping_ops swiotlb_dma_ops = {
16 .mapping_error = swiotlb_dma_mapping_error, 16 .mapping_error = swiotlb_dma_mapping_error,
17 .alloc_coherent = swiotlb_alloc_coherent, 17 .alloc_coherent = swiotlb_alloc_coherent,
18 .free_coherent = swiotlb_free_coherent, 18 .free_coherent = swiotlb_free_coherent,
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index d8d5ccc245c8..4f21765078b7 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -288,16 +288,18 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
288 288
289static int __init idle_setup (char *str) 289static int __init idle_setup (char *str)
290{ 290{
291 if (!strncmp(str, "poll", 4)) { 291 if (!strcmp(str, "poll")) {
292 printk("using polling idle threads.\n"); 292 printk("using polling idle threads.\n");
293 pm_idle = poll_idle; 293 pm_idle = poll_idle;
294 } 294 } else if (!strcmp(str, "mwait"))
295 force_mwait = 1;
296 else
297 return -1;
295 298
296 boot_option_idle_override = 1; 299 boot_option_idle_override = 1;
297 return 1; 300 return 0;
298} 301}
299 302early_param("idle", idle_setup);
300__setup("idle=", idle_setup);
301 303
302/* Prints also some state that isn't saved in the pt_regs */ 304/* Prints also some state that isn't saved in the pt_regs */
303void __show_regs(struct pt_regs * regs) 305void __show_regs(struct pt_regs * regs)
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 3d98b696881d..db51577bda32 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -79,6 +79,8 @@ int bootloader_type;
79 79
80unsigned long saved_video_mode; 80unsigned long saved_video_mode;
81 81
82int force_mwait __cpuinitdata;
83
82/* 84/*
83 * Early DMI memory 85 * Early DMI memory
84 */ 86 */
@@ -205,10 +207,10 @@ static void discover_ebda(void)
205 * there is a real-mode segmented pointer pointing to the 207 * there is a real-mode segmented pointer pointing to the
206 * 4K EBDA area at 0x40E 208 * 4K EBDA area at 0x40E
207 */ 209 */
208 ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER; 210 ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
209 ebda_addr <<= 4; 211 ebda_addr <<= 4;
210 212
211 ebda_size = *(unsigned short *)(unsigned long)ebda_addr; 213 ebda_size = *(unsigned short *)__va(ebda_addr);
212 214
213 /* Round EBDA up to pages */ 215 /* Round EBDA up to pages */
214 if (ebda_size == 0) 216 if (ebda_size == 0)
@@ -274,8 +276,6 @@ void __init setup_arch(char **cmdline_p)
274 276
275 dmi_scan_machine(); 277 dmi_scan_machine();
276 278
277 zap_low_mappings(0);
278
279#ifdef CONFIG_ACPI 279#ifdef CONFIG_ACPI
280 /* 280 /*
281 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). 281 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
@@ -329,15 +329,8 @@ void __init setup_arch(char **cmdline_p)
329#endif 329#endif
330 330
331#ifdef CONFIG_SMP 331#ifdef CONFIG_SMP
332 /*
333 * But first pinch a few for the stack/trampoline stuff
334 * FIXME: Don't need the extra page at 4K, but need to fix
335 * trampoline before removing it. (see the GDT stuff)
336 */
337 reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
338
339 /* Reserve SMP trampoline */ 332 /* Reserve SMP trampoline */
340 reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE); 333 reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE);
341#endif 334#endif
342 335
343#ifdef CONFIG_ACPI_SLEEP 336#ifdef CONFIG_ACPI_SLEEP
@@ -612,6 +605,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
612 605
613 /* RDTSC can be speculated around */ 606 /* RDTSC can be speculated around */
614 clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); 607 clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
608
609 /* Family 10 doesn't support C states in MWAIT so don't use it */
610 if (c->x86 == 0x10 && !force_mwait)
611 clear_bit(X86_FEATURE_MWAIT, &c->x86_capability);
615} 612}
616 613
617static void __cpuinit detect_ht(struct cpuinfo_x86 *c) 614static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -987,9 +984,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
987 "stc", 984 "stc",
988 "100mhzsteps", 985 "100mhzsteps",
989 "hwpstate", 986 "hwpstate",
990 NULL, /* tsc invariant mapped to constant_tsc */ 987 "", /* tsc invariant mapped to constant_tsc */
991 NULL, 988 /* nothing */
992 /* nothing */ /* constant_tsc - moved to flags */
993 }; 989 };
994 990
995 991
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 6a70b55f719d..64379a80d763 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -103,9 +103,9 @@ void __init setup_per_cpu_areas(void)
103 if (!NODE_DATA(cpu_to_node(i))) { 103 if (!NODE_DATA(cpu_to_node(i))) {
104 printk("cpu with no node %d, num_online_nodes %d\n", 104 printk("cpu with no node %d, num_online_nodes %d\n",
105 i, num_online_nodes()); 105 i, num_online_nodes());
106 ptr = alloc_bootmem(size); 106 ptr = alloc_bootmem_pages(size);
107 } else { 107 } else {
108 ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); 108 ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size);
109 } 109 }
110 if (!ptr) 110 if (!ptr)
111 panic("Cannot allocate cpu data for CPU %d\n", i); 111 panic("Cannot allocate cpu data for CPU %d\n", i);
@@ -201,7 +201,6 @@ void __cpuinit cpu_init (void)
201 /* CPU 0 is initialised in head64.c */ 201 /* CPU 0 is initialised in head64.c */
202 if (cpu != 0) { 202 if (cpu != 0) {
203 pda_init(cpu); 203 pda_init(cpu);
204 zap_low_mappings(cpu);
205 } else 204 } else
206 estacks = boot_exception_stacks; 205 estacks = boot_exception_stacks;
207 206
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
index 49ec324cd141..c819625f3316 100644
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -141,7 +141,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
141 goto badframe; 141 goto badframe;
142 142
143#ifdef DEBUG_SIG 143#ifdef DEBUG_SIG
144 printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs.rip,regs.rsp,frame,eax); 144 printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs->rip,regs->rsp,frame,eax);
145#endif 145#endif
146 146
147 if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->rsp) == -EFAULT) 147 if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->rsp) == -EFAULT)
@@ -301,7 +301,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
301 if (test_thread_flag(TIF_SINGLESTEP)) 301 if (test_thread_flag(TIF_SINGLESTEP))
302 ptrace_notify(SIGTRAP); 302 ptrace_notify(SIGTRAP);
303#ifdef DEBUG_SIG 303#ifdef DEBUG_SIG
304 printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", 304 printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%p\n",
305 current->comm, current->pid, frame, regs->rip, frame->pretcode); 305 current->comm, current->pid, frame, regs->rip, frame->pretcode);
306#endif 306#endif
307 307
@@ -463,7 +463,7 @@ void
463do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) 463do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
464{ 464{
465#ifdef DEBUG_SIG 465#ifdef DEBUG_SIG
466 printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx pending:%lx\n", 466 printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%p pending:%x\n",
467 thread_info_flags, regs->rip, regs->rsp, __builtin_return_address(0),signal_pending(current)); 467 thread_info_flags, regs->rip, regs->rsp, __builtin_return_address(0),signal_pending(current));
468#endif 468#endif
469 469
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index af1ec4d23cf8..bd1d123947ce 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -452,42 +452,34 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
452} 452}
453EXPORT_SYMBOL(smp_call_function); 453EXPORT_SYMBOL(smp_call_function);
454 454
455void smp_stop_cpu(void) 455static void stop_this_cpu(void *dummy)
456{ 456{
457 unsigned long flags; 457 local_irq_disable();
458 /* 458 /*
459 * Remove this CPU: 459 * Remove this CPU:
460 */ 460 */
461 cpu_clear(smp_processor_id(), cpu_online_map); 461 cpu_clear(smp_processor_id(), cpu_online_map);
462 local_irq_save(flags);
463 disable_local_APIC(); 462 disable_local_APIC();
464 local_irq_restore(flags);
465}
466
467static void smp_really_stop_cpu(void *dummy)
468{
469 smp_stop_cpu();
470 for (;;) 463 for (;;)
471 halt(); 464 halt();
472} 465}
473 466
474void smp_send_stop(void) 467void smp_send_stop(void)
475{ 468{
476 int nolock = 0; 469 int nolock;
470 unsigned long flags;
471
477 if (reboot_force) 472 if (reboot_force)
478 return; 473 return;
474
479 /* Don't deadlock on the call lock in panic */ 475 /* Don't deadlock on the call lock in panic */
480 if (!spin_trylock(&call_lock)) { 476 nolock = !spin_trylock(&call_lock);
481 /* ignore locking because we have panicked anyways */ 477 local_irq_save(flags);
482 nolock = 1; 478 __smp_call_function(stop_this_cpu, NULL, 0, 0);
483 }
484 __smp_call_function(smp_really_stop_cpu, NULL, 0, 0);
485 if (!nolock) 479 if (!nolock)
486 spin_unlock(&call_lock); 480 spin_unlock(&call_lock);
487
488 local_irq_disable();
489 disable_local_APIC(); 481 disable_local_APIC();
490 local_irq_enable(); 482 local_irq_restore(flags);
491} 483}
492 484
493/* 485/*
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index cd4643a37022..4d9dacfae575 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -60,7 +60,6 @@
60#include <asm/irq.h> 60#include <asm/irq.h>
61#include <asm/hw_irq.h> 61#include <asm/hw_irq.h>
62#include <asm/numa.h> 62#include <asm/numa.h>
63#include <asm/genapic.h>
64 63
65/* Number of siblings per CPU package */ 64/* Number of siblings per CPU package */
66int smp_num_siblings = 1; 65int smp_num_siblings = 1;
@@ -68,7 +67,6 @@ EXPORT_SYMBOL(smp_num_siblings);
68 67
69/* Last level cache ID of each logical CPU */ 68/* Last level cache ID of each logical CPU */
70u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; 69u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
71EXPORT_SYMBOL(cpu_llc_id);
72 70
73/* Bitmask of currently online CPUs */ 71/* Bitmask of currently online CPUs */
74cpumask_t cpu_online_map __read_mostly; 72cpumask_t cpu_online_map __read_mostly;
@@ -392,7 +390,8 @@ static void inquire_remote_apic(int apicid)
392{ 390{
393 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; 391 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
394 char *names[] = { "ID", "VERSION", "SPIV" }; 392 char *names[] = { "ID", "VERSION", "SPIV" };
395 int timeout, status; 393 int timeout;
394 unsigned int status;
396 395
397 printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); 396 printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
398 397
@@ -402,7 +401,9 @@ static void inquire_remote_apic(int apicid)
402 /* 401 /*
403 * Wait for idle. 402 * Wait for idle.
404 */ 403 */
405 apic_wait_icr_idle(); 404 status = safe_apic_wait_icr_idle();
405 if (status)
406 printk("a previous APIC delivery may have failed\n");
406 407
407 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); 408 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
408 apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); 409 apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
@@ -430,8 +431,8 @@ static void inquire_remote_apic(int apicid)
430 */ 431 */
431static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip) 432static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
432{ 433{
433 unsigned long send_status = 0, accept_status = 0; 434 unsigned long send_status, accept_status = 0;
434 int maxlvt, timeout, num_starts, j; 435 int maxlvt, num_starts, j;
435 436
436 Dprintk("Asserting INIT.\n"); 437 Dprintk("Asserting INIT.\n");
437 438
@@ -447,12 +448,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
447 | APIC_DM_INIT); 448 | APIC_DM_INIT);
448 449
449 Dprintk("Waiting for send to finish...\n"); 450 Dprintk("Waiting for send to finish...\n");
450 timeout = 0; 451 send_status = safe_apic_wait_icr_idle();
451 do {
452 Dprintk("+");
453 udelay(100);
454 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
455 } while (send_status && (timeout++ < 1000));
456 452
457 mdelay(10); 453 mdelay(10);
458 454
@@ -465,12 +461,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
465 apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); 461 apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
466 462
467 Dprintk("Waiting for send to finish...\n"); 463 Dprintk("Waiting for send to finish...\n");
468 timeout = 0; 464 send_status = safe_apic_wait_icr_idle();
469 do {
470 Dprintk("+");
471 udelay(100);
472 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
473 } while (send_status && (timeout++ < 1000));
474 465
475 mb(); 466 mb();
476 atomic_set(&init_deasserted, 1); 467 atomic_set(&init_deasserted, 1);
@@ -509,12 +500,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
509 Dprintk("Startup point 1.\n"); 500 Dprintk("Startup point 1.\n");
510 501
511 Dprintk("Waiting for send to finish...\n"); 502 Dprintk("Waiting for send to finish...\n");
512 timeout = 0; 503 send_status = safe_apic_wait_icr_idle();
513 do {
514 Dprintk("+");
515 udelay(100);
516 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
517 } while (send_status && (timeout++ < 1000));
518 504
519 /* 505 /*
520 * Give the other CPU some time to accept the IPI. 506 * Give the other CPU some time to accept the IPI.
@@ -945,6 +931,12 @@ int __cpuinit __cpu_up(unsigned int cpu)
945 return -ENOSYS; 931 return -ENOSYS;
946 } 932 }
947 933
934 /*
935 * Save current MTRR state in case it was changed since early boot
936 * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
937 */
938 mtrr_save_state();
939
948 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 940 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
949 /* Boot it! */ 941 /* Boot it! */
950 err = do_boot_cpu(cpu, apicid); 942 err = do_boot_cpu(cpu, apicid);
@@ -965,13 +957,6 @@ int __cpuinit __cpu_up(unsigned int cpu)
965 957
966 while (!cpu_isset(cpu, cpu_online_map)) 958 while (!cpu_isset(cpu, cpu_online_map))
967 cpu_relax(); 959 cpu_relax();
968
969 if (num_online_cpus() > 8 && genapic == &apic_flat) {
970 printk(KERN_WARNING
971 "flat APIC routing can't be used with > 8 cpus\n");
972 BUG();
973 }
974
975 err = 0; 960 err = 0;
976 961
977 return err; 962 return err;
diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c
index 91f7e678bae7..6a5a98f2a75c 100644
--- a/arch/x86_64/kernel/suspend.c
+++ b/arch/x86_64/kernel/suspend.c
@@ -12,6 +12,10 @@
12#include <asm/proto.h> 12#include <asm/proto.h>
13#include <asm/page.h> 13#include <asm/page.h>
14#include <asm/pgtable.h> 14#include <asm/pgtable.h>
15#include <asm/mtrr.h>
16
17/* References to section boundaries */
18extern const void __nosave_begin, __nosave_end;
15 19
16struct saved_context saved_context; 20struct saved_context saved_context;
17 21
@@ -33,7 +37,6 @@ void __save_processor_state(struct saved_context *ctxt)
33 asm volatile ("str %0" : "=m" (ctxt->tr)); 37 asm volatile ("str %0" : "=m" (ctxt->tr));
34 38
35 /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */ 39 /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
36 /* EFER should be constant for kernel version, no need to handle it. */
37 /* 40 /*
38 * segment registers 41 * segment registers
39 */ 42 */
@@ -46,10 +49,12 @@ void __save_processor_state(struct saved_context *ctxt)
46 rdmsrl(MSR_FS_BASE, ctxt->fs_base); 49 rdmsrl(MSR_FS_BASE, ctxt->fs_base);
47 rdmsrl(MSR_GS_BASE, ctxt->gs_base); 50 rdmsrl(MSR_GS_BASE, ctxt->gs_base);
48 rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); 51 rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
52 mtrr_save_fixed_ranges(NULL);
49 53
50 /* 54 /*
51 * control registers 55 * control registers
52 */ 56 */
57 rdmsrl(MSR_EFER, ctxt->efer);
53 asm volatile ("movq %%cr0, %0" : "=r" (ctxt->cr0)); 58 asm volatile ("movq %%cr0, %0" : "=r" (ctxt->cr0));
54 asm volatile ("movq %%cr2, %0" : "=r" (ctxt->cr2)); 59 asm volatile ("movq %%cr2, %0" : "=r" (ctxt->cr2));
55 asm volatile ("movq %%cr3, %0" : "=r" (ctxt->cr3)); 60 asm volatile ("movq %%cr3, %0" : "=r" (ctxt->cr3));
@@ -75,6 +80,7 @@ void __restore_processor_state(struct saved_context *ctxt)
75 /* 80 /*
76 * control registers 81 * control registers
77 */ 82 */
83 wrmsrl(MSR_EFER, ctxt->efer);
78 asm volatile ("movq %0, %%cr8" :: "r" (ctxt->cr8)); 84 asm volatile ("movq %0, %%cr8" :: "r" (ctxt->cr8));
79 asm volatile ("movq %0, %%cr4" :: "r" (ctxt->cr4)); 85 asm volatile ("movq %0, %%cr4" :: "r" (ctxt->cr4));
80 asm volatile ("movq %0, %%cr3" :: "r" (ctxt->cr3)); 86 asm volatile ("movq %0, %%cr3" :: "r" (ctxt->cr3));
@@ -219,4 +225,15 @@ int swsusp_arch_resume(void)
219 restore_image(); 225 restore_image();
220 return 0; 226 return 0;
221} 227}
228
229/*
230 * pfn_is_nosave - check if given pfn is in the 'nosave' section
231 */
232
233int pfn_is_nosave(unsigned long pfn)
234{
235 unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
236 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
237 return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
238}
222#endif /* CONFIG_SOFTWARE_SUSPEND */ 239#endif /* CONFIG_SOFTWARE_SUSPEND */
diff --git a/arch/x86_64/kernel/suspend_asm.S b/arch/x86_64/kernel/suspend_asm.S
index bfbe00763c68..16d183f67bc1 100644
--- a/arch/x86_64/kernel/suspend_asm.S
+++ b/arch/x86_64/kernel/suspend_asm.S
@@ -71,9 +71,10 @@ loop:
71 jmp loop 71 jmp loop
72done: 72done:
73 /* go back to the original page tables */ 73 /* go back to the original page tables */
74 leaq init_level4_pgt(%rip), %rax 74 movq $(init_level4_pgt - __START_KERNEL_map), %rax
75 subq $__START_KERNEL_map, %rax 75 addq phys_base(%rip), %rax
76 movq %rax, %cr3 76 movq %rax, %cr3
77
77 /* Flush TLB, including "global" things (vmalloc) */ 78 /* Flush TLB, including "global" things (vmalloc) */
78 movq mmu_cr4_features(%rip), %rax 79 movq mmu_cr4_features(%rip), %rax
79 movq %rax, %rdx 80 movq %rax, %rdx
diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c
index 76bf7c241fe4..f891931eb753 100644
--- a/arch/x86_64/kernel/sys_x86_64.c
+++ b/arch/x86_64/kernel/sys_x86_64.c
@@ -93,6 +93,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
93 unsigned long start_addr; 93 unsigned long start_addr;
94 unsigned long begin, end; 94 unsigned long begin, end;
95 95
96 if (flags & MAP_FIXED)
97 return addr;
98
96 find_start_end(flags, &begin, &end); 99 find_start_end(flags, &begin, &end);
97 100
98 if (len > end) 101 if (len > end)
diff --git a/arch/x86_64/kernel/syscall.c b/arch/x86_64/kernel/syscall.c
index 213fd6ab789d..63d592c276cc 100644
--- a/arch/x86_64/kernel/syscall.c
+++ b/arch/x86_64/kernel/syscall.c
@@ -3,6 +3,7 @@
3#include <linux/linkage.h> 3#include <linux/linkage.h>
4#include <linux/sys.h> 4#include <linux/sys.h>
5#include <linux/cache.h> 5#include <linux/cache.h>
6#include <asm/asm-offsets.h>
6 7
7#define __NO_STUBS 8#define __NO_STUBS
8 9
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 75d73a9aa9ff..0652e173813b 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -39,13 +39,11 @@
39#include <asm/proto.h> 39#include <asm/proto.h>
40#include <asm/hpet.h> 40#include <asm/hpet.h>
41#include <asm/sections.h> 41#include <asm/sections.h>
42#include <linux/cpufreq.h>
43#include <linux/hpet.h> 42#include <linux/hpet.h>
44#include <asm/apic.h> 43#include <asm/apic.h>
45#include <asm/hpet.h> 44#include <asm/hpet.h>
46 45#include <asm/mpspec.h>
47extern void i8254_timer_resume(void); 46#include <asm/nmi.h>
48extern int using_apic_timer;
49 47
50static char *timename = NULL; 48static char *timename = NULL;
51 49
@@ -252,6 +250,51 @@ static unsigned long get_cmos_time(void)
252 return mktime(year, mon, day, hour, min, sec); 250 return mktime(year, mon, day, hour, min, sec);
253} 251}
254 252
253/* calibrate_cpu is used on systems with fixed rate TSCs to determine
254 * processor frequency */
255#define TICK_COUNT 100000000
256static unsigned int __init tsc_calibrate_cpu_khz(void)
257{
258 int tsc_start, tsc_now;
259 int i, no_ctr_free;
260 unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0;
261 unsigned long flags;
262
263 for (i = 0; i < 4; i++)
264 if (avail_to_resrv_perfctr_nmi_bit(i))
265 break;
266 no_ctr_free = (i == 4);
267 if (no_ctr_free) {
268 i = 3;
269 rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
270 wrmsrl(MSR_K7_EVNTSEL3, 0);
271 rdmsrl(MSR_K7_PERFCTR3, pmc3);
272 } else {
273 reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i);
274 reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
275 }
276 local_irq_save(flags);
277 /* start meauring cycles, incrementing from 0 */
278 wrmsrl(MSR_K7_PERFCTR0 + i, 0);
279 wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
280 rdtscl(tsc_start);
281 do {
282 rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
283 tsc_now = get_cycles_sync();
284 } while ((tsc_now - tsc_start) < TICK_COUNT);
285
286 local_irq_restore(flags);
287 if (no_ctr_free) {
288 wrmsrl(MSR_K7_EVNTSEL3, 0);
289 wrmsrl(MSR_K7_PERFCTR3, pmc3);
290 wrmsrl(MSR_K7_EVNTSEL3, evntsel3);
291 } else {
292 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
293 release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
294 }
295
296 return pmc_now * tsc_khz / (tsc_now - tsc_start);
297}
255 298
256/* 299/*
257 * pit_calibrate_tsc() uses the speaker output (channel 2) of 300 * pit_calibrate_tsc() uses the speaker output (channel 2) of
@@ -285,7 +328,7 @@ static unsigned int __init pit_calibrate_tsc(void)
285#define PIT_MODE 0x43 328#define PIT_MODE 0x43
286#define PIT_CH0 0x40 329#define PIT_CH0 0x40
287 330
288static void __init __pit_init(int val, u8 mode) 331static void __pit_init(int val, u8 mode)
289{ 332{
290 unsigned long flags; 333 unsigned long flags;
291 334
@@ -301,12 +344,12 @@ void __init pit_init(void)
301 __pit_init(LATCH, 0x34); /* binary, mode 2, LSB/MSB, ch 0 */ 344 __pit_init(LATCH, 0x34); /* binary, mode 2, LSB/MSB, ch 0 */
302} 345}
303 346
304void __init pit_stop_interrupt(void) 347void pit_stop_interrupt(void)
305{ 348{
306 __pit_init(0, 0x30); /* mode 0 */ 349 __pit_init(0, 0x30); /* mode 0 */
307} 350}
308 351
309void __init stop_timer_interrupt(void) 352void stop_timer_interrupt(void)
310{ 353{
311 char *name; 354 char *name;
312 if (hpet_address) { 355 if (hpet_address) {
@@ -339,23 +382,29 @@ void __init time_init(void)
339 if (hpet_use_timer) { 382 if (hpet_use_timer) {
340 /* set tick_nsec to use the proper rate for HPET */ 383 /* set tick_nsec to use the proper rate for HPET */
341 tick_nsec = TICK_NSEC_HPET; 384 tick_nsec = TICK_NSEC_HPET;
342 cpu_khz = hpet_calibrate_tsc(); 385 tsc_khz = hpet_calibrate_tsc();
343 timename = "HPET"; 386 timename = "HPET";
344 } else { 387 } else {
345 pit_init(); 388 pit_init();
346 cpu_khz = pit_calibrate_tsc(); 389 tsc_khz = pit_calibrate_tsc();
347 timename = "PIT"; 390 timename = "PIT";
348 } 391 }
349 392
393 cpu_khz = tsc_khz;
394 if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
395 boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
396 boot_cpu_data.x86 == 16)
397 cpu_khz = tsc_calibrate_cpu_khz();
398
350 if (unsynchronized_tsc()) 399 if (unsynchronized_tsc())
351 mark_tsc_unstable(); 400 mark_tsc_unstable("TSCs unsynchronized");
352 401
353 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) 402 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
354 vgetcpu_mode = VGETCPU_RDTSCP; 403 vgetcpu_mode = VGETCPU_RDTSCP;
355 else 404 else
356 vgetcpu_mode = VGETCPU_LSL; 405 vgetcpu_mode = VGETCPU_LSL;
357 406
358 set_cyc2ns_scale(cpu_khz); 407 set_cyc2ns_scale(tsc_khz);
359 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", 408 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
360 cpu_khz / 1000, cpu_khz % 1000); 409 cpu_khz / 1000, cpu_khz % 1000);
361 init_tsc_clocksource(); 410 init_tsc_clocksource();
diff --git a/arch/x86_64/kernel/trampoline.S b/arch/x86_64/kernel/trampoline.S
index c79b99a9e2f6..e7e2764c461b 100644
--- a/arch/x86_64/kernel/trampoline.S
+++ b/arch/x86_64/kernel/trampoline.S
@@ -3,6 +3,7 @@
3 * Trampoline.S Derived from Setup.S by Linus Torvalds 3 * Trampoline.S Derived from Setup.S by Linus Torvalds
4 * 4 *
5 * 4 Jan 1997 Michael Chastain: changed to gnu as. 5 * 4 Jan 1997 Michael Chastain: changed to gnu as.
6 * 15 Sept 2005 Eric Biederman: 64bit PIC support
6 * 7 *
7 * Entry: CS:IP point to the start of our code, we are 8 * Entry: CS:IP point to the start of our code, we are
8 * in real mode with no stack, but the rest of the 9 * in real mode with no stack, but the rest of the
@@ -17,15 +18,20 @@
17 * and IP is zero. Thus, data addresses need to be absolute 18 * and IP is zero. Thus, data addresses need to be absolute
18 * (no relocation) and are taken with regard to r_base. 19 * (no relocation) and are taken with regard to r_base.
19 * 20 *
21 * With the addition of trampoline_level4_pgt this code can
22 * now enter a 64bit kernel that lives at arbitrary 64bit
23 * physical addresses.
24 *
20 * If you work on this file, check the object module with objdump 25 * If you work on this file, check the object module with objdump
21 * --full-contents --reloc to make sure there are no relocation 26 * --full-contents --reloc to make sure there are no relocation
22 * entries. For the GDT entry we do hand relocation in smpboot.c 27 * entries.
23 * because of 64bit linker limitations.
24 */ 28 */
25 29
26#include <linux/linkage.h> 30#include <linux/linkage.h>
27#include <asm/segment.h> 31#include <asm/pgtable.h>
28#include <asm/page.h> 32#include <asm/page.h>
33#include <asm/msr.h>
34#include <asm/segment.h>
29 35
30.data 36.data
31 37
@@ -33,15 +39,33 @@
33 39
34ENTRY(trampoline_data) 40ENTRY(trampoline_data)
35r_base = . 41r_base = .
42 cli # We should be safe anyway
36 wbinvd 43 wbinvd
37 mov %cs, %ax # Code and data in the same place 44 mov %cs, %ax # Code and data in the same place
38 mov %ax, %ds 45 mov %ax, %ds
46 mov %ax, %es
47 mov %ax, %ss
39 48
40 cli # We should be safe anyway
41 49
42 movl $0xA5A5A5A5, trampoline_data - r_base 50 movl $0xA5A5A5A5, trampoline_data - r_base
43 # write marker for master knows we're running 51 # write marker for master knows we're running
44 52
53 # Setup stack
54 movw $(trampoline_stack_end - r_base), %sp
55
56 call verify_cpu # Verify the cpu supports long mode
57 testl %eax, %eax # Check for return code
58 jnz no_longmode
59
60 mov %cs, %ax
61 movzx %ax, %esi # Find the 32bit trampoline location
62 shll $4, %esi
63
64 # Fixup the vectors
65 addl %esi, startup_32_vector - r_base
66 addl %esi, startup_64_vector - r_base
67 addl %esi, tgdt + 2 - r_base # Fixup the gdt pointer
68
45 /* 69 /*
46 * GDT tables in non default location kernel can be beyond 16MB and 70 * GDT tables in non default location kernel can be beyond 16MB and
47 * lgdt will not be able to load the address as in real mode default 71 * lgdt will not be able to load the address as in real mode default
@@ -49,23 +73,94 @@ r_base = .
49 * to 32 bit. 73 * to 32 bit.
50 */ 74 */
51 75
52 lidtl idt_48 - r_base # load idt with 0, 0 76 lidtl tidt - r_base # load idt with 0, 0
53 lgdtl gdt_48 - r_base # load gdt with whatever is appropriate 77 lgdtl tgdt - r_base # load gdt with whatever is appropriate
54 78
55 xor %ax, %ax 79 xor %ax, %ax
56 inc %ax # protected mode (PE) bit 80 inc %ax # protected mode (PE) bit
57 lmsw %ax # into protected mode 81 lmsw %ax # into protected mode
58 # flaush prefetch and jump to startup_32 in arch/x86_64/kernel/head.S 82
59 ljmpl $__KERNEL32_CS, $(startup_32-__START_KERNEL_map) 83 # flush prefetch and jump to startup_32
84 ljmpl *(startup_32_vector - r_base)
85
86 .code32
87 .balign 4
88startup_32:
89 movl $__KERNEL_DS, %eax # Initialize the %ds segment register
90 movl %eax, %ds
91
92 xorl %eax, %eax
93 btsl $5, %eax # Enable PAE mode
94 movl %eax, %cr4
95
96 # Setup trampoline 4 level pagetables
97 leal (trampoline_level4_pgt - r_base)(%esi), %eax
98 movl %eax, %cr3
99
100 movl $MSR_EFER, %ecx
101 movl $(1 << _EFER_LME), %eax # Enable Long Mode
102 xorl %edx, %edx
103 wrmsr
104
105 xorl %eax, %eax
106 btsl $31, %eax # Enable paging and in turn activate Long Mode
107 btsl $0, %eax # Enable protected mode
108 movl %eax, %cr0
109
110 /*
111 * At this point we're in long mode but in 32bit compatibility mode
112 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
113 * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use
114 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
115 */
116 ljmp *(startup_64_vector - r_base)(%esi)
117
118 .code64
119 .balign 4
120startup_64:
121 # Now jump into the kernel using virtual addresses
122 movq $secondary_startup_64, %rax
123 jmp *%rax
124
125 .code16
126no_longmode:
127 hlt
128 jmp no_longmode
129#include "verify_cpu.S"
60 130
61 # Careful these need to be in the same 64K segment as the above; 131 # Careful these need to be in the same 64K segment as the above;
62idt_48: 132tidt:
63 .word 0 # idt limit = 0 133 .word 0 # idt limit = 0
64 .word 0, 0 # idt base = 0L 134 .word 0, 0 # idt base = 0L
65 135
66gdt_48: 136 # Duplicate the global descriptor table
67 .short GDT_ENTRIES*8 - 1 # gdt limit 137 # so the kernel can live anywhere
68 .long cpu_gdt_table-__START_KERNEL_map 138 .balign 4
139tgdt:
140 .short tgdt_end - tgdt # gdt limit
141 .long tgdt - r_base
142 .short 0
143 .quad 0x00cf9b000000ffff # __KERNEL32_CS
144 .quad 0x00af9b000000ffff # __KERNEL_CS
145 .quad 0x00cf93000000ffff # __KERNEL_DS
146tgdt_end:
147
148 .balign 4
149startup_32_vector:
150 .long startup_32 - r_base
151 .word __KERNEL32_CS, 0
152
153 .balign 4
154startup_64_vector:
155 .long startup_64 - r_base
156 .word __KERNEL_CS, 0
157
158trampoline_stack:
159 .org 0x1000
160trampoline_stack_end:
161ENTRY(trampoline_level4_pgt)
162 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
163 .fill 510,8,0
164 .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
69 165
70.globl trampoline_end 166ENTRY(trampoline_end)
71trampoline_end:
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 09d2e8a10a49..d76fc32d4599 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -426,8 +426,7 @@ void show_registers(struct pt_regs *regs)
426 const int cpu = smp_processor_id(); 426 const int cpu = smp_processor_id();
427 struct task_struct *cur = cpu_pda(cpu)->pcurrent; 427 struct task_struct *cur = cpu_pda(cpu)->pcurrent;
428 428
429 rsp = regs->rsp; 429 rsp = regs->rsp;
430
431 printk("CPU %d ", cpu); 430 printk("CPU %d ", cpu);
432 __show_regs(regs); 431 __show_regs(regs);
433 printk("Process %s (pid: %d, threadinfo %p, task %p)\n", 432 printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
@@ -438,7 +437,6 @@ void show_registers(struct pt_regs *regs)
438 * time of the fault.. 437 * time of the fault..
439 */ 438 */
440 if (in_kernel) { 439 if (in_kernel) {
441
442 printk("Stack: "); 440 printk("Stack: ");
443 _show_stack(NULL, regs, (unsigned long*)rsp); 441 _show_stack(NULL, regs, (unsigned long*)rsp);
444 442
@@ -581,10 +579,20 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
581{ 579{
582 struct task_struct *tsk = current; 580 struct task_struct *tsk = current;
583 581
584 tsk->thread.error_code = error_code;
585 tsk->thread.trap_no = trapnr;
586
587 if (user_mode(regs)) { 582 if (user_mode(regs)) {
583 /*
584 * We want error_code and trap_no set for userspace
585 * faults and kernelspace faults which result in
586 * die(), but not kernelspace faults which are fixed
587 * up. die() gives the process no chance to handle
588 * the signal and notice the kernel fault information,
589 * so that won't result in polluting the information
590 * about previously queued, but not yet delivered,
591 * faults. See also do_general_protection below.
592 */
593 tsk->thread.error_code = error_code;
594 tsk->thread.trap_no = trapnr;
595
588 if (exception_trace && unhandled_signal(tsk, signr)) 596 if (exception_trace && unhandled_signal(tsk, signr))
589 printk(KERN_INFO 597 printk(KERN_INFO
590 "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", 598 "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
@@ -605,8 +613,11 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
605 fixup = search_exception_tables(regs->rip); 613 fixup = search_exception_tables(regs->rip);
606 if (fixup) 614 if (fixup)
607 regs->rip = fixup->fixup; 615 regs->rip = fixup->fixup;
608 else 616 else {
617 tsk->thread.error_code = error_code;
618 tsk->thread.trap_no = trapnr;
609 die(str, regs, error_code); 619 die(str, regs, error_code);
620 }
610 return; 621 return;
611 } 622 }
612} 623}
@@ -682,10 +693,10 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
682 693
683 conditional_sti(regs); 694 conditional_sti(regs);
684 695
685 tsk->thread.error_code = error_code;
686 tsk->thread.trap_no = 13;
687
688 if (user_mode(regs)) { 696 if (user_mode(regs)) {
697 tsk->thread.error_code = error_code;
698 tsk->thread.trap_no = 13;
699
689 if (exception_trace && unhandled_signal(tsk, SIGSEGV)) 700 if (exception_trace && unhandled_signal(tsk, SIGSEGV))
690 printk(KERN_INFO 701 printk(KERN_INFO
691 "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", 702 "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
@@ -704,6 +715,9 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
704 regs->rip = fixup->fixup; 715 regs->rip = fixup->fixup;
705 return; 716 return;
706 } 717 }
718
719 tsk->thread.error_code = error_code;
720 tsk->thread.trap_no = 13;
707 if (notify_die(DIE_GPF, "general protection fault", regs, 721 if (notify_die(DIE_GPF, "general protection fault", regs,
708 error_code, 13, SIGSEGV) == NOTIFY_STOP) 722 error_code, 13, SIGSEGV) == NOTIFY_STOP)
709 return; 723 return;
diff --git a/arch/x86_64/kernel/tsc.c b/arch/x86_64/kernel/tsc.c
index 1a0edbbffaa0..48f9a8e6aa91 100644
--- a/arch/x86_64/kernel/tsc.c
+++ b/arch/x86_64/kernel/tsc.c
@@ -13,6 +13,8 @@ static int notsc __initdata = 0;
13 13
14unsigned int cpu_khz; /* TSC clocks / usec, not used here */ 14unsigned int cpu_khz; /* TSC clocks / usec, not used here */
15EXPORT_SYMBOL(cpu_khz); 15EXPORT_SYMBOL(cpu_khz);
16unsigned int tsc_khz;
17EXPORT_SYMBOL(tsc_khz);
16 18
17static unsigned int cyc2ns_scale __read_mostly; 19static unsigned int cyc2ns_scale __read_mostly;
18 20
@@ -77,7 +79,7 @@ static void handle_cpufreq_delayed_get(struct work_struct *v)
77static unsigned int ref_freq = 0; 79static unsigned int ref_freq = 0;
78static unsigned long loops_per_jiffy_ref = 0; 80static unsigned long loops_per_jiffy_ref = 0;
79 81
80static unsigned long cpu_khz_ref = 0; 82static unsigned long tsc_khz_ref = 0;
81 83
82static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 84static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
83 void *data) 85 void *data)
@@ -99,7 +101,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
99 if (!ref_freq) { 101 if (!ref_freq) {
100 ref_freq = freq->old; 102 ref_freq = freq->old;
101 loops_per_jiffy_ref = *lpj; 103 loops_per_jiffy_ref = *lpj;
102 cpu_khz_ref = cpu_khz; 104 tsc_khz_ref = tsc_khz;
103 } 105 }
104 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || 106 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
105 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || 107 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
@@ -107,12 +109,12 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
107 *lpj = 109 *lpj =
108 cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); 110 cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
109 111
110 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); 112 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
111 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 113 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
112 mark_tsc_unstable(); 114 mark_tsc_unstable("cpufreq changes");
113 } 115 }
114 116
115 set_cyc2ns_scale(cpu_khz_ref); 117 set_cyc2ns_scale(tsc_khz_ref);
116 118
117 return 0; 119 return 0;
118} 120}
@@ -197,10 +199,11 @@ static struct clocksource clocksource_tsc = {
197 .vread = vread_tsc, 199 .vread = vread_tsc,
198}; 200};
199 201
200void mark_tsc_unstable(void) 202void mark_tsc_unstable(char *reason)
201{ 203{
202 if (!tsc_unstable) { 204 if (!tsc_unstable) {
203 tsc_unstable = 1; 205 tsc_unstable = 1;
206 printk("Marking TSC unstable due to %s\n", reason);
204 /* Change only the rating, when not registered */ 207 /* Change only the rating, when not registered */
205 if (clocksource_tsc.mult) 208 if (clocksource_tsc.mult)
206 clocksource_change_rating(&clocksource_tsc, 0); 209 clocksource_change_rating(&clocksource_tsc, 0);
@@ -213,7 +216,7 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
213void __init init_tsc_clocksource(void) 216void __init init_tsc_clocksource(void)
214{ 217{
215 if (!notsc) { 218 if (!notsc) {
216 clocksource_tsc.mult = clocksource_khz2mult(cpu_khz, 219 clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
217 clocksource_tsc.shift); 220 clocksource_tsc.shift);
218 if (check_tsc_unstable()) 221 if (check_tsc_unstable())
219 clocksource_tsc.rating = 0; 222 clocksource_tsc.rating = 0;
diff --git a/arch/x86_64/kernel/tsc_sync.c b/arch/x86_64/kernel/tsc_sync.c
index 014f0db45dfa..355f5f506c81 100644
--- a/arch/x86_64/kernel/tsc_sync.c
+++ b/arch/x86_64/kernel/tsc_sync.c
@@ -50,7 +50,7 @@ static __cpuinit void check_tsc_warp(void)
50 /* 50 /*
51 * The measurement runs for 20 msecs: 51 * The measurement runs for 20 msecs:
52 */ 52 */
53 end = start + cpu_khz * 20ULL; 53 end = start + tsc_khz * 20ULL;
54 now = start; 54 now = start;
55 55
56 for (i = 0; ; i++) { 56 for (i = 0; ; i++) {
@@ -138,7 +138,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
138 printk("\n"); 138 printk("\n");
139 printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs," 139 printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs,"
140 " turning off TSC clock.\n", max_warp); 140 " turning off TSC clock.\n", max_warp);
141 mark_tsc_unstable(); 141 mark_tsc_unstable("check_tsc_sync_source failed");
142 nr_warps = 0; 142 nr_warps = 0;
143 max_warp = 0; 143 max_warp = 0;
144 last_tsc = 0; 144 last_tsc = 0;
diff --git a/arch/x86_64/kernel/verify_cpu.S b/arch/x86_64/kernel/verify_cpu.S
new file mode 100644
index 000000000000..e035f5948199
--- /dev/null
+++ b/arch/x86_64/kernel/verify_cpu.S
@@ -0,0 +1,119 @@
1/*
2 *
3 * verify_cpu.S - Code for cpu long mode and SSE verification. This
4 * code has been borrowed from boot/setup.S and was introduced by
5 * Andi Kleen.
6 *
7 * Copyright (c) 2007 Andi Kleen (ak@suse.de)
8 * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com)
9 * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com)
10 *
11 * This source code is licensed under the GNU General Public License,
12 * Version 2. See the file COPYING for more details.
13 *
14 * This is a common code for verification whether CPU supports
15 * long mode and SSE or not. It is not called directly instead this
16 * file is included at various places and compiled in that context.
17 * Following are the current usage.
18 *
19 * This file is included by both 16bit and 32bit code.
20 *
21 * arch/x86_64/boot/setup.S : Boot cpu verification (16bit)
22 * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit)
23 * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit)
24 * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit)
25 *
26 * verify_cpu, returns the status of cpu check in register %eax.
27 * 0: Success 1: Failure
28 *
29 * The caller needs to check for the error code and take the action
30 * appropriately. Either display a message or halt.
31 */
32
33#include <asm/cpufeature.h>
34
35verify_cpu:
36 pushfl # Save caller passed flags
37 pushl $0 # Kill any dangerous flags
38 popfl
39
40 /* minimum CPUID flags for x86-64 as defined by AMD */
41#define M(x) (1<<(x))
42#define M2(a,b) M(a)|M(b)
43#define M4(a,b,c,d) M(a)|M(b)|M(c)|M(d)
44
45#define SSE_MASK \
46 (M2(X86_FEATURE_XMM,X86_FEATURE_XMM2))
47#define REQUIRED_MASK1 \
48 (M4(X86_FEATURE_FPU,X86_FEATURE_PSE,X86_FEATURE_TSC,X86_FEATURE_MSR)|\
49 M4(X86_FEATURE_PAE,X86_FEATURE_CX8,X86_FEATURE_PGE,X86_FEATURE_CMOV)|\
50 M(X86_FEATURE_FXSR))
51#define REQUIRED_MASK2 \
52 (M(X86_FEATURE_LM - 32))
53
54 pushfl # standard way to check for cpuid
55 popl %eax
56 movl %eax,%ebx
57 xorl $0x200000,%eax
58 pushl %eax
59 popfl
60 pushfl
61 popl %eax
62 cmpl %eax,%ebx
63 jz verify_cpu_no_longmode # cpu has no cpuid
64
65 movl $0x0,%eax # See if cpuid 1 is implemented
66 cpuid
67 cmpl $0x1,%eax
68 jb verify_cpu_no_longmode # no cpuid 1
69
70 xor %di,%di
71 cmpl $0x68747541,%ebx # AuthenticAMD
72 jnz verify_cpu_noamd
73 cmpl $0x69746e65,%edx
74 jnz verify_cpu_noamd
75 cmpl $0x444d4163,%ecx
76 jnz verify_cpu_noamd
77 mov $1,%di # cpu is from AMD
78
79verify_cpu_noamd:
80 movl $0x1,%eax # Does the cpu have what it takes
81 cpuid
82 andl $REQUIRED_MASK1,%edx
83 xorl $REQUIRED_MASK1,%edx
84 jnz verify_cpu_no_longmode
85
86 movl $0x80000000,%eax # See if extended cpuid is implemented
87 cpuid
88 cmpl $0x80000001,%eax
89 jb verify_cpu_no_longmode # no extended cpuid
90
91 movl $0x80000001,%eax # Does the cpu have what it takes
92 cpuid
93 andl $REQUIRED_MASK2,%edx
94 xorl $REQUIRED_MASK2,%edx
95 jnz verify_cpu_no_longmode
96
97verify_cpu_sse_test:
98 movl $1,%eax
99 cpuid
100 andl $SSE_MASK,%edx
101 cmpl $SSE_MASK,%edx
102 je verify_cpu_sse_ok
103 test %di,%di
104 jz verify_cpu_no_longmode # only try to force SSE on AMD
105 movl $0xc0010015,%ecx # HWCR
106 rdmsr
107 btr $15,%eax # enable SSE
108 wrmsr
109 xor %di,%di # don't loop
110 jmp verify_cpu_sse_test # try again
111
112verify_cpu_no_longmode:
113 popfl # Restore caller passed flags
114 movl $1,%eax
115 ret
116verify_cpu_sse_ok:
117 popfl # Restore caller passed flags
118 xorl %eax, %eax
119 ret
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 5176ecf006ee..88cfa50b424d 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -29,9 +29,7 @@ SECTIONS
29 .text : AT(ADDR(.text) - LOAD_OFFSET) { 29 .text : AT(ADDR(.text) - LOAD_OFFSET) {
30 /* First the code that has to be first for bootstrapping */ 30 /* First the code that has to be first for bootstrapping */
31 *(.bootstrap.text) 31 *(.bootstrap.text)
32 /* Then all the functions that are "hot" in profiles, to group them 32 _stext = .;
33 onto the same hugetlb entry */
34 #include "functionlist"
35 /* Then the rest */ 33 /* Then the rest */
36 *(.text) 34 *(.text)
37 SCHED_TEXT 35 SCHED_TEXT
@@ -50,10 +48,10 @@ SECTIONS
50 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } 48 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
51 __stop___ex_table = .; 49 __stop___ex_table = .;
52 50
53 RODATA
54
55 BUG_TABLE 51 BUG_TABLE
56 52
53 RODATA
54
57 . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ 55 . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */
58 /* Data */ 56 /* Data */
59 .data : AT(ADDR(.data) - LOAD_OFFSET) { 57 .data : AT(ADDR(.data) - LOAD_OFFSET) {
@@ -94,6 +92,12 @@ SECTIONS
94 { *(.vsyscall_gtod_data) } 92 { *(.vsyscall_gtod_data) }
95 vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); 93 vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
96 94
95
96 .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1))
97 { *(.vsyscall_1) }
98 .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2))
99 { *(.vsyscall_2) }
100
97 .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) } 101 .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) }
98 vgetcpu_mode = VVIRT(.vgetcpu_mode); 102 vgetcpu_mode = VVIRT(.vgetcpu_mode);
99 103
@@ -101,10 +105,6 @@ SECTIONS
101 .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) } 105 .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) }
102 jiffies = VVIRT(.jiffies); 106 jiffies = VVIRT(.jiffies);
103 107
104 .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1))
105 { *(.vsyscall_1) }
106 .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2))
107 { *(.vsyscall_2) }
108 .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) 108 .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3))
109 { *(.vsyscall_3) } 109 { *(.vsyscall_3) }
110 110
@@ -194,7 +194,7 @@ SECTIONS
194 __initramfs_end = .; 194 __initramfs_end = .;
195#endif 195#endif
196 196
197 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); 197 . = ALIGN(4096);
198 __per_cpu_start = .; 198 __per_cpu_start = .;
199 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } 199 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
200 __per_cpu_end = .; 200 __per_cpu_end = .;
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index b43c698cf7d3..dc32cef96195 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -45,14 +45,34 @@
45 45
46#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) 46#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
47#define __syscall_clobber "r11","rcx","memory" 47#define __syscall_clobber "r11","rcx","memory"
48#define __pa_vsymbol(x) \
49 ({unsigned long v; \
50 extern char __vsyscall_0; \
51 asm("" : "=r" (v) : "0" (x)); \
52 ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); })
48 53
54/*
55 * vsyscall_gtod_data contains data that is :
56 * - readonly from vsyscalls
57 * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
58 * Try to keep this structure as small as possible to avoid cache line ping pongs
59 */
49struct vsyscall_gtod_data_t { 60struct vsyscall_gtod_data_t {
50 seqlock_t lock; 61 seqlock_t lock;
51 int sysctl_enabled; 62
52 struct timeval wall_time_tv; 63 /* open coded 'struct timespec' */
64 time_t wall_time_sec;
65 u32 wall_time_nsec;
66
67 int sysctl_enabled;
53 struct timezone sys_tz; 68 struct timezone sys_tz;
54 cycle_t offset_base; 69 struct { /* extract of a clocksource struct */
55 struct clocksource clock; 70 cycle_t (*vread)(void);
71 cycle_t cycle_last;
72 cycle_t mask;
73 u32 mult;
74 u32 shift;
75 } clock;
56}; 76};
57int __vgetcpu_mode __section_vgetcpu_mode; 77int __vgetcpu_mode __section_vgetcpu_mode;
58 78
@@ -68,9 +88,13 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
68 88
69 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); 89 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
70 /* copy vsyscall data */ 90 /* copy vsyscall data */
71 vsyscall_gtod_data.clock = *clock; 91 vsyscall_gtod_data.clock.vread = clock->vread;
72 vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec; 92 vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
73 vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000; 93 vsyscall_gtod_data.clock.mask = clock->mask;
94 vsyscall_gtod_data.clock.mult = clock->mult;
95 vsyscall_gtod_data.clock.shift = clock->shift;
96 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
97 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
74 vsyscall_gtod_data.sys_tz = sys_tz; 98 vsyscall_gtod_data.sys_tz = sys_tz;
75 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); 99 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
76} 100}
@@ -105,7 +129,8 @@ static __always_inline long time_syscall(long *t)
105static __always_inline void do_vgettimeofday(struct timeval * tv) 129static __always_inline void do_vgettimeofday(struct timeval * tv)
106{ 130{
107 cycle_t now, base, mask, cycle_delta; 131 cycle_t now, base, mask, cycle_delta;
108 unsigned long seq, mult, shift, nsec_delta; 132 unsigned seq;
133 unsigned long mult, shift, nsec;
109 cycle_t (*vread)(void); 134 cycle_t (*vread)(void);
110 do { 135 do {
111 seq = read_seqbegin(&__vsyscall_gtod_data.lock); 136 seq = read_seqbegin(&__vsyscall_gtod_data.lock);
@@ -121,21 +146,20 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
121 mult = __vsyscall_gtod_data.clock.mult; 146 mult = __vsyscall_gtod_data.clock.mult;
122 shift = __vsyscall_gtod_data.clock.shift; 147 shift = __vsyscall_gtod_data.clock.shift;
123 148
124 *tv = __vsyscall_gtod_data.wall_time_tv; 149 tv->tv_sec = __vsyscall_gtod_data.wall_time_sec;
125 150 nsec = __vsyscall_gtod_data.wall_time_nsec;
126 } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); 151 } while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
127 152
128 /* calculate interval: */ 153 /* calculate interval: */
129 cycle_delta = (now - base) & mask; 154 cycle_delta = (now - base) & mask;
130 /* convert to nsecs: */ 155 /* convert to nsecs: */
131 nsec_delta = (cycle_delta * mult) >> shift; 156 nsec += (cycle_delta * mult) >> shift;
132 157
133 /* convert to usecs and add to timespec: */ 158 while (nsec >= NSEC_PER_SEC) {
134 tv->tv_usec += nsec_delta / NSEC_PER_USEC;
135 while (tv->tv_usec > USEC_PER_SEC) {
136 tv->tv_sec += 1; 159 tv->tv_sec += 1;
137 tv->tv_usec -= USEC_PER_SEC; 160 nsec -= NSEC_PER_SEC;
138 } 161 }
162 tv->tv_usec = nsec / NSEC_PER_USEC;
139} 163}
140 164
141int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) 165int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
@@ -151,11 +175,13 @@ int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
151 * unlikely */ 175 * unlikely */
152time_t __vsyscall(1) vtime(time_t *t) 176time_t __vsyscall(1) vtime(time_t *t)
153{ 177{
178 time_t result;
154 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) 179 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
155 return time_syscall(t); 180 return time_syscall(t);
156 else if (t) 181 result = __vsyscall_gtod_data.wall_time_sec;
157 *t = __vsyscall_gtod_data.wall_time_tv.tv_sec; 182 if (t)
158 return __vsyscall_gtod_data.wall_time_tv.tv_sec; 183 *t = result;
184 return result;
159} 185}
160 186
161/* Fast way to get current CPU and node. 187/* Fast way to get current CPU and node.
@@ -224,10 +250,10 @@ static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
224 return ret; 250 return ret;
225 /* gcc has some trouble with __va(__pa()), so just do it this 251 /* gcc has some trouble with __va(__pa()), so just do it this
226 way. */ 252 way. */
227 map1 = ioremap(__pa_symbol(&vsysc1), 2); 253 map1 = ioremap(__pa_vsymbol(&vsysc1), 2);
228 if (!map1) 254 if (!map1)
229 return -ENOMEM; 255 return -ENOMEM;
230 map2 = ioremap(__pa_symbol(&vsysc2), 2); 256 map2 = ioremap(__pa_vsymbol(&vsysc2), 2);
231 if (!map2) { 257 if (!map2) {
232 ret = -ENOMEM; 258 ret = -ENOMEM;
233 goto out; 259 goto out;
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 6ada7231f3ab..de99dba2c515 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -585,7 +585,7 @@ do_sigbus:
585} 585}
586 586
587DEFINE_SPINLOCK(pgd_lock); 587DEFINE_SPINLOCK(pgd_lock);
588struct page *pgd_list; 588LIST_HEAD(pgd_list);
589 589
590void vmalloc_sync_all(void) 590void vmalloc_sync_all(void)
591{ 591{
@@ -605,8 +605,7 @@ void vmalloc_sync_all(void)
605 if (pgd_none(*pgd_ref)) 605 if (pgd_none(*pgd_ref))
606 continue; 606 continue;
607 spin_lock(&pgd_lock); 607 spin_lock(&pgd_lock);
608 for (page = pgd_list; page; 608 list_for_each_entry(page, &pgd_list, lru) {
609 page = (struct page *)page->index) {
610 pgd_t *pgd; 609 pgd_t *pgd;
611 pgd = (pgd_t *)page_address(page) + pgd_index(address); 610 pgd = (pgd_t *)page_address(page) + pgd_index(address);
612 if (pgd_none(*pgd)) 611 if (pgd_none(*pgd))
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index ec31534eb104..c0822683b916 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -22,10 +22,12 @@
22#include <linux/bootmem.h> 22#include <linux/bootmem.h>
23#include <linux/proc_fs.h> 23#include <linux/proc_fs.h>
24#include <linux/pci.h> 24#include <linux/pci.h>
25#include <linux/pfn.h>
25#include <linux/poison.h> 26#include <linux/poison.h>
26#include <linux/dma-mapping.h> 27#include <linux/dma-mapping.h>
27#include <linux/module.h> 28#include <linux/module.h>
28#include <linux/memory_hotplug.h> 29#include <linux/memory_hotplug.h>
30#include <linux/nmi.h>
29 31
30#include <asm/processor.h> 32#include <asm/processor.h>
31#include <asm/system.h> 33#include <asm/system.h>
@@ -46,7 +48,7 @@
46#define Dprintk(x...) 48#define Dprintk(x...)
47#endif 49#endif
48 50
49struct dma_mapping_ops* dma_ops; 51const struct dma_mapping_ops* dma_ops;
50EXPORT_SYMBOL(dma_ops); 52EXPORT_SYMBOL(dma_ops);
51 53
52static unsigned long dma_reserve __initdata; 54static unsigned long dma_reserve __initdata;
@@ -72,6 +74,11 @@ void show_mem(void)
72 74
73 for_each_online_pgdat(pgdat) { 75 for_each_online_pgdat(pgdat) {
74 for (i = 0; i < pgdat->node_spanned_pages; ++i) { 76 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
77 /* this loop can take a while with 256 GB and 4k pages
78 so update the NMI watchdog */
79 if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
80 touch_nmi_watchdog();
81 }
75 page = pfn_to_page(pgdat->node_start_pfn + i); 82 page = pfn_to_page(pgdat->node_start_pfn + i);
76 total++; 83 total++;
77 if (PageReserved(page)) 84 if (PageReserved(page))
@@ -167,23 +174,9 @@ __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
167 174
168unsigned long __initdata table_start, table_end; 175unsigned long __initdata table_start, table_end;
169 176
170extern pmd_t temp_boot_pmds[]; 177static __meminit void *alloc_low_page(unsigned long *phys)
171
172static struct temp_map {
173 pmd_t *pmd;
174 void *address;
175 int allocated;
176} temp_mappings[] __initdata = {
177 { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) },
178 { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) },
179 {}
180};
181
182static __meminit void *alloc_low_page(int *index, unsigned long *phys)
183{ 178{
184 struct temp_map *ti; 179 unsigned long pfn = table_end++;
185 int i;
186 unsigned long pfn = table_end++, paddr;
187 void *adr; 180 void *adr;
188 181
189 if (after_bootmem) { 182 if (after_bootmem) {
@@ -194,57 +187,63 @@ static __meminit void *alloc_low_page(int *index, unsigned long *phys)
194 187
195 if (pfn >= end_pfn) 188 if (pfn >= end_pfn)
196 panic("alloc_low_page: ran out of memory"); 189 panic("alloc_low_page: ran out of memory");
197 for (i = 0; temp_mappings[i].allocated; i++) { 190
198 if (!temp_mappings[i].pmd) 191 adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
199 panic("alloc_low_page: ran out of temp mappings");
200 }
201 ti = &temp_mappings[i];
202 paddr = (pfn << PAGE_SHIFT) & PMD_MASK;
203 set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE));
204 ti->allocated = 1;
205 __flush_tlb();
206 adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK);
207 memset(adr, 0, PAGE_SIZE); 192 memset(adr, 0, PAGE_SIZE);
208 *index = i; 193 *phys = pfn * PAGE_SIZE;
209 *phys = pfn * PAGE_SIZE; 194 return adr;
210 return adr; 195}
211}
212 196
213static __meminit void unmap_low_page(int i) 197static __meminit void unmap_low_page(void *adr)
214{ 198{
215 struct temp_map *ti;
216 199
217 if (after_bootmem) 200 if (after_bootmem)
218 return; 201 return;
219 202
220 ti = &temp_mappings[i]; 203 early_iounmap(adr, PAGE_SIZE);
221 set_pmd(ti->pmd, __pmd(0));
222 ti->allocated = 0;
223} 204}
224 205
225/* Must run before zap_low_mappings */ 206/* Must run before zap_low_mappings */
226__init void *early_ioremap(unsigned long addr, unsigned long size) 207__init void *early_ioremap(unsigned long addr, unsigned long size)
227{ 208{
228 unsigned long map = round_down(addr, LARGE_PAGE_SIZE); 209 unsigned long vaddr;
229 210 pmd_t *pmd, *last_pmd;
230 /* actually usually some more */ 211 int i, pmds;
231 if (size >= LARGE_PAGE_SIZE) { 212
232 return NULL; 213 pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
214 vaddr = __START_KERNEL_map;
215 pmd = level2_kernel_pgt;
216 last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
217 for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
218 for (i = 0; i < pmds; i++) {
219 if (pmd_present(pmd[i]))
220 goto next;
221 }
222 vaddr += addr & ~PMD_MASK;
223 addr &= PMD_MASK;
224 for (i = 0; i < pmds; i++, addr += PMD_SIZE)
225 set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
226 __flush_tlb();
227 return (void *)vaddr;
228 next:
229 ;
233 } 230 }
234 set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); 231 printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
235 map += LARGE_PAGE_SIZE; 232 return NULL;
236 set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
237 __flush_tlb();
238 return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1));
239} 233}
240 234
241/* To avoid virtual aliases later */ 235/* To avoid virtual aliases later */
242__init void early_iounmap(void *addr, unsigned long size) 236__init void early_iounmap(void *addr, unsigned long size)
243{ 237{
244 if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address) 238 unsigned long vaddr;
245 printk("early_iounmap: bad address %p\n", addr); 239 pmd_t *pmd;
246 set_pmd(temp_mappings[0].pmd, __pmd(0)); 240 int i, pmds;
247 set_pmd(temp_mappings[1].pmd, __pmd(0)); 241
242 vaddr = (unsigned long)addr;
243 pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
244 pmd = level2_kernel_pgt + pmd_index(vaddr);
245 for (i = 0; i < pmds; i++)
246 pmd_clear(pmd + i);
248 __flush_tlb(); 247 __flush_tlb();
249} 248}
250 249
@@ -289,7 +288,6 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne
289 288
290 289
291 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) { 290 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
292 int map;
293 unsigned long pmd_phys; 291 unsigned long pmd_phys;
294 pud_t *pud = pud_page + pud_index(addr); 292 pud_t *pud = pud_page + pud_index(addr);
295 pmd_t *pmd; 293 pmd_t *pmd;
@@ -307,12 +305,12 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne
307 continue; 305 continue;
308 } 306 }
309 307
310 pmd = alloc_low_page(&map, &pmd_phys); 308 pmd = alloc_low_page(&pmd_phys);
311 spin_lock(&init_mm.page_table_lock); 309 spin_lock(&init_mm.page_table_lock);
312 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); 310 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
313 phys_pmd_init(pmd, addr, end); 311 phys_pmd_init(pmd, addr, end);
314 spin_unlock(&init_mm.page_table_lock); 312 spin_unlock(&init_mm.page_table_lock);
315 unmap_low_page(map); 313 unmap_low_page(pmd);
316 } 314 }
317 __flush_tlb(); 315 __flush_tlb();
318} 316}
@@ -364,7 +362,6 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
364 end = (unsigned long)__va(end); 362 end = (unsigned long)__va(end);
365 363
366 for (; start < end; start = next) { 364 for (; start < end; start = next) {
367 int map;
368 unsigned long pud_phys; 365 unsigned long pud_phys;
369 pgd_t *pgd = pgd_offset_k(start); 366 pgd_t *pgd = pgd_offset_k(start);
370 pud_t *pud; 367 pud_t *pud;
@@ -372,7 +369,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
372 if (after_bootmem) 369 if (after_bootmem)
373 pud = pud_offset(pgd, start & PGDIR_MASK); 370 pud = pud_offset(pgd, start & PGDIR_MASK);
374 else 371 else
375 pud = alloc_low_page(&map, &pud_phys); 372 pud = alloc_low_page(&pud_phys);
376 373
377 next = start + PGDIR_SIZE; 374 next = start + PGDIR_SIZE;
378 if (next > end) 375 if (next > end)
@@ -380,7 +377,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
380 phys_pud_init(pud, __pa(start), __pa(next)); 377 phys_pud_init(pud, __pa(start), __pa(next));
381 if (!after_bootmem) 378 if (!after_bootmem)
382 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); 379 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
383 unmap_low_page(map); 380 unmap_low_page(pud);
384 } 381 }
385 382
386 if (!after_bootmem) 383 if (!after_bootmem)
@@ -388,21 +385,6 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
388 __flush_tlb_all(); 385 __flush_tlb_all();
389} 386}
390 387
391void __cpuinit zap_low_mappings(int cpu)
392{
393 if (cpu == 0) {
394 pgd_t *pgd = pgd_offset_k(0UL);
395 pgd_clear(pgd);
396 } else {
397 /*
398 * For AP's, zap the low identity mappings by changing the cr3
399 * to init_level4_pgt and doing local flush tlb all
400 */
401 asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
402 }
403 __flush_tlb_all();
404}
405
406#ifndef CONFIG_NUMA 388#ifndef CONFIG_NUMA
407void __init paging_init(void) 389void __init paging_init(void)
408{ 390{
@@ -579,15 +561,6 @@ void __init mem_init(void)
579 reservedpages << (PAGE_SHIFT-10), 561 reservedpages << (PAGE_SHIFT-10),
580 datasize >> 10, 562 datasize >> 10,
581 initsize >> 10); 563 initsize >> 10);
582
583#ifdef CONFIG_SMP
584 /*
585 * Sync boot_level4_pgt mappings with the init_level4_pgt
586 * except for the low identity mappings which are already zapped
587 * in init_level4_pgt. This sync-up is essential for AP's bringup
588 */
589 memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
590#endif
591} 564}
592 565
593void free_init_pages(char *what, unsigned long begin, unsigned long end) 566void free_init_pages(char *what, unsigned long begin, unsigned long end)
@@ -597,21 +570,23 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
597 if (begin >= end) 570 if (begin >= end)
598 return; 571 return;
599 572
600 printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); 573 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
601 for (addr = begin; addr < end; addr += PAGE_SIZE) { 574 for (addr = begin; addr < end; addr += PAGE_SIZE) {
602 ClearPageReserved(virt_to_page(addr)); 575 ClearPageReserved(virt_to_page(addr));
603 init_page_count(virt_to_page(addr)); 576 init_page_count(virt_to_page(addr));
604 memset((void *)(addr & ~(PAGE_SIZE-1)), 577 memset((void *)(addr & ~(PAGE_SIZE-1)),
605 POISON_FREE_INITMEM, PAGE_SIZE); 578 POISON_FREE_INITMEM, PAGE_SIZE);
579 if (addr >= __START_KERNEL_map)
580 change_page_attr_addr(addr, 1, __pgprot(0));
606 free_page(addr); 581 free_page(addr);
607 totalram_pages++; 582 totalram_pages++;
608 } 583 }
584 if (addr > __START_KERNEL_map)
585 global_flush_tlb();
609} 586}
610 587
611void free_initmem(void) 588void free_initmem(void)
612{ 589{
613 memset(__initdata_begin, POISON_FREE_INITDATA,
614 __initdata_end - __initdata_begin);
615 free_init_pages("unused kernel memory", 590 free_init_pages("unused kernel memory",
616 (unsigned long)(&__init_begin), 591 (unsigned long)(&__init_begin),
617 (unsigned long)(&__init_end)); 592 (unsigned long)(&__init_end));
@@ -621,13 +596,23 @@ void free_initmem(void)
621 596
622void mark_rodata_ro(void) 597void mark_rodata_ro(void)
623{ 598{
624 unsigned long addr = (unsigned long)__start_rodata; 599 unsigned long start = (unsigned long)_stext, end;
600
601#ifdef CONFIG_HOTPLUG_CPU
602 /* It must still be possible to apply SMP alternatives. */
603 if (num_possible_cpus() > 1)
604 start = (unsigned long)_etext;
605#endif
606 end = (unsigned long)__end_rodata;
607 start = (start + PAGE_SIZE - 1) & PAGE_MASK;
608 end &= PAGE_MASK;
609 if (end <= start)
610 return;
625 611
626 for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) 612 change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
627 change_page_attr_addr(addr, 1, PAGE_KERNEL_RO);
628 613
629 printk ("Write protecting the kernel read-only data: %luk\n", 614 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
630 (__end_rodata - __start_rodata) >> 10); 615 (end - start) >> 10);
631 616
632 /* 617 /*
633 * change_page_attr_addr() requires a global_flush_tlb() call after it. 618 * change_page_attr_addr() requires a global_flush_tlb() call after it.
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
index c6e5e8d401a4..6cac90aa5032 100644
--- a/arch/x86_64/mm/ioremap.c
+++ b/arch/x86_64/mm/ioremap.c
@@ -13,12 +13,21 @@
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/io.h> 15#include <linux/io.h>
16
16#include <asm/pgalloc.h> 17#include <asm/pgalloc.h>
17#include <asm/fixmap.h> 18#include <asm/fixmap.h>
18#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
19#include <asm/cacheflush.h> 20#include <asm/cacheflush.h>
20#include <asm/proto.h> 21#include <asm/proto.h>
21 22
23unsigned long __phys_addr(unsigned long x)
24{
25 if (x >= __START_KERNEL_map)
26 return x - __START_KERNEL_map + phys_base;
27 return x - PAGE_OFFSET;
28}
29EXPORT_SYMBOL(__phys_addr);
30
22#define ISA_START_ADDRESS 0xa0000 31#define ISA_START_ADDRESS 0xa0000
23#define ISA_END_ADDRESS 0x100000 32#define ISA_END_ADDRESS 0x100000
24 33
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c
index b5b8dba28b4e..f983c75825d0 100644
--- a/arch/x86_64/mm/k8topology.c
+++ b/arch/x86_64/mm/k8topology.c
@@ -49,11 +49,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
49 int found = 0; 49 int found = 0;
50 u32 reg; 50 u32 reg;
51 unsigned numnodes; 51 unsigned numnodes;
52 nodemask_t nodes_parsed;
53 unsigned dualcore = 0; 52 unsigned dualcore = 0;
54 53
55 nodes_clear(nodes_parsed);
56
57 if (!early_pci_allowed()) 54 if (!early_pci_allowed())
58 return -1; 55 return -1;
59 56
@@ -65,6 +62,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
65 62
66 reg = read_pci_config(0, nb, 0, 0x60); 63 reg = read_pci_config(0, nb, 0, 0x60);
67 numnodes = ((reg >> 4) & 0xF) + 1; 64 numnodes = ((reg >> 4) & 0xF) + 1;
65 if (numnodes <= 1)
66 return -1;
68 67
69 printk(KERN_INFO "Number of nodes %d\n", numnodes); 68 printk(KERN_INFO "Number of nodes %d\n", numnodes);
70 69
@@ -102,7 +101,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
102 nodeid, (base>>8)&3, (limit>>8) & 3); 101 nodeid, (base>>8)&3, (limit>>8) & 3);
103 return -1; 102 return -1;
104 } 103 }
105 if (node_isset(nodeid, nodes_parsed)) { 104 if (node_isset(nodeid, node_possible_map)) {
106 printk(KERN_INFO "Node %d already present. Skipping\n", 105 printk(KERN_INFO "Node %d already present. Skipping\n",
107 nodeid); 106 nodeid);
108 continue; 107 continue;
@@ -155,7 +154,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
155 154
156 prevbase = base; 155 prevbase = base;
157 156
158 node_set(nodeid, nodes_parsed); 157 node_set(nodeid, node_possible_map);
159 } 158 }
160 159
161 if (!found) 160 if (!found)
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 41b8fb069924..51548947ad3b 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -273,125 +273,213 @@ void __init numa_init_array(void)
273 273
274#ifdef CONFIG_NUMA_EMU 274#ifdef CONFIG_NUMA_EMU
275/* Numa emulation */ 275/* Numa emulation */
276int numa_fake __initdata = 0; 276#define E820_ADDR_HOLE_SIZE(start, end) \
277 (e820_hole_size((start) >> PAGE_SHIFT, (end) >> PAGE_SHIFT) << \
278 PAGE_SHIFT)
279char *cmdline __initdata;
277 280
278/* 281/*
279 * This function is used to find out if the start and end correspond to 282 * Setups up nid to range from addr to addr + size. If the end boundary is
280 * different zones. 283 * greater than max_addr, then max_addr is used instead. The return value is 0
284 * if there is additional memory left for allocation past addr and -1 otherwise.
285 * addr is adjusted to be at the end of the node.
281 */ 286 */
282int zone_cross_over(unsigned long start, unsigned long end) 287static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr,
288 u64 size, u64 max_addr)
283{ 289{
284 if ((start < (MAX_DMA32_PFN << PAGE_SHIFT)) && 290 int ret = 0;
285 (end >= (MAX_DMA32_PFN << PAGE_SHIFT))) 291 nodes[nid].start = *addr;
286 return 1; 292 *addr += size;
287 return 0; 293 if (*addr >= max_addr) {
294 *addr = max_addr;
295 ret = -1;
296 }
297 nodes[nid].end = *addr;
298 node_set(nid, node_possible_map);
299 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
300 nodes[nid].start, nodes[nid].end,
301 (nodes[nid].end - nodes[nid].start) >> 20);
302 return ret;
288} 303}
289 304
290static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) 305/*
306 * Splits num_nodes nodes up equally starting at node_start. The return value
307 * is the number of nodes split up and addr is adjusted to be at the end of the
308 * last node allocated.
309 */
310static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr,
311 u64 max_addr, int node_start,
312 int num_nodes)
291{ 313{
292 int i, big; 314 unsigned int big;
293 struct bootnode nodes[MAX_NUMNODES]; 315 u64 size;
294 unsigned long sz, old_sz; 316 int i;
295 unsigned long hole_size;
296 unsigned long start, end;
297 unsigned long max_addr = (end_pfn << PAGE_SHIFT);
298
299 start = (start_pfn << PAGE_SHIFT);
300 hole_size = e820_hole_size(start, max_addr);
301 sz = (max_addr - start - hole_size) / numa_fake;
302
303 /* Kludge needed for the hash function */
304
305 old_sz = sz;
306 /*
307 * Round down to the nearest FAKE_NODE_MIN_SIZE.
308 */
309 sz &= FAKE_NODE_MIN_HASH_MASK;
310 317
318 if (num_nodes <= 0)
319 return -1;
320 if (num_nodes > MAX_NUMNODES)
321 num_nodes = MAX_NUMNODES;
322 size = (max_addr - *addr - E820_ADDR_HOLE_SIZE(*addr, max_addr)) /
323 num_nodes;
311 /* 324 /*
312 * We ensure that each node is at least 64MB big. Smaller than this 325 * Calculate the number of big nodes that can be allocated as a result
313 * size can cause VM hiccups. 326 * of consolidating the leftovers.
314 */ 327 */
315 if (sz == 0) { 328 big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) /
316 printk(KERN_INFO "Not enough memory for %d nodes. Reducing " 329 FAKE_NODE_MIN_SIZE;
317 "the number of nodes\n", numa_fake); 330
318 numa_fake = (max_addr - start - hole_size) / FAKE_NODE_MIN_SIZE; 331 /* Round down to nearest FAKE_NODE_MIN_SIZE. */
319 printk(KERN_INFO "Number of fake nodes will be = %d\n", 332 size &= FAKE_NODE_MIN_HASH_MASK;
320 numa_fake); 333 if (!size) {
321 sz = FAKE_NODE_MIN_SIZE; 334 printk(KERN_ERR "Not enough memory for each node. "
335 "NUMA emulation disabled.\n");
336 return -1;
322 } 337 }
323 /* 338
324 * Find out how many nodes can get an extra NODE_MIN_SIZE granule. 339 for (i = node_start; i < num_nodes + node_start; i++) {
325 * This logic ensures the extra memory gets distributed among as many 340 u64 end = *addr + size;
326 * nodes as possible (as compared to one single node getting all that
327 * extra memory.
328 */
329 big = ((old_sz - sz) * numa_fake) / FAKE_NODE_MIN_SIZE;
330 printk(KERN_INFO "Fake node Size: %luMB hole_size: %luMB big nodes: "
331 "%d\n",
332 (sz >> 20), (hole_size >> 20), big);
333 memset(&nodes,0,sizeof(nodes));
334 end = start;
335 for (i = 0; i < numa_fake; i++) {
336 /*
337 * In case we are not able to allocate enough memory for all
338 * the nodes, we reduce the number of fake nodes.
339 */
340 if (end >= max_addr) {
341 numa_fake = i - 1;
342 break;
343 }
344 start = nodes[i].start = end;
345 /*
346 * Final node can have all the remaining memory.
347 */
348 if (i == numa_fake-1)
349 sz = max_addr - start;
350 end = nodes[i].start + sz;
351 /*
352 * Fir "big" number of nodes get extra granule.
353 */
354 if (i < big) 341 if (i < big)
355 end += FAKE_NODE_MIN_SIZE; 342 end += FAKE_NODE_MIN_SIZE;
356 /* 343 /*
357 * Iterate over the range to ensure that this node gets at 344 * The final node can have the remaining system RAM. Other
358 * least sz amount of RAM (excluding holes) 345 * nodes receive roughly the same amount of available pages.
359 */ 346 */
360 while ((end - start - e820_hole_size(start, end)) < sz) { 347 if (i == num_nodes + node_start - 1)
361 end += FAKE_NODE_MIN_SIZE; 348 end = max_addr;
362 if (end >= max_addr) 349 else
363 break; 350 while (end - *addr - E820_ADDR_HOLE_SIZE(*addr, end) <
351 size) {
352 end += FAKE_NODE_MIN_SIZE;
353 if (end > max_addr) {
354 end = max_addr;
355 break;
356 }
357 }
358 if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0)
359 break;
360 }
361 return i - node_start + 1;
362}
363
364/*
365 * Splits the remaining system RAM into chunks of size. The remaining memory is
366 * always assigned to a final node and can be asymmetric. Returns the number of
367 * nodes split.
368 */
369static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr,
370 u64 max_addr, int node_start, u64 size)
371{
372 int i = node_start;
373 size = (size << 20) & FAKE_NODE_MIN_HASH_MASK;
374 while (!setup_node_range(i++, nodes, addr, size, max_addr))
375 ;
376 return i - node_start;
377}
378
379/*
380 * Sets up the system RAM area from start_pfn to end_pfn according to the
381 * numa=fake command-line option.
382 */
383static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
384{
385 struct bootnode nodes[MAX_NUMNODES];
386 u64 addr = start_pfn << PAGE_SHIFT;
387 u64 max_addr = end_pfn << PAGE_SHIFT;
388 int num_nodes = 0;
389 int coeff_flag;
390 int coeff = -1;
391 int num = 0;
392 u64 size;
393 int i;
394
395 memset(&nodes, 0, sizeof(nodes));
396 /*
397 * If the numa=fake command-line is just a single number N, split the
398 * system RAM into N fake nodes.
399 */
400 if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) {
401 num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0,
402 simple_strtol(cmdline, NULL, 0));
403 if (num_nodes < 0)
404 return num_nodes;
405 goto out;
406 }
407
408 /* Parse the command line. */
409 for (coeff_flag = 0; ; cmdline++) {
410 if (*cmdline && isdigit(*cmdline)) {
411 num = num * 10 + *cmdline - '0';
412 continue;
364 } 413 }
365 /* 414 if (*cmdline == '*') {
366 * Look at the next node to make sure there is some real memory 415 if (num > 0)
367 * to map. Bad things happen when the only memory present 416 coeff = num;
368 * in a zone on a fake node is IO hole. 417 coeff_flag = 1;
369 */ 418 }
370 while (e820_hole_size(end, end + FAKE_NODE_MIN_SIZE) > 0) { 419 if (!*cmdline || *cmdline == ',') {
371 if (zone_cross_over(start, end + sz)) { 420 if (!coeff_flag)
372 end = (MAX_DMA32_PFN << PAGE_SHIFT); 421 coeff = 1;
422 /*
423 * Round down to the nearest FAKE_NODE_MIN_SIZE.
424 * Command-line coefficients are in megabytes.
425 */
426 size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK;
427 if (size)
428 for (i = 0; i < coeff; i++, num_nodes++)
429 if (setup_node_range(num_nodes, nodes,
430 &addr, size, max_addr) < 0)
431 goto done;
432 if (!*cmdline)
373 break; 433 break;
374 } 434 coeff_flag = 0;
375 if (end >= max_addr) 435 coeff = -1;
436 }
437 num = 0;
438 }
439done:
440 if (!num_nodes)
441 return -1;
442 /* Fill remainder of system RAM, if appropriate. */
443 if (addr < max_addr) {
444 if (coeff_flag && coeff < 0) {
445 /* Split remaining nodes into num-sized chunks */
446 num_nodes += split_nodes_by_size(nodes, &addr, max_addr,
447 num_nodes, num);
448 goto out;
449 }
450 switch (*(cmdline - 1)) {
451 case '*':
452 /* Split remaining nodes into coeff chunks */
453 if (coeff <= 0)
376 break; 454 break;
377 end += FAKE_NODE_MIN_SIZE; 455 num_nodes += split_nodes_equally(nodes, &addr, max_addr,
456 num_nodes, coeff);
457 break;
458 case ',':
459 /* Do not allocate remaining system RAM */
460 break;
461 default:
462 /* Give one final node */
463 setup_node_range(num_nodes, nodes, &addr,
464 max_addr - addr, max_addr);
465 num_nodes++;
378 } 466 }
379 if (end > max_addr) 467 }
380 end = max_addr; 468out:
381 nodes[i].end = end; 469 memnode_shift = compute_hash_shift(nodes, num_nodes);
382 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", 470 if (memnode_shift < 0) {
383 i, 471 memnode_shift = 0;
384 nodes[i].start, nodes[i].end, 472 printk(KERN_ERR "No NUMA hash function found. NUMA emulation "
385 (nodes[i].end - nodes[i].start) >> 20); 473 "disabled.\n");
386 node_set_online(i); 474 return -1;
387 } 475 }
388 memnode_shift = compute_hash_shift(nodes, numa_fake); 476
389 if (memnode_shift < 0) { 477 /*
390 memnode_shift = 0; 478 * We need to vacate all active ranges that may have been registered by
391 printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n"); 479 * SRAT.
392 return -1; 480 */
393 } 481 remove_all_active_ranges();
394 for_each_online_node(i) { 482 for_each_node_mask(i, node_possible_map) {
395 e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, 483 e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
396 nodes[i].end >> PAGE_SHIFT); 484 nodes[i].end >> PAGE_SHIFT);
397 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 485 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
@@ -399,26 +487,32 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
399 numa_init_array(); 487 numa_init_array();
400 return 0; 488 return 0;
401} 489}
402#endif 490#undef E820_ADDR_HOLE_SIZE
491#endif /* CONFIG_NUMA_EMU */
403 492
404void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) 493void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
405{ 494{
406 int i; 495 int i;
407 496
497 nodes_clear(node_possible_map);
498
408#ifdef CONFIG_NUMA_EMU 499#ifdef CONFIG_NUMA_EMU
409 if (numa_fake && !numa_emulation(start_pfn, end_pfn)) 500 if (cmdline && !numa_emulation(start_pfn, end_pfn))
410 return; 501 return;
502 nodes_clear(node_possible_map);
411#endif 503#endif
412 504
413#ifdef CONFIG_ACPI_NUMA 505#ifdef CONFIG_ACPI_NUMA
414 if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, 506 if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
415 end_pfn << PAGE_SHIFT)) 507 end_pfn << PAGE_SHIFT))
416 return; 508 return;
509 nodes_clear(node_possible_map);
417#endif 510#endif
418 511
419#ifdef CONFIG_K8_NUMA 512#ifdef CONFIG_K8_NUMA
420 if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT)) 513 if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT))
421 return; 514 return;
515 nodes_clear(node_possible_map);
422#endif 516#endif
423 printk(KERN_INFO "%s\n", 517 printk(KERN_INFO "%s\n",
424 numa_off ? "NUMA turned off" : "No NUMA configuration found"); 518 numa_off ? "NUMA turned off" : "No NUMA configuration found");
@@ -432,6 +526,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
432 memnodemap[0] = 0; 526 memnodemap[0] = 0;
433 nodes_clear(node_online_map); 527 nodes_clear(node_online_map);
434 node_set_online(0); 528 node_set_online(0);
529 node_set(0, node_possible_map);
435 for (i = 0; i < NR_CPUS; i++) 530 for (i = 0; i < NR_CPUS; i++)
436 numa_set_node(i, 0); 531 numa_set_node(i, 0);
437 node_to_cpumask[0] = cpumask_of_cpu(0); 532 node_to_cpumask[0] = cpumask_of_cpu(0);
@@ -486,11 +581,8 @@ static __init int numa_setup(char *opt)
486 if (!strncmp(opt,"off",3)) 581 if (!strncmp(opt,"off",3))
487 numa_off = 1; 582 numa_off = 1;
488#ifdef CONFIG_NUMA_EMU 583#ifdef CONFIG_NUMA_EMU
489 if(!strncmp(opt, "fake=", 5)) { 584 if (!strncmp(opt, "fake=", 5))
490 numa_fake = simple_strtoul(opt+5,NULL,0); ; 585 cmdline = opt + 5;
491 if (numa_fake >= MAX_NUMNODES)
492 numa_fake = MAX_NUMNODES;
493 }
494#endif 586#endif
495#ifdef CONFIG_ACPI_NUMA 587#ifdef CONFIG_ACPI_NUMA
496 if (!strncmp(opt,"noacpi",6)) 588 if (!strncmp(opt,"noacpi",6))
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 081409aa3452..d653d0bf3df6 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -180,16 +180,24 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
180 */ 180 */
181int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) 181int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
182{ 182{
183 int err = 0; 183 int err = 0, kernel_map = 0;
184 int i; 184 int i;
185 185
186 if (address >= __START_KERNEL_map
187 && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
188 address = (unsigned long)__va(__pa(address));
189 kernel_map = 1;
190 }
191
186 down_write(&init_mm.mmap_sem); 192 down_write(&init_mm.mmap_sem);
187 for (i = 0; i < numpages; i++, address += PAGE_SIZE) { 193 for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
188 unsigned long pfn = __pa(address) >> PAGE_SHIFT; 194 unsigned long pfn = __pa(address) >> PAGE_SHIFT;
189 195
190 err = __change_page_attr(address, pfn, prot, PAGE_KERNEL); 196 if (!kernel_map || pte_present(pfn_pte(0, prot))) {
191 if (err) 197 err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
192 break; 198 if (err)
199 break;
200 }
193 /* Handle kernel mapping too which aliases part of the 201 /* Handle kernel mapping too which aliases part of the
194 * lowmem */ 202 * lowmem */
195 if (__pa(address) < KERNEL_TEXT_SIZE) { 203 if (__pa(address) < KERNEL_TEXT_SIZE) {
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index 2efe215fc76a..1e76bb0a7277 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -419,19 +419,21 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
419 return -1; 419 return -1;
420 } 420 }
421 421
422 node_possible_map = nodes_parsed;
423
422 /* Finally register nodes */ 424 /* Finally register nodes */
423 for_each_node_mask(i, nodes_parsed) 425 for_each_node_mask(i, node_possible_map)
424 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 426 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
425 /* Try again in case setup_node_bootmem missed one due 427 /* Try again in case setup_node_bootmem missed one due
426 to missing bootmem */ 428 to missing bootmem */
427 for_each_node_mask(i, nodes_parsed) 429 for_each_node_mask(i, node_possible_map)
428 if (!node_online(i)) 430 if (!node_online(i))
429 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 431 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
430 432
431 for (i = 0; i < NR_CPUS; i++) { 433 for (i = 0; i < NR_CPUS; i++) {
432 if (cpu_to_node[i] == NUMA_NO_NODE) 434 if (cpu_to_node[i] == NUMA_NO_NODE)
433 continue; 435 continue;
434 if (!node_isset(cpu_to_node[i], nodes_parsed)) 436 if (!node_isset(cpu_to_node[i], node_possible_map))
435 numa_set_node(i, NUMA_NO_NODE); 437 numa_set_node(i, NUMA_NO_NODE);
436 } 438 }
437 numa_init_array(); 439 numa_init_array();