aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/Kconfig135
-rw-r--r--arch/x86/Kconfig.debug9
-rw-r--r--arch/x86/Makefile19
-rw-r--r--arch/x86/boot/compressed/misc.c4
-rw-r--r--arch/x86/boot/memory.c3
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/acpi/boot.c407
-rw-r--r--arch/x86/kernel/aperture_64.c2
-rw-r--r--arch/x86/kernel/apic_32.c12
-rw-r--r--arch/x86/kernel/apic_64.c6
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c38
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c901
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h3
-rw-r--r--arch/x86/kernel/e820.c (renamed from arch/x86/kernel/e820_64.c)1223
-rw-r--r--arch/x86/kernel/e820_32.c775
-rw-r--r--arch/x86/kernel/efi.c59
-rw-r--r--arch/x86/kernel/efi_64.c8
-rw-r--r--arch/x86/kernel/genapic_64.c2
-rw-r--r--arch/x86/kernel/head.c73
-rw-r--r--arch/x86/kernel/head32.c27
-rw-r--r--arch/x86/kernel/head64.c68
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/io_apic_32.c127
-rw-r--r--arch/x86/kernel/io_apic_64.c82
-rw-r--r--arch/x86/kernel/mpparse.c827
-rw-r--r--arch/x86/kernel/numaq_32.c25
-rw-r--r--arch/x86/kernel/setup.c26
-rw-r--r--arch/x86/kernel/setup_32.c556
-rw-r--r--arch/x86/kernel/setup_64.c52
-rw-r--r--arch/x86/kernel/smpboot.c17
-rw-r--r--arch/x86/kernel/srat_32.c191
-rw-r--r--arch/x86/kernel/summit_32.c2
-rw-r--r--arch/x86/kernel/trampoline.c2
-rw-r--r--arch/x86/lguest/boot.c7
-rw-r--r--arch/x86/mach-default/setup.c42
-rw-r--r--arch/x86/mach-es7000/Makefile1
-rw-r--r--arch/x86/mach-es7000/es7000plat.c49
-rw-r--r--arch/x86/mach-generic/Makefile10
-rw-r--r--arch/x86/mach-generic/bigsmp.c4
-rw-r--r--arch/x86/mach-generic/numaq.c41
-rw-r--r--arch/x86/mach-generic/probe.c15
-rw-r--r--arch/x86/mach-visws/mpparse.c7
-rw-r--r--arch/x86/mach-visws/setup.c6
-rw-r--r--arch/x86/mach-voyager/setup.c37
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c14
-rw-r--r--arch/x86/mm/discontig_32.c186
-rw-r--r--arch/x86/mm/init_32.c86
-rw-r--r--arch/x86/mm/init_64.c30
-rw-r--r--arch/x86/mm/k8topology_64.c4
-rw-r--r--arch/x86/mm/numa_64.c2
-rw-r--r--arch/x86/pci/Makefile_325
-rw-r--r--arch/x86/pci/amd_bus.c4
-rw-r--r--arch/x86/pci/numa.c29
-rw-r--r--arch/x86/xen/enlighten.c3
-rw-r--r--arch/x86/xen/setup.c4
55 files changed, 3588 insertions, 2689 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7dc46ba26fb..640dc62a7fa 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -230,6 +230,27 @@ config SMP
230 230
231 If you don't know what to do here, say N. 231 If you don't know what to do here, say N.
232 232
233config X86_FIND_SMP_CONFIG
234 def_bool y
235 depends on X86_MPPARSE || X86_VOYAGER || X86_VISWS
236 depends on X86_32
237
238if ACPI
239config X86_MPPARSE
240 def_bool y
241 bool "Enable MPS table"
242 depends on X86_LOCAL_APIC && !X86_VISWS
243 help
244 For old smp systems that do not have proper acpi support. Newer systems
245 (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
246endif
247
248if !ACPI
249config X86_MPPARSE
250 def_bool y
251 depends on X86_LOCAL_APIC && !X86_VISWS
252endif
253
233choice 254choice
234 prompt "Subarchitecture Type" 255 prompt "Subarchitecture Type"
235 default X86_PC 256 default X86_PC
@@ -261,36 +282,6 @@ config X86_VOYAGER
261 If you do not specifically know you have a Voyager based machine, 282 If you do not specifically know you have a Voyager based machine,
262 say N here, otherwise the kernel you build will not be bootable. 283 say N here, otherwise the kernel you build will not be bootable.
263 284
264config X86_NUMAQ
265 bool "NUMAQ (IBM/Sequent)"
266 depends on SMP && X86_32 && PCI
267 select NUMA
268 help
269 This option is used for getting Linux to run on a (IBM/Sequent) NUMA
270 multiquad box. This changes the way that processors are bootstrapped,
271 and uses Clustered Logical APIC addressing mode instead of Flat Logical.
272 You will need a new lynxer.elf file to flash your firmware with - send
273 email to <Martin.Bligh@us.ibm.com>.
274
275config X86_SUMMIT
276 bool "Summit/EXA (IBM x440)"
277 depends on X86_32 && SMP
278 help
279 This option is needed for IBM systems that use the Summit/EXA chipset.
280 In particular, it is needed for the x440.
281
282 If you don't have one of these computers, you should say N here.
283 If you want to build a NUMA kernel, you must select ACPI.
284
285config X86_BIGSMP
286 bool "Support for other sub-arch SMP systems with more than 8 CPUs"
287 depends on X86_32 && SMP
288 help
289 This option is needed for the systems that have more than 8 CPUs
290 and if the system is not of any sub-arch type above.
291
292 If you don't have such a system, you should say N here.
293
294config X86_VISWS 285config X86_VISWS
295 bool "SGI 320/540 (Visual Workstation)" 286 bool "SGI 320/540 (Visual Workstation)"
296 depends on X86_32 && !PCI 287 depends on X86_32 && !PCI
@@ -304,12 +295,33 @@ config X86_VISWS
304 and vice versa. See <file:Documentation/sgi-visws.txt> for details. 295 and vice versa. See <file:Documentation/sgi-visws.txt> for details.
305 296
306config X86_GENERICARCH 297config X86_GENERICARCH
307 bool "Generic architecture (Summit, bigsmp, ES7000, default)" 298 bool "Generic architecture"
308 depends on X86_32 299 depends on X86_32
309 help 300 help
310 This option compiles in the Summit, bigsmp, ES7000, default subarchitectures. 301 This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default
311 It is intended for a generic binary kernel. 302 subarchitectures. It is intended for a generic binary kernel.
312 If you want a NUMA kernel, select ACPI. We need SRAT for NUMA. 303 if you select them all, kernel will probe it one by one. and will
304 fallback to default.
305
306if X86_GENERICARCH
307
308config X86_NUMAQ
309 bool "NUMAQ (IBM/Sequent)"
310 depends on SMP && X86_32 && PCI && X86_MPPARSE
311 select NUMA
312 help
313 This option is used for getting Linux to run on a NUMAQ (IBM/Sequent)
314 NUMA multiquad box. This changes the way that processors are
315 bootstrapped, and uses Clustered Logical APIC addressing mode instead
316 of Flat Logical. You will need a new lynxer.elf file to flash your
317 firmware with - send email to <Martin.Bligh@us.ibm.com>.
318
319config X86_SUMMIT
320 bool "Summit/EXA (IBM x440)"
321 depends on X86_32 && SMP
322 help
323 This option is needed for IBM systems that use the Summit/EXA chipset.
324 In particular, it is needed for the x440.
313 325
314config X86_ES7000 326config X86_ES7000
315 bool "Support for Unisys ES7000 IA32 series" 327 bool "Support for Unisys ES7000 IA32 series"
@@ -317,8 +329,15 @@ config X86_ES7000
317 help 329 help
318 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is 330 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is
319 supposed to run on an IA32-based Unisys ES7000 system. 331 supposed to run on an IA32-based Unisys ES7000 system.
320 Only choose this option if you have such a system, otherwise you 332
321 should say N here. 333config X86_BIGSMP
334 bool "Support for big SMP systems with more than 8 CPUs"
335 depends on X86_32 && SMP
336 help
337 This option is needed for the systems that have more than 8 CPUs
338 and if the system is not of any sub-arch type above.
339
340endif
322 341
323config X86_RDC321X 342config X86_RDC321X
324 bool "RDC R-321x SoC" 343 bool "RDC R-321x SoC"
@@ -432,7 +451,7 @@ config MEMTEST
432 451
433config ACPI_SRAT 452config ACPI_SRAT
434 def_bool y 453 def_bool y
435 depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH) 454 depends on X86_32 && ACPI && NUMA && X86_GENERICARCH
436 select ACPI_NUMA 455 select ACPI_NUMA
437 456
438config HAVE_ARCH_PARSE_SRAT 457config HAVE_ARCH_PARSE_SRAT
@@ -441,11 +460,11 @@ config HAVE_ARCH_PARSE_SRAT
441 460
442config X86_SUMMIT_NUMA 461config X86_SUMMIT_NUMA
443 def_bool y 462 def_bool y
444 depends on X86_32 && NUMA && (X86_SUMMIT || X86_GENERICARCH) 463 depends on X86_32 && NUMA && X86_GENERICARCH
445 464
446config X86_CYCLONE_TIMER 465config X86_CYCLONE_TIMER
447 def_bool y 466 def_bool y
448 depends on X86_32 && X86_SUMMIT || X86_GENERICARCH 467 depends on X86_GENERICARCH
449 468
450config ES7000_CLUSTERED_APIC 469config ES7000_CLUSTERED_APIC
451 def_bool y 470 def_bool y
@@ -910,9 +929,9 @@ config X86_PAE
910config NUMA 929config NUMA
911 bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" 930 bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
912 depends on SMP 931 depends on SMP
913 depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL) 932 depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
914 default n if X86_PC 933 default n if X86_PC
915 default y if (X86_NUMAQ || X86_SUMMIT) 934 default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
916 help 935 help
917 Enable NUMA (Non Uniform Memory Access) support. 936 Enable NUMA (Non Uniform Memory Access) support.
918 The kernel will try to allocate memory used by a CPU on the 937 The kernel will try to allocate memory used by a CPU on the
@@ -1089,6 +1108,40 @@ config MTRR
1089 1108
1090 See <file:Documentation/mtrr.txt> for more information. 1109 See <file:Documentation/mtrr.txt> for more information.
1091 1110
1111config MTRR_SANITIZER
1112 def_bool y
1113 prompt "MTRR cleanup support"
1114 depends on MTRR
1115 help
1116 Convert MTRR layout from continuous to discrete, so some X driver
1117 could add WB entries.
1118
1119 Say N here if you see bootup problems (boot crash, boot hang,
1120 spontaneous reboots).
1121
1122 Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size
1123 could be used to send largest mtrr entry size for continuous block
1124 to hold holes (aka. UC entries)
1125
1126 If unsure, say Y.
1127
1128config MTRR_SANITIZER_ENABLE_DEFAULT
1129 int "MTRR cleanup enable value (0-1)"
1130 range 0 1
1131 default "0"
1132 depends on MTRR_SANITIZER
1133 help
1134 Enable mtrr cleanup default value
1135
1136config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
1137 int "MTRR cleanup spare reg num (0-7)"
1138 range 0 7
1139 default "1"
1140 depends on MTRR_SANITIZER
1141 help
1142 mtrr cleanup spare entries default, it can be changed via
1143 mtrr_spare_reg_nr=
1144
1092config X86_PAT 1145config X86_PAT
1093 bool 1146 bool
1094 prompt "x86 PAT support" 1147 prompt "x86 PAT support"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 38a15333f72..f0684bb74fa 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -137,15 +137,6 @@ config 4KSTACKS
137 on the VM subsystem for higher order allocations. This option 137 on the VM subsystem for higher order allocations. This option
138 will also use IRQ stacks to compensate for the reduced stackspace. 138 will also use IRQ stacks to compensate for the reduced stackspace.
139 139
140config X86_FIND_SMP_CONFIG
141 def_bool y
142 depends on X86_LOCAL_APIC || X86_VOYAGER
143 depends on X86_32
144
145config X86_MPPARSE
146 def_bool y
147 depends on (X86_32 && (X86_LOCAL_APIC && !X86_VISWS)) || X86_64
148
149config DOUBLEFAULT 140config DOUBLEFAULT
150 default y 141 default y
151 bool "Enable doublefault exception handler" if EMBEDDED 142 bool "Enable doublefault exception handler" if EMBEDDED
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5df0d1e330b..b03d24b44bf 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -117,29 +117,11 @@ mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/
117mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-x86/mach-visws 117mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-x86/mach-visws
118mcore-$(CONFIG_X86_VISWS) := arch/x86/mach-visws/ 118mcore-$(CONFIG_X86_VISWS) := arch/x86/mach-visws/
119 119
120# NUMAQ subarch support
121mflags-$(CONFIG_X86_NUMAQ) := -Iinclude/asm-x86/mach-numaq
122mcore-$(CONFIG_X86_NUMAQ) := arch/x86/mach-default/
123
124# BIGSMP subarch support
125mflags-$(CONFIG_X86_BIGSMP) := -Iinclude/asm-x86/mach-bigsmp
126mcore-$(CONFIG_X86_BIGSMP) := arch/x86/mach-default/
127
128#Summit subarch support
129mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-x86/mach-summit
130mcore-$(CONFIG_X86_SUMMIT) := arch/x86/mach-default/
131
132# generic subarchitecture 120# generic subarchitecture
133mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic 121mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic
134fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ 122fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/
135mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ 123mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/
136 124
137
138# ES7000 subarch support
139mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-x86/mach-es7000
140fcore-$(CONFIG_X86_ES7000) := arch/x86/mach-es7000/
141mcore-$(CONFIG_X86_ES7000) := arch/x86/mach-default/
142
143# RDC R-321x subarch support 125# RDC R-321x subarch support
144mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x 126mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x
145mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default/ 127mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default/
@@ -160,6 +142,7 @@ KBUILD_AFLAGS += $(mflags-y)
160 142
161head-y := arch/x86/kernel/head_$(BITS).o 143head-y := arch/x86/kernel/head_$(BITS).o
162head-y += arch/x86/kernel/head$(BITS).o 144head-y += arch/x86/kernel/head$(BITS).o
145head-y += arch/x86/kernel/head.o
163head-y += arch/x86/kernel/init_task.o 146head-y += arch/x86/kernel/init_task.o
164 147
165libs-y += arch/x86/lib/ 148libs-y += arch/x86/lib/
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 11629e903aa..bc5553b496f 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -218,10 +218,6 @@ static char *vidmem;
218static int vidport; 218static int vidport;
219static int lines, cols; 219static int lines, cols;
220 220
221#ifdef CONFIG_X86_NUMAQ
222void *xquad_portio;
223#endif
224
225#include "../../../../lib/inflate.c" 221#include "../../../../lib/inflate.c"
226 222
227static void *malloc(int size) 223static void *malloc(int size)
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index acad32eb429..53165c97336 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -13,6 +13,7 @@
13 */ 13 */
14 14
15#include "boot.h" 15#include "boot.h"
16#include <linux/kernel.h>
16 17
17#define SMAP 0x534d4150 /* ASCII "SMAP" */ 18#define SMAP 0x534d4150 /* ASCII "SMAP" */
18 19
@@ -53,7 +54,7 @@ static int detect_memory_e820(void)
53 54
54 count++; 55 count++;
55 desc++; 56 desc++;
56 } while (next && count < E820MAX); 57 } while (next && count < ARRAY_SIZE(boot_params.e820_map));
57 58
58 return boot_params.e820_entries = count; 59 return boot_params.e820_entries = count;
59} 60}
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 53557cbe4bf..d1d4ee89527 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -2,7 +2,7 @@
2# Makefile for the linux kernel. 2# Makefile for the linux kernel.
3# 3#
4 4
5extra-y := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds 5extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinux.lds
6 6
7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) 7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
8 8
@@ -22,7 +22,7 @@ obj-y += setup_$(BITS).o i8259.o irqinit_$(BITS).o setup.o
22obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 22obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
23obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 23obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
24obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o 24obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o
25obj-y += bootflag.o e820_$(BITS).o 25obj-y += bootflag.o e820.o
26obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o 26obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
27obj-y += alternative.o i8253.o pci-nommu.o 27obj-y += alternative.o i8253.o pci-nommu.o
28obj-y += tsc_$(BITS).o io_delay.o rtc.o 28obj-y += tsc_$(BITS).o io_delay.o rtc.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ff1a7b49a46..6516359922b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -83,6 +83,8 @@ int acpi_lapic;
83int acpi_ioapic; 83int acpi_ioapic;
84int acpi_strict; 84int acpi_strict;
85 85
86static int disable_irq0_through_ioapic __initdata;
87
86u8 acpi_sci_flags __initdata; 88u8 acpi_sci_flags __initdata;
87int acpi_sci_override_gsi __initdata; 89int acpi_sci_override_gsi __initdata;
88int acpi_skip_timer_override __initdata; 90int acpi_skip_timer_override __initdata;
@@ -338,8 +340,6 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
338 340
339#ifdef CONFIG_X86_IO_APIC 341#ifdef CONFIG_X86_IO_APIC
340 342
341struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
342
343static int __init 343static int __init
344acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) 344acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
345{ 345{
@@ -858,6 +858,372 @@ static int __init acpi_parse_madt_lapic_entries(void)
858#endif /* CONFIG_X86_LOCAL_APIC */ 858#endif /* CONFIG_X86_LOCAL_APIC */
859 859
860#ifdef CONFIG_X86_IO_APIC 860#ifdef CONFIG_X86_IO_APIC
861#define MP_ISA_BUS 0
862
863#ifdef CONFIG_X86_ES7000
864extern int es7000_plat;
865#endif
866
867static struct {
868 int apic_id;
869 int gsi_base;
870 int gsi_end;
871 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
872} mp_ioapic_routing[MAX_IO_APICS];
873
874static int mp_find_ioapic(int gsi)
875{
876 int i = 0;
877
878 /* Find the IOAPIC that manages this GSI. */
879 for (i = 0; i < nr_ioapics; i++) {
880 if ((gsi >= mp_ioapic_routing[i].gsi_base)
881 && (gsi <= mp_ioapic_routing[i].gsi_end))
882 return i;
883 }
884
885 printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
886 return -1;
887}
888
889static u8 __init uniq_ioapic_id(u8 id)
890{
891#ifdef CONFIG_X86_32
892 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
893 !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
894 return io_apic_get_unique_id(nr_ioapics, id);
895 else
896 return id;
897#else
898 int i;
899 DECLARE_BITMAP(used, 256);
900 bitmap_zero(used, 256);
901 for (i = 0; i < nr_ioapics; i++) {
902 struct mp_config_ioapic *ia = &mp_ioapics[i];
903 __set_bit(ia->mp_apicid, used);
904 }
905 if (!test_bit(id, used))
906 return id;
907 return find_first_zero_bit(used, 256);
908#endif
909}
910
911static int bad_ioapic(unsigned long address)
912{
913 if (nr_ioapics >= MAX_IO_APICS) {
914 printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
915 "(found %d)\n", MAX_IO_APICS, nr_ioapics);
916 panic("Recompile kernel with bigger MAX_IO_APICS!\n");
917 }
918 if (!address) {
919 printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
920 " found in table, skipping!\n");
921 return 1;
922 }
923 return 0;
924}
925
926void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
927{
928 int idx = 0;
929
930 if (bad_ioapic(address))
931 return;
932
933 idx = nr_ioapics;
934
935 mp_ioapics[idx].mp_type = MP_IOAPIC;
936 mp_ioapics[idx].mp_flags = MPC_APIC_USABLE;
937 mp_ioapics[idx].mp_apicaddr = address;
938
939 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
940 mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id);
941#ifdef CONFIG_X86_32
942 mp_ioapics[idx].mp_apicver = io_apic_get_version(idx);
943#else
944 mp_ioapics[idx].mp_apicver = 0;
945#endif
946 /*
947 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
948 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
949 */
950 mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid;
951 mp_ioapic_routing[idx].gsi_base = gsi_base;
952 mp_ioapic_routing[idx].gsi_end = gsi_base +
953 io_apic_get_redir_entries(idx);
954
955 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
956 "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid,
957 mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr,
958 mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
959
960 nr_ioapics++;
961}
962
963static void assign_to_mp_irq(struct mp_config_intsrc *m,
964 struct mp_config_intsrc *mp_irq)
965{
966 memcpy(mp_irq, m, sizeof(struct mp_config_intsrc));
967}
968
969static int mp_irq_cmp(struct mp_config_intsrc *mp_irq,
970 struct mp_config_intsrc *m)
971{
972 return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc));
973}
974
975static void save_mp_irq(struct mp_config_intsrc *m)
976{
977 int i;
978
979 for (i = 0; i < mp_irq_entries; i++) {
980 if (!mp_irq_cmp(&mp_irqs[i], m))
981 return;
982 }
983
984 assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
985 if (++mp_irq_entries == MAX_IRQ_SOURCES)
986 panic("Max # of irq sources exceeded!!\n");
987}
988
989void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
990{
991 int ioapic;
992 int pin;
993 struct mp_config_intsrc mp_irq;
994
995 /* Skip the 8254 timer interrupt (IRQ 0) if requested. */
996 if (bus_irq == 0 && disable_irq0_through_ioapic)
997 return;
998
999 /*
1000 * Convert 'gsi' to 'ioapic.pin'.
1001 */
1002 ioapic = mp_find_ioapic(gsi);
1003 if (ioapic < 0)
1004 return;
1005 pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
1006
1007 /*
1008 * TBD: This check is for faulty timer entries, where the override
1009 * erroneously sets the trigger to level, resulting in a HUGE
1010 * increase of timer interrupts!
1011 */
1012 if ((bus_irq == 0) && (trigger == 3))
1013 trigger = 1;
1014
1015 mp_irq.mp_type = MP_INTSRC;
1016 mp_irq.mp_irqtype = mp_INT;
1017 mp_irq.mp_irqflag = (trigger << 2) | polarity;
1018 mp_irq.mp_srcbus = MP_ISA_BUS;
1019 mp_irq.mp_srcbusirq = bus_irq; /* IRQ */
1020 mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */
1021 mp_irq.mp_dstirq = pin; /* INTIN# */
1022
1023 save_mp_irq(&mp_irq);
1024}
1025
1026void __init mp_config_acpi_legacy_irqs(void)
1027{
1028 int i;
1029 int ioapic;
1030 unsigned int dstapic;
1031 struct mp_config_intsrc mp_irq;
1032
1033#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
1034 /*
1035 * Fabricate the legacy ISA bus (bus #31).
1036 */
1037 mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
1038#endif
1039 set_bit(MP_ISA_BUS, mp_bus_not_pci);
1040 Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
1041
1042#ifdef CONFIG_X86_ES7000
1043 /*
1044 * Older generations of ES7000 have no legacy identity mappings
1045 */
1046 if (es7000_plat == 1)
1047 return;
1048#endif
1049
1050 /*
1051 * Locate the IOAPIC that manages the ISA IRQs (0-15).
1052 */
1053 ioapic = mp_find_ioapic(0);
1054 if (ioapic < 0)
1055 return;
1056 dstapic = mp_ioapics[ioapic].mp_apicid;
1057
1058 /*
1059 * Use the default configuration for the IRQs 0-15. Unless
1060 * overridden by (MADT) interrupt source override entries.
1061 */
1062 for (i = 0; i < 16; i++) {
1063 int idx;
1064
1065 /* Skip the 8254 timer interrupt (IRQ 0) if requested. */
1066 if (i == 0 && disable_irq0_through_ioapic)
1067 continue;
1068
1069 for (idx = 0; idx < mp_irq_entries; idx++) {
1070 struct mp_config_intsrc *irq = mp_irqs + idx;
1071
1072 /* Do we already have a mapping for this ISA IRQ? */
1073 if (irq->mp_srcbus == MP_ISA_BUS
1074 && irq->mp_srcbusirq == i)
1075 break;
1076
1077 /* Do we already have a mapping for this IOAPIC pin */
1078 if (irq->mp_dstapic == dstapic &&
1079 irq->mp_dstirq == i)
1080 break;
1081 }
1082
1083 if (idx != mp_irq_entries) {
1084 printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
1085 continue; /* IRQ already used */
1086 }
1087
1088 mp_irq.mp_type = MP_INTSRC;
1089 mp_irq.mp_irqflag = 0; /* Conforming */
1090 mp_irq.mp_srcbus = MP_ISA_BUS;
1091 mp_irq.mp_dstapic = dstapic;
1092 mp_irq.mp_irqtype = mp_INT;
1093 mp_irq.mp_srcbusirq = i; /* Identity mapped */
1094 mp_irq.mp_dstirq = i;
1095
1096 save_mp_irq(&mp_irq);
1097 }
1098}
1099
1100int mp_register_gsi(u32 gsi, int triggering, int polarity)
1101{
1102 int ioapic;
1103 int ioapic_pin;
1104#ifdef CONFIG_X86_32
1105#define MAX_GSI_NUM 4096
1106#define IRQ_COMPRESSION_START 64
1107
1108 static int pci_irq = IRQ_COMPRESSION_START;
1109 /*
1110 * Mapping between Global System Interrupts, which
1111 * represent all possible interrupts, and IRQs
1112 * assigned to actual devices.
1113 */
1114 static int gsi_to_irq[MAX_GSI_NUM];
1115#else
1116
1117 if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
1118 return gsi;
1119#endif
1120
1121 /* Don't set up the ACPI SCI because it's already set up */
1122 if (acpi_gbl_FADT.sci_interrupt == gsi)
1123 return gsi;
1124
1125 ioapic = mp_find_ioapic(gsi);
1126 if (ioapic < 0) {
1127 printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
1128 return gsi;
1129 }
1130
1131 ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
1132
1133#ifdef CONFIG_X86_32
1134 if (ioapic_renumber_irq)
1135 gsi = ioapic_renumber_irq(ioapic, gsi);
1136#endif
1137
1138 /*
1139 * Avoid pin reprogramming. PRTs typically include entries
1140 * with redundant pin->gsi mappings (but unique PCI devices);
1141 * we only program the IOAPIC on the first.
1142 */
1143 if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
1144 printk(KERN_ERR "Invalid reference to IOAPIC pin "
1145 "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
1146 ioapic_pin);
1147 return gsi;
1148 }
1149 if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
1150 Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
1151 mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
1152#ifdef CONFIG_X86_32
1153 return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
1154#else
1155 return gsi;
1156#endif
1157 }
1158
1159 set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
1160#ifdef CONFIG_X86_32
1161 /*
1162 * For GSI >= 64, use IRQ compression
1163 */
1164 if ((gsi >= IRQ_COMPRESSION_START)
1165 && (triggering == ACPI_LEVEL_SENSITIVE)) {
1166 /*
1167 * For PCI devices assign IRQs in order, avoiding gaps
1168 * due to unused I/O APIC pins.
1169 */
1170 int irq = gsi;
1171 if (gsi < MAX_GSI_NUM) {
1172 /*
1173 * Retain the VIA chipset work-around (gsi > 15), but
1174 * avoid a problem where the 8254 timer (IRQ0) is setup
1175 * via an override (so it's not on pin 0 of the ioapic),
1176 * and at the same time, the pin 0 interrupt is a PCI
1177 * type. The gsi > 15 test could cause these two pins
1178 * to be shared as IRQ0, and they are not shareable.
1179 * So test for this condition, and if necessary, avoid
1180 * the pin collision.
1181 */
1182 gsi = pci_irq++;
1183 /*
1184 * Don't assign IRQ used by ACPI SCI
1185 */
1186 if (gsi == acpi_gbl_FADT.sci_interrupt)
1187 gsi = pci_irq++;
1188 gsi_to_irq[irq] = gsi;
1189 } else {
1190 printk(KERN_ERR "GSI %u is too high\n", gsi);
1191 return gsi;
1192 }
1193 }
1194#endif
1195 io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
1196 triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
1197 polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
1198 return gsi;
1199}
1200
1201int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
1202 u32 gsi, int triggering, int polarity)
1203{
1204#ifdef CONFIG_X86_MPPARSE
1205 struct mp_config_intsrc mp_irq;
1206 int ioapic;
1207
1208 if (!acpi_ioapic)
1209 return 0;
1210
1211 /* print the entry should happen on mptable identically */
1212 mp_irq.mp_type = MP_INTSRC;
1213 mp_irq.mp_irqtype = mp_INT;
1214 mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
1215 (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
1216 mp_irq.mp_srcbus = number;
1217 mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
1218 ioapic = mp_find_ioapic(gsi);
1219 mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id;
1220 mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base;
1221
1222 save_mp_irq(&mp_irq);
1223#endif
1224 return 0;
1225}
1226
861/* 1227/*
862 * Parse IOAPIC related entries in MADT 1228 * Parse IOAPIC related entries in MADT
863 * returns 0 on success, < 0 on error 1229 * returns 0 on success, < 0 on error
@@ -1059,6 +1425,17 @@ static int __init force_acpi_ht(const struct dmi_system_id *d)
1059} 1425}
1060 1426
1061/* 1427/*
1428 * Don't register any I/O APIC entries for the 8254 timer IRQ.
1429 */
1430static int __init
1431dmi_disable_irq0_through_ioapic(const struct dmi_system_id *d)
1432{
1433 pr_notice("%s detected: disabling IRQ 0 through I/O APIC\n", d->ident);
1434 disable_irq0_through_ioapic = 1;
1435 return 0;
1436}
1437
1438/*
1062 * If your system is blacklisted here, but you find that acpi=force 1439 * If your system is blacklisted here, but you find that acpi=force
1063 * works for you, please contact acpi-devel@sourceforge.net 1440 * works for you, please contact acpi-devel@sourceforge.net
1064 */ 1441 */
@@ -1225,6 +1602,32 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
1225 DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), 1602 DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
1226 }, 1603 },
1227 }, 1604 },
1605 /*
1606 * HP laptops which use a DSDT reporting as HP/SB400/10000,
1607 * which includes some code which overrides all temperature
1608 * trip points to 16C if the INTIN2 input of the I/O APIC
1609 * is enabled. This input is incorrectly designated the
1610 * ISA IRQ 0 via an interrupt source override even though
1611 * it is wired to the output of the master 8259A and INTIN0
1612 * is not connected at all. Abandon any attempts to route
1613 * IRQ 0 through the I/O APIC therefore.
1614 */
1615 {
1616 .callback = dmi_disable_irq0_through_ioapic,
1617 .ident = "HP NX6125 laptop",
1618 .matches = {
1619 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
1620 DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6125"),
1621 },
1622 },
1623 {
1624 .callback = dmi_disable_irq0_through_ioapic,
1625 .ident = "HP NX6325 laptop",
1626 .matches = {
1627 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
1628 DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"),
1629 },
1630 },
1228 {} 1631 {}
1229}; 1632};
1230 1633
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index e819362c706..600470d464f 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -328,7 +328,7 @@ void __init early_gart_iommu_check(void)
328 E820_RAM)) { 328 E820_RAM)) {
329 /* reserve it, so we can reuse it in second kernel */ 329 /* reserve it, so we can reuse it in second kernel */
330 printk(KERN_INFO "update e820 for GART\n"); 330 printk(KERN_INFO "update e820 for GART\n");
331 add_memory_region(aper_base, aper_size, E820_RESERVED); 331 e820_add_region(aper_base, aper_size, E820_RESERVED);
332 update_e820(); 332 update_e820();
333 } 333 }
334 } 334 }
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index ce4538ebb7f..570c362eca8 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -79,6 +79,11 @@ char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
79 */ 79 */
80int apic_verbosity; 80int apic_verbosity;
81 81
82int pic_mode;
83
84/* Have we found an MP table */
85int smp_found_config;
86
82static unsigned int calibration_result; 87static unsigned int calibration_result;
83 88
84static int lapic_next_event(unsigned long delta, 89static int lapic_next_event(unsigned long delta,
@@ -1202,7 +1207,7 @@ void __init init_apic_mappings(void)
1202 1207
1203 for (i = 0; i < nr_ioapics; i++) { 1208 for (i = 0; i < nr_ioapics; i++) {
1204 if (smp_found_config) { 1209 if (smp_found_config) {
1205 ioapic_phys = mp_ioapics[i].mpc_apicaddr; 1210 ioapic_phys = mp_ioapics[i].mp_apicaddr;
1206 if (!ioapic_phys) { 1211 if (!ioapic_phys) {
1207 printk(KERN_ERR 1212 printk(KERN_ERR
1208 "WARNING: bogus zero IO-APIC " 1213 "WARNING: bogus zero IO-APIC "
@@ -1517,6 +1522,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
1517 */ 1522 */
1518 cpu = 0; 1523 cpu = 0;
1519 1524
1525 if (apicid > max_physical_apicid)
1526 max_physical_apicid = apicid;
1527
1520 /* 1528 /*
1521 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y 1529 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
1522 * but we need to work other dependencies like SMP_SUSPEND etc 1530 * but we need to work other dependencies like SMP_SUSPEND etc
@@ -1524,7 +1532,7 @@ void __cpuinit generic_processor_info(int apicid, int version)
1524 * if (CPU_HOTPLUG_ENABLED || num_processors > 8) 1532 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
1525 * - Ashok Raj <ashok.raj@intel.com> 1533 * - Ashok Raj <ashok.raj@intel.com>
1526 */ 1534 */
1527 if (num_processors > 8) { 1535 if (max_physical_apicid >= 8) {
1528 switch (boot_cpu_data.x86_vendor) { 1536 switch (boot_cpu_data.x86_vendor) {
1529 case X86_VENDOR_INTEL: 1537 case X86_VENDOR_INTEL:
1530 if (!APIC_XAPIC(version)) { 1538 if (!APIC_XAPIC(version)) {
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 3ef7752aa8e..d7406aa1c98 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -56,6 +56,9 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
56 */ 56 */
57int apic_verbosity; 57int apic_verbosity;
58 58
59/* Have we found an MP table */
60int smp_found_config;
61
59static struct resource lapic_resource = { 62static struct resource lapic_resource = {
60 .name = "Local APIC", 63 .name = "Local APIC",
61 .flags = IORESOURCE_MEM | IORESOURCE_BUSY, 64 .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
@@ -1068,6 +1071,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
1068 */ 1071 */
1069 cpu = 0; 1072 cpu = 0;
1070 } 1073 }
1074 if (apicid > max_physical_apicid)
1075 max_physical_apicid = apicid;
1076
1071 /* are we being called early in kernel startup? */ 1077 /* are we being called early in kernel startup? */
1072 if (x86_cpu_to_apicid_early_ptr) { 1078 if (x86_cpu_to_apicid_early_ptr) {
1073 u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; 1079 u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 5d241ce94a4..509bd3d9eac 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -37,7 +37,7 @@ static struct fixed_range_block fixed_range_blocks[] = {
37static unsigned long smp_changes_mask; 37static unsigned long smp_changes_mask;
38static struct mtrr_state mtrr_state = {}; 38static struct mtrr_state mtrr_state = {};
39static int mtrr_state_set; 39static int mtrr_state_set;
40static u64 tom2; 40u64 mtrr_tom2;
41 41
42#undef MODULE_PARAM_PREFIX 42#undef MODULE_PARAM_PREFIX
43#define MODULE_PARAM_PREFIX "mtrr." 43#define MODULE_PARAM_PREFIX "mtrr."
@@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
139 } 139 }
140 } 140 }
141 141
142 if (tom2) { 142 if (mtrr_tom2) {
143 if (start >= (1ULL<<32) && (end < tom2)) 143 if (start >= (1ULL<<32) && (end < mtrr_tom2))
144 return MTRR_TYPE_WRBACK; 144 return MTRR_TYPE_WRBACK;
145 } 145 }
146 146
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
158 rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); 158 rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
159} 159}
160 160
161/* fill the MSR pair relating to a var range */
162void fill_mtrr_var_range(unsigned int index,
163 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
164{
165 struct mtrr_var_range *vr;
166
167 vr = mtrr_state.var_ranges;
168
169 vr[index].base_lo = base_lo;
170 vr[index].base_hi = base_hi;
171 vr[index].mask_lo = mask_lo;
172 vr[index].mask_hi = mask_hi;
173}
174
161static void 175static void
162get_fixed_ranges(mtrr_type * frs) 176get_fixed_ranges(mtrr_type * frs)
163{ 177{
@@ -213,13 +227,13 @@ void __init get_mtrr_state(void)
213 mtrr_state.enabled = (lo & 0xc00) >> 10; 227 mtrr_state.enabled = (lo & 0xc00) >> 10;
214 228
215 if (amd_special_default_mtrr()) { 229 if (amd_special_default_mtrr()) {
216 unsigned lo, hi; 230 unsigned low, high;
217 /* TOP_MEM2 */ 231 /* TOP_MEM2 */
218 rdmsr(MSR_K8_TOP_MEM2, lo, hi); 232 rdmsr(MSR_K8_TOP_MEM2, low, high);
219 tom2 = hi; 233 mtrr_tom2 = high;
220 tom2 <<= 32; 234 mtrr_tom2 <<= 32;
221 tom2 |= lo; 235 mtrr_tom2 |= low;
222 tom2 &= 0xffffff8000000ULL; 236 mtrr_tom2 &= 0xffffff800000ULL;
223 } 237 }
224 if (mtrr_show) { 238 if (mtrr_show) {
225 int high_width; 239 int high_width;
@@ -251,9 +265,9 @@ void __init get_mtrr_state(void)
251 else 265 else
252 printk(KERN_INFO "MTRR %u disabled\n", i); 266 printk(KERN_INFO "MTRR %u disabled\n", i);
253 } 267 }
254 if (tom2) { 268 if (mtrr_tom2) {
255 printk(KERN_INFO "TOM2: %016llx aka %lldM\n", 269 printk(KERN_INFO "TOM2: %016llx aka %lldM\n",
256 tom2, tom2>>20); 270 mtrr_tom2, mtrr_tom2>>20);
257 } 271 }
258 } 272 }
259 mtrr_state_set = 1; 273 mtrr_state_set = 1;
@@ -328,7 +342,7 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
328 342
329 if (lo != msrwords[0] || hi != msrwords[1]) { 343 if (lo != msrwords[0] || hi != msrwords[1]) {
330 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && 344 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
331 boot_cpu_data.x86 == 15 && 345 (boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) &&
332 ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) 346 ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
333 k8_enable_fixed_iorrs(); 347 k8_enable_fixed_iorrs();
334 mtrr_wrmsr(msr, msrwords[0], msrwords[1]); 348 mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 6a1e278d932..105afe12beb 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
37#include <linux/smp.h> 37#include <linux/smp.h>
38#include <linux/cpu.h> 38#include <linux/cpu.h>
39#include <linux/mutex.h> 39#include <linux/mutex.h>
40#include <linux/sort.h>
40 41
41#include <asm/e820.h> 42#include <asm/e820.h>
42#include <asm/mtrr.h> 43#include <asm/mtrr.h>
@@ -609,6 +610,787 @@ static struct sysdev_driver mtrr_sysdev_driver = {
609 .resume = mtrr_restore, 610 .resume = mtrr_restore,
610}; 611};
611 612
613/* should be related to MTRR_VAR_RANGES nums */
614#define RANGE_NUM 256
615
616struct res_range {
617 unsigned long start;
618 unsigned long end;
619};
620
621static int __init
622add_range(struct res_range *range, int nr_range, unsigned long start,
623 unsigned long end)
624{
625 /* out of slots */
626 if (nr_range >= RANGE_NUM)
627 return nr_range;
628
629 range[nr_range].start = start;
630 range[nr_range].end = end;
631
632 nr_range++;
633
634 return nr_range;
635}
636
637static int __init
638add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
639 unsigned long end)
640{
641 int i;
642
643 /* try to merge it with old one */
644 for (i = 0; i < nr_range; i++) {
645 unsigned long final_start, final_end;
646 unsigned long common_start, common_end;
647
648 if (!range[i].end)
649 continue;
650
651 common_start = max(range[i].start, start);
652 common_end = min(range[i].end, end);
653 if (common_start > common_end + 1)
654 continue;
655
656 final_start = min(range[i].start, start);
657 final_end = max(range[i].end, end);
658
659 range[i].start = final_start;
660 range[i].end = final_end;
661 return nr_range;
662 }
663
664 /* need to add that */
665 return add_range(range, nr_range, start, end);
666}
667
668static void __init
669subtract_range(struct res_range *range, unsigned long start, unsigned long end)
670{
671 int i, j;
672
673 for (j = 0; j < RANGE_NUM; j++) {
674 if (!range[j].end)
675 continue;
676
677 if (start <= range[j].start && end >= range[j].end) {
678 range[j].start = 0;
679 range[j].end = 0;
680 continue;
681 }
682
683 if (start <= range[j].start && end < range[j].end &&
684 range[j].start < end + 1) {
685 range[j].start = end + 1;
686 continue;
687 }
688
689
690 if (start > range[j].start && end >= range[j].end &&
691 range[j].end > start - 1) {
692 range[j].end = start - 1;
693 continue;
694 }
695
696 if (start > range[j].start && end < range[j].end) {
697 /* find the new spare */
698 for (i = 0; i < RANGE_NUM; i++) {
699 if (range[i].end == 0)
700 break;
701 }
702 if (i < RANGE_NUM) {
703 range[i].end = range[j].end;
704 range[i].start = end + 1;
705 } else {
706 printk(KERN_ERR "run of slot in ranges\n");
707 }
708 range[j].end = start - 1;
709 continue;
710 }
711 }
712}
713
714static int __init cmp_range(const void *x1, const void *x2)
715{
716 const struct res_range *r1 = x1;
717 const struct res_range *r2 = x2;
718 long start1, start2;
719
720 start1 = r1->start;
721 start2 = r2->start;
722
723 return start1 - start2;
724}
725
726struct var_mtrr_range_state {
727 unsigned long base_pfn;
728 unsigned long size_pfn;
729 mtrr_type type;
730};
731
732struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
733static int __initdata debug_print;
734
735static int __init
736x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
737 unsigned long extra_remove_base,
738 unsigned long extra_remove_size)
739{
740 unsigned long i, base, size;
741 mtrr_type type;
742
743 for (i = 0; i < num_var_ranges; i++) {
744 type = range_state[i].type;
745 if (type != MTRR_TYPE_WRBACK)
746 continue;
747 base = range_state[i].base_pfn;
748 size = range_state[i].size_pfn;
749 nr_range = add_range_with_merge(range, nr_range, base,
750 base + size - 1);
751 }
752 if (debug_print) {
753 printk(KERN_DEBUG "After WB checking\n");
754 for (i = 0; i < nr_range; i++)
755 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
756 range[i].start, range[i].end + 1);
757 }
758
759 /* take out UC ranges */
760 for (i = 0; i < num_var_ranges; i++) {
761 type = range_state[i].type;
762 if (type != MTRR_TYPE_UNCACHABLE)
763 continue;
764 size = range_state[i].size_pfn;
765 if (!size)
766 continue;
767 base = range_state[i].base_pfn;
768 subtract_range(range, base, base + size - 1);
769 }
770 if (extra_remove_size)
771 subtract_range(range, extra_remove_base,
772 extra_remove_base + extra_remove_size - 1);
773
774 /* get new range num */
775 nr_range = 0;
776 for (i = 0; i < RANGE_NUM; i++) {
777 if (!range[i].end)
778 continue;
779 nr_range++;
780 }
781 if (debug_print) {
782 printk(KERN_DEBUG "After UC checking\n");
783 for (i = 0; i < nr_range; i++)
784 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
785 range[i].start, range[i].end + 1);
786 }
787
788 /* sort the ranges */
789 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
790 if (debug_print) {
791 printk(KERN_DEBUG "After sorting\n");
792 for (i = 0; i < nr_range; i++)
793 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
794 range[i].start, range[i].end + 1);
795 }
796
797 /* clear those is not used */
798 for (i = nr_range; i < RANGE_NUM; i++)
799 memset(&range[i], 0, sizeof(range[i]));
800
801 return nr_range;
802}
803
804static struct res_range __initdata range[RANGE_NUM];
805
806#ifdef CONFIG_MTRR_SANITIZER
807
808static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
809{
810 unsigned long sum;
811 int i;
812
813 sum = 0;
814 for (i = 0; i < nr_range; i++)
815 sum += range[i].end + 1 - range[i].start;
816
817 return sum;
818}
819
820static int enable_mtrr_cleanup __initdata =
821 CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
822
823static int __init disable_mtrr_cleanup_setup(char *str)
824{
825 if (enable_mtrr_cleanup != -1)
826 enable_mtrr_cleanup = 0;
827 return 0;
828}
829early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
830
831static int __init enable_mtrr_cleanup_setup(char *str)
832{
833 if (enable_mtrr_cleanup != -1)
834 enable_mtrr_cleanup = 1;
835 return 0;
836}
837early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
838
839struct var_mtrr_state {
840 unsigned long range_startk;
841 unsigned long range_sizek;
842 unsigned long chunk_sizek;
843 unsigned long gran_sizek;
844 unsigned int reg;
845};
846
847static void __init
848set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
849 unsigned char type, unsigned int address_bits)
850{
851 u32 base_lo, base_hi, mask_lo, mask_hi;
852 u64 base, mask;
853
854 if (!sizek) {
855 fill_mtrr_var_range(reg, 0, 0, 0, 0);
856 return;
857 }
858
859 mask = (1ULL << address_bits) - 1;
860 mask &= ~((((u64)sizek) << 10) - 1);
861
862 base = ((u64)basek) << 10;
863
864 base |= type;
865 mask |= 0x800;
866
867 base_lo = base & ((1ULL<<32) - 1);
868 base_hi = base >> 32;
869
870 mask_lo = mask & ((1ULL<<32) - 1);
871 mask_hi = mask >> 32;
872
873 fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
874}
875
876static void __init
877save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
878 unsigned char type)
879{
880 range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
881 range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
882 range_state[reg].type = type;
883}
884
885static void __init
886set_var_mtrr_all(unsigned int address_bits)
887{
888 unsigned long basek, sizek;
889 unsigned char type;
890 unsigned int reg;
891
892 for (reg = 0; reg < num_var_ranges; reg++) {
893 basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
894 sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
895 type = range_state[reg].type;
896
897 set_var_mtrr(reg, basek, sizek, type, address_bits);
898 }
899}
900
901static unsigned int __init
902range_to_mtrr(unsigned int reg, unsigned long range_startk,
903 unsigned long range_sizek, unsigned char type)
904{
905 if (!range_sizek || (reg >= num_var_ranges))
906 return reg;
907
908 while (range_sizek) {
909 unsigned long max_align, align;
910 unsigned long sizek;
911
912 /* Compute the maximum size I can make a range */
913 if (range_startk)
914 max_align = ffs(range_startk) - 1;
915 else
916 max_align = 32;
917 align = fls(range_sizek) - 1;
918 if (align > max_align)
919 align = max_align;
920
921 sizek = 1 << align;
922 if (debug_print)
923 printk(KERN_DEBUG "Setting variable MTRR %d, "
924 "base: %ldMB, range: %ldMB, type %s\n",
925 reg, range_startk >> 10, sizek >> 10,
926 (type == MTRR_TYPE_UNCACHABLE)?"UC":
927 ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
928 );
929 save_var_mtrr(reg++, range_startk, sizek, type);
930 range_startk += sizek;
931 range_sizek -= sizek;
932 if (reg >= num_var_ranges)
933 break;
934 }
935 return reg;
936}
937
938static unsigned __init
939range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
940 unsigned long sizek)
941{
942 unsigned long hole_basek, hole_sizek;
943 unsigned long second_basek, second_sizek;
944 unsigned long range0_basek, range0_sizek;
945 unsigned long range_basek, range_sizek;
946 unsigned long chunk_sizek;
947 unsigned long gran_sizek;
948
949 hole_basek = 0;
950 hole_sizek = 0;
951 second_basek = 0;
952 second_sizek = 0;
953 chunk_sizek = state->chunk_sizek;
954 gran_sizek = state->gran_sizek;
955
956 /* align with gran size, prevent small block used up MTRRs */
957 range_basek = ALIGN(state->range_startk, gran_sizek);
958 if ((range_basek > basek) && basek)
959 return second_sizek;
960 state->range_sizek -= (range_basek - state->range_startk);
961 range_sizek = ALIGN(state->range_sizek, gran_sizek);
962
963 while (range_sizek > state->range_sizek) {
964 range_sizek -= gran_sizek;
965 if (!range_sizek)
966 return 0;
967 }
968 state->range_sizek = range_sizek;
969
970 /* try to append some small hole */
971 range0_basek = state->range_startk;
972 range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
973 if (range0_sizek == state->range_sizek) {
974 if (debug_print)
975 printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
976 range0_basek<<10,
977 (range0_basek + state->range_sizek)<<10);
978 state->reg = range_to_mtrr(state->reg, range0_basek,
979 state->range_sizek, MTRR_TYPE_WRBACK);
980 return 0;
981 }
982
983 range0_sizek -= chunk_sizek;
984 if (range0_sizek && sizek) {
985 while (range0_basek + range0_sizek > (basek + sizek)) {
986 range0_sizek -= chunk_sizek;
987 if (!range0_sizek)
988 break;
989 }
990 }
991
992 if (range0_sizek) {
993 if (debug_print)
994 printk(KERN_DEBUG "range0: %016lx - %016lx\n",
995 range0_basek<<10,
996 (range0_basek + range0_sizek)<<10);
997 state->reg = range_to_mtrr(state->reg, range0_basek,
998 range0_sizek, MTRR_TYPE_WRBACK);
999
1000 }
1001
1002 range_basek = range0_basek + range0_sizek;
1003 range_sizek = chunk_sizek;
1004
1005 if (range_basek + range_sizek > basek &&
1006 range_basek + range_sizek <= (basek + sizek)) {
1007 /* one hole */
1008 second_basek = basek;
1009 second_sizek = range_basek + range_sizek - basek;
1010 }
1011
1012 /* if last piece, only could one hole near end */
1013 if ((second_basek || !basek) &&
1014 range_sizek - (state->range_sizek - range0_sizek) - second_sizek <
1015 (chunk_sizek >> 1)) {
1016 /*
1017 * one hole in middle (second_sizek is 0) or at end
1018 * (second_sizek is 0 )
1019 */
1020 hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
1021 - second_sizek;
1022 hole_basek = range_basek + range_sizek - hole_sizek
1023 - second_sizek;
1024 } else {
1025 /* fallback for big hole, or several holes */
1026 range_sizek = state->range_sizek - range0_sizek;
1027 second_basek = 0;
1028 second_sizek = 0;
1029 }
1030
1031 if (debug_print)
1032 printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
1033 (range_basek + range_sizek)<<10);
1034 state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
1035 MTRR_TYPE_WRBACK);
1036 if (hole_sizek) {
1037 if (debug_print)
1038 printk(KERN_DEBUG "hole: %016lx - %016lx\n",
1039 hole_basek<<10, (hole_basek + hole_sizek)<<10);
1040 state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
1041 MTRR_TYPE_UNCACHABLE);
1042
1043 }
1044
1045 return second_sizek;
1046}
1047
1048static void __init
1049set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
1050 unsigned long size_pfn)
1051{
1052 unsigned long basek, sizek;
1053 unsigned long second_sizek = 0;
1054
1055 if (state->reg >= num_var_ranges)
1056 return;
1057
1058 basek = base_pfn << (PAGE_SHIFT - 10);
1059 sizek = size_pfn << (PAGE_SHIFT - 10);
1060
1061 /* See if I can merge with the last range */
1062 if ((basek <= 1024) ||
1063 (state->range_startk + state->range_sizek == basek)) {
1064 unsigned long endk = basek + sizek;
1065 state->range_sizek = endk - state->range_startk;
1066 return;
1067 }
1068 /* Write the range mtrrs */
1069 if (state->range_sizek != 0)
1070 second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
1071
1072 /* Allocate an msr */
1073 state->range_startk = basek + second_sizek;
1074 state->range_sizek = sizek - second_sizek;
1075}
1076
1077/* mininum size of mtrr block that can take hole */
1078static u64 mtrr_chunk_size __initdata = (256ULL<<20);
1079
1080static int __init parse_mtrr_chunk_size_opt(char *p)
1081{
1082 if (!p)
1083 return -EINVAL;
1084 mtrr_chunk_size = memparse(p, &p);
1085 return 0;
1086}
1087early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
1088
1089/* granity of mtrr of block */
1090static u64 mtrr_gran_size __initdata;
1091
1092static int __init parse_mtrr_gran_size_opt(char *p)
1093{
1094 if (!p)
1095 return -EINVAL;
1096 mtrr_gran_size = memparse(p, &p);
1097 return 0;
1098}
1099early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
1100
1101static int nr_mtrr_spare_reg __initdata =
1102 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
1103
1104static int __init parse_mtrr_spare_reg(char *arg)
1105{
1106 if (arg)
1107 nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
1108 return 0;
1109}
1110
1111early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
1112
1113static int __init
1114x86_setup_var_mtrrs(struct res_range *range, int nr_range,
1115 u64 chunk_size, u64 gran_size)
1116{
1117 struct var_mtrr_state var_state;
1118 int i;
1119 int num_reg;
1120
1121 var_state.range_startk = 0;
1122 var_state.range_sizek = 0;
1123 var_state.reg = 0;
1124 var_state.chunk_sizek = chunk_size >> 10;
1125 var_state.gran_sizek = gran_size >> 10;
1126
1127 memset(range_state, 0, sizeof(range_state));
1128
1129 /* Write the range etc */
1130 for (i = 0; i < nr_range; i++)
1131 set_var_mtrr_range(&var_state, range[i].start,
1132 range[i].end - range[i].start + 1);
1133
1134 /* Write the last range */
1135 if (var_state.range_sizek != 0)
1136 range_to_mtrr_with_hole(&var_state, 0, 0);
1137
1138 num_reg = var_state.reg;
1139 /* Clear out the extra MTRR's */
1140 while (var_state.reg < num_var_ranges) {
1141 save_var_mtrr(var_state.reg, 0, 0, 0);
1142 var_state.reg++;
1143 }
1144
1145 return num_reg;
1146}
1147
1148struct mtrr_cleanup_result {
1149 unsigned long gran_sizek;
1150 unsigned long chunk_sizek;
1151 unsigned long lose_cover_sizek;
1152 unsigned int num_reg;
1153 int bad;
1154};
1155
1156/*
1157 * gran_size: 1M, 2M, ..., 2G
1158 * chunk size: gran_size, ..., 4G
1159 * so we need (2+13)*6
1160 */
1161#define NUM_RESULT 90
1162#define PSHIFT (PAGE_SHIFT - 10)
1163
1164static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
1165static struct res_range __initdata range_new[RANGE_NUM];
1166static unsigned long __initdata min_loss_pfn[RANGE_NUM];
1167
1168static int __init mtrr_cleanup(unsigned address_bits)
1169{
1170 unsigned long extra_remove_base, extra_remove_size;
1171 unsigned long i, base, size, def, dummy;
1172 mtrr_type type;
1173 int nr_range, nr_range_new;
1174 u64 chunk_size, gran_size;
1175 unsigned long range_sums, range_sums_new;
1176 int index_good;
1177 int num_reg_good;
1178
1179 /* extra one for all 0 */
1180 int num[MTRR_NUM_TYPES + 1];
1181
1182 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
1183 return 0;
1184 rdmsr(MTRRdefType_MSR, def, dummy);
1185 def &= 0xff;
1186 if (def != MTRR_TYPE_UNCACHABLE)
1187 return 0;
1188
1189 /* get it and store it aside */
1190 memset(range_state, 0, sizeof(range_state));
1191 for (i = 0; i < num_var_ranges; i++) {
1192 mtrr_if->get(i, &base, &size, &type);
1193 range_state[i].base_pfn = base;
1194 range_state[i].size_pfn = size;
1195 range_state[i].type = type;
1196 }
1197
1198 /* check entries number */
1199 memset(num, 0, sizeof(num));
1200 for (i = 0; i < num_var_ranges; i++) {
1201 type = range_state[i].type;
1202 size = range_state[i].size_pfn;
1203 if (type >= MTRR_NUM_TYPES)
1204 continue;
1205 if (!size)
1206 type = MTRR_NUM_TYPES;
1207 num[type]++;
1208 }
1209
1210 /* check if we got UC entries */
1211 if (!num[MTRR_TYPE_UNCACHABLE])
1212 return 0;
1213
1214 /* check if we only had WB and UC */
1215 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
1216 num_var_ranges - num[MTRR_NUM_TYPES])
1217 return 0;
1218
1219 memset(range, 0, sizeof(range));
1220 extra_remove_size = 0;
1221 if (mtrr_tom2) {
1222 extra_remove_base = 1 << (32 - PAGE_SHIFT);
1223 extra_remove_size =
1224 (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
1225 }
1226 nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
1227 extra_remove_size);
1228 range_sums = sum_ranges(range, nr_range);
1229 printk(KERN_INFO "total RAM coverred: %ldM\n",
1230 range_sums >> (20 - PAGE_SHIFT));
1231
1232 if (mtrr_chunk_size && mtrr_gran_size) {
1233 int num_reg;
1234
1235 debug_print = 1;
1236 /* convert ranges to var ranges state */
1237 num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
1238 mtrr_gran_size);
1239
1240 /* we got new setting in range_state, check it */
1241 memset(range_new, 0, sizeof(range_new));
1242 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
1243 extra_remove_base,
1244 extra_remove_size);
1245 range_sums_new = sum_ranges(range_new, nr_range_new);
1246
1247 i = 0;
1248 result[i].chunk_sizek = mtrr_chunk_size >> 10;
1249 result[i].gran_sizek = mtrr_gran_size >> 10;
1250 result[i].num_reg = num_reg;
1251 if (range_sums < range_sums_new) {
1252 result[i].lose_cover_sizek =
1253 (range_sums_new - range_sums) << PSHIFT;
1254 result[i].bad = 1;
1255 } else
1256 result[i].lose_cover_sizek =
1257 (range_sums - range_sums_new) << PSHIFT;
1258
1259 printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
1260 result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10,
1261 result[i].chunk_sizek >> 10);
1262 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n",
1263 result[i].num_reg, result[i].bad?"-":"",
1264 result[i].lose_cover_sizek >> 10);
1265 if (!result[i].bad) {
1266 set_var_mtrr_all(address_bits);
1267 return 1;
1268 }
1269 printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
1270 "will find optimal one\n");
1271 debug_print = 0;
1272 memset(result, 0, sizeof(result[0]));
1273 }
1274
1275 i = 0;
1276 memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
1277 memset(result, 0, sizeof(result));
1278 for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
1279 for (chunk_size = gran_size; chunk_size < (1ULL<<33);
1280 chunk_size <<= 1) {
1281 int num_reg;
1282
1283 if (debug_print)
1284 printk(KERN_INFO
1285 "\ngran_size: %lldM chunk_size_size: %lldM\n",
1286 gran_size >> 20, chunk_size >> 20);
1287 if (i >= NUM_RESULT)
1288 continue;
1289
1290 /* convert ranges to var ranges state */
1291 num_reg = x86_setup_var_mtrrs(range, nr_range,
1292 chunk_size, gran_size);
1293
1294 /* we got new setting in range_state, check it */
1295 memset(range_new, 0, sizeof(range_new));
1296 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
1297 extra_remove_base, extra_remove_size);
1298 range_sums_new = sum_ranges(range_new, nr_range_new);
1299
1300 result[i].chunk_sizek = chunk_size >> 10;
1301 result[i].gran_sizek = gran_size >> 10;
1302 result[i].num_reg = num_reg;
1303 if (range_sums < range_sums_new) {
1304 result[i].lose_cover_sizek =
1305 (range_sums_new - range_sums) << PSHIFT;
1306 result[i].bad = 1;
1307 } else
1308 result[i].lose_cover_sizek =
1309 (range_sums - range_sums_new) << PSHIFT;
1310
1311 /* double check it */
1312 if (!result[i].bad && !result[i].lose_cover_sizek) {
1313 if (nr_range_new != nr_range ||
1314 memcmp(range, range_new, sizeof(range)))
1315 result[i].bad = 1;
1316 }
1317
1318 if (!result[i].bad && (range_sums - range_sums_new <
1319 min_loss_pfn[num_reg])) {
1320 min_loss_pfn[num_reg] =
1321 range_sums - range_sums_new;
1322 }
1323 i++;
1324 }
1325 }
1326
1327 /* print out all */
1328 for (i = 0; i < NUM_RESULT; i++) {
1329 printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
1330 result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
1331 result[i].chunk_sizek >> 10);
1332 printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n",
1333 result[i].num_reg, result[i].bad?"-":"",
1334 result[i].lose_cover_sizek >> 10);
1335 }
1336
1337 /* try to find the optimal index */
1338 if (nr_mtrr_spare_reg >= num_var_ranges)
1339 nr_mtrr_spare_reg = num_var_ranges - 1;
1340 num_reg_good = -1;
1341 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
1342 if (!min_loss_pfn[i]) {
1343 num_reg_good = i;
1344 break;
1345 }
1346 }
1347
1348 index_good = -1;
1349 if (num_reg_good != -1) {
1350 for (i = 0; i < NUM_RESULT; i++) {
1351 if (!result[i].bad &&
1352 result[i].num_reg == num_reg_good &&
1353 !result[i].lose_cover_sizek) {
1354 index_good = i;
1355 break;
1356 }
1357 }
1358 }
1359
1360 if (index_good != -1) {
1361 printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
1362 i = index_good;
1363 printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t",
1364 result[i].gran_sizek >> 10,
1365 result[i].chunk_sizek >> 10);
1366 printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n",
1367 result[i].num_reg,
1368 result[i].lose_cover_sizek >> 10);
1369 /* convert ranges to var ranges state */
1370 chunk_size = result[i].chunk_sizek;
1371 chunk_size <<= 10;
1372 gran_size = result[i].gran_sizek;
1373 gran_size <<= 10;
1374 debug_print = 1;
1375 x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
1376 set_var_mtrr_all(address_bits);
1377 return 1;
1378 }
1379
1380 printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
1381 printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
1382
1383 return 0;
1384}
1385#else
1386static int __init mtrr_cleanup(unsigned address_bits)
1387{
1388 return 0;
1389}
1390#endif
1391
1392static int __initdata changed_by_mtrr_cleanup;
1393
612static int disable_mtrr_trim; 1394static int disable_mtrr_trim;
613 1395
614static int __init disable_mtrr_trim_setup(char *str) 1396static int __init disable_mtrr_trim_setup(char *str)
@@ -648,6 +1430,19 @@ int __init amd_special_default_mtrr(void)
648 return 0; 1430 return 0;
649} 1431}
650 1432
1433static u64 __init real_trim_memory(unsigned long start_pfn,
1434 unsigned long limit_pfn)
1435{
1436 u64 trim_start, trim_size;
1437 trim_start = start_pfn;
1438 trim_start <<= PAGE_SHIFT;
1439 trim_size = limit_pfn;
1440 trim_size <<= PAGE_SHIFT;
1441 trim_size -= trim_start;
1442
1443 return e820_update_range(trim_start, trim_size, E820_RAM,
1444 E820_RESERVED);
1445}
651/** 1446/**
652 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs 1447 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
653 * @end_pfn: ending page frame number 1448 * @end_pfn: ending page frame number
@@ -663,8 +1458,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
663{ 1458{
664 unsigned long i, base, size, highest_pfn = 0, def, dummy; 1459 unsigned long i, base, size, highest_pfn = 0, def, dummy;
665 mtrr_type type; 1460 mtrr_type type;
666 u64 trim_start, trim_size; 1461 int nr_range;
1462 u64 total_trim_size;
667 1463
1464 /* extra one for all 0 */
1465 int num[MTRR_NUM_TYPES + 1];
668 /* 1466 /*
669 * Make sure we only trim uncachable memory on machines that 1467 * Make sure we only trim uncachable memory on machines that
670 * support the Intel MTRR architecture: 1468 * support the Intel MTRR architecture:
@@ -676,14 +1474,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
676 if (def != MTRR_TYPE_UNCACHABLE) 1474 if (def != MTRR_TYPE_UNCACHABLE)
677 return 0; 1475 return 0;
678 1476
679 if (amd_special_default_mtrr()) 1477 /* get it and store it aside */
680 return 0; 1478 memset(range_state, 0, sizeof(range_state));
1479 for (i = 0; i < num_var_ranges; i++) {
1480 mtrr_if->get(i, &base, &size, &type);
1481 range_state[i].base_pfn = base;
1482 range_state[i].size_pfn = size;
1483 range_state[i].type = type;
1484 }
681 1485
682 /* Find highest cached pfn */ 1486 /* Find highest cached pfn */
683 for (i = 0; i < num_var_ranges; i++) { 1487 for (i = 0; i < num_var_ranges; i++) {
684 mtrr_if->get(i, &base, &size, &type); 1488 type = range_state[i].type;
685 if (type != MTRR_TYPE_WRBACK) 1489 if (type != MTRR_TYPE_WRBACK)
686 continue; 1490 continue;
1491 base = range_state[i].base_pfn;
1492 size = range_state[i].size_pfn;
687 if (highest_pfn < base + size) 1493 if (highest_pfn < base + size)
688 highest_pfn = base + size; 1494 highest_pfn = base + size;
689 } 1495 }
@@ -698,22 +1504,65 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
698 return 0; 1504 return 0;
699 } 1505 }
700 1506
701 if (highest_pfn < end_pfn) { 1507 /* check entries number */
1508 memset(num, 0, sizeof(num));
1509 for (i = 0; i < num_var_ranges; i++) {
1510 type = range_state[i].type;
1511 if (type >= MTRR_NUM_TYPES)
1512 continue;
1513 size = range_state[i].size_pfn;
1514 if (!size)
1515 type = MTRR_NUM_TYPES;
1516 num[type]++;
1517 }
1518
1519 /* no entry for WB? */
1520 if (!num[MTRR_TYPE_WRBACK])
1521 return 0;
1522
1523 /* check if we only had WB and UC */
1524 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
1525 num_var_ranges - num[MTRR_NUM_TYPES])
1526 return 0;
1527
1528 memset(range, 0, sizeof(range));
1529 nr_range = 0;
1530 if (mtrr_tom2) {
1531 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
1532 range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
1533 if (highest_pfn < range[nr_range].end + 1)
1534 highest_pfn = range[nr_range].end + 1;
1535 nr_range++;
1536 }
1537 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
1538
1539 total_trim_size = 0;
1540 /* check the head */
1541 if (range[0].start)
1542 total_trim_size += real_trim_memory(0, range[0].start);
1543 /* check the holes */
1544 for (i = 0; i < nr_range - 1; i++) {
1545 if (range[i].end + 1 < range[i+1].start)
1546 total_trim_size += real_trim_memory(range[i].end + 1,
1547 range[i+1].start);
1548 }
1549 /* check the top */
1550 i = nr_range - 1;
1551 if (range[i].end + 1 < end_pfn)
1552 total_trim_size += real_trim_memory(range[i].end + 1,
1553 end_pfn);
1554
1555 if (total_trim_size) {
702 printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" 1556 printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
703 " all of memory, losing %luMB of RAM.\n", 1557 " all of memory, losing %lluMB of RAM.\n",
704 (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT)); 1558 total_trim_size >> 20);
705 1559
706 WARN_ON(1); 1560 if (!changed_by_mtrr_cleanup)
1561 WARN_ON(1);
707 1562
708 printk(KERN_INFO "update e820 for mtrr\n"); 1563 printk(KERN_INFO "update e820 for mtrr\n");
709 trim_start = highest_pfn;
710 trim_start <<= PAGE_SHIFT;
711 trim_size = end_pfn;
712 trim_size <<= PAGE_SHIFT;
713 trim_size -= trim_start;
714 update_memory_range(trim_start, trim_size, E820_RAM,
715 E820_RESERVED);
716 update_e820(); 1564 update_e820();
1565
717 return 1; 1566 return 1;
718 } 1567 }
719 1568
@@ -729,18 +1578,21 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
729 */ 1578 */
730void __init mtrr_bp_init(void) 1579void __init mtrr_bp_init(void)
731{ 1580{
1581 u32 phys_addr;
732 init_ifs(); 1582 init_ifs();
733 1583
1584 phys_addr = 32;
1585
734 if (cpu_has_mtrr) { 1586 if (cpu_has_mtrr) {
735 mtrr_if = &generic_mtrr_ops; 1587 mtrr_if = &generic_mtrr_ops;
736 size_or_mask = 0xff000000; /* 36 bits */ 1588 size_or_mask = 0xff000000; /* 36 bits */
737 size_and_mask = 0x00f00000; 1589 size_and_mask = 0x00f00000;
1590 phys_addr = 36;
738 1591
739 /* This is an AMD specific MSR, but we assume(hope?) that 1592 /* This is an AMD specific MSR, but we assume(hope?) that
740 Intel will implement it to when they extend the address 1593 Intel will implement it to when they extend the address
741 bus of the Xeon. */ 1594 bus of the Xeon. */
742 if (cpuid_eax(0x80000000) >= 0x80000008) { 1595 if (cpuid_eax(0x80000000) >= 0x80000008) {
743 u32 phys_addr;
744 phys_addr = cpuid_eax(0x80000008) & 0xff; 1596 phys_addr = cpuid_eax(0x80000008) & 0xff;
745 /* CPUID workaround for Intel 0F33/0F34 CPU */ 1597 /* CPUID workaround for Intel 0F33/0F34 CPU */
746 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 1598 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -758,6 +1610,7 @@ void __init mtrr_bp_init(void)
758 don't support PAE */ 1610 don't support PAE */
759 size_or_mask = 0xfff00000; /* 32 bits */ 1611 size_or_mask = 0xfff00000; /* 32 bits */
760 size_and_mask = 0; 1612 size_and_mask = 0;
1613 phys_addr = 32;
761 } 1614 }
762 } else { 1615 } else {
763 switch (boot_cpu_data.x86_vendor) { 1616 switch (boot_cpu_data.x86_vendor) {
@@ -791,8 +1644,15 @@ void __init mtrr_bp_init(void)
791 if (mtrr_if) { 1644 if (mtrr_if) {
792 set_num_var_ranges(); 1645 set_num_var_ranges();
793 init_table(); 1646 init_table();
794 if (use_intel()) 1647 if (use_intel()) {
795 get_mtrr_state(); 1648 get_mtrr_state();
1649
1650 if (mtrr_cleanup(phys_addr)) {
1651 changed_by_mtrr_cleanup = 1;
1652 mtrr_if->set_all();
1653 }
1654
1655 }
796 } 1656 }
797} 1657}
798 1658
@@ -829,9 +1689,10 @@ static int __init mtrr_init_finialize(void)
829{ 1689{
830 if (!mtrr_if) 1690 if (!mtrr_if)
831 return 0; 1691 return 0;
832 if (use_intel()) 1692 if (use_intel()) {
833 mtrr_state_warn(); 1693 if (!changed_by_mtrr_cleanup)
834 else { 1694 mtrr_state_warn();
1695 } else {
835 /* The CPUs haven't MTRR and seem to not support SMP. They have 1696 /* The CPUs haven't MTRR and seem to not support SMP. They have
836 * specific drivers, we use a tricky method to support 1697 * specific drivers, we use a tricky method to support
837 * suspend/resume for them. 1698 * suspend/resume for them.
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 2cc77eb6fea..2dc4ec656b2 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_context *ctxt);
81void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); 81void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
82void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); 82void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
83 83
84void fill_mtrr_var_range(unsigned int index,
85 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
84void get_mtrr_state(void); 86void get_mtrr_state(void);
85 87
86extern void set_mtrr_ops(struct mtrr_ops * ops); 88extern void set_mtrr_ops(struct mtrr_ops * ops);
@@ -92,6 +94,7 @@ extern struct mtrr_ops * mtrr_if;
92#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) 94#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
93 95
94extern unsigned int num_var_ranges; 96extern unsigned int num_var_ranges;
97extern u64 mtrr_tom2;
95 98
96void mtrr_state_warn(void); 99void mtrr_state_warn(void);
97const char *mtrr_attrib_to_str(int x); 100const char *mtrr_attrib_to_str(int x);
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820.c
index 124480c0008..7b613d2efb0 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820.c
@@ -17,171 +17,30 @@
17#include <linux/kexec.h> 17#include <linux/kexec.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/mm.h> 19#include <linux/mm.h>
20#include <linux/suspend.h>
21#include <linux/pfn.h> 20#include <linux/pfn.h>
21#include <linux/suspend.h>
22 22
23#include <asm/pgtable.h> 23#include <asm/pgtable.h>
24#include <asm/page.h> 24#include <asm/page.h>
25#include <asm/e820.h> 25#include <asm/e820.h>
26#include <asm/proto.h> 26#include <asm/proto.h>
27#include <asm/setup.h> 27#include <asm/setup.h>
28#include <asm/sections.h>
29#include <asm/kdebug.h>
30#include <asm/trampoline.h> 28#include <asm/trampoline.h>
31 29
32struct e820map e820; 30struct e820map e820;
33 31
34/* 32/* For PCI or other memory-mapped resources */
35 * PFN of last memory page. 33unsigned long pci_mem_start = 0xaeedbabe;
36 */ 34#ifdef CONFIG_PCI
37unsigned long end_pfn; 35EXPORT_SYMBOL(pci_mem_start);
38
39/*
40 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
41 * The direct mapping extends to max_pfn_mapped, so that we can directly access
42 * apertures, ACPI and other tables without having to play with fixmaps.
43 */
44unsigned long max_pfn_mapped;
45
46/*
47 * Last pfn which the user wants to use.
48 */
49static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
50
51/*
52 * Early reserved memory areas.
53 */
54#define MAX_EARLY_RES 20
55
56struct early_res {
57 unsigned long start, end;
58 char name[16];
59};
60static struct early_res early_res[MAX_EARLY_RES] __initdata = {
61 { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
62#ifdef CONFIG_X86_TRAMPOLINE
63 { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
64#endif 36#endif
65 {}
66};
67
68void __init reserve_early(unsigned long start, unsigned long end, char *name)
69{
70 int i;
71 struct early_res *r;
72 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
73 r = &early_res[i];
74 if (end > r->start && start < r->end)
75 panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n",
76 start, end - 1, name?name:"", r->start, r->end - 1, r->name);
77 }
78 if (i >= MAX_EARLY_RES)
79 panic("Too many early reservations");
80 r = &early_res[i];
81 r->start = start;
82 r->end = end;
83 if (name)
84 strncpy(r->name, name, sizeof(r->name) - 1);
85}
86
87void __init free_early(unsigned long start, unsigned long end)
88{
89 struct early_res *r;
90 int i, j;
91
92 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
93 r = &early_res[i];
94 if (start == r->start && end == r->end)
95 break;
96 }
97 if (i >= MAX_EARLY_RES || !early_res[i].end)
98 panic("free_early on not reserved area: %lx-%lx!", start, end);
99 37
100 for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
101 ;
102
103 memmove(&early_res[i], &early_res[i + 1],
104 (j - 1 - i) * sizeof(struct early_res));
105
106 early_res[j - 1].end = 0;
107}
108
109void __init early_res_to_bootmem(unsigned long start, unsigned long end)
110{
111 int i;
112 unsigned long final_start, final_end;
113 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
114 struct early_res *r = &early_res[i];
115 final_start = max(start, r->start);
116 final_end = min(end, r->end);
117 if (final_start >= final_end)
118 continue;
119 printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i,
120 final_start, final_end - 1, r->name);
121 reserve_bootmem_generic(final_start, final_end - final_start);
122 }
123}
124
125/* Check for already reserved areas */
126static inline int __init
127bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
128{
129 int i;
130 unsigned long addr = *addrp, last;
131 int changed = 0;
132again:
133 last = addr + size;
134 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
135 struct early_res *r = &early_res[i];
136 if (last >= r->start && addr < r->end) {
137 *addrp = addr = round_up(r->end, align);
138 changed = 1;
139 goto again;
140 }
141 }
142 return changed;
143}
144
145/* Check for already reserved areas */
146static inline int __init
147bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
148{
149 int i;
150 unsigned long addr = *addrp, last;
151 unsigned long size = *sizep;
152 int changed = 0;
153again:
154 last = addr + size;
155 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
156 struct early_res *r = &early_res[i];
157 if (last > r->start && addr < r->start) {
158 size = r->start - addr;
159 changed = 1;
160 goto again;
161 }
162 if (last > r->end && addr < r->end) {
163 addr = round_up(r->end, align);
164 size = last - addr;
165 changed = 1;
166 goto again;
167 }
168 if (last <= r->end && addr >= r->start) {
169 (*sizep)++;
170 return 0;
171 }
172 }
173 if (changed) {
174 *addrp = addr;
175 *sizep = size;
176 }
177 return changed;
178}
179/* 38/*
180 * This function checks if any part of the range <start,end> is mapped 39 * This function checks if any part of the range <start,end> is mapped
181 * with type. 40 * with type.
182 */ 41 */
183int 42int
184e820_any_mapped(unsigned long start, unsigned long end, unsigned type) 43e820_any_mapped(u64 start, u64 end, unsigned type)
185{ 44{
186 int i; 45 int i;
187 46
@@ -204,8 +63,7 @@ EXPORT_SYMBOL_GPL(e820_any_mapped);
204 * Note: this function only works correct if the e820 table is sorted and 63 * Note: this function only works correct if the e820 table is sorted and
205 * not-overlapping, which is the case 64 * not-overlapping, which is the case
206 */ 65 */
207int __init e820_all_mapped(unsigned long start, unsigned long end, 66int __init e820_all_mapped(u64 start, u64 end, unsigned type)
208 unsigned type)
209{ 67{
210 int i; 68 int i;
211 69
@@ -234,214 +92,13 @@ int __init e820_all_mapped(unsigned long start, unsigned long end,
234} 92}
235 93
236/* 94/*
237 * Find a free area with specified alignment in a specific range.
238 */
239unsigned long __init find_e820_area(unsigned long start, unsigned long end,
240 unsigned long size, unsigned long align)
241{
242 int i;
243
244 for (i = 0; i < e820.nr_map; i++) {
245 struct e820entry *ei = &e820.map[i];
246 unsigned long addr, last;
247 unsigned long ei_last;
248
249 if (ei->type != E820_RAM)
250 continue;
251 addr = round_up(ei->addr, align);
252 ei_last = ei->addr + ei->size;
253 if (addr < start)
254 addr = round_up(start, align);
255 if (addr >= ei_last)
256 continue;
257 while (bad_addr(&addr, size, align) && addr+size <= ei_last)
258 ;
259 last = addr + size;
260 if (last > ei_last)
261 continue;
262 if (last > end)
263 continue;
264 return addr;
265 }
266 return -1UL;
267}
268
269/*
270 * Find next free range after *start
271 */
272unsigned long __init find_e820_area_size(unsigned long start,
273 unsigned long *sizep,
274 unsigned long align)
275{
276 int i;
277
278 for (i = 0; i < e820.nr_map; i++) {
279 struct e820entry *ei = &e820.map[i];
280 unsigned long addr, last;
281 unsigned long ei_last;
282
283 if (ei->type != E820_RAM)
284 continue;
285 addr = round_up(ei->addr, align);
286 ei_last = ei->addr + ei->size;
287 if (addr < start)
288 addr = round_up(start, align);
289 if (addr >= ei_last)
290 continue;
291 *sizep = ei_last - addr;
292 while (bad_addr_size(&addr, sizep, align) &&
293 addr + *sizep <= ei_last)
294 ;
295 last = addr + *sizep;
296 if (last > ei_last)
297 continue;
298 return addr;
299 }
300 return -1UL;
301
302}
303/*
304 * Find the highest page frame number we have available
305 */
306unsigned long __init e820_end_of_ram(void)
307{
308 unsigned long end_pfn;
309
310 end_pfn = find_max_pfn_with_active_regions();
311
312 if (end_pfn > max_pfn_mapped)
313 max_pfn_mapped = end_pfn;
314 if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT)
315 max_pfn_mapped = MAXMEM>>PAGE_SHIFT;
316 if (end_pfn > end_user_pfn)
317 end_pfn = end_user_pfn;
318 if (end_pfn > max_pfn_mapped)
319 end_pfn = max_pfn_mapped;
320
321 printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped);
322 return end_pfn;
323}
324
325/*
326 * Mark e820 reserved areas as busy for the resource manager.
327 */
328void __init e820_reserve_resources(void)
329{
330 int i;
331 struct resource *res;
332
333 res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
334 for (i = 0; i < e820.nr_map; i++) {
335 switch (e820.map[i].type) {
336 case E820_RAM: res->name = "System RAM"; break;
337 case E820_ACPI: res->name = "ACPI Tables"; break;
338 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
339 default: res->name = "reserved";
340 }
341 res->start = e820.map[i].addr;
342 res->end = res->start + e820.map[i].size - 1;
343 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
344 insert_resource(&iomem_resource, res);
345 res++;
346 }
347}
348
349/*
350 * Find the ranges of physical addresses that do not correspond to
351 * e820 RAM areas and mark the corresponding pages as nosave for software
352 * suspend and suspend to RAM.
353 *
354 * This function requires the e820 map to be sorted and without any
355 * overlapping entries and assumes the first e820 area to be RAM.
356 */
357void __init e820_mark_nosave_regions(void)
358{
359 int i;
360 unsigned long paddr;
361
362 paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
363 for (i = 1; i < e820.nr_map; i++) {
364 struct e820entry *ei = &e820.map[i];
365
366 if (paddr < ei->addr)
367 register_nosave_region(PFN_DOWN(paddr),
368 PFN_UP(ei->addr));
369
370 paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
371 if (ei->type != E820_RAM)
372 register_nosave_region(PFN_UP(ei->addr),
373 PFN_DOWN(paddr));
374
375 if (paddr >= (end_pfn << PAGE_SHIFT))
376 break;
377 }
378}
379
380/*
381 * Finds an active region in the address range from start_pfn to end_pfn and
382 * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
383 */
384static int __init e820_find_active_region(const struct e820entry *ei,
385 unsigned long start_pfn,
386 unsigned long end_pfn,
387 unsigned long *ei_startpfn,
388 unsigned long *ei_endpfn)
389{
390 *ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
391 *ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT;
392
393 /* Skip map entries smaller than a page */
394 if (*ei_startpfn >= *ei_endpfn)
395 return 0;
396
397 /* Check if max_pfn_mapped should be updated */
398 if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped)
399 max_pfn_mapped = *ei_endpfn;
400
401 /* Skip if map is outside the node */
402 if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
403 *ei_startpfn >= end_pfn)
404 return 0;
405
406 /* Check for overlaps */
407 if (*ei_startpfn < start_pfn)
408 *ei_startpfn = start_pfn;
409 if (*ei_endpfn > end_pfn)
410 *ei_endpfn = end_pfn;
411
412 /* Obey end_user_pfn to save on memmap */
413 if (*ei_startpfn >= end_user_pfn)
414 return 0;
415 if (*ei_endpfn > end_user_pfn)
416 *ei_endpfn = end_user_pfn;
417
418 return 1;
419}
420
421/* Walk the e820 map and register active regions within a node */
422void __init
423e820_register_active_regions(int nid, unsigned long start_pfn,
424 unsigned long end_pfn)
425{
426 unsigned long ei_startpfn;
427 unsigned long ei_endpfn;
428 int i;
429
430 for (i = 0; i < e820.nr_map; i++)
431 if (e820_find_active_region(&e820.map[i],
432 start_pfn, end_pfn,
433 &ei_startpfn, &ei_endpfn))
434 add_active_range(nid, ei_startpfn, ei_endpfn);
435}
436
437/*
438 * Add a memory region to the kernel e820 map. 95 * Add a memory region to the kernel e820 map.
439 */ 96 */
440void __init add_memory_region(unsigned long start, unsigned long size, int type) 97void __init e820_add_region(u64 start, u64 size, int type)
441{ 98{
442 int x = e820.nr_map; 99 int x = e820.nr_map;
443 100
444 if (x == E820MAX) { 101 if (x == ARRAY_SIZE(e820.map)) {
445 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); 102 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
446 return; 103 return;
447 } 104 }
@@ -452,28 +109,7 @@ void __init add_memory_region(unsigned long start, unsigned long size, int type)
452 e820.nr_map++; 109 e820.nr_map++;
453} 110}
454 111
455/* 112void __init e820_print_map(char *who)
456 * Find the hole size (in bytes) in the memory range.
457 * @start: starting address of the memory range to scan
458 * @end: ending address of the memory range to scan
459 */
460unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
461{
462 unsigned long start_pfn = start >> PAGE_SHIFT;
463 unsigned long end_pfn = end >> PAGE_SHIFT;
464 unsigned long ei_startpfn, ei_endpfn, ram = 0;
465 int i;
466
467 for (i = 0; i < e820.nr_map; i++) {
468 if (e820_find_active_region(&e820.map[i],
469 start_pfn, end_pfn,
470 &ei_startpfn, &ei_endpfn))
471 ram += ei_endpfn - ei_startpfn;
472 }
473 return end - start - (ram << PAGE_SHIFT);
474}
475
476static void __init e820_print_map(char *who)
477{ 113{
478 int i; 114 int i;
479 115
@@ -506,19 +142,75 @@ static void __init e820_print_map(char *who)
506 * Sanitize the BIOS e820 map. 142 * Sanitize the BIOS e820 map.
507 * 143 *
508 * Some e820 responses include overlapping entries. The following 144 * Some e820 responses include overlapping entries. The following
509 * replaces the original e820 map with a new one, removing overlaps. 145 * replaces the original e820 map with a new one, removing overlaps,
146 * and resolving conflicting memory types in favor of highest
147 * numbered type.
510 * 148 *
149 * The input parameter biosmap points to an array of 'struct
150 * e820entry' which on entry has elements in the range [0, *pnr_map)
151 * valid, and which has space for up to max_nr_map entries.
152 * On return, the resulting sanitized e820 map entries will be in
153 * overwritten in the same location, starting at biosmap.
154 *
155 * The integer pointed to by pnr_map must be valid on entry (the
156 * current number of valid entries located at biosmap) and will
157 * be updated on return, with the new number of valid entries
158 * (something no more than max_nr_map.)
159 *
160 * The return value from sanitize_e820_map() is zero if it
161 * successfully 'sanitized' the map entries passed in, and is -1
162 * if it did nothing, which can happen if either of (1) it was
163 * only passed one map entry, or (2) any of the input map entries
164 * were invalid (start + size < start, meaning that the size was
165 * so big the described memory range wrapped around through zero.)
166 *
167 * Visually we're performing the following
168 * (1,2,3,4 = memory types)...
169 *
170 * Sample memory map (w/overlaps):
171 * ____22__________________
172 * ______________________4_
173 * ____1111________________
174 * _44_____________________
175 * 11111111________________
176 * ____________________33__
177 * ___________44___________
178 * __________33333_________
179 * ______________22________
180 * ___________________2222_
181 * _________111111111______
182 * _____________________11_
183 * _________________4______
184 *
185 * Sanitized equivalent (no overlap):
186 * 1_______________________
187 * _44_____________________
188 * ___1____________________
189 * ____22__________________
190 * ______11________________
191 * _________1______________
192 * __________3_____________
193 * ___________44___________
194 * _____________33_________
195 * _______________2________
196 * ________________1_______
197 * _________________4______
198 * ___________________2____
199 * ____________________33__
200 * ______________________4_
511 */ 201 */
512static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) 202
203int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
204 int *pnr_map)
513{ 205{
514 struct change_member { 206 struct change_member {
515 struct e820entry *pbios; /* pointer to original bios entry */ 207 struct e820entry *pbios; /* pointer to original bios entry */
516 unsigned long long addr; /* address for this change point */ 208 unsigned long long addr; /* address for this change point */
517 }; 209 };
518 static struct change_member change_point_list[2*E820MAX] __initdata; 210static struct change_member change_point_list[2*E820_X_MAX] __initdata;
519 static struct change_member *change_point[2*E820MAX] __initdata; 211static struct change_member *change_point[2*E820_X_MAX] __initdata;
520 static struct e820entry *overlap_list[E820MAX] __initdata; 212static struct e820entry *overlap_list[E820_X_MAX] __initdata;
521 static struct e820entry new_bios[E820MAX] __initdata; 213static struct e820entry new_bios[E820_X_MAX] __initdata;
522 struct change_member *change_tmp; 214 struct change_member *change_tmp;
523 unsigned long current_type, last_type; 215 unsigned long current_type, last_type;
524 unsigned long long last_addr; 216 unsigned long long last_addr;
@@ -528,48 +220,12 @@ static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
528 int old_nr, new_nr, chg_nr; 220 int old_nr, new_nr, chg_nr;
529 int i; 221 int i;
530 222
531 /*
532 Visually we're performing the following
533 (1,2,3,4 = memory types)...
534
535 Sample memory map (w/overlaps):
536 ____22__________________
537 ______________________4_
538 ____1111________________
539 _44_____________________
540 11111111________________
541 ____________________33__
542 ___________44___________
543 __________33333_________
544 ______________22________
545 ___________________2222_
546 _________111111111______
547 _____________________11_
548 _________________4______
549
550 Sanitized equivalent (no overlap):
551 1_______________________
552 _44_____________________
553 ___1____________________
554 ____22__________________
555 ______11________________
556 _________1______________
557 __________3_____________
558 ___________44___________
559 _____________33_________
560 _______________2________
561 ________________1_______
562 _________________4______
563 ___________________2____
564 ____________________33__
565 ______________________4_
566 */
567
568 /* if there's only one memory region, don't bother */ 223 /* if there's only one memory region, don't bother */
569 if (*pnr_map < 2) 224 if (*pnr_map < 2)
570 return -1; 225 return -1;
571 226
572 old_nr = *pnr_map; 227 old_nr = *pnr_map;
228 BUG_ON(old_nr > max_nr_map);
573 229
574 /* bail out if we find any unreasonable addresses in bios map */ 230 /* bail out if we find any unreasonable addresses in bios map */
575 for (i = 0; i < old_nr; i++) 231 for (i = 0; i < old_nr; i++)
@@ -681,7 +337,7 @@ static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
681 * no more space left for new 337 * no more space left for new
682 * bios entries ? 338 * bios entries ?
683 */ 339 */
684 if (++new_bios_entry >= E820MAX) 340 if (++new_bios_entry >= max_nr_map)
685 break; 341 break;
686 } 342 }
687 if (current_type != 0) { 343 if (current_type != 0) {
@@ -703,22 +359,9 @@ static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
703 return 0; 359 return 0;
704} 360}
705 361
706/* 362static int __init __copy_e820_map(struct e820entry *biosmap, int nr_map)
707 * Copy the BIOS e820 map into a safe place.
708 *
709 * Sanity-check it while we're at it..
710 *
711 * If we're lucky and live on a modern system, the setup code
712 * will have given us a memory map that we can use to properly
713 * set up memory. If we aren't, we'll fake a memory map.
714 */
715static int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
716{ 363{
717 /* Only one memory region (or negative)? Ignore it */ 364 while (nr_map) {
718 if (nr_map < 2)
719 return -1;
720
721 do {
722 u64 start = biosmap->addr; 365 u64 start = biosmap->addr;
723 u64 size = biosmap->size; 366 u64 size = biosmap->size;
724 u64 end = start + size; 367 u64 end = start + size;
@@ -728,111 +371,37 @@ static int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
728 if (start > end) 371 if (start > end)
729 return -1; 372 return -1;
730 373
731 add_memory_region(start, size, type); 374 e820_add_region(start, size, type);
732 } while (biosmap++, --nr_map);
733 return 0;
734}
735
736static void early_panic(char *msg)
737{
738 early_printk(msg);
739 panic(msg);
740}
741
742/* We're not void only for x86 32-bit compat */
743char * __init machine_specific_memory_setup(void)
744{
745 char *who = "BIOS-e820";
746 /*
747 * Try to copy the BIOS-supplied E820-map.
748 *
749 * Otherwise fake a memory map; one section from 0k->640k,
750 * the next section from 1mb->appropriate_mem_k
751 */
752 sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
753 if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0)
754 early_panic("Cannot find a valid memory map");
755 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
756 e820_print_map(who);
757
758 /* In case someone cares... */
759 return who;
760}
761
762static int __init parse_memopt(char *p)
763{
764 if (!p)
765 return -EINVAL;
766 end_user_pfn = memparse(p, &p);
767 end_user_pfn >>= PAGE_SHIFT;
768 return 0;
769}
770early_param("mem", parse_memopt);
771
772static int userdef __initdata;
773
774static int __init parse_memmap_opt(char *p)
775{
776 char *oldp;
777 unsigned long long start_at, mem_size;
778
779 if (!strcmp(p, "exactmap")) {
780#ifdef CONFIG_CRASH_DUMP
781 /*
782 * If we are doing a crash dump, we still need to know
783 * the real mem size before original memory map is
784 * reset.
785 */
786 e820_register_active_regions(0, 0, -1UL);
787 saved_max_pfn = e820_end_of_ram();
788 remove_all_active_ranges();
789#endif
790 max_pfn_mapped = 0;
791 e820.nr_map = 0;
792 userdef = 1;
793 return 0;
794 }
795
796 oldp = p;
797 mem_size = memparse(p, &p);
798 if (p == oldp)
799 return -EINVAL;
800 375
801 userdef = 1; 376 biosmap++;
802 if (*p == '@') { 377 nr_map--;
803 start_at = memparse(p+1, &p);
804 add_memory_region(start_at, mem_size, E820_RAM);
805 } else if (*p == '#') {
806 start_at = memparse(p+1, &p);
807 add_memory_region(start_at, mem_size, E820_ACPI);
808 } else if (*p == '$') {
809 start_at = memparse(p+1, &p);
810 add_memory_region(start_at, mem_size, E820_RESERVED);
811 } else {
812 end_user_pfn = (mem_size >> PAGE_SHIFT);
813 } 378 }
814 return *p == '\0' ? 0 : -EINVAL; 379 return 0;
815} 380}
816early_param("memmap", parse_memmap_opt);
817 381
818void __init finish_e820_parsing(void) 382/*
383 * Copy the BIOS e820 map into a safe place.
384 *
385 * Sanity-check it while we're at it..
386 *
387 * If we're lucky and live on a modern system, the setup code
388 * will have given us a memory map that we can use to properly
389 * set up memory. If we aren't, we'll fake a memory map.
390 */
391int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
819{ 392{
820 if (userdef) { 393 /* Only one memory region (or negative)? Ignore it */
821 char nr = e820.nr_map; 394 if (nr_map < 2)
822 395 return -1;
823 if (sanitize_e820_map(e820.map, &nr) < 0)
824 early_panic("Invalid user supplied memory map");
825 e820.nr_map = nr;
826 396
827 printk(KERN_INFO "user-defined physical RAM map:\n"); 397 return __copy_e820_map(biosmap, nr_map);
828 e820_print_map("user");
829 }
830} 398}
831 399
832void __init update_memory_range(u64 start, u64 size, unsigned old_type, 400u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
833 unsigned new_type) 401 unsigned new_type)
834{ 402{
835 int i; 403 int i;
404 u64 real_updated_size = 0;
836 405
837 BUG_ON(old_type == new_type); 406 BUG_ON(old_type == new_type);
838 407
@@ -842,8 +411,10 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type,
842 if (ei->type != old_type) 411 if (ei->type != old_type)
843 continue; 412 continue;
844 /* totally covered? */ 413 /* totally covered? */
845 if (ei->addr >= start && ei->size <= size) { 414 if (ei->addr >= start &&
415 (ei->addr + ei->size) <= (start + size)) {
846 ei->type = new_type; 416 ei->type = new_type;
417 real_updated_size += ei->size;
847 continue; 418 continue;
848 } 419 }
849 /* partially covered */ 420 /* partially covered */
@@ -851,26 +422,25 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type,
851 final_end = min(start + size, ei->addr + ei->size); 422 final_end = min(start + size, ei->addr + ei->size);
852 if (final_start >= final_end) 423 if (final_start >= final_end)
853 continue; 424 continue;
854 add_memory_region(final_start, final_end - final_start, 425 e820_add_region(final_start, final_end - final_start,
855 new_type); 426 new_type);
427 real_updated_size += final_end - final_start;
856 } 428 }
429 return real_updated_size;
857} 430}
858 431
859void __init update_e820(void) 432void __init update_e820(void)
860{ 433{
861 u8 nr_map; 434 int nr_map;
862 435
863 nr_map = e820.nr_map; 436 nr_map = e820.nr_map;
864 if (sanitize_e820_map(e820.map, &nr_map)) 437 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
865 return; 438 return;
866 e820.nr_map = nr_map; 439 e820.nr_map = nr_map;
867 printk(KERN_INFO "modified physical RAM map:\n"); 440 printk(KERN_INFO "modified physical RAM map:\n");
868 e820_print_map("modified"); 441 e820_print_map("modified");
869} 442}
870 443
871unsigned long pci_mem_start = 0xaeedbabe;
872EXPORT_SYMBOL(pci_mem_start);
873
874/* 444/*
875 * Search for the biggest gap in the low 32 bits of the e820 445 * Search for the biggest gap in the low 32 bits of the e820
876 * memory space. We pass this space to PCI to assign MMIO resources 446 * memory space. We pass this space to PCI to assign MMIO resources
@@ -880,7 +450,7 @@ EXPORT_SYMBOL(pci_mem_start);
880__init void e820_setup_gap(void) 450__init void e820_setup_gap(void)
881{ 451{
882 unsigned long gapstart, gapsize, round; 452 unsigned long gapstart, gapsize, round;
883 unsigned long last; 453 unsigned long long last;
884 int i; 454 int i;
885 int found = 0; 455 int found = 0;
886 456
@@ -909,6 +479,7 @@ __init void e820_setup_gap(void)
909 last = start; 479 last = start;
910 } 480 }
911 481
482#ifdef CONFIG_X86_64
912 if (!found) { 483 if (!found) {
913 gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; 484 gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
914 printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " 485 printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
@@ -916,6 +487,7 @@ __init void e820_setup_gap(void)
916 KERN_ERR "PCI: Unassigned devices with 32bit resource " 487 KERN_ERR "PCI: Unassigned devices with 32bit resource "
917 "registers may break!\n"); 488 "registers may break!\n");
918 } 489 }
490#endif
919 491
920 /* 492 /*
921 * See how much we want to round up: start off with 493 * See how much we want to round up: start off with
@@ -932,6 +504,586 @@ __init void e820_setup_gap(void)
932 pci_mem_start, gapstart, gapsize); 504 pci_mem_start, gapstart, gapsize);
933} 505}
934 506
507/**
508 * Because of the size limitation of struct boot_params, only first
509 * 128 E820 memory entries are passed to kernel via
510 * boot_params.e820_map, others are passed via SETUP_E820_EXT node of
511 * linked list of struct setup_data, which is parsed here.
512 */
513void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data)
514{
515 u32 map_len;
516 int entries;
517 struct e820entry *extmap;
518
519 entries = sdata->len / sizeof(struct e820entry);
520 map_len = sdata->len + sizeof(struct setup_data);
521 if (map_len > PAGE_SIZE)
522 sdata = early_ioremap(pa_data, map_len);
523 extmap = (struct e820entry *)(sdata->data);
524 __copy_e820_map(extmap, entries);
525 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
526 if (map_len > PAGE_SIZE)
527 early_iounmap(sdata, map_len);
528 printk(KERN_INFO "extended physical RAM map:\n");
529 e820_print_map("extended");
530}
531
532#if defined(CONFIG_X86_64) || \
533 (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
534/**
535 * Find the ranges of physical addresses that do not correspond to
536 * e820 RAM areas and mark the corresponding pages as nosave for
537 * hibernation (32 bit) or software suspend and suspend to RAM (64 bit).
538 *
539 * This function requires the e820 map to be sorted and without any
540 * overlapping entries and assumes the first e820 area to be RAM.
541 */
542void __init e820_mark_nosave_regions(unsigned long limit_pfn)
543{
544 int i;
545 unsigned long pfn;
546
547 pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
548 for (i = 1; i < e820.nr_map; i++) {
549 struct e820entry *ei = &e820.map[i];
550
551 if (pfn < PFN_UP(ei->addr))
552 register_nosave_region(pfn, PFN_UP(ei->addr));
553
554 pfn = PFN_DOWN(ei->addr + ei->size);
555 if (ei->type != E820_RAM)
556 register_nosave_region(PFN_UP(ei->addr), pfn);
557
558 if (pfn >= limit_pfn)
559 break;
560 }
561}
562#endif
563
564/*
565 * Early reserved memory areas.
566 */
567#define MAX_EARLY_RES 20
568
569struct early_res {
570 u64 start, end;
571 char name[16];
572};
573static struct early_res early_res[MAX_EARLY_RES] __initdata = {
574 { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
575#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE)
576 { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
577#endif
578#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
579 /*
580 * But first pinch a few for the stack/trampoline stuff
581 * FIXME: Don't need the extra page at 4K, but need to fix
582 * trampoline before removing it. (see the GDT stuff)
583 */
584 { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" },
585 /*
586 * Has to be in very low memory so we can execute
587 * real-mode AP code.
588 */
589 { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" },
590#endif
591 {}
592};
593
594static int __init find_overlapped_early(u64 start, u64 end)
595{
596 int i;
597 struct early_res *r;
598
599 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
600 r = &early_res[i];
601 if (end > r->start && start < r->end)
602 break;
603 }
604
605 return i;
606}
607
608void __init reserve_early(u64 start, u64 end, char *name)
609{
610 int i;
611 struct early_res *r;
612
613 i = find_overlapped_early(start, end);
614 if (i >= MAX_EARLY_RES)
615 panic("Too many early reservations");
616 r = &early_res[i];
617 if (r->end)
618 panic("Overlapping early reservations "
619 "%llx-%llx %s to %llx-%llx %s\n",
620 start, end - 1, name?name:"", r->start,
621 r->end - 1, r->name);
622 r->start = start;
623 r->end = end;
624 if (name)
625 strncpy(r->name, name, sizeof(r->name) - 1);
626}
627
628void __init free_early(u64 start, u64 end)
629{
630 struct early_res *r;
631 int i, j;
632
633 i = find_overlapped_early(start, end);
634 r = &early_res[i];
635 if (i >= MAX_EARLY_RES || r->end != end || r->start != start)
636 panic("free_early on not reserved area: %llx-%llx!",
637 start, end - 1);
638
639 for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
640 ;
641
642 memmove(&early_res[i], &early_res[i + 1],
643 (j - 1 - i) * sizeof(struct early_res));
644
645 early_res[j - 1].end = 0;
646}
647
648void __init early_res_to_bootmem(u64 start, u64 end)
649{
650 int i;
651 u64 final_start, final_end;
652 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
653 struct early_res *r = &early_res[i];
654 final_start = max(start, r->start);
655 final_end = min(end, r->end);
656 if (final_start >= final_end)
657 continue;
658 printk(KERN_INFO " early res: %d [%llx-%llx] %s\n", i,
659 final_start, final_end - 1, r->name);
660 reserve_bootmem_generic(final_start, final_end - final_start,
661 BOOTMEM_DEFAULT);
662 }
663}
664
665/* Check for already reserved areas */
666static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
667{
668 int i;
669 u64 addr = *addrp;
670 int changed = 0;
671 struct early_res *r;
672again:
673 i = find_overlapped_early(addr, addr + size);
674 r = &early_res[i];
675 if (i < MAX_EARLY_RES && r->end) {
676 *addrp = addr = round_up(r->end, align);
677 changed = 1;
678 goto again;
679 }
680 return changed;
681}
682
683/* Check for already reserved areas */
684static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
685{
686 int i;
687 u64 addr = *addrp, last;
688 u64 size = *sizep;
689 int changed = 0;
690again:
691 last = addr + size;
692 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
693 struct early_res *r = &early_res[i];
694 if (last > r->start && addr < r->start) {
695 size = r->start - addr;
696 changed = 1;
697 goto again;
698 }
699 if (last > r->end && addr < r->end) {
700 addr = round_up(r->end, align);
701 size = last - addr;
702 changed = 1;
703 goto again;
704 }
705 if (last <= r->end && addr >= r->start) {
706 (*sizep)++;
707 return 0;
708 }
709 }
710 if (changed) {
711 *addrp = addr;
712 *sizep = size;
713 }
714 return changed;
715}
716
717/*
718 * Find a free area with specified alignment in a specific range.
719 */
720u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
721{
722 int i;
723
724 for (i = 0; i < e820.nr_map; i++) {
725 struct e820entry *ei = &e820.map[i];
726 u64 addr, last;
727 u64 ei_last;
728
729 if (ei->type != E820_RAM)
730 continue;
731 addr = round_up(ei->addr, align);
732 ei_last = ei->addr + ei->size;
733 if (addr < start)
734 addr = round_up(start, align);
735 if (addr >= ei_last)
736 continue;
737 while (bad_addr(&addr, size, align) && addr+size <= ei_last)
738 ;
739 last = addr + size;
740 if (last > ei_last)
741 continue;
742 if (last > end)
743 continue;
744 return addr;
745 }
746 return -1ULL;
747}
748
749/*
750 * Find next free range after *start
751 */
752u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
753{
754 int i;
755
756 for (i = 0; i < e820.nr_map; i++) {
757 struct e820entry *ei = &e820.map[i];
758 u64 addr, last;
759 u64 ei_last;
760
761 if (ei->type != E820_RAM)
762 continue;
763 addr = round_up(ei->addr, align);
764 ei_last = ei->addr + ei->size;
765 if (addr < start)
766 addr = round_up(start, align);
767 if (addr >= ei_last)
768 continue;
769 *sizep = ei_last - addr;
770 while (bad_addr_size(&addr, sizep, align) &&
771 addr + *sizep <= ei_last)
772 ;
773 last = addr + *sizep;
774 if (last > ei_last)
775 continue;
776 return addr;
777 }
778 return -1UL;
779
780}
781
782/*
783 * pre allocated 4k and reserved it in e820
784 */
785u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
786{
787 u64 size = 0;
788 u64 addr;
789 u64 start;
790
791 start = startt;
792 while (size < sizet)
793 start = find_e820_area_size(start, &size, align);
794
795 if (size < sizet)
796 return 0;
797
798 addr = round_down(start + size - sizet, align);
799 e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
800 printk(KERN_INFO "update e820 for early_reserve_e820\n");
801 update_e820();
802
803 return addr;
804}
805
806#ifdef CONFIG_X86_32
807# ifdef CONFIG_X86_PAE
808# define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT))
809# else
810# define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT))
811# endif
812#else /* CONFIG_X86_32 */
813# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT
814#endif
815
816/*
817 * Last pfn which the user wants to use.
818 */
819unsigned long __initdata end_user_pfn = MAX_ARCH_PFN;
820
821/*
822 * Find the highest page frame number we have available
823 */
824unsigned long __init e820_end_of_ram(void)
825{
826 unsigned long last_pfn;
827 unsigned long max_arch_pfn = MAX_ARCH_PFN;
828
829 last_pfn = find_max_pfn_with_active_regions();
830
831 if (last_pfn > max_arch_pfn)
832 last_pfn = max_arch_pfn;
833 if (last_pfn > end_user_pfn)
834 last_pfn = end_user_pfn;
835
836 printk(KERN_INFO "last_pfn = %lu max_arch_pfn = %lu\n",
837 last_pfn, max_arch_pfn);
838 return last_pfn;
839}
840
841/*
842 * Finds an active region in the address range from start_pfn to last_pfn and
843 * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
844 */
845int __init e820_find_active_region(const struct e820entry *ei,
846 unsigned long start_pfn,
847 unsigned long last_pfn,
848 unsigned long *ei_startpfn,
849 unsigned long *ei_endpfn)
850{
851 u64 align = PAGE_SIZE;
852
853 *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT;
854 *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT;
855
856 /* Skip map entries smaller than a page */
857 if (*ei_startpfn >= *ei_endpfn)
858 return 0;
859
860 /* Skip if map is outside the node */
861 if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
862 *ei_startpfn >= last_pfn)
863 return 0;
864
865 /* Check for overlaps */
866 if (*ei_startpfn < start_pfn)
867 *ei_startpfn = start_pfn;
868 if (*ei_endpfn > last_pfn)
869 *ei_endpfn = last_pfn;
870
871 /* Obey end_user_pfn to save on memmap */
872 if (*ei_startpfn >= end_user_pfn)
873 return 0;
874 if (*ei_endpfn > end_user_pfn)
875 *ei_endpfn = end_user_pfn;
876
877 return 1;
878}
879
880/* Walk the e820 map and register active regions within a node */
881void __init e820_register_active_regions(int nid, unsigned long start_pfn,
882 unsigned long last_pfn)
883{
884 unsigned long ei_startpfn;
885 unsigned long ei_endpfn;
886 int i;
887
888 for (i = 0; i < e820.nr_map; i++)
889 if (e820_find_active_region(&e820.map[i],
890 start_pfn, last_pfn,
891 &ei_startpfn, &ei_endpfn))
892 add_active_range(nid, ei_startpfn, ei_endpfn);
893}
894
895/*
896 * Find the hole size (in bytes) in the memory range.
897 * @start: starting address of the memory range to scan
898 * @end: ending address of the memory range to scan
899 */
900u64 __init e820_hole_size(u64 start, u64 end)
901{
902 unsigned long start_pfn = start >> PAGE_SHIFT;
903 unsigned long last_pfn = end >> PAGE_SHIFT;
904 unsigned long ei_startpfn, ei_endpfn, ram = 0;
905 int i;
906
907 for (i = 0; i < e820.nr_map; i++) {
908 if (e820_find_active_region(&e820.map[i],
909 start_pfn, last_pfn,
910 &ei_startpfn, &ei_endpfn))
911 ram += ei_endpfn - ei_startpfn;
912 }
913 return end - start - ((u64)ram << PAGE_SHIFT);
914}
915
916static void early_panic(char *msg)
917{
918 early_printk(msg);
919 panic(msg);
920}
921
922/* "mem=nopentium" disables the 4MB page tables. */
923static int __init parse_memopt(char *p)
924{
925 u64 mem_size;
926
927 if (!p)
928 return -EINVAL;
929
930#ifdef CONFIG_X86_32
931 if (!strcmp(p, "nopentium")) {
932 setup_clear_cpu_cap(X86_FEATURE_PSE);
933 return 0;
934 }
935#endif
936
937 mem_size = memparse(p, &p);
938 end_user_pfn = mem_size>>PAGE_SHIFT;
939 return 0;
940}
941early_param("mem", parse_memopt);
942
943static int userdef __initdata;
944
945static int __init parse_memmap_opt(char *p)
946{
947 char *oldp;
948 u64 start_at, mem_size;
949
950 if (!strcmp(p, "exactmap")) {
951#ifdef CONFIG_CRASH_DUMP
952 /*
953 * If we are doing a crash dump, we still need to know
954 * the real mem size before original memory map is
955 * reset.
956 */
957 e820_register_active_regions(0, 0, -1UL);
958 saved_max_pfn = e820_end_of_ram();
959 remove_all_active_ranges();
960#endif
961 e820.nr_map = 0;
962 userdef = 1;
963 return 0;
964 }
965
966 oldp = p;
967 mem_size = memparse(p, &p);
968 if (p == oldp)
969 return -EINVAL;
970
971 userdef = 1;
972 if (*p == '@') {
973 start_at = memparse(p+1, &p);
974 e820_add_region(start_at, mem_size, E820_RAM);
975 } else if (*p == '#') {
976 start_at = memparse(p+1, &p);
977 e820_add_region(start_at, mem_size, E820_ACPI);
978 } else if (*p == '$') {
979 start_at = memparse(p+1, &p);
980 e820_add_region(start_at, mem_size, E820_RESERVED);
981 } else {
982 end_user_pfn = (mem_size >> PAGE_SHIFT);
983 }
984 return *p == '\0' ? 0 : -EINVAL;
985}
986early_param("memmap", parse_memmap_opt);
987
988void __init finish_e820_parsing(void)
989{
990 if (userdef) {
991 int nr = e820.nr_map;
992
993 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
994 early_panic("Invalid user supplied memory map");
995 e820.nr_map = nr;
996
997 printk(KERN_INFO "user-defined physical RAM map:\n");
998 e820_print_map("user");
999 }
1000}
1001
1002/*
1003 * Mark e820 reserved areas as busy for the resource manager.
1004 */
1005void __init e820_reserve_resources(void)
1006{
1007 int i;
1008 struct resource *res;
1009
1010 res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
1011 for (i = 0; i < e820.nr_map; i++) {
1012 switch (e820.map[i].type) {
1013 case E820_RAM: res->name = "System RAM"; break;
1014 case E820_ACPI: res->name = "ACPI Tables"; break;
1015 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1016 default: res->name = "reserved";
1017 }
1018 res->start = e820.map[i].addr;
1019 res->end = res->start + e820.map[i].size - 1;
1020#ifndef CONFIG_RESOURCES_64BIT
1021 if (res->end > 0x100000000ULL) {
1022 res++;
1023 continue;
1024 }
1025#endif
1026 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1027 insert_resource(&iomem_resource, res);
1028 res++;
1029 }
1030}
1031
1032char *__init default_machine_specific_memory_setup(void)
1033{
1034 char *who = "BIOS-e820";
1035 int new_nr;
1036 /*
1037 * Try to copy the BIOS-supplied E820-map.
1038 *
1039 * Otherwise fake a memory map; one section from 0k->640k,
1040 * the next section from 1mb->appropriate_mem_k
1041 */
1042 new_nr = boot_params.e820_entries;
1043 sanitize_e820_map(boot_params.e820_map,
1044 ARRAY_SIZE(boot_params.e820_map),
1045 &new_nr);
1046 boot_params.e820_entries = new_nr;
1047 if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) {
1048 u64 mem_size;
1049
1050 /* compare results from other methods and take the greater */
1051 if (boot_params.alt_mem_k
1052 < boot_params.screen_info.ext_mem_k) {
1053 mem_size = boot_params.screen_info.ext_mem_k;
1054 who = "BIOS-88";
1055 } else {
1056 mem_size = boot_params.alt_mem_k;
1057 who = "BIOS-e801";
1058 }
1059
1060 e820.nr_map = 0;
1061 e820_add_region(0, LOWMEMSIZE(), E820_RAM);
1062 e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
1063 }
1064
1065 /* In case someone cares... */
1066 return who;
1067}
1068
1069char *__init __attribute__((weak)) machine_specific_memory_setup(void)
1070{
1071 return default_machine_specific_memory_setup();
1072}
1073
1074/* Overridden in paravirt.c if CONFIG_PARAVIRT */
1075char * __init __attribute__((weak)) memory_setup(void)
1076{
1077 return machine_specific_memory_setup();
1078}
1079
1080void __init setup_memory_map(void)
1081{
1082 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1083 e820_print_map(memory_setup());
1084}
1085
1086#ifdef CONFIG_X86_64
935int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) 1087int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
936{ 1088{
937 int i; 1089 int i;
@@ -950,3 +1102,4 @@ int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
950 max_pfn << PAGE_SHIFT) - *addr; 1102 max_pfn << PAGE_SHIFT) - *addr;
951 return i + 1; 1103 return i + 1;
952} 1104}
1105#endif
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
deleted file mode 100644
index ed733e7cf4e..00000000000
--- a/arch/x86/kernel/e820_32.c
+++ /dev/null
@@ -1,775 +0,0 @@
1#include <linux/kernel.h>
2#include <linux/types.h>
3#include <linux/init.h>
4#include <linux/bootmem.h>
5#include <linux/ioport.h>
6#include <linux/string.h>
7#include <linux/kexec.h>
8#include <linux/module.h>
9#include <linux/mm.h>
10#include <linux/pfn.h>
11#include <linux/uaccess.h>
12#include <linux/suspend.h>
13
14#include <asm/pgtable.h>
15#include <asm/page.h>
16#include <asm/e820.h>
17#include <asm/setup.h>
18
19struct e820map e820;
20struct change_member {
21 struct e820entry *pbios; /* pointer to original bios entry */
22 unsigned long long addr; /* address for this change point */
23};
24static struct change_member change_point_list[2*E820MAX] __initdata;
25static struct change_member *change_point[2*E820MAX] __initdata;
26static struct e820entry *overlap_list[E820MAX] __initdata;
27static struct e820entry new_bios[E820MAX] __initdata;
28/* For PCI or other memory-mapped resources */
29unsigned long pci_mem_start = 0x10000000;
30#ifdef CONFIG_PCI
31EXPORT_SYMBOL(pci_mem_start);
32#endif
33extern int user_defined_memmap;
34
35static struct resource system_rom_resource = {
36 .name = "System ROM",
37 .start = 0xf0000,
38 .end = 0xfffff,
39 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
40};
41
42static struct resource extension_rom_resource = {
43 .name = "Extension ROM",
44 .start = 0xe0000,
45 .end = 0xeffff,
46 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
47};
48
49static struct resource adapter_rom_resources[] = { {
50 .name = "Adapter ROM",
51 .start = 0xc8000,
52 .end = 0,
53 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
54}, {
55 .name = "Adapter ROM",
56 .start = 0,
57 .end = 0,
58 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
59}, {
60 .name = "Adapter ROM",
61 .start = 0,
62 .end = 0,
63 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
64}, {
65 .name = "Adapter ROM",
66 .start = 0,
67 .end = 0,
68 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
69}, {
70 .name = "Adapter ROM",
71 .start = 0,
72 .end = 0,
73 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
74}, {
75 .name = "Adapter ROM",
76 .start = 0,
77 .end = 0,
78 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
79} };
80
81static struct resource video_rom_resource = {
82 .name = "Video ROM",
83 .start = 0xc0000,
84 .end = 0xc7fff,
85 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
86};
87
88#define ROMSIGNATURE 0xaa55
89
90static int __init romsignature(const unsigned char *rom)
91{
92 const unsigned short * const ptr = (const unsigned short *)rom;
93 unsigned short sig;
94
95 return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
96}
97
98static int __init romchecksum(const unsigned char *rom, unsigned long length)
99{
100 unsigned char sum, c;
101
102 for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
103 sum += c;
104 return !length && !sum;
105}
106
107static void __init probe_roms(void)
108{
109 const unsigned char *rom;
110 unsigned long start, length, upper;
111 unsigned char c;
112 int i;
113
114 /* video rom */
115 upper = adapter_rom_resources[0].start;
116 for (start = video_rom_resource.start; start < upper; start += 2048) {
117 rom = isa_bus_to_virt(start);
118 if (!romsignature(rom))
119 continue;
120
121 video_rom_resource.start = start;
122
123 if (probe_kernel_address(rom + 2, c) != 0)
124 continue;
125
126 /* 0 < length <= 0x7f * 512, historically */
127 length = c * 512;
128
129 /* if checksum okay, trust length byte */
130 if (length && romchecksum(rom, length))
131 video_rom_resource.end = start + length - 1;
132
133 request_resource(&iomem_resource, &video_rom_resource);
134 break;
135 }
136
137 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
138 if (start < upper)
139 start = upper;
140
141 /* system rom */
142 request_resource(&iomem_resource, &system_rom_resource);
143 upper = system_rom_resource.start;
144
145 /* check for extension rom (ignore length byte!) */
146 rom = isa_bus_to_virt(extension_rom_resource.start);
147 if (romsignature(rom)) {
148 length = extension_rom_resource.end - extension_rom_resource.start + 1;
149 if (romchecksum(rom, length)) {
150 request_resource(&iomem_resource, &extension_rom_resource);
151 upper = extension_rom_resource.start;
152 }
153 }
154
155 /* check for adapter roms on 2k boundaries */
156 for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
157 rom = isa_bus_to_virt(start);
158 if (!romsignature(rom))
159 continue;
160
161 if (probe_kernel_address(rom + 2, c) != 0)
162 continue;
163
164 /* 0 < length <= 0x7f * 512, historically */
165 length = c * 512;
166
167 /* but accept any length that fits if checksum okay */
168 if (!length || start + length > upper || !romchecksum(rom, length))
169 continue;
170
171 adapter_rom_resources[i].start = start;
172 adapter_rom_resources[i].end = start + length - 1;
173 request_resource(&iomem_resource, &adapter_rom_resources[i]);
174
175 start = adapter_rom_resources[i++].end & ~2047UL;
176 }
177}
178
179/*
180 * Request address space for all standard RAM and ROM resources
181 * and also for regions reported as reserved by the e820.
182 */
183void __init init_iomem_resources(struct resource *code_resource,
184 struct resource *data_resource,
185 struct resource *bss_resource)
186{
187 int i;
188
189 probe_roms();
190 for (i = 0; i < e820.nr_map; i++) {
191 struct resource *res;
192#ifndef CONFIG_RESOURCES_64BIT
193 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
194 continue;
195#endif
196 res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
197 switch (e820.map[i].type) {
198 case E820_RAM: res->name = "System RAM"; break;
199 case E820_ACPI: res->name = "ACPI Tables"; break;
200 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
201 default: res->name = "reserved";
202 }
203 res->start = e820.map[i].addr;
204 res->end = res->start + e820.map[i].size - 1;
205 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
206 if (request_resource(&iomem_resource, res)) {
207 kfree(res);
208 continue;
209 }
210 if (e820.map[i].type == E820_RAM) {
211 /*
212 * We don't know which RAM region contains kernel data,
213 * so we try it repeatedly and let the resource manager
214 * test it.
215 */
216 request_resource(res, code_resource);
217 request_resource(res, data_resource);
218 request_resource(res, bss_resource);
219#ifdef CONFIG_KEXEC
220 if (crashk_res.start != crashk_res.end)
221 request_resource(res, &crashk_res);
222#endif
223 }
224 }
225}
226
227#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
228/**
229 * e820_mark_nosave_regions - Find the ranges of physical addresses that do not
230 * correspond to e820 RAM areas and mark the corresponding pages as nosave for
231 * hibernation.
232 *
233 * This function requires the e820 map to be sorted and without any
234 * overlapping entries and assumes the first e820 area to be RAM.
235 */
236void __init e820_mark_nosave_regions(void)
237{
238 int i;
239 unsigned long pfn;
240
241 pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
242 for (i = 1; i < e820.nr_map; i++) {
243 struct e820entry *ei = &e820.map[i];
244
245 if (pfn < PFN_UP(ei->addr))
246 register_nosave_region(pfn, PFN_UP(ei->addr));
247
248 pfn = PFN_DOWN(ei->addr + ei->size);
249 if (ei->type != E820_RAM)
250 register_nosave_region(PFN_UP(ei->addr), pfn);
251
252 if (pfn >= max_low_pfn)
253 break;
254 }
255}
256#endif
257
258void __init add_memory_region(unsigned long long start,
259 unsigned long long size, int type)
260{
261 int x;
262
263 x = e820.nr_map;
264
265 if (x == E820MAX) {
266 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
267 return;
268 }
269
270 e820.map[x].addr = start;
271 e820.map[x].size = size;
272 e820.map[x].type = type;
273 e820.nr_map++;
274} /* add_memory_region */
275
276/*
277 * Sanitize the BIOS e820 map.
278 *
279 * Some e820 responses include overlapping entries. The following
280 * replaces the original e820 map with a new one, removing overlaps.
281 *
282 */
283int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
284{
285 struct change_member *change_tmp;
286 unsigned long current_type, last_type;
287 unsigned long long last_addr;
288 int chgidx, still_changing;
289 int overlap_entries;
290 int new_bios_entry;
291 int old_nr, new_nr, chg_nr;
292 int i;
293
294 /*
295 Visually we're performing the following (1,2,3,4 = memory types)...
296
297 Sample memory map (w/overlaps):
298 ____22__________________
299 ______________________4_
300 ____1111________________
301 _44_____________________
302 11111111________________
303 ____________________33__
304 ___________44___________
305 __________33333_________
306 ______________22________
307 ___________________2222_
308 _________111111111______
309 _____________________11_
310 _________________4______
311
312 Sanitized equivalent (no overlap):
313 1_______________________
314 _44_____________________
315 ___1____________________
316 ____22__________________
317 ______11________________
318 _________1______________
319 __________3_____________
320 ___________44___________
321 _____________33_________
322 _______________2________
323 ________________1_______
324 _________________4______
325 ___________________2____
326 ____________________33__
327 ______________________4_
328 */
329 /* if there's only one memory region, don't bother */
330 if (*pnr_map < 2) {
331 return -1;
332 }
333
334 old_nr = *pnr_map;
335
336 /* bail out if we find any unreasonable addresses in bios map */
337 for (i=0; i<old_nr; i++)
338 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
339 return -1;
340 }
341
342 /* create pointers for initial change-point information (for sorting) */
343 for (i=0; i < 2*old_nr; i++)
344 change_point[i] = &change_point_list[i];
345
346 /* record all known change-points (starting and ending addresses),
347 omitting those that are for empty memory regions */
348 chgidx = 0;
349 for (i=0; i < old_nr; i++) {
350 if (biosmap[i].size != 0) {
351 change_point[chgidx]->addr = biosmap[i].addr;
352 change_point[chgidx++]->pbios = &biosmap[i];
353 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
354 change_point[chgidx++]->pbios = &biosmap[i];
355 }
356 }
357 chg_nr = chgidx; /* true number of change-points */
358
359 /* sort change-point list by memory addresses (low -> high) */
360 still_changing = 1;
361 while (still_changing) {
362 still_changing = 0;
363 for (i=1; i < chg_nr; i++) {
364 /* if <current_addr> > <last_addr>, swap */
365 /* or, if current=<start_addr> & last=<end_addr>, swap */
366 if ((change_point[i]->addr < change_point[i-1]->addr) ||
367 ((change_point[i]->addr == change_point[i-1]->addr) &&
368 (change_point[i]->addr == change_point[i]->pbios->addr) &&
369 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
370 )
371 {
372 change_tmp = change_point[i];
373 change_point[i] = change_point[i-1];
374 change_point[i-1] = change_tmp;
375 still_changing=1;
376 }
377 }
378 }
379
380 /* create a new bios memory map, removing overlaps */
381 overlap_entries=0; /* number of entries in the overlap table */
382 new_bios_entry=0; /* index for creating new bios map entries */
383 last_type = 0; /* start with undefined memory type */
384 last_addr = 0; /* start with 0 as last starting address */
385 /* loop through change-points, determining affect on the new bios map */
386 for (chgidx=0; chgidx < chg_nr; chgidx++)
387 {
388 /* keep track of all overlapping bios entries */
389 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
390 {
391 /* add map entry to overlap list (> 1 entry implies an overlap) */
392 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
393 }
394 else
395 {
396 /* remove entry from list (order independent, so swap with last) */
397 for (i=0; i<overlap_entries; i++)
398 {
399 if (overlap_list[i] == change_point[chgidx]->pbios)
400 overlap_list[i] = overlap_list[overlap_entries-1];
401 }
402 overlap_entries--;
403 }
404 /* if there are overlapping entries, decide which "type" to use */
405 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
406 current_type = 0;
407 for (i=0; i<overlap_entries; i++)
408 if (overlap_list[i]->type > current_type)
409 current_type = overlap_list[i]->type;
410 /* continue building up new bios map based on this information */
411 if (current_type != last_type) {
412 if (last_type != 0) {
413 new_bios[new_bios_entry].size =
414 change_point[chgidx]->addr - last_addr;
415 /* move forward only if the new size was non-zero */
416 if (new_bios[new_bios_entry].size != 0)
417 if (++new_bios_entry >= E820MAX)
418 break; /* no more space left for new bios entries */
419 }
420 if (current_type != 0) {
421 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
422 new_bios[new_bios_entry].type = current_type;
423 last_addr=change_point[chgidx]->addr;
424 }
425 last_type = current_type;
426 }
427 }
428 new_nr = new_bios_entry; /* retain count for new bios entries */
429
430 /* copy new bios mapping into original location */
431 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
432 *pnr_map = new_nr;
433
434 return 0;
435}
436
437/*
438 * Copy the BIOS e820 map into a safe place.
439 *
440 * Sanity-check it while we're at it..
441 *
442 * If we're lucky and live on a modern system, the setup code
443 * will have given us a memory map that we can use to properly
444 * set up memory. If we aren't, we'll fake a memory map.
445 *
446 * We check to see that the memory map contains at least 2 elements
447 * before we'll use it, because the detection code in setup.S may
448 * not be perfect and most every PC known to man has two memory
449 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
450 * thinkpad 560x, for example, does not cooperate with the memory
451 * detection code.)
452 */
453int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
454{
455 /* Only one memory region (or negative)? Ignore it */
456 if (nr_map < 2)
457 return -1;
458
459 do {
460 u64 start = biosmap->addr;
461 u64 size = biosmap->size;
462 u64 end = start + size;
463 u32 type = biosmap->type;
464
465 /* Overflow in 64 bits? Ignore the memory map. */
466 if (start > end)
467 return -1;
468
469 add_memory_region(start, size, type);
470 } while (biosmap++, --nr_map);
471
472 return 0;
473}
474
475/*
476 * Find the highest page frame number we have available
477 */
478void __init propagate_e820_map(void)
479{
480 int i;
481
482 max_pfn = 0;
483
484 for (i = 0; i < e820.nr_map; i++) {
485 unsigned long start, end;
486 /* RAM? */
487 if (e820.map[i].type != E820_RAM)
488 continue;
489 start = PFN_UP(e820.map[i].addr);
490 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
491 if (start >= end)
492 continue;
493 if (end > max_pfn)
494 max_pfn = end;
495 memory_present(0, start, end);
496 }
497}
498
499/*
500 * Register fully available low RAM pages with the bootmem allocator.
501 */
502void __init register_bootmem_low_pages(unsigned long max_low_pfn)
503{
504 int i;
505
506 for (i = 0; i < e820.nr_map; i++) {
507 unsigned long curr_pfn, last_pfn, size;
508 /*
509 * Reserve usable low memory
510 */
511 if (e820.map[i].type != E820_RAM)
512 continue;
513 /*
514 * We are rounding up the start address of usable memory:
515 */
516 curr_pfn = PFN_UP(e820.map[i].addr);
517 if (curr_pfn >= max_low_pfn)
518 continue;
519 /*
520 * ... and at the end of the usable range downwards:
521 */
522 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
523
524 if (last_pfn > max_low_pfn)
525 last_pfn = max_low_pfn;
526
527 /*
528 * .. finally, did all the rounding and playing
529 * around just make the area go away?
530 */
531 if (last_pfn <= curr_pfn)
532 continue;
533
534 size = last_pfn - curr_pfn;
535 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
536 }
537}
538
539void __init e820_register_memory(void)
540{
541 unsigned long gapstart, gapsize, round;
542 unsigned long long last;
543 int i;
544
545 /*
546 * Search for the biggest gap in the low 32 bits of the e820
547 * memory space.
548 */
549 last = 0x100000000ull;
550 gapstart = 0x10000000;
551 gapsize = 0x400000;
552 i = e820.nr_map;
553 while (--i >= 0) {
554 unsigned long long start = e820.map[i].addr;
555 unsigned long long end = start + e820.map[i].size;
556
557 /*
558 * Since "last" is at most 4GB, we know we'll
559 * fit in 32 bits if this condition is true
560 */
561 if (last > end) {
562 unsigned long gap = last - end;
563
564 if (gap > gapsize) {
565 gapsize = gap;
566 gapstart = end;
567 }
568 }
569 if (start < last)
570 last = start;
571 }
572
573 /*
574 * See how much we want to round up: start off with
575 * rounding to the next 1MB area.
576 */
577 round = 0x100000;
578 while ((gapsize >> 4) > round)
579 round += round;
580 /* Fun with two's complement */
581 pci_mem_start = (gapstart + round) & -round;
582
583 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
584 pci_mem_start, gapstart, gapsize);
585}
586
587void __init print_memory_map(char *who)
588{
589 int i;
590
591 for (i = 0; i < e820.nr_map; i++) {
592 printk(" %s: %016Lx - %016Lx ", who,
593 e820.map[i].addr,
594 e820.map[i].addr + e820.map[i].size);
595 switch (e820.map[i].type) {
596 case E820_RAM: printk("(usable)\n");
597 break;
598 case E820_RESERVED:
599 printk("(reserved)\n");
600 break;
601 case E820_ACPI:
602 printk("(ACPI data)\n");
603 break;
604 case E820_NVS:
605 printk("(ACPI NVS)\n");
606 break;
607 default: printk("type %u\n", e820.map[i].type);
608 break;
609 }
610 }
611}
612
613void __init limit_regions(unsigned long long size)
614{
615 unsigned long long current_addr;
616 int i;
617
618 print_memory_map("limit_regions start");
619 for (i = 0; i < e820.nr_map; i++) {
620 current_addr = e820.map[i].addr + e820.map[i].size;
621 if (current_addr < size)
622 continue;
623
624 if (e820.map[i].type != E820_RAM)
625 continue;
626
627 if (e820.map[i].addr >= size) {
628 /*
629 * This region starts past the end of the
630 * requested size, skip it completely.
631 */
632 e820.nr_map = i;
633 } else {
634 e820.nr_map = i + 1;
635 e820.map[i].size -= current_addr - size;
636 }
637 print_memory_map("limit_regions endfor");
638 return;
639 }
640 print_memory_map("limit_regions endfunc");
641}
642
643/*
644 * This function checks if any part of the range <start,end> is mapped
645 * with type.
646 */
647int
648e820_any_mapped(u64 start, u64 end, unsigned type)
649{
650 int i;
651 for (i = 0; i < e820.nr_map; i++) {
652 const struct e820entry *ei = &e820.map[i];
653 if (type && ei->type != type)
654 continue;
655 if (ei->addr >= end || ei->addr + ei->size <= start)
656 continue;
657 return 1;
658 }
659 return 0;
660}
661EXPORT_SYMBOL_GPL(e820_any_mapped);
662
663 /*
664 * This function checks if the entire range <start,end> is mapped with type.
665 *
666 * Note: this function only works correct if the e820 table is sorted and
667 * not-overlapping, which is the case
668 */
669int __init
670e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
671{
672 u64 start = s;
673 u64 end = e;
674 int i;
675 for (i = 0; i < e820.nr_map; i++) {
676 struct e820entry *ei = &e820.map[i];
677 if (type && ei->type != type)
678 continue;
679 /* is the region (part) in overlap with the current region ?*/
680 if (ei->addr >= end || ei->addr + ei->size <= start)
681 continue;
682 /* if the region is at the beginning of <start,end> we move
683 * start to the end of the region since it's ok until there
684 */
685 if (ei->addr <= start)
686 start = ei->addr + ei->size;
687 /* if start is now at or beyond end, we're done, full
688 * coverage */
689 if (start >= end)
690 return 1; /* we're done */
691 }
692 return 0;
693}
694
695static int __init parse_memmap(char *arg)
696{
697 if (!arg)
698 return -EINVAL;
699
700 if (strcmp(arg, "exactmap") == 0) {
701#ifdef CONFIG_CRASH_DUMP
702 /* If we are doing a crash dump, we
703 * still need to know the real mem
704 * size before original memory map is
705 * reset.
706 */
707 propagate_e820_map();
708 saved_max_pfn = max_pfn;
709#endif
710 e820.nr_map = 0;
711 user_defined_memmap = 1;
712 } else {
713 /* If the user specifies memory size, we
714 * limit the BIOS-provided memory map to
715 * that size. exactmap can be used to specify
716 * the exact map. mem=number can be used to
717 * trim the existing memory map.
718 */
719 unsigned long long start_at, mem_size;
720
721 mem_size = memparse(arg, &arg);
722 if (*arg == '@') {
723 start_at = memparse(arg+1, &arg);
724 add_memory_region(start_at, mem_size, E820_RAM);
725 } else if (*arg == '#') {
726 start_at = memparse(arg+1, &arg);
727 add_memory_region(start_at, mem_size, E820_ACPI);
728 } else if (*arg == '$') {
729 start_at = memparse(arg+1, &arg);
730 add_memory_region(start_at, mem_size, E820_RESERVED);
731 } else {
732 limit_regions(mem_size);
733 user_defined_memmap = 1;
734 }
735 }
736 return 0;
737}
738early_param("memmap", parse_memmap);
739void __init update_memory_range(u64 start, u64 size, unsigned old_type,
740 unsigned new_type)
741{
742 int i;
743
744 BUG_ON(old_type == new_type);
745
746 for (i = 0; i < e820.nr_map; i++) {
747 struct e820entry *ei = &e820.map[i];
748 u64 final_start, final_end;
749 if (ei->type != old_type)
750 continue;
751 /* totally covered? */
752 if (ei->addr >= start && ei->size <= size) {
753 ei->type = new_type;
754 continue;
755 }
756 /* partially covered */
757 final_start = max(start, ei->addr);
758 final_end = min(start + size, ei->addr + ei->size);
759 if (final_start >= final_end)
760 continue;
761 add_memory_region(final_start, final_end - final_start,
762 new_type);
763 }
764}
765void __init update_e820(void)
766{
767 u8 nr_map;
768
769 nr_map = e820.nr_map;
770 if (sanitize_e820_map(e820.map, &nr_map))
771 return;
772 e820.nr_map = nr_map;
773 printk(KERN_INFO "modified physical RAM map:\n");
774 print_memory_map("modified");
775}
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 77d424cf68b..473c89fe507 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -213,6 +213,48 @@ unsigned long efi_get_time(void)
213 eft.minute, eft.second); 213 eft.minute, eft.second);
214} 214}
215 215
216/*
217 * Tell the kernel about the EFI memory map. This might include
218 * more than the max 128 entries that can fit in the e820 legacy
219 * (zeropage) memory map.
220 */
221
222static void __init add_efi_memmap(void)
223{
224 void *p;
225
226 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
227 efi_memory_desc_t *md = p;
228 unsigned long long start = md->phys_addr;
229 unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
230 int e820_type;
231
232 if (md->attribute & EFI_MEMORY_WB)
233 e820_type = E820_RAM;
234 else
235 e820_type = E820_RESERVED;
236 e820_add_region(start, size, e820_type);
237 }
238 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
239}
240
241void __init efi_reserve_early(void)
242{
243 unsigned long pmap;
244
245 pmap = boot_params.efi_info.efi_memmap;
246#ifdef CONFIG_X86_64
247 pmap += (__u64)boot_params.efi_info.efi_memmap_hi << 32;
248#endif
249 memmap.phys_map = (void *)pmap;
250 memmap.nr_map = boot_params.efi_info.efi_memmap_size /
251 boot_params.efi_info.efi_memdesc_size;
252 memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
253 memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
254 reserve_early(pmap, pmap + memmap.nr_map * memmap.desc_size,
255 "EFI memmap");
256}
257
216#if EFI_DEBUG 258#if EFI_DEBUG
217static void __init print_efi_memmap(void) 259static void __init print_efi_memmap(void)
218{ 260{
@@ -242,21 +284,11 @@ void __init efi_init(void)
242 int i = 0; 284 int i = 0;
243 void *tmp; 285 void *tmp;
244 286
245#ifdef CONFIG_X86_32
246 efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; 287 efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
247 memmap.phys_map = (void *)boot_params.efi_info.efi_memmap; 288#ifdef CONFIG_X86_64
248#else 289 efi_phys.systab = (void *)efi_phys.systab +
249 efi_phys.systab = (efi_system_table_t *) 290 ((__u64)boot_params.efi_info.efi_systab_hi<<32);
250 (boot_params.efi_info.efi_systab |
251 ((__u64)boot_params.efi_info.efi_systab_hi<<32));
252 memmap.phys_map = (void *)
253 (boot_params.efi_info.efi_memmap |
254 ((__u64)boot_params.efi_info.efi_memmap_hi<<32));
255#endif 291#endif
256 memmap.nr_map = boot_params.efi_info.efi_memmap_size /
257 boot_params.efi_info.efi_memdesc_size;
258 memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
259 memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
260 292
261 efi.systab = early_ioremap((unsigned long)efi_phys.systab, 293 efi.systab = early_ioremap((unsigned long)efi_phys.systab,
262 sizeof(efi_system_table_t)); 294 sizeof(efi_system_table_t));
@@ -370,6 +402,7 @@ void __init efi_init(void)
370 if (memmap.desc_size != sizeof(efi_memory_desc_t)) 402 if (memmap.desc_size != sizeof(efi_memory_desc_t))
371 printk(KERN_WARNING "Kernel-defined memdesc" 403 printk(KERN_WARNING "Kernel-defined memdesc"
372 "doesn't match the one from EFI!\n"); 404 "doesn't match the one from EFI!\n");
405 add_efi_memmap();
373 406
374 /* Setup for EFI runtime service */ 407 /* Setup for EFI runtime service */
375 reboot_type = BOOT_EFI; 408 reboot_type = BOOT_EFI;
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index d0060fdccca..652c5287215 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -97,13 +97,7 @@ void __init efi_call_phys_epilog(void)
97 early_runtime_code_mapping_set_exec(0); 97 early_runtime_code_mapping_set_exec(0);
98} 98}
99 99
100void __init efi_reserve_bootmem(void) 100void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size)
101{
102 reserve_bootmem_generic((unsigned long)memmap.phys_map,
103 memmap.nr_map * memmap.desc_size);
104}
105
106void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size)
107{ 101{
108 static unsigned pages_mapped __initdata; 102 static unsigned pages_mapped __initdata;
109 unsigned i, pages; 103 unsigned i, pages;
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index cbaaf69bedb..1fa8be5bd21 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -51,7 +51,7 @@ void __init setup_apic_routing(void)
51 else 51 else
52#endif 52#endif
53 53
54 if (num_possible_cpus() <= 8) 54 if (max_physical_apicid < 8)
55 genapic = &apic_flat; 55 genapic = &apic_flat;
56 else 56 else
57 genapic = &apic_physflat; 57 genapic = &apic_physflat;
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
new file mode 100644
index 00000000000..a727c0b9819
--- /dev/null
+++ b/arch/x86/kernel/head.c
@@ -0,0 +1,73 @@
1#include <linux/kernel.h>
2#include <linux/init.h>
3
4#include <asm/setup.h>
5#include <asm/bios_ebda.h>
6
7#define BIOS_LOWMEM_KILOBYTES 0x413
8
9/*
10 * The BIOS places the EBDA/XBDA at the top of conventional
11 * memory, and usually decreases the reported amount of
12 * conventional memory (int 0x12) too. This also contains a
13 * workaround for Dell systems that neglect to reserve EBDA.
14 * The same workaround also avoids a problem with the AMD768MPX
15 * chipset: reserve a page before VGA to prevent PCI prefetch
16 * into it (errata #56). Usually the page is reserved anyways,
17 * unless you have no PS/2 mouse plugged in.
18 */
19void __init reserve_ebda_region(void)
20{
21 unsigned int lowmem, ebda_addr;
22
23 /* To determine the position of the EBDA and the */
24 /* end of conventional memory, we need to look at */
25 /* the BIOS data area. In a paravirtual environment */
26 /* that area is absent. We'll just have to assume */
27 /* that the paravirt case can handle memory setup */
28 /* correctly, without our help. */
29 if (paravirt_enabled())
30 return;
31
32 /* end of low (conventional) memory */
33 lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
34 lowmem <<= 10;
35
36 /* start of EBDA area */
37 ebda_addr = get_bios_ebda();
38
39 /* Fixup: bios puts an EBDA in the top 64K segment */
40 /* of conventional memory, but does not adjust lowmem. */
41 if ((lowmem - ebda_addr) <= 0x10000)
42 lowmem = ebda_addr;
43
44 /* Fixup: bios does not report an EBDA at all. */
45 /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
46 if ((ebda_addr == 0) && (lowmem >= 0x9f000))
47 lowmem = 0x9f000;
48
49 /* Paranoia: should never happen, but... */
50 if ((lowmem == 0) || (lowmem >= 0x100000))
51 lowmem = 0x9f000;
52
53 /* reserve all memory between lowmem and the 1MB mark */
54 reserve_early(lowmem, 0x100000, "BIOS reserved");
55}
56
57void __init reserve_setup_data(void)
58{
59 struct setup_data *data;
60 u64 pa_data;
61 char buf[32];
62
63 if (boot_params.hdr.version < 0x0209)
64 return;
65 pa_data = boot_params.hdr.setup_data;
66 while (pa_data) {
67 data = early_ioremap(pa_data, sizeof(*data));
68 sprintf(buf, "setup data %x", data->type);
69 reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
70 pa_data = data->next;
71 early_iounmap(data, sizeof(*data));
72 }
73}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 3db05905892..fa1d25dd83e 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -8,7 +8,34 @@
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/start_kernel.h> 9#include <linux/start_kernel.h>
10 10
11#include <asm/setup.h>
12#include <asm/sections.h>
13#include <asm/e820.h>
14#include <asm/bios_ebda.h>
15
11void __init i386_start_kernel(void) 16void __init i386_start_kernel(void)
12{ 17{
18 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
19
20#ifdef CONFIG_BLK_DEV_INITRD
21 /* Reserve INITRD */
22 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
23 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
24 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
25 u64 ramdisk_end = ramdisk_image + ramdisk_size;
26 reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
27 }
28#endif
29 reserve_early(init_pg_tables_start, init_pg_tables_end,
30 "INIT_PG_TABLE");
31
32 reserve_ebda_region();
33
34 /*
35 * At this point everything still needed from the boot loader
36 * or BIOS or kernel text should be early reserved or marked not
37 * RAM in e820. All other memory is free game.
38 */
39
13 start_kernel(); 40 start_kernel();
14} 41}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index e25c57b8aa8..5fbed459ff3 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -51,74 +51,6 @@ static void __init copy_bootdata(char *real_mode_data)
51 } 51 }
52} 52}
53 53
54#define BIOS_LOWMEM_KILOBYTES 0x413
55
56/*
57 * The BIOS places the EBDA/XBDA at the top of conventional
58 * memory, and usually decreases the reported amount of
59 * conventional memory (int 0x12) too. This also contains a
60 * workaround for Dell systems that neglect to reserve EBDA.
61 * The same workaround also avoids a problem with the AMD768MPX
62 * chipset: reserve a page before VGA to prevent PCI prefetch
63 * into it (errata #56). Usually the page is reserved anyways,
64 * unless you have no PS/2 mouse plugged in.
65 */
66static void __init reserve_ebda_region(void)
67{
68 unsigned int lowmem, ebda_addr;
69
70 /* To determine the position of the EBDA and the */
71 /* end of conventional memory, we need to look at */
72 /* the BIOS data area. In a paravirtual environment */
73 /* that area is absent. We'll just have to assume */
74 /* that the paravirt case can handle memory setup */
75 /* correctly, without our help. */
76 if (paravirt_enabled())
77 return;
78
79 /* end of low (conventional) memory */
80 lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
81 lowmem <<= 10;
82
83 /* start of EBDA area */
84 ebda_addr = get_bios_ebda();
85
86 /* Fixup: bios puts an EBDA in the top 64K segment */
87 /* of conventional memory, but does not adjust lowmem. */
88 if ((lowmem - ebda_addr) <= 0x10000)
89 lowmem = ebda_addr;
90
91 /* Fixup: bios does not report an EBDA at all. */
92 /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
93 if ((ebda_addr == 0) && (lowmem >= 0x9f000))
94 lowmem = 0x9f000;
95
96 /* Paranoia: should never happen, but... */
97 if ((lowmem == 0) || (lowmem >= 0x100000))
98 lowmem = 0x9f000;
99
100 /* reserve all memory between lowmem and the 1MB mark */
101 reserve_early(lowmem, 0x100000, "BIOS reserved");
102}
103
104static void __init reserve_setup_data(void)
105{
106 struct setup_data *data;
107 unsigned long pa_data;
108 char buf[32];
109
110 if (boot_params.hdr.version < 0x0209)
111 return;
112 pa_data = boot_params.hdr.setup_data;
113 while (pa_data) {
114 data = early_ioremap(pa_data, sizeof(*data));
115 sprintf(buf, "setup data %x", data->type);
116 reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
117 pa_data = data->next;
118 early_iounmap(data, sizeof(*data));
119 }
120}
121
122void __init x86_64_start_kernel(char * real_mode_data) 54void __init x86_64_start_kernel(char * real_mode_data)
123{ 55{
124 int i; 56 int i;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f7357cc0162..b98b338aae1 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -194,6 +194,7 @@ default_entry:
194 xorl %ebx,%ebx /* %ebx is kept at zero */ 194 xorl %ebx,%ebx /* %ebx is kept at zero */
195 195
196 movl $pa(pg0), %edi 196 movl $pa(pg0), %edi
197 movl %edi, pa(init_pg_tables_start)
197 movl $pa(swapper_pg_pmd), %edx 198 movl $pa(swapper_pg_pmd), %edx
198 movl $PTE_ATTR, %eax 199 movl $PTE_ATTR, %eax
19910: 20010:
@@ -219,6 +220,8 @@ default_entry:
219 jb 10b 220 jb 10b
2201: 2211:
221 movl %edi,pa(init_pg_tables_end) 222 movl %edi,pa(init_pg_tables_end)
223 shrl $12, %eax
224 movl %eax, pa(max_pfn_mapped)
222 225
223 /* Do early initialization of the fixmap area */ 226 /* Do early initialization of the fixmap area */
224 movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax 227 movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
@@ -228,6 +231,7 @@ default_entry:
228page_pde_offset = (__PAGE_OFFSET >> 20); 231page_pde_offset = (__PAGE_OFFSET >> 20);
229 232
230 movl $pa(pg0), %edi 233 movl $pa(pg0), %edi
234 movl %edi, pa(init_pg_tables_start)
231 movl $pa(swapper_pg_dir), %edx 235 movl $pa(swapper_pg_dir), %edx
232 movl $PTE_ATTR, %eax 236 movl $PTE_ATTR, %eax
23310: 23710:
@@ -249,6 +253,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
249 cmpl %ebp,%eax 253 cmpl %ebp,%eax
250 jb 10b 254 jb 10b
251 movl %edi,pa(init_pg_tables_end) 255 movl %edi,pa(init_pg_tables_end)
256 shrl $12, %eax
257 movl %eax, pa(max_pfn_mapped)
252 258
253 /* Do early initialization of the fixmap area */ 259 /* Do early initialization of the fixmap area */
254 movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax 260 movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index dac47d61d2b..fedb3b113ac 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -72,15 +72,21 @@ int sis_apic_bug = -1;
72int nr_ioapic_registers[MAX_IO_APICS]; 72int nr_ioapic_registers[MAX_IO_APICS];
73 73
74/* I/O APIC entries */ 74/* I/O APIC entries */
75struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; 75struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
76int nr_ioapics; 76int nr_ioapics;
77 77
78/* MP IRQ source entries */ 78/* MP IRQ source entries */
79struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; 79struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
80 80
81/* # of MP IRQ source entries */ 81/* # of MP IRQ source entries */
82int mp_irq_entries; 82int mp_irq_entries;
83 83
84#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
85int mp_bus_id_to_type[MAX_MP_BUSSES];
86#endif
87
88DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
89
84static int disable_timer_pin_1 __initdata; 90static int disable_timer_pin_1 __initdata;
85 91
86/* 92/*
@@ -110,7 +116,7 @@ struct io_apic {
110static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) 116static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
111{ 117{
112 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) 118 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
113 + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); 119 + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
114} 120}
115 121
116static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 122static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@ -802,10 +808,10 @@ static int find_irq_entry(int apic, int pin, int type)
802 int i; 808 int i;
803 809
804 for (i = 0; i < mp_irq_entries; i++) 810 for (i = 0; i < mp_irq_entries; i++)
805 if (mp_irqs[i].mpc_irqtype == type && 811 if (mp_irqs[i].mp_irqtype == type &&
806 (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || 812 (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
807 mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && 813 mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
808 mp_irqs[i].mpc_dstirq == pin) 814 mp_irqs[i].mp_dstirq == pin)
809 return i; 815 return i;
810 816
811 return -1; 817 return -1;
@@ -819,13 +825,13 @@ static int __init find_isa_irq_pin(int irq, int type)
819 int i; 825 int i;
820 826
821 for (i = 0; i < mp_irq_entries; i++) { 827 for (i = 0; i < mp_irq_entries; i++) {
822 int lbus = mp_irqs[i].mpc_srcbus; 828 int lbus = mp_irqs[i].mp_srcbus;
823 829
824 if (test_bit(lbus, mp_bus_not_pci) && 830 if (test_bit(lbus, mp_bus_not_pci) &&
825 (mp_irqs[i].mpc_irqtype == type) && 831 (mp_irqs[i].mp_irqtype == type) &&
826 (mp_irqs[i].mpc_srcbusirq == irq)) 832 (mp_irqs[i].mp_srcbusirq == irq))
827 833
828 return mp_irqs[i].mpc_dstirq; 834 return mp_irqs[i].mp_dstirq;
829 } 835 }
830 return -1; 836 return -1;
831} 837}
@@ -835,17 +841,17 @@ static int __init find_isa_irq_apic(int irq, int type)
835 int i; 841 int i;
836 842
837 for (i = 0; i < mp_irq_entries; i++) { 843 for (i = 0; i < mp_irq_entries; i++) {
838 int lbus = mp_irqs[i].mpc_srcbus; 844 int lbus = mp_irqs[i].mp_srcbus;
839 845
840 if (test_bit(lbus, mp_bus_not_pci) && 846 if (test_bit(lbus, mp_bus_not_pci) &&
841 (mp_irqs[i].mpc_irqtype == type) && 847 (mp_irqs[i].mp_irqtype == type) &&
842 (mp_irqs[i].mpc_srcbusirq == irq)) 848 (mp_irqs[i].mp_srcbusirq == irq))
843 break; 849 break;
844 } 850 }
845 if (i < mp_irq_entries) { 851 if (i < mp_irq_entries) {
846 int apic; 852 int apic;
847 for (apic = 0; apic < nr_ioapics; apic++) { 853 for (apic = 0; apic < nr_ioapics; apic++) {
848 if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) 854 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
849 return apic; 855 return apic;
850 } 856 }
851 } 857 }
@@ -865,28 +871,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
865 871
866 apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " 872 apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
867 "slot:%d, pin:%d.\n", bus, slot, pin); 873 "slot:%d, pin:%d.\n", bus, slot, pin);
868 if (mp_bus_id_to_pci_bus[bus] == -1) { 874 if (test_bit(bus, mp_bus_not_pci)) {
869 printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); 875 printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
870 return -1; 876 return -1;
871 } 877 }
872 for (i = 0; i < mp_irq_entries; i++) { 878 for (i = 0; i < mp_irq_entries; i++) {
873 int lbus = mp_irqs[i].mpc_srcbus; 879 int lbus = mp_irqs[i].mp_srcbus;
874 880
875 for (apic = 0; apic < nr_ioapics; apic++) 881 for (apic = 0; apic < nr_ioapics; apic++)
876 if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || 882 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
877 mp_irqs[i].mpc_dstapic == MP_APIC_ALL) 883 mp_irqs[i].mp_dstapic == MP_APIC_ALL)
878 break; 884 break;
879 885
880 if (!test_bit(lbus, mp_bus_not_pci) && 886 if (!test_bit(lbus, mp_bus_not_pci) &&
881 !mp_irqs[i].mpc_irqtype && 887 !mp_irqs[i].mp_irqtype &&
882 (bus == lbus) && 888 (bus == lbus) &&
883 (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { 889 (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
884 int irq = pin_2_irq(i, apic, mp_irqs[i].mpc_dstirq); 890 int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq);
885 891
886 if (!(apic || IO_APIC_IRQ(irq))) 892 if (!(apic || IO_APIC_IRQ(irq)))
887 continue; 893 continue;
888 894
889 if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) 895 if (pin == (mp_irqs[i].mp_srcbusirq & 3))
890 return irq; 896 return irq;
891 /* 897 /*
892 * Use the first all-but-pin matching entry as a 898 * Use the first all-but-pin matching entry as a
@@ -953,7 +959,7 @@ static int EISA_ELCR(unsigned int irq)
953 * EISA conforming in the MP table, that means its trigger type must 959 * EISA conforming in the MP table, that means its trigger type must
954 * be read in from the ELCR */ 960 * be read in from the ELCR */
955 961
956#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) 962#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
957#define default_EISA_polarity(idx) default_ISA_polarity(idx) 963#define default_EISA_polarity(idx) default_ISA_polarity(idx)
958 964
959/* PCI interrupts are always polarity one level triggered, 965/* PCI interrupts are always polarity one level triggered,
@@ -970,13 +976,13 @@ static int EISA_ELCR(unsigned int irq)
970 976
971static int MPBIOS_polarity(int idx) 977static int MPBIOS_polarity(int idx)
972{ 978{
973 int bus = mp_irqs[idx].mpc_srcbus; 979 int bus = mp_irqs[idx].mp_srcbus;
974 int polarity; 980 int polarity;
975 981
976 /* 982 /*
977 * Determine IRQ line polarity (high active or low active): 983 * Determine IRQ line polarity (high active or low active):
978 */ 984 */
979 switch (mp_irqs[idx].mpc_irqflag & 3) { 985 switch (mp_irqs[idx].mp_irqflag & 3) {
980 case 0: /* conforms, ie. bus-type dependent polarity */ 986 case 0: /* conforms, ie. bus-type dependent polarity */
981 { 987 {
982 polarity = test_bit(bus, mp_bus_not_pci)? 988 polarity = test_bit(bus, mp_bus_not_pci)?
@@ -1012,13 +1018,13 @@ static int MPBIOS_polarity(int idx)
1012 1018
1013static int MPBIOS_trigger(int idx) 1019static int MPBIOS_trigger(int idx)
1014{ 1020{
1015 int bus = mp_irqs[idx].mpc_srcbus; 1021 int bus = mp_irqs[idx].mp_srcbus;
1016 int trigger; 1022 int trigger;
1017 1023
1018 /* 1024 /*
1019 * Determine IRQ trigger mode (edge or level sensitive): 1025 * Determine IRQ trigger mode (edge or level sensitive):
1020 */ 1026 */
1021 switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) { 1027 switch ((mp_irqs[idx].mp_irqflag>>2) & 3) {
1022 case 0: /* conforms, ie. bus-type dependent */ 1028 case 0: /* conforms, ie. bus-type dependent */
1023 { 1029 {
1024 trigger = test_bit(bus, mp_bus_not_pci)? 1030 trigger = test_bit(bus, mp_bus_not_pci)?
@@ -1095,16 +1101,16 @@ static inline int irq_trigger(int idx)
1095static int pin_2_irq(int idx, int apic, int pin) 1101static int pin_2_irq(int idx, int apic, int pin)
1096{ 1102{
1097 int irq, i; 1103 int irq, i;
1098 int bus = mp_irqs[idx].mpc_srcbus; 1104 int bus = mp_irqs[idx].mp_srcbus;
1099 1105
1100 /* 1106 /*
1101 * Debugging check, we are in big trouble if this message pops up! 1107 * Debugging check, we are in big trouble if this message pops up!
1102 */ 1108 */
1103 if (mp_irqs[idx].mpc_dstirq != pin) 1109 if (mp_irqs[idx].mp_dstirq != pin)
1104 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); 1110 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
1105 1111
1106 if (test_bit(bus, mp_bus_not_pci)) 1112 if (test_bit(bus, mp_bus_not_pci))
1107 irq = mp_irqs[idx].mpc_srcbusirq; 1113 irq = mp_irqs[idx].mp_srcbusirq;
1108 else { 1114 else {
1109 /* 1115 /*
1110 * PCI IRQs are mapped in order 1116 * PCI IRQs are mapped in order
@@ -1248,12 +1254,12 @@ static void __init setup_IO_APIC_irqs(void)
1248 if (first_notcon) { 1254 if (first_notcon) {
1249 apic_printk(APIC_VERBOSE, KERN_DEBUG 1255 apic_printk(APIC_VERBOSE, KERN_DEBUG
1250 " IO-APIC (apicid-pin) %d-%d", 1256 " IO-APIC (apicid-pin) %d-%d",
1251 mp_ioapics[apic].mpc_apicid, 1257 mp_ioapics[apic].mp_apicid,
1252 pin); 1258 pin);
1253 first_notcon = 0; 1259 first_notcon = 0;
1254 } else 1260 } else
1255 apic_printk(APIC_VERBOSE, ", %d-%d", 1261 apic_printk(APIC_VERBOSE, ", %d-%d",
1256 mp_ioapics[apic].mpc_apicid, pin); 1262 mp_ioapics[apic].mp_apicid, pin);
1257 continue; 1263 continue;
1258 } 1264 }
1259 1265
@@ -1348,7 +1354,7 @@ void __init print_IO_APIC(void)
1348 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 1354 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1349 for (i = 0; i < nr_ioapics; i++) 1355 for (i = 0; i < nr_ioapics; i++)
1350 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", 1356 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1351 mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); 1357 mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
1352 1358
1353 /* 1359 /*
1354 * We are a bit conservative about what we expect. We have to 1360 * We are a bit conservative about what we expect. We have to
@@ -1367,7 +1373,7 @@ void __init print_IO_APIC(void)
1367 reg_03.raw = io_apic_read(apic, 3); 1373 reg_03.raw = io_apic_read(apic, 3);
1368 spin_unlock_irqrestore(&ioapic_lock, flags); 1374 spin_unlock_irqrestore(&ioapic_lock, flags);
1369 1375
1370 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); 1376 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
1371 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 1377 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1372 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 1378 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1373 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); 1379 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
@@ -1708,7 +1714,6 @@ void disable_IO_APIC(void)
1708 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 1714 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
1709 */ 1715 */
1710 1716
1711#ifndef CONFIG_X86_NUMAQ
1712static void __init setup_ioapic_ids_from_mpc(void) 1717static void __init setup_ioapic_ids_from_mpc(void)
1713{ 1718{
1714 union IO_APIC_reg_00 reg_00; 1719 union IO_APIC_reg_00 reg_00;
@@ -1718,6 +1723,11 @@ static void __init setup_ioapic_ids_from_mpc(void)
1718 unsigned char old_id; 1723 unsigned char old_id;
1719 unsigned long flags; 1724 unsigned long flags;
1720 1725
1726#ifdef CONFIG_X86_NUMAQ
1727 if (found_numaq)
1728 return;
1729#endif
1730
1721 /* 1731 /*
1722 * Don't check I/O APIC IDs for xAPIC systems. They have 1732 * Don't check I/O APIC IDs for xAPIC systems. They have
1723 * no meaning without the serial APIC bus. 1733 * no meaning without the serial APIC bus.
@@ -1741,14 +1751,14 @@ static void __init setup_ioapic_ids_from_mpc(void)
1741 reg_00.raw = io_apic_read(apic, 0); 1751 reg_00.raw = io_apic_read(apic, 0);
1742 spin_unlock_irqrestore(&ioapic_lock, flags); 1752 spin_unlock_irqrestore(&ioapic_lock, flags);
1743 1753
1744 old_id = mp_ioapics[apic].mpc_apicid; 1754 old_id = mp_ioapics[apic].mp_apicid;
1745 1755
1746 if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { 1756 if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
1747 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", 1757 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
1748 apic, mp_ioapics[apic].mpc_apicid); 1758 apic, mp_ioapics[apic].mp_apicid);
1749 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 1759 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1750 reg_00.bits.ID); 1760 reg_00.bits.ID);
1751 mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; 1761 mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
1752 } 1762 }
1753 1763
1754 /* 1764 /*
@@ -1757,9 +1767,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
1757 * 'stuck on smp_invalidate_needed IPI wait' messages. 1767 * 'stuck on smp_invalidate_needed IPI wait' messages.
1758 */ 1768 */
1759 if (check_apicid_used(phys_id_present_map, 1769 if (check_apicid_used(phys_id_present_map,
1760 mp_ioapics[apic].mpc_apicid)) { 1770 mp_ioapics[apic].mp_apicid)) {
1761 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", 1771 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
1762 apic, mp_ioapics[apic].mpc_apicid); 1772 apic, mp_ioapics[apic].mp_apicid);
1763 for (i = 0; i < get_physical_broadcast(); i++) 1773 for (i = 0; i < get_physical_broadcast(); i++)
1764 if (!physid_isset(i, phys_id_present_map)) 1774 if (!physid_isset(i, phys_id_present_map))
1765 break; 1775 break;
@@ -1768,13 +1778,13 @@ static void __init setup_ioapic_ids_from_mpc(void)
1768 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 1778 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1769 i); 1779 i);
1770 physid_set(i, phys_id_present_map); 1780 physid_set(i, phys_id_present_map);
1771 mp_ioapics[apic].mpc_apicid = i; 1781 mp_ioapics[apic].mp_apicid = i;
1772 } else { 1782 } else {
1773 physid_mask_t tmp; 1783 physid_mask_t tmp;
1774 tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid); 1784 tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
1775 apic_printk(APIC_VERBOSE, "Setting %d in the " 1785 apic_printk(APIC_VERBOSE, "Setting %d in the "
1776 "phys_id_present_map\n", 1786 "phys_id_present_map\n",
1777 mp_ioapics[apic].mpc_apicid); 1787 mp_ioapics[apic].mp_apicid);
1778 physids_or(phys_id_present_map, phys_id_present_map, tmp); 1788 physids_or(phys_id_present_map, phys_id_present_map, tmp);
1779 } 1789 }
1780 1790
@@ -1783,11 +1793,11 @@ static void __init setup_ioapic_ids_from_mpc(void)
1783 * We need to adjust the IRQ routing table 1793 * We need to adjust the IRQ routing table
1784 * if the ID changed. 1794 * if the ID changed.
1785 */ 1795 */
1786 if (old_id != mp_ioapics[apic].mpc_apicid) 1796 if (old_id != mp_ioapics[apic].mp_apicid)
1787 for (i = 0; i < mp_irq_entries; i++) 1797 for (i = 0; i < mp_irq_entries; i++)
1788 if (mp_irqs[i].mpc_dstapic == old_id) 1798 if (mp_irqs[i].mp_dstapic == old_id)
1789 mp_irqs[i].mpc_dstapic 1799 mp_irqs[i].mp_dstapic
1790 = mp_ioapics[apic].mpc_apicid; 1800 = mp_ioapics[apic].mp_apicid;
1791 1801
1792 /* 1802 /*
1793 * Read the right value from the MPC table and 1803 * Read the right value from the MPC table and
@@ -1795,9 +1805,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
1795 */ 1805 */
1796 apic_printk(APIC_VERBOSE, KERN_INFO 1806 apic_printk(APIC_VERBOSE, KERN_INFO
1797 "...changing IO-APIC physical APIC ID to %d ...", 1807 "...changing IO-APIC physical APIC ID to %d ...",
1798 mp_ioapics[apic].mpc_apicid); 1808 mp_ioapics[apic].mp_apicid);
1799 1809
1800 reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; 1810 reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
1801 spin_lock_irqsave(&ioapic_lock, flags); 1811 spin_lock_irqsave(&ioapic_lock, flags);
1802 io_apic_write(apic, 0, reg_00.raw); 1812 io_apic_write(apic, 0, reg_00.raw);
1803 spin_unlock_irqrestore(&ioapic_lock, flags); 1813 spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -1808,15 +1818,12 @@ static void __init setup_ioapic_ids_from_mpc(void)
1808 spin_lock_irqsave(&ioapic_lock, flags); 1818 spin_lock_irqsave(&ioapic_lock, flags);
1809 reg_00.raw = io_apic_read(apic, 0); 1819 reg_00.raw = io_apic_read(apic, 0);
1810 spin_unlock_irqrestore(&ioapic_lock, flags); 1820 spin_unlock_irqrestore(&ioapic_lock, flags);
1811 if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) 1821 if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
1812 printk("could not set ID!\n"); 1822 printk("could not set ID!\n");
1813 else 1823 else
1814 apic_printk(APIC_VERBOSE, " ok.\n"); 1824 apic_printk(APIC_VERBOSE, " ok.\n");
1815 } 1825 }
1816} 1826}
1817#else
1818static void __init setup_ioapic_ids_from_mpc(void) { }
1819#endif
1820 1827
1821int no_timer_check __initdata; 1828int no_timer_check __initdata;
1822 1829
@@ -2352,8 +2359,8 @@ static int ioapic_resume(struct sys_device *dev)
2352 2359
2353 spin_lock_irqsave(&ioapic_lock, flags); 2360 spin_lock_irqsave(&ioapic_lock, flags);
2354 reg_00.raw = io_apic_read(dev->id, 0); 2361 reg_00.raw = io_apic_read(dev->id, 0);
2355 if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { 2362 if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
2356 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; 2363 reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
2357 io_apic_write(dev->id, 0, reg_00.raw); 2364 io_apic_write(dev->id, 0, reg_00.raw);
2358 } 2365 }
2359 spin_unlock_irqrestore(&ioapic_lock, flags); 2366 spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -2785,7 +2792,7 @@ int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int ac
2785 2792
2786 apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " 2793 apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
2787 "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, 2794 "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
2788 mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, 2795 mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq,
2789 edge_level, active_high_low); 2796 edge_level, active_high_low);
2790 2797
2791 ioapic_register_intr(irq, entry.vector, edge_level); 2798 ioapic_register_intr(irq, entry.vector, edge_level);
@@ -2806,8 +2813,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
2806 return -1; 2813 return -1;
2807 2814
2808 for (i = 0; i < mp_irq_entries; i++) 2815 for (i = 0; i < mp_irq_entries; i++)
2809 if (mp_irqs[i].mpc_irqtype == mp_INT && 2816 if (mp_irqs[i].mp_irqtype == mp_INT &&
2810 mp_irqs[i].mpc_srcbusirq == bus_irq) 2817 mp_irqs[i].mp_srcbusirq == bus_irq)
2811 break; 2818 break;
2812 if (i >= mp_irq_entries) 2819 if (i >= mp_irq_entries)
2813 return -1; 2820 return -1;
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 78a3866ab36..2eba4f4c14b 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -108,15 +108,17 @@ DEFINE_SPINLOCK(vector_lock);
108int nr_ioapic_registers[MAX_IO_APICS]; 108int nr_ioapic_registers[MAX_IO_APICS];
109 109
110/* I/O APIC entries */ 110/* I/O APIC entries */
111struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; 111struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
112int nr_ioapics; 112int nr_ioapics;
113 113
114/* MP IRQ source entries */ 114/* MP IRQ source entries */
115struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; 115struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
116 116
117/* # of MP IRQ source entries */ 117/* # of MP IRQ source entries */
118int mp_irq_entries; 118int mp_irq_entries;
119 119
120DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
121
120/* 122/*
121 * Rough estimation of how many shared IRQs there are, can 123 * Rough estimation of how many shared IRQs there are, can
122 * be changed anytime. 124 * be changed anytime.
@@ -144,7 +146,7 @@ struct io_apic {
144static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) 146static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
145{ 147{
146 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) 148 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
147 + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); 149 + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
148} 150}
149 151
150static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 152static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@ -464,10 +466,10 @@ static int find_irq_entry(int apic, int pin, int type)
464 int i; 466 int i;
465 467
466 for (i = 0; i < mp_irq_entries; i++) 468 for (i = 0; i < mp_irq_entries; i++)
467 if (mp_irqs[i].mpc_irqtype == type && 469 if (mp_irqs[i].mp_irqtype == type &&
468 (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || 470 (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
469 mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && 471 mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
470 mp_irqs[i].mpc_dstirq == pin) 472 mp_irqs[i].mp_dstirq == pin)
471 return i; 473 return i;
472 474
473 return -1; 475 return -1;
@@ -481,13 +483,13 @@ static int __init find_isa_irq_pin(int irq, int type)
481 int i; 483 int i;
482 484
483 for (i = 0; i < mp_irq_entries; i++) { 485 for (i = 0; i < mp_irq_entries; i++) {
484 int lbus = mp_irqs[i].mpc_srcbus; 486 int lbus = mp_irqs[i].mp_srcbus;
485 487
486 if (test_bit(lbus, mp_bus_not_pci) && 488 if (test_bit(lbus, mp_bus_not_pci) &&
487 (mp_irqs[i].mpc_irqtype == type) && 489 (mp_irqs[i].mp_irqtype == type) &&
488 (mp_irqs[i].mpc_srcbusirq == irq)) 490 (mp_irqs[i].mp_srcbusirq == irq))
489 491
490 return mp_irqs[i].mpc_dstirq; 492 return mp_irqs[i].mp_dstirq;
491 } 493 }
492 return -1; 494 return -1;
493} 495}
@@ -497,17 +499,17 @@ static int __init find_isa_irq_apic(int irq, int type)
497 int i; 499 int i;
498 500
499 for (i = 0; i < mp_irq_entries; i++) { 501 for (i = 0; i < mp_irq_entries; i++) {
500 int lbus = mp_irqs[i].mpc_srcbus; 502 int lbus = mp_irqs[i].mp_srcbus;
501 503
502 if (test_bit(lbus, mp_bus_not_pci) && 504 if (test_bit(lbus, mp_bus_not_pci) &&
503 (mp_irqs[i].mpc_irqtype == type) && 505 (mp_irqs[i].mp_irqtype == type) &&
504 (mp_irqs[i].mpc_srcbusirq == irq)) 506 (mp_irqs[i].mp_srcbusirq == irq))
505 break; 507 break;
506 } 508 }
507 if (i < mp_irq_entries) { 509 if (i < mp_irq_entries) {
508 int apic; 510 int apic;
509 for(apic = 0; apic < nr_ioapics; apic++) { 511 for(apic = 0; apic < nr_ioapics; apic++) {
510 if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) 512 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
511 return apic; 513 return apic;
512 } 514 }
513 } 515 }
@@ -527,28 +529,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
527 529
528 apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", 530 apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
529 bus, slot, pin); 531 bus, slot, pin);
530 if (mp_bus_id_to_pci_bus[bus] == -1) { 532 if (test_bit(bus, mp_bus_not_pci)) {
531 apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); 533 apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
532 return -1; 534 return -1;
533 } 535 }
534 for (i = 0; i < mp_irq_entries; i++) { 536 for (i = 0; i < mp_irq_entries; i++) {
535 int lbus = mp_irqs[i].mpc_srcbus; 537 int lbus = mp_irqs[i].mp_srcbus;
536 538
537 for (apic = 0; apic < nr_ioapics; apic++) 539 for (apic = 0; apic < nr_ioapics; apic++)
538 if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || 540 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
539 mp_irqs[i].mpc_dstapic == MP_APIC_ALL) 541 mp_irqs[i].mp_dstapic == MP_APIC_ALL)
540 break; 542 break;
541 543
542 if (!test_bit(lbus, mp_bus_not_pci) && 544 if (!test_bit(lbus, mp_bus_not_pci) &&
543 !mp_irqs[i].mpc_irqtype && 545 !mp_irqs[i].mp_irqtype &&
544 (bus == lbus) && 546 (bus == lbus) &&
545 (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { 547 (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
546 int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); 548 int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
547 549
548 if (!(apic || IO_APIC_IRQ(irq))) 550 if (!(apic || IO_APIC_IRQ(irq)))
549 continue; 551 continue;
550 552
551 if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) 553 if (pin == (mp_irqs[i].mp_srcbusirq & 3))
552 return irq; 554 return irq;
553 /* 555 /*
554 * Use the first all-but-pin matching entry as a 556 * Use the first all-but-pin matching entry as a
@@ -576,13 +578,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
576 578
577static int MPBIOS_polarity(int idx) 579static int MPBIOS_polarity(int idx)
578{ 580{
579 int bus = mp_irqs[idx].mpc_srcbus; 581 int bus = mp_irqs[idx].mp_srcbus;
580 int polarity; 582 int polarity;
581 583
582 /* 584 /*
583 * Determine IRQ line polarity (high active or low active): 585 * Determine IRQ line polarity (high active or low active):
584 */ 586 */
585 switch (mp_irqs[idx].mpc_irqflag & 3) 587 switch (mp_irqs[idx].mp_irqflag & 3)
586 { 588 {
587 case 0: /* conforms, ie. bus-type dependent polarity */ 589 case 0: /* conforms, ie. bus-type dependent polarity */
588 if (test_bit(bus, mp_bus_not_pci)) 590 if (test_bit(bus, mp_bus_not_pci))
@@ -618,13 +620,13 @@ static int MPBIOS_polarity(int idx)
618 620
619static int MPBIOS_trigger(int idx) 621static int MPBIOS_trigger(int idx)
620{ 622{
621 int bus = mp_irqs[idx].mpc_srcbus; 623 int bus = mp_irqs[idx].mp_srcbus;
622 int trigger; 624 int trigger;
623 625
624 /* 626 /*
625 * Determine IRQ trigger mode (edge or level sensitive): 627 * Determine IRQ trigger mode (edge or level sensitive):
626 */ 628 */
627 switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) 629 switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
628 { 630 {
629 case 0: /* conforms, ie. bus-type dependent */ 631 case 0: /* conforms, ie. bus-type dependent */
630 if (test_bit(bus, mp_bus_not_pci)) 632 if (test_bit(bus, mp_bus_not_pci))
@@ -671,16 +673,16 @@ static inline int irq_trigger(int idx)
671static int pin_2_irq(int idx, int apic, int pin) 673static int pin_2_irq(int idx, int apic, int pin)
672{ 674{
673 int irq, i; 675 int irq, i;
674 int bus = mp_irqs[idx].mpc_srcbus; 676 int bus = mp_irqs[idx].mp_srcbus;
675 677
676 /* 678 /*
677 * Debugging check, we are in big trouble if this message pops up! 679 * Debugging check, we are in big trouble if this message pops up!
678 */ 680 */
679 if (mp_irqs[idx].mpc_dstirq != pin) 681 if (mp_irqs[idx].mp_dstirq != pin)
680 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); 682 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
681 683
682 if (test_bit(bus, mp_bus_not_pci)) { 684 if (test_bit(bus, mp_bus_not_pci)) {
683 irq = mp_irqs[idx].mpc_srcbusirq; 685 irq = mp_irqs[idx].mp_srcbusirq;
684 } else { 686 } else {
685 /* 687 /*
686 * PCI IRQs are mapped in order 688 * PCI IRQs are mapped in order
@@ -857,7 +859,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
857 apic_printk(APIC_VERBOSE,KERN_DEBUG 859 apic_printk(APIC_VERBOSE,KERN_DEBUG
858 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 860 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
859 "IRQ %d Mode:%i Active:%i)\n", 861 "IRQ %d Mode:%i Active:%i)\n",
860 apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector, 862 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
861 irq, trigger, polarity); 863 irq, trigger, polarity);
862 864
863 /* 865 /*
@@ -898,10 +900,10 @@ static void __init setup_IO_APIC_irqs(void)
898 idx = find_irq_entry(apic,pin,mp_INT); 900 idx = find_irq_entry(apic,pin,mp_INT);
899 if (idx == -1) { 901 if (idx == -1) {
900 if (first_notcon) { 902 if (first_notcon) {
901 apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin); 903 apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
902 first_notcon = 0; 904 first_notcon = 0;
903 } else 905 } else
904 apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin); 906 apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
905 continue; 907 continue;
906 } 908 }
907 if (!first_notcon) { 909 if (!first_notcon) {
@@ -969,7 +971,7 @@ void __apicdebuginit print_IO_APIC(void)
969 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 971 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
970 for (i = 0; i < nr_ioapics; i++) 972 for (i = 0; i < nr_ioapics; i++)
971 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", 973 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
972 mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); 974 mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
973 975
974 /* 976 /*
975 * We are a bit conservative about what we expect. We have to 977 * We are a bit conservative about what we expect. We have to
@@ -987,7 +989,7 @@ void __apicdebuginit print_IO_APIC(void)
987 spin_unlock_irqrestore(&ioapic_lock, flags); 989 spin_unlock_irqrestore(&ioapic_lock, flags);
988 990
989 printk("\n"); 991 printk("\n");
990 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); 992 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
991 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 993 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
992 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 994 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
993 995
@@ -1873,8 +1875,8 @@ static int ioapic_resume(struct sys_device *dev)
1873 1875
1874 spin_lock_irqsave(&ioapic_lock, flags); 1876 spin_lock_irqsave(&ioapic_lock, flags);
1875 reg_00.raw = io_apic_read(dev->id, 0); 1877 reg_00.raw = io_apic_read(dev->id, 0);
1876 if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { 1878 if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
1877 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; 1879 reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
1878 io_apic_write(dev->id, 0, reg_00.raw); 1880 io_apic_write(dev->id, 0, reg_00.raw);
1879 } 1881 }
1880 spin_unlock_irqrestore(&ioapic_lock, flags); 1882 spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -2274,8 +2276,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
2274 return -1; 2276 return -1;
2275 2277
2276 for (i = 0; i < mp_irq_entries; i++) 2278 for (i = 0; i < mp_irq_entries; i++)
2277 if (mp_irqs[i].mpc_irqtype == mp_INT && 2279 if (mp_irqs[i].mp_irqtype == mp_INT &&
2278 mp_irqs[i].mpc_srcbusirq == bus_irq) 2280 mp_irqs[i].mp_srcbusirq == bus_irq)
2279 break; 2281 break;
2280 if (i >= mp_irq_entries) 2282 if (i >= mp_irq_entries)
2281 return -1; 2283 return -1;
@@ -2368,7 +2370,7 @@ void __init ioapic_init_mappings(void)
2368 ioapic_res = ioapic_setup_resources(); 2370 ioapic_res = ioapic_setup_resources();
2369 for (i = 0; i < nr_ioapics; i++) { 2371 for (i = 0; i < nr_ioapics; i++) {
2370 if (smp_found_config) { 2372 if (smp_found_config) {
2371 ioapic_phys = mp_ioapics[i].mpc_apicaddr; 2373 ioapic_phys = mp_ioapics[i].mp_apicaddr;
2372 } else { 2374 } else {
2373 ioapic_phys = (unsigned long) 2375 ioapic_phys = (unsigned long)
2374 alloc_bootmem_pages(PAGE_SIZE); 2376 alloc_bootmem_pages(PAGE_SIZE);
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 404683b94e7..8b6b1e05c30 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -25,6 +25,8 @@
25#include <asm/proto.h> 25#include <asm/proto.h>
26#include <asm/acpi.h> 26#include <asm/acpi.h>
27#include <asm/bios_ebda.h> 27#include <asm/bios_ebda.h>
28#include <asm/e820.h>
29#include <asm/trampoline.h>
28 30
29#include <mach_apic.h> 31#include <mach_apic.h>
30#ifdef CONFIG_X86_32 32#ifdef CONFIG_X86_32
@@ -32,28 +34,6 @@
32#include <mach_mpparse.h> 34#include <mach_mpparse.h>
33#endif 35#endif
34 36
35/* Have we found an MP table */
36int smp_found_config;
37
38/*
39 * Various Linux-internal data structures created from the
40 * MP-table.
41 */
42#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
43int mp_bus_id_to_type[MAX_MP_BUSSES];
44#endif
45
46DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
47int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 };
48
49static int mp_current_pci_id;
50
51int pic_mode;
52
53/*
54 * Intel MP BIOS table parsing routines:
55 */
56
57/* 37/*
58 * Checksum an MP configuration block. 38 * Checksum an MP configuration block.
59 */ 39 */
@@ -69,15 +49,73 @@ static int __init mpf_checksum(unsigned char *mp, int len)
69} 49}
70 50
71#ifdef CONFIG_X86_NUMAQ 51#ifdef CONFIG_X86_NUMAQ
52int found_numaq;
72/* 53/*
73 * Have to match translation table entries to main table entries by counter 54 * Have to match translation table entries to main table entries by counter
74 * hence the mpc_record variable .... can't see a less disgusting way of 55 * hence the mpc_record variable .... can't see a less disgusting way of
75 * doing this .... 56 * doing this ....
76 */ 57 */
58struct mpc_config_translation {
59 unsigned char mpc_type;
60 unsigned char trans_len;
61 unsigned char trans_type;
62 unsigned char trans_quad;
63 unsigned char trans_global;
64 unsigned char trans_local;
65 unsigned short trans_reserved;
66};
67
77 68
78static int mpc_record; 69static int mpc_record;
79static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] 70static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
80 __cpuinitdata; 71 __cpuinitdata;
72
73static inline int generate_logical_apicid(int quad, int phys_apicid)
74{
75 return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
76}
77
78
79static inline int mpc_apic_id(struct mpc_config_processor *m,
80 struct mpc_config_translation *translation_record)
81{
82 int quad = translation_record->trans_quad;
83 int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
84
85 printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
86 m->mpc_apicid,
87 (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
88 (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
89 m->mpc_apicver, quad, logical_apicid);
90 return logical_apicid;
91}
92
93int mp_bus_id_to_node[MAX_MP_BUSSES];
94
95int mp_bus_id_to_local[MAX_MP_BUSSES];
96
97static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
98 struct mpc_config_translation *translation)
99{
100 int quad = translation->trans_quad;
101 int local = translation->trans_local;
102
103 mp_bus_id_to_node[m->mpc_busid] = quad;
104 mp_bus_id_to_local[m->mpc_busid] = local;
105 printk(KERN_INFO "Bus #%d is %s (node %d)\n",
106 m->mpc_busid, name, quad);
107}
108
109int quad_local_to_mp_bus_id [NR_CPUS/4][4];
110static void mpc_oem_pci_bus(struct mpc_config_bus *m,
111 struct mpc_config_translation *translation)
112{
113 int quad = translation->trans_quad;
114 int local = translation->trans_local;
115
116 quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
117}
118
81#endif 119#endif
82 120
83static void __cpuinit MP_processor_info(struct mpc_config_processor *m) 121static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
@@ -90,7 +128,10 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
90 return; 128 return;
91 } 129 }
92#ifdef CONFIG_X86_NUMAQ 130#ifdef CONFIG_X86_NUMAQ
93 apicid = mpc_apic_id(m, translation_table[mpc_record]); 131 if (found_numaq)
132 apicid = mpc_apic_id(m, translation_table[mpc_record]);
133 else
134 apicid = m->mpc_apicid;
94#else 135#else
95 apicid = m->mpc_apicid; 136 apicid = m->mpc_apicid;
96#endif 137#endif
@@ -103,17 +144,18 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
103 generic_processor_info(apicid, m->mpc_apicver); 144 generic_processor_info(apicid, m->mpc_apicver);
104} 145}
105 146
147#ifdef CONFIG_X86_IO_APIC
106static void __init MP_bus_info(struct mpc_config_bus *m) 148static void __init MP_bus_info(struct mpc_config_bus *m)
107{ 149{
108 char str[7]; 150 char str[7];
109
110 memcpy(str, m->mpc_bustype, 6); 151 memcpy(str, m->mpc_bustype, 6);
111 str[6] = 0; 152 str[6] = 0;
112 153
113#ifdef CONFIG_X86_NUMAQ 154#ifdef CONFIG_X86_NUMAQ
114 mpc_oem_bus_info(m, str, translation_table[mpc_record]); 155 if (found_numaq)
156 mpc_oem_bus_info(m, str, translation_table[mpc_record]);
115#else 157#else
116 Dprintk("Bus #%d is %s\n", m->mpc_busid, str); 158 printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
117#endif 159#endif
118 160
119#if MAX_MP_BUSSES < 256 161#if MAX_MP_BUSSES < 256
@@ -132,11 +174,10 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
132#endif 174#endif
133 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { 175 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
134#ifdef CONFIG_X86_NUMAQ 176#ifdef CONFIG_X86_NUMAQ
135 mpc_oem_pci_bus(m, translation_table[mpc_record]); 177 if (found_numaq)
178 mpc_oem_pci_bus(m, translation_table[mpc_record]);
136#endif 179#endif
137 clear_bit(m->mpc_busid, mp_bus_not_pci); 180 clear_bit(m->mpc_busid, mp_bus_not_pci);
138 mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
139 mp_current_pci_id++;
140#if defined(CONFIG_EISA) || defined (CONFIG_MCA) 181#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
141 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; 182 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
142 } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { 183 } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
@@ -147,6 +188,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
147 } else 188 } else
148 printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); 189 printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
149} 190}
191#endif
150 192
151#ifdef CONFIG_X86_IO_APIC 193#ifdef CONFIG_X86_IO_APIC
152 194
@@ -176,18 +218,89 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
176 if (bad_ioapic(m->mpc_apicaddr)) 218 if (bad_ioapic(m->mpc_apicaddr))
177 return; 219 return;
178 220
179 mp_ioapics[nr_ioapics] = *m; 221 mp_ioapics[nr_ioapics].mp_apicaddr = m->mpc_apicaddr;
222 mp_ioapics[nr_ioapics].mp_apicid = m->mpc_apicid;
223 mp_ioapics[nr_ioapics].mp_type = m->mpc_type;
224 mp_ioapics[nr_ioapics].mp_apicver = m->mpc_apicver;
225 mp_ioapics[nr_ioapics].mp_flags = m->mpc_flags;
180 nr_ioapics++; 226 nr_ioapics++;
181} 227}
182 228
183static void __init MP_intsrc_info(struct mpc_config_intsrc *m) 229static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
184{ 230{
185 mp_irqs[mp_irq_entries] = *m; 231 printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
186 Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
187 " IRQ %02x, APIC ID %x, APIC INT %02x\n", 232 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
188 m->mpc_irqtype, m->mpc_irqflag & 3, 233 m->mpc_irqtype, m->mpc_irqflag & 3,
189 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, 234 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
190 m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); 235 m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
236}
237
238static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
239{
240 printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
241 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
242 mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
243 (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
244 mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq);
245}
246
247static void __init assign_to_mp_irq(struct mpc_config_intsrc *m,
248 struct mp_config_intsrc *mp_irq)
249{
250 mp_irq->mp_dstapic = m->mpc_dstapic;
251 mp_irq->mp_type = m->mpc_type;
252 mp_irq->mp_irqtype = m->mpc_irqtype;
253 mp_irq->mp_irqflag = m->mpc_irqflag;
254 mp_irq->mp_srcbus = m->mpc_srcbus;
255 mp_irq->mp_srcbusirq = m->mpc_srcbusirq;
256 mp_irq->mp_dstirq = m->mpc_dstirq;
257}
258
259static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq,
260 struct mpc_config_intsrc *m)
261{
262 m->mpc_dstapic = mp_irq->mp_dstapic;
263 m->mpc_type = mp_irq->mp_type;
264 m->mpc_irqtype = mp_irq->mp_irqtype;
265 m->mpc_irqflag = mp_irq->mp_irqflag;
266 m->mpc_srcbus = mp_irq->mp_srcbus;
267 m->mpc_srcbusirq = mp_irq->mp_srcbusirq;
268 m->mpc_dstirq = mp_irq->mp_dstirq;
269}
270
271static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
272 struct mpc_config_intsrc *m)
273{
274 if (mp_irq->mp_dstapic != m->mpc_dstapic)
275 return 1;
276 if (mp_irq->mp_type != m->mpc_type)
277 return 2;
278 if (mp_irq->mp_irqtype != m->mpc_irqtype)
279 return 3;
280 if (mp_irq->mp_irqflag != m->mpc_irqflag)
281 return 4;
282 if (mp_irq->mp_srcbus != m->mpc_srcbus)
283 return 5;
284 if (mp_irq->mp_srcbusirq != m->mpc_srcbusirq)
285 return 6;
286 if (mp_irq->mp_dstirq != m->mpc_dstirq)
287 return 7;
288
289 return 0;
290}
291
292static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
293{
294 int i;
295
296 print_MP_intsrc_info(m);
297
298 for (i = 0; i < mp_irq_entries; i++) {
299 if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m))
300 return;
301 }
302
303 assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
191 if (++mp_irq_entries == MAX_IRQ_SOURCES) 304 if (++mp_irq_entries == MAX_IRQ_SOURCES)
192 panic("Max # of irq sources exceeded!!\n"); 305 panic("Max # of irq sources exceeded!!\n");
193} 306}
@@ -196,7 +309,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
196 309
197static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) 310static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
198{ 311{
199 Dprintk("Lint: type %d, pol %d, trig %d, bus %d," 312 printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x,"
200 " IRQ %02x, APIC ID %x, APIC LINT %02x\n", 313 " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
201 m->mpc_irqtype, m->mpc_irqflag & 3, 314 m->mpc_irqtype, m->mpc_irqflag & 3,
202 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, 315 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
@@ -266,11 +379,14 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
266 } 379 }
267} 380}
268 381
269static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, 382void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
270 char *productid) 383 char *productid)
271{ 384{
272 if (strncmp(oem, "IBM NUMA", 8)) 385 if (strncmp(oem, "IBM NUMA", 8))
273 printk("Warning! May not be a NUMA-Q system!\n"); 386 printk("Warning! Not a NUMA-Q system!\n");
387 else
388 found_numaq = 1;
389
274 if (mpc->mpc_oemptr) 390 if (mpc->mpc_oemptr)
275 smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr, 391 smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
276 mpc->mpc_oemsize); 392 mpc->mpc_oemsize);
@@ -281,12 +397,9 @@ static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
281 * Read/parse the MPC 397 * Read/parse the MPC
282 */ 398 */
283 399
284static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) 400static int __init smp_check_mpc(struct mp_config_table *mpc, char *oem,
401 char *str)
285{ 402{
286 char str[16];
287 char oem[10];
288 int count = sizeof(*mpc);
289 unsigned char *mpt = ((unsigned char *)mpc) + count;
290 403
291 if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { 404 if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) {
292 printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", 405 printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n",
@@ -309,19 +422,42 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
309 } 422 }
310 memcpy(oem, mpc->mpc_oem, 8); 423 memcpy(oem, mpc->mpc_oem, 8);
311 oem[8] = 0; 424 oem[8] = 0;
312 printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem); 425 printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem);
313 426
314 memcpy(str, mpc->mpc_productid, 12); 427 memcpy(str, mpc->mpc_productid, 12);
315 str[12] = 0; 428 str[12] = 0;
316 printk("Product ID: %s ", str);
317 429
318#ifdef CONFIG_X86_32 430 printk(KERN_INFO "MPTABLE: Product ID: %s\n", str);
319 mps_oem_check(mpc, oem, str);
320#endif
321 printk(KERN_INFO "MPTABLE: Product ID: %s ", str);
322 431
323 printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); 432 printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic);
324 433
434 return 1;
435}
436
437static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
438{
439 char str[16];
440 char oem[10];
441
442 int count = sizeof(*mpc);
443 unsigned char *mpt = ((unsigned char *)mpc) + count;
444
445 if (!smp_check_mpc(mpc, oem, str))
446 return 0;
447
448#ifdef CONFIG_X86_32
449 /*
450 * need to make sure summit and es7000's mps_oem_check is safe to be
451 * called early via genericarch 's mps_oem_check
452 */
453 if (early) {
454#ifdef CONFIG_X86_NUMAQ
455 numaq_mps_oem_check(mpc, oem, str);
456#endif
457 } else
458 mps_oem_check(mpc, oem, str);
459#endif
460
325 /* save the local APIC address, it might be non-default */ 461 /* save the local APIC address, it might be non-default */
326 if (!acpi_lapic) 462 if (!acpi_lapic)
327 mp_lapic_addr = mpc->mpc_lapic; 463 mp_lapic_addr = mpc->mpc_lapic;
@@ -352,7 +488,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
352 { 488 {
353 struct mpc_config_bus *m = 489 struct mpc_config_bus *m =
354 (struct mpc_config_bus *)mpt; 490 (struct mpc_config_bus *)mpt;
491#ifdef CONFIG_X86_IO_APIC
355 MP_bus_info(m); 492 MP_bus_info(m);
493#endif
356 mpt += sizeof(*m); 494 mpt += sizeof(*m);
357 count += sizeof(*m); 495 count += sizeof(*m);
358 break; 496 break;
@@ -402,6 +540,11 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
402 ++mpc_record; 540 ++mpc_record;
403#endif 541#endif
404 } 542 }
543
544#ifdef CONFIG_X86_GENERICARCH
545 generic_bigsmp_probe();
546#endif
547
405 setup_apic_routing(); 548 setup_apic_routing();
406 if (!num_processors) 549 if (!num_processors)
407 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 550 printk(KERN_ERR "MPTABLE: no processors registered!\n");
@@ -427,7 +570,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
427 intsrc.mpc_type = MP_INTSRC; 570 intsrc.mpc_type = MP_INTSRC;
428 intsrc.mpc_irqflag = 0; /* conforming */ 571 intsrc.mpc_irqflag = 0; /* conforming */
429 intsrc.mpc_srcbus = 0; 572 intsrc.mpc_srcbus = 0;
430 intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; 573 intsrc.mpc_dstapic = mp_ioapics[0].mp_apicid;
431 574
432 intsrc.mpc_irqtype = mp_INT; 575 intsrc.mpc_irqtype = mp_INT;
433 576
@@ -488,40 +631,11 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
488 MP_intsrc_info(&intsrc); 631 MP_intsrc_info(&intsrc);
489} 632}
490 633
491#endif
492 634
493static inline void __init construct_default_ISA_mptable(int mpc_default_type) 635static void construct_ioapic_table(int mpc_default_type)
494{ 636{
495 struct mpc_config_processor processor;
496 struct mpc_config_bus bus;
497#ifdef CONFIG_X86_IO_APIC
498 struct mpc_config_ioapic ioapic; 637 struct mpc_config_ioapic ioapic;
499#endif 638 struct mpc_config_bus bus;
500 struct mpc_config_lintsrc lintsrc;
501 int linttypes[2] = { mp_ExtINT, mp_NMI };
502 int i;
503
504 /*
505 * local APIC has default address
506 */
507 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
508
509 /*
510 * 2 CPUs, numbered 0 & 1.
511 */
512 processor.mpc_type = MP_PROCESSOR;
513 /* Either an integrated APIC or a discrete 82489DX. */
514 processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
515 processor.mpc_cpuflag = CPU_ENABLED;
516 processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
517 (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
518 processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
519 processor.mpc_reserved[0] = 0;
520 processor.mpc_reserved[1] = 0;
521 for (i = 0; i < 2; i++) {
522 processor.mpc_apicid = i;
523 MP_processor_info(&processor);
524 }
525 639
526 bus.mpc_type = MP_BUS; 640 bus.mpc_type = MP_BUS;
527 bus.mpc_busid = 0; 641 bus.mpc_busid = 0;
@@ -550,7 +664,6 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
550 MP_bus_info(&bus); 664 MP_bus_info(&bus);
551 } 665 }
552 666
553#ifdef CONFIG_X86_IO_APIC
554 ioapic.mpc_type = MP_IOAPIC; 667 ioapic.mpc_type = MP_IOAPIC;
555 ioapic.mpc_apicid = 2; 668 ioapic.mpc_apicid = 2;
556 ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; 669 ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
@@ -562,7 +675,42 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
562 * We set up most of the low 16 IO-APIC pins according to MPS rules. 675 * We set up most of the low 16 IO-APIC pins according to MPS rules.
563 */ 676 */
564 construct_default_ioirq_mptable(mpc_default_type); 677 construct_default_ioirq_mptable(mpc_default_type);
678}
679#else
680static inline void construct_ioapic_table(int mpc_default_type) { }
565#endif 681#endif
682
683static inline void __init construct_default_ISA_mptable(int mpc_default_type)
684{
685 struct mpc_config_processor processor;
686 struct mpc_config_lintsrc lintsrc;
687 int linttypes[2] = { mp_ExtINT, mp_NMI };
688 int i;
689
690 /*
691 * local APIC has default address
692 */
693 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
694
695 /*
696 * 2 CPUs, numbered 0 & 1.
697 */
698 processor.mpc_type = MP_PROCESSOR;
699 /* Either an integrated APIC or a discrete 82489DX. */
700 processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
701 processor.mpc_cpuflag = CPU_ENABLED;
702 processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
703 (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
704 processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
705 processor.mpc_reserved[0] = 0;
706 processor.mpc_reserved[1] = 0;
707 for (i = 0; i < 2; i++) {
708 processor.mpc_apicid = i;
709 MP_processor_info(&processor);
710 }
711
712 construct_ioapic_table(mpc_default_type);
713
566 lintsrc.mpc_type = MP_LINTSRC; 714 lintsrc.mpc_type = MP_LINTSRC;
567 lintsrc.mpc_irqflag = 0; /* conforming */ 715 lintsrc.mpc_irqflag = 0; /* conforming */
568 lintsrc.mpc_srcbusid = 0; 716 lintsrc.mpc_srcbusid = 0;
@@ -600,7 +748,7 @@ static void __init __get_smp_config(unsigned early)
600 748
601 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", 749 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
602 mpf->mpf_specification); 750 mpf->mpf_specification);
603#ifdef CONFIG_X86_32 751#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
604 if (mpf->mpf_feature2 & (1 << 7)) { 752 if (mpf->mpf_feature2 & (1 << 7)) {
605 printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); 753 printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
606 pic_mode = 1; 754 pic_mode = 1;
@@ -632,7 +780,9 @@ static void __init __get_smp_config(unsigned early)
632 * override the defaults. 780 * override the defaults.
633 */ 781 */
634 if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { 782 if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) {
783#ifdef CONFIG_X86_LOCAL_APIC
635 smp_found_config = 0; 784 smp_found_config = 0;
785#endif
636 printk(KERN_ERR 786 printk(KERN_ERR
637 "BIOS bug, MP table errors detected!...\n"); 787 "BIOS bug, MP table errors detected!...\n");
638 printk(KERN_ERR "... disabling SMP support. " 788 printk(KERN_ERR "... disabling SMP support. "
@@ -689,7 +839,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
689 unsigned int *bp = phys_to_virt(base); 839 unsigned int *bp = phys_to_virt(base);
690 struct intel_mp_floating *mpf; 840 struct intel_mp_floating *mpf;
691 841
692 Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length); 842 printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length);
693 BUILD_BUG_ON(sizeof(*mpf) != 16); 843 BUILD_BUG_ON(sizeof(*mpf) != 16);
694 844
695 while (length > 0) { 845 while (length > 0) {
@@ -699,15 +849,21 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
699 !mpf_checksum((unsigned char *)bp, 16) && 849 !mpf_checksum((unsigned char *)bp, 16) &&
700 ((mpf->mpf_specification == 1) 850 ((mpf->mpf_specification == 1)
701 || (mpf->mpf_specification == 4))) { 851 || (mpf->mpf_specification == 4))) {
702 852#ifdef CONFIG_X86_LOCAL_APIC
703 smp_found_config = 1; 853 smp_found_config = 1;
854#endif
704 mpf_found = mpf; 855 mpf_found = mpf;
705#ifdef CONFIG_X86_32 856
706 printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", 857 printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
707 mpf, virt_to_phys(mpf)); 858 mpf, virt_to_phys(mpf));
708 reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE, 859
860 if (!reserve)
861 return 1;
862 reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
709 BOOTMEM_DEFAULT); 863 BOOTMEM_DEFAULT);
710 if (mpf->mpf_physptr) { 864 if (mpf->mpf_physptr) {
865 unsigned long size = PAGE_SIZE;
866#ifdef CONFIG_X86_32
711 /* 867 /*
712 * We cannot access to MPC table to compute 868 * We cannot access to MPC table to compute
713 * table size yet, as only few megabytes from 869 * table size yet, as only few megabytes from
@@ -717,24 +873,15 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
717 * PAGE_SIZE from mpg->mpf_physptr yields BUG() 873 * PAGE_SIZE from mpg->mpf_physptr yields BUG()
718 * in reserve_bootmem. 874 * in reserve_bootmem.
719 */ 875 */
720 unsigned long size = PAGE_SIZE;
721 unsigned long end = max_low_pfn * PAGE_SIZE; 876 unsigned long end = max_low_pfn * PAGE_SIZE;
722 if (mpf->mpf_physptr + size > end) 877 if (mpf->mpf_physptr + size > end)
723 size = end - mpf->mpf_physptr; 878 size = end - mpf->mpf_physptr;
724 reserve_bootmem(mpf->mpf_physptr, size, 879#endif
880 reserve_bootmem_generic(mpf->mpf_physptr, size,
725 BOOTMEM_DEFAULT); 881 BOOTMEM_DEFAULT);
726 } 882 }
727 883
728#else 884 return 1;
729 if (!reserve)
730 return 1;
731
732 reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE);
733 if (mpf->mpf_physptr)
734 reserve_bootmem_generic(mpf->mpf_physptr,
735 PAGE_SIZE);
736#endif
737 return 1;
738 } 885 }
739 bp += 4; 886 bp += 4;
740 length -= 16; 887 length -= 16;
@@ -790,298 +937,294 @@ void __init find_smp_config(void)
790 __find_smp_config(1); 937 __find_smp_config(1);
791} 938}
792 939
793/* -------------------------------------------------------------------------- 940#ifdef CONFIG_X86_IO_APIC
794 ACPI-based MP Configuration 941static u8 __initdata irq_used[MAX_IRQ_SOURCES];
795 -------------------------------------------------------------------------- */
796 942
797/* 943static int __init get_MP_intsrc_index(struct mpc_config_intsrc *m)
798 * Keep this outside and initialized to 0, for !CONFIG_ACPI builds: 944{
799 */ 945 int i;
800int es7000_plat;
801 946
802#ifdef CONFIG_ACPI 947 if (m->mpc_irqtype != mp_INT)
948 return 0;
803 949
804#ifdef CONFIG_X86_IO_APIC 950 if (m->mpc_irqflag != 0x0f)
951 return 0;
805 952
806#define MP_ISA_BUS 0 953 /* not legacy */
807 954
808extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; 955 for (i = 0; i < mp_irq_entries; i++) {
956 if (mp_irqs[i].mp_irqtype != mp_INT)
957 continue;
809 958
810static int mp_find_ioapic(int gsi) 959 if (mp_irqs[i].mp_irqflag != 0x0f)
811{ 960 continue;
812 int i = 0;
813 961
814 /* Find the IOAPIC that manages this GSI. */ 962 if (mp_irqs[i].mp_srcbus != m->mpc_srcbus)
815 for (i = 0; i < nr_ioapics; i++) { 963 continue;
816 if ((gsi >= mp_ioapic_routing[i].gsi_base) 964 if (mp_irqs[i].mp_srcbusirq != m->mpc_srcbusirq)
817 && (gsi <= mp_ioapic_routing[i].gsi_end)) 965 continue;
818 return i; 966 if (irq_used[i]) {
967 /* already claimed */
968 return -2;
969 }
970 irq_used[i] = 1;
971 return i;
819 } 972 }
820 973
821 printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); 974 /* not found */
822 return -1; 975 return -1;
823} 976}
824 977
825static u8 __init uniq_ioapic_id(u8 id) 978#define SPARE_SLOT_NUM 20
826{ 979
827#ifdef CONFIG_X86_32 980static struct mpc_config_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
828 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
829 !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
830 return io_apic_get_unique_id(nr_ioapics, id);
831 else
832 return id;
833#else
834 int i;
835 DECLARE_BITMAP(used, 256);
836 bitmap_zero(used, 256);
837 for (i = 0; i < nr_ioapics; i++) {
838 struct mpc_config_ioapic *ia = &mp_ioapics[i];
839 __set_bit(ia->mpc_apicid, used);
840 }
841 if (!test_bit(id, used))
842 return id;
843 return find_first_zero_bit(used, 256);
844#endif 981#endif
845}
846 982
847void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) 983static int __init replace_intsrc_all(struct mp_config_table *mpc,
984 unsigned long mpc_new_phys,
985 unsigned long mpc_new_length)
848{ 986{
849 int idx = 0; 987#ifdef CONFIG_X86_IO_APIC
850 988 int i;
851 if (bad_ioapic(address)) 989 int nr_m_spare = 0;
852 return; 990#endif
853 991
854 idx = nr_ioapics; 992 int count = sizeof(*mpc);
993 unsigned char *mpt = ((unsigned char *)mpc) + count;
855 994
856 mp_ioapics[idx].mpc_type = MP_IOAPIC; 995 printk(KERN_INFO "mpc_length %x\n", mpc->mpc_length);
857 mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; 996 while (count < mpc->mpc_length) {
858 mp_ioapics[idx].mpc_apicaddr = address; 997 switch (*mpt) {
998 case MP_PROCESSOR:
999 {
1000 struct mpc_config_processor *m =
1001 (struct mpc_config_processor *)mpt;
1002 mpt += sizeof(*m);
1003 count += sizeof(*m);
1004 break;
1005 }
1006 case MP_BUS:
1007 {
1008 struct mpc_config_bus *m =
1009 (struct mpc_config_bus *)mpt;
1010 mpt += sizeof(*m);
1011 count += sizeof(*m);
1012 break;
1013 }
1014 case MP_IOAPIC:
1015 {
1016 mpt += sizeof(struct mpc_config_ioapic);
1017 count += sizeof(struct mpc_config_ioapic);
1018 break;
1019 }
1020 case MP_INTSRC:
1021 {
1022#ifdef CONFIG_X86_IO_APIC
1023 struct mpc_config_intsrc *m =
1024 (struct mpc_config_intsrc *)mpt;
859 1025
860 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 1026 printk(KERN_INFO "OLD ");
861 mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); 1027 print_MP_intsrc_info(m);
862#ifdef CONFIG_X86_32 1028 i = get_MP_intsrc_index(m);
863 mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); 1029 if (i > 0) {
864#else 1030 assign_to_mpc_intsrc(&mp_irqs[i], m);
865 mp_ioapics[idx].mpc_apicver = 0; 1031 printk(KERN_INFO "NEW ");
1032 print_mp_irq_info(&mp_irqs[i]);
1033 } else if (!i) {
1034 /* legacy, do nothing */
1035 } else if (nr_m_spare < SPARE_SLOT_NUM) {
1036 /*
1037 * not found (-1), or duplicated (-2)
1038 * are invalid entries,
1039 * we need to use the slot later
1040 */
1041 m_spare[nr_m_spare] = m;
1042 nr_m_spare++;
1043 }
866#endif 1044#endif
867 /* 1045 mpt += sizeof(struct mpc_config_intsrc);
868 * Build basic GSI lookup table to facilitate gsi->io_apic lookups 1046 count += sizeof(struct mpc_config_intsrc);
869 * and to prevent reprogramming of IOAPIC pins (PCI GSIs). 1047 break;
870 */ 1048 }
871 mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; 1049 case MP_LINTSRC:
872 mp_ioapic_routing[idx].gsi_base = gsi_base; 1050 {
873 mp_ioapic_routing[idx].gsi_end = gsi_base + 1051 struct mpc_config_lintsrc *m =
874 io_apic_get_redir_entries(idx); 1052 (struct mpc_config_lintsrc *)mpt;
875 1053 mpt += sizeof(*m);
876 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " 1054 count += sizeof(*m);
877 "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, 1055 break;
878 mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, 1056 }
879 mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); 1057 default:
880 1058 /* wrong mptable */
881 nr_ioapics++; 1059 printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
882} 1060 printk(KERN_ERR "type %x\n", *mpt);
1061 print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
1062 1, mpc, mpc->mpc_length, 1);
1063 goto out;
1064 }
1065 }
883 1066
884void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) 1067#ifdef CONFIG_X86_IO_APIC
885{ 1068 for (i = 0; i < mp_irq_entries; i++) {
886 struct mpc_config_intsrc intsrc; 1069 if (irq_used[i])
887 int ioapic = -1; 1070 continue;
888 int pin = -1;
889 1071
890 /* 1072 if (mp_irqs[i].mp_irqtype != mp_INT)
891 * Convert 'gsi' to 'ioapic.pin'. 1073 continue;
892 */
893 ioapic = mp_find_ioapic(gsi);
894 if (ioapic < 0)
895 return;
896 pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
897 1074
898 /* 1075 if (mp_irqs[i].mp_irqflag != 0x0f)
899 * TBD: This check is for faulty timer entries, where the override 1076 continue;
900 * erroneously sets the trigger to level, resulting in a HUGE
901 * increase of timer interrupts!
902 */
903 if ((bus_irq == 0) && (trigger == 3))
904 trigger = 1;
905 1077
906 intsrc.mpc_type = MP_INTSRC; 1078 if (nr_m_spare > 0) {
907 intsrc.mpc_irqtype = mp_INT; 1079 printk(KERN_INFO "*NEW* found ");
908 intsrc.mpc_irqflag = (trigger << 2) | polarity; 1080 nr_m_spare--;
909 intsrc.mpc_srcbus = MP_ISA_BUS; 1081 assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]);
910 intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ 1082 m_spare[nr_m_spare] = NULL;
911 intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ 1083 } else {
912 intsrc.mpc_dstirq = pin; /* INTIN# */ 1084 struct mpc_config_intsrc *m =
1085 (struct mpc_config_intsrc *)mpt;
1086 count += sizeof(struct mpc_config_intsrc);
1087 if (!mpc_new_phys) {
1088 printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count);
1089 } else {
1090 if (count <= mpc_new_length)
1091 printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count);
1092 else {
1093 printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length);
1094 goto out;
1095 }
1096 }
1097 assign_to_mpc_intsrc(&mp_irqs[i], m);
1098 mpc->mpc_length = count;
1099 mpt += sizeof(struct mpc_config_intsrc);
1100 }
1101 print_mp_irq_info(&mp_irqs[i]);
1102 }
1103#endif
1104out:
1105 /* update checksum */
1106 mpc->mpc_checksum = 0;
1107 mpc->mpc_checksum -= mpf_checksum((unsigned char *)mpc,
1108 mpc->mpc_length);
913 1109
914 MP_intsrc_info(&intsrc); 1110 return 0;
915} 1111}
916 1112
917void __init mp_config_acpi_legacy_irqs(void) 1113static int __initdata enable_update_mptable;
918{
919 struct mpc_config_intsrc intsrc;
920 int i = 0;
921 int ioapic = -1;
922 1114
923#if defined (CONFIG_MCA) || defined (CONFIG_EISA) 1115static int __init update_mptable_setup(char *str)
924 /* 1116{
925 * Fabricate the legacy ISA bus (bus #31). 1117 enable_update_mptable = 1;
926 */ 1118 return 0;
927 mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; 1119}
928#endif 1120early_param("update_mptable", update_mptable_setup);
929 set_bit(MP_ISA_BUS, mp_bus_not_pci);
930 Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
931 1121
932 /* 1122static unsigned long __initdata mpc_new_phys;
933 * Older generations of ES7000 have no legacy identity mappings 1123static unsigned long mpc_new_length __initdata = 4096;
934 */
935 if (es7000_plat == 1)
936 return;
937 1124
938 /* 1125/* alloc_mptable or alloc_mptable=4k */
939 * Locate the IOAPIC that manages the ISA IRQs (0-15). 1126static int __initdata alloc_mptable;
940 */ 1127static int __init parse_alloc_mptable_opt(char *p)
941 ioapic = mp_find_ioapic(0); 1128{
942 if (ioapic < 0) 1129 enable_update_mptable = 1;
943 return; 1130 alloc_mptable = 1;
1131 if (!p)
1132 return 0;
1133 mpc_new_length = memparse(p, &p);
1134 return 0;
1135}
1136early_param("alloc_mptable", parse_alloc_mptable_opt);
944 1137
945 intsrc.mpc_type = MP_INTSRC; 1138void __init early_reserve_e820_mpc_new(void)
946 intsrc.mpc_irqflag = 0; /* Conforming */ 1139{
947 intsrc.mpc_srcbus = MP_ISA_BUS; 1140 if (enable_update_mptable && alloc_mptable) {
948#ifdef CONFIG_X86_IO_APIC 1141 u64 startt = 0;
949 intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; 1142#ifdef CONFIG_X86_TRAMPOLINE
1143 startt = TRAMPOLINE_BASE;
950#endif 1144#endif
951 /* 1145 mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
952 * Use the default configuration for the IRQs 0-15. Unless
953 * overridden by (MADT) interrupt source override entries.
954 */
955 for (i = 0; i < 16; i++) {
956 int idx;
957
958 for (idx = 0; idx < mp_irq_entries; idx++) {
959 struct mpc_config_intsrc *irq = mp_irqs + idx;
960
961 /* Do we already have a mapping for this ISA IRQ? */
962 if (irq->mpc_srcbus == MP_ISA_BUS
963 && irq->mpc_srcbusirq == i)
964 break;
965
966 /* Do we already have a mapping for this IOAPIC pin */
967 if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
968 (irq->mpc_dstirq == i))
969 break;
970 }
971
972 if (idx != mp_irq_entries) {
973 printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
974 continue; /* IRQ already used */
975 }
976
977 intsrc.mpc_irqtype = mp_INT;
978 intsrc.mpc_srcbusirq = i; /* Identity mapped */
979 intsrc.mpc_dstirq = i;
980
981 MP_intsrc_info(&intsrc);
982 } 1146 }
983} 1147}
984 1148
985int mp_register_gsi(u32 gsi, int triggering, int polarity) 1149static int __init update_mp_table(void)
986{ 1150{
987 int ioapic; 1151 char str[16];
988 int ioapic_pin; 1152 char oem[10];
989#ifdef CONFIG_X86_32 1153 struct intel_mp_floating *mpf;
990#define MAX_GSI_NUM 4096 1154 struct mp_config_table *mpc;
991#define IRQ_COMPRESSION_START 64 1155 struct mp_config_table *mpc_new;
1156
1157 if (!enable_update_mptable)
1158 return 0;
1159
1160 mpf = mpf_found;
1161 if (!mpf)
1162 return 0;
992 1163
993 static int pci_irq = IRQ_COMPRESSION_START;
994 /* 1164 /*
995 * Mapping between Global System Interrupts, which 1165 * Now see if we need to go further.
996 * represent all possible interrupts, and IRQs
997 * assigned to actual devices.
998 */ 1166 */
999 static int gsi_to_irq[MAX_GSI_NUM]; 1167 if (mpf->mpf_feature1 != 0)
1000#else 1168 return 0;
1001
1002 if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
1003 return gsi;
1004#endif
1005 1169
1006 /* Don't set up the ACPI SCI because it's already set up */ 1170 if (!mpf->mpf_physptr)
1007 if (acpi_gbl_FADT.sci_interrupt == gsi) 1171 return 0;
1008 return gsi;
1009 1172
1010 ioapic = mp_find_ioapic(gsi); 1173 mpc = phys_to_virt(mpf->mpf_physptr);
1011 if (ioapic < 0) {
1012 printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
1013 return gsi;
1014 }
1015 1174
1016 ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; 1175 if (!smp_check_mpc(mpc, oem, str))
1176 return 0;
1017 1177
1018#ifdef CONFIG_X86_32 1178 printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf));
1019 if (ioapic_renumber_irq) 1179 printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr);
1020 gsi = ioapic_renumber_irq(ioapic, gsi);
1021#endif
1022 1180
1023 /* 1181 if (mpc_new_phys && mpc->mpc_length > mpc_new_length) {
1024 * Avoid pin reprogramming. PRTs typically include entries 1182 mpc_new_phys = 0;
1025 * with redundant pin->gsi mappings (but unique PCI devices); 1183 printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n",
1026 * we only program the IOAPIC on the first. 1184 mpc_new_length);
1027 */
1028 if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
1029 printk(KERN_ERR "Invalid reference to IOAPIC pin "
1030 "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
1031 ioapic_pin);
1032 return gsi;
1033 } 1185 }
1034 if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { 1186
1035 Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", 1187 if (!mpc_new_phys) {
1036 mp_ioapic_routing[ioapic].apic_id, ioapic_pin); 1188 unsigned char old, new;
1037#ifdef CONFIG_X86_32 1189 /* check if we can change the postion */
1038 return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); 1190 mpc->mpc_checksum = 0;
1039#else 1191 old = mpf_checksum((unsigned char *)mpc, mpc->mpc_length);
1040 return gsi; 1192 mpc->mpc_checksum = 0xff;
1041#endif 1193 new = mpf_checksum((unsigned char *)mpc, mpc->mpc_length);
1194 if (old == new) {
1195 printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n");
1196 return 0;
1197 }
1198 printk(KERN_INFO "use in-positon replacing\n");
1199 } else {
1200 mpf->mpf_physptr = mpc_new_phys;
1201 mpc_new = phys_to_virt(mpc_new_phys);
1202 memcpy(mpc_new, mpc, mpc->mpc_length);
1203 mpc = mpc_new;
1204 /* check if we can modify that */
1205 if (mpc_new_phys - mpf->mpf_physptr) {
1206 struct intel_mp_floating *mpf_new;
1207 /* steal 16 bytes from [0, 1k) */
1208 printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
1209 mpf_new = phys_to_virt(0x400 - 16);
1210 memcpy(mpf_new, mpf, 16);
1211 mpf = mpf_new;
1212 mpf->mpf_physptr = mpc_new_phys;
1213 }
1214 mpf->mpf_checksum = 0;
1215 mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16);
1216 printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr);
1042 } 1217 }
1043 1218
1044 set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
1045#ifdef CONFIG_X86_32
1046 /* 1219 /*
1047 * For GSI >= 64, use IRQ compression 1220 * only replace the one with mp_INT and
1221 * MP_IRQ_TRIGGER_LEVEL|MP_IRQ_POLARITY_LOW,
1222 * already in mp_irqs , stored by ... and mp_config_acpi_gsi,
1223 * may need pci=routeirq for all coverage
1048 */ 1224 */
1049 if ((gsi >= IRQ_COMPRESSION_START) 1225 replace_intsrc_all(mpc, mpc_new_phys, mpc_new_length);
1050 && (triggering == ACPI_LEVEL_SENSITIVE)) { 1226
1051 /* 1227 return 0;
1052 * For PCI devices assign IRQs in order, avoiding gaps
1053 * due to unused I/O APIC pins.
1054 */
1055 int irq = gsi;
1056 if (gsi < MAX_GSI_NUM) {
1057 /*
1058 * Retain the VIA chipset work-around (gsi > 15), but
1059 * avoid a problem where the 8254 timer (IRQ0) is setup
1060 * via an override (so it's not on pin 0 of the ioapic),
1061 * and at the same time, the pin 0 interrupt is a PCI
1062 * type. The gsi > 15 test could cause these two pins
1063 * to be shared as IRQ0, and they are not shareable.
1064 * So test for this condition, and if necessary, avoid
1065 * the pin collision.
1066 */
1067 gsi = pci_irq++;
1068 /*
1069 * Don't assign IRQ used by ACPI SCI
1070 */
1071 if (gsi == acpi_gbl_FADT.sci_interrupt)
1072 gsi = pci_irq++;
1073 gsi_to_irq[irq] = gsi;
1074 } else {
1075 printk(KERN_ERR "GSI %u is too high\n", gsi);
1076 return gsi;
1077 }
1078 }
1079#endif
1080 io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
1081 triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
1082 polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
1083 return gsi;
1084} 1228}
1085 1229
1086#endif /* CONFIG_X86_IO_APIC */ 1230late_initcall(update_mp_table);
1087#endif /* CONFIG_ACPI */
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index e65281b1634..f0f1de1c4a1 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -31,6 +31,8 @@
31#include <asm/numaq.h> 31#include <asm/numaq.h>
32#include <asm/topology.h> 32#include <asm/topology.h>
33#include <asm/processor.h> 33#include <asm/processor.h>
34#include <asm/mpspec.h>
35#include <asm/e820.h>
34 36
35#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) 37#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
36 38
@@ -58,6 +60,8 @@ static void __init smp_dump_qct(void)
58 node_end_pfn[node] = MB_TO_PAGES( 60 node_end_pfn[node] = MB_TO_PAGES(
59 eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); 61 eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
60 62
63 e820_register_active_regions(node, node_start_pfn[node],
64 node_end_pfn[node]);
61 memory_present(node, 65 memory_present(node,
62 node_start_pfn[node], node_end_pfn[node]); 66 node_start_pfn[node], node_end_pfn[node]);
63 node_remap_size[node] = node_memmap_size_bytes(node, 67 node_remap_size[node] = node_memmap_size_bytes(node,
@@ -67,13 +71,24 @@ static void __init smp_dump_qct(void)
67 } 71 }
68} 72}
69 73
70/* 74static __init void early_check_numaq(void)
71 * Unlike Summit, we don't really care to let the NUMA-Q 75{
72 * fall back to flat mode. Don't compile for NUMA-Q 76 /*
73 * unless you really need it! 77 * Find possible boot-time SMP configuration:
74 */ 78 */
79 early_find_smp_config();
80 /*
81 * get boot-time SMP configuration:
82 */
83 if (smp_found_config)
84 early_get_smp_config();
85}
86
75int __init get_memcfg_numaq(void) 87int __init get_memcfg_numaq(void)
76{ 88{
89 early_check_numaq();
90 if (!found_numaq)
91 return 0;
77 smp_dump_qct(); 92 smp_dump_qct();
78 return 1; 93 return 1;
79} 94}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 6f80b852a19..5b0de38cde4 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -17,6 +17,7 @@ unsigned int num_processors;
17unsigned disabled_cpus __cpuinitdata; 17unsigned disabled_cpus __cpuinitdata;
18/* Processor that is doing the boot up */ 18/* Processor that is doing the boot up */
19unsigned int boot_cpu_physical_apicid = -1U; 19unsigned int boot_cpu_physical_apicid = -1U;
20unsigned int max_physical_apicid;
20EXPORT_SYMBOL(boot_cpu_physical_apicid); 21EXPORT_SYMBOL(boot_cpu_physical_apicid);
21 22
22DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; 23DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
@@ -137,3 +138,28 @@ void __init setup_per_cpu_areas(void)
137} 138}
138 139
139#endif 140#endif
141
142void __init parse_setup_data(void)
143{
144 struct setup_data *data;
145 u64 pa_data;
146
147 if (boot_params.hdr.version < 0x0209)
148 return;
149 pa_data = boot_params.hdr.setup_data;
150 while (pa_data) {
151 data = early_ioremap(pa_data, PAGE_SIZE);
152 switch (data->type) {
153 case SETUP_E820_EXT:
154 parse_e820_ext(data, pa_data);
155 break;
156 default:
157 break;
158 }
159#ifndef CONFIG_DEBUG_BOOT_PARAMS
160 free_early(pa_data, pa_data+sizeof(*data)+data->len);
161#endif
162 pa_data = data->next;
163 early_iounmap(data, PAGE_SIZE);
164 }
165}
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 5a2f8e06388..7e06ecd8317 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -59,6 +59,7 @@
59#include <asm/setup.h> 59#include <asm/setup.h>
60#include <asm/arch_hooks.h> 60#include <asm/arch_hooks.h>
61#include <asm/sections.h> 61#include <asm/sections.h>
62#include <asm/dmi.h>
62#include <asm/io_apic.h> 63#include <asm/io_apic.h>
63#include <asm/ist.h> 64#include <asm/ist.h>
64#include <asm/io.h> 65#include <asm/io.h>
@@ -67,10 +68,13 @@
67#include <asm/bios_ebda.h> 68#include <asm/bios_ebda.h>
68#include <asm/cacheflush.h> 69#include <asm/cacheflush.h>
69#include <asm/processor.h> 70#include <asm/processor.h>
71#include <asm/efi.h>
72#include <asm/bugs.h>
70 73
71/* This value is set up by the early boot code to point to the value 74/* This value is set up by the early boot code to point to the value
72 immediately after the boot time page tables. It contains a *physical* 75 immediately after the boot time page tables. It contains a *physical*
73 address, and must not be in the .bss segment! */ 76 address, and must not be in the .bss segment! */
77unsigned long init_pg_tables_start __initdata = ~0UL;
74unsigned long init_pg_tables_end __initdata = ~0UL; 78unsigned long init_pg_tables_end __initdata = ~0UL;
75 79
76/* 80/*
@@ -182,6 +186,12 @@ int bootloader_type;
182static unsigned int highmem_pages = -1; 186static unsigned int highmem_pages = -1;
183 187
184/* 188/*
189 * Early DMI memory
190 */
191int dmi_alloc_index;
192char dmi_alloc_data[DMI_MAX_DATA];
193
194/*
185 * Setup options 195 * Setup options
186 */ 196 */
187struct screen_info screen_info; 197struct screen_info screen_info;
@@ -237,42 +247,6 @@ static inline void copy_edd(void)
237} 247}
238#endif 248#endif
239 249
240int __initdata user_defined_memmap;
241
242/*
243 * "mem=nopentium" disables the 4MB page tables.
244 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
245 * to <mem>, overriding the bios size.
246 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
247 * <start> to <start>+<mem>, overriding the bios size.
248 *
249 * HPA tells me bootloaders need to parse mem=, so no new
250 * option should be mem= [also see Documentation/i386/boot.txt]
251 */
252static int __init parse_mem(char *arg)
253{
254 if (!arg)
255 return -EINVAL;
256
257 if (strcmp(arg, "nopentium") == 0) {
258 setup_clear_cpu_cap(X86_FEATURE_PSE);
259 } else {
260 /* If the user specifies memory size, we
261 * limit the BIOS-provided memory map to
262 * that size. exactmap can be used to specify
263 * the exact map. mem=number can be used to
264 * trim the existing memory map.
265 */
266 unsigned long long mem_size;
267
268 mem_size = memparse(arg, &arg);
269 limit_regions(mem_size);
270 user_defined_memmap = 1;
271 }
272 return 0;
273}
274early_param("mem", parse_mem);
275
276#ifdef CONFIG_PROC_VMCORE 250#ifdef CONFIG_PROC_VMCORE
277/* elfcorehdr= specifies the location of elf core header 251/* elfcorehdr= specifies the location of elf core header
278 * stored by the crashed kernel. 252 * stored by the crashed kernel.
@@ -395,56 +369,6 @@ unsigned long __init find_max_low_pfn(void)
395 return max_low_pfn; 369 return max_low_pfn;
396} 370}
397 371
398#define BIOS_LOWMEM_KILOBYTES 0x413
399
400/*
401 * The BIOS places the EBDA/XBDA at the top of conventional
402 * memory, and usually decreases the reported amount of
403 * conventional memory (int 0x12) too. This also contains a
404 * workaround for Dell systems that neglect to reserve EBDA.
405 * The same workaround also avoids a problem with the AMD768MPX
406 * chipset: reserve a page before VGA to prevent PCI prefetch
407 * into it (errata #56). Usually the page is reserved anyways,
408 * unless you have no PS/2 mouse plugged in.
409 */
410static void __init reserve_ebda_region(void)
411{
412 unsigned int lowmem, ebda_addr;
413
414 /* To determine the position of the EBDA and the */
415 /* end of conventional memory, we need to look at */
416 /* the BIOS data area. In a paravirtual environment */
417 /* that area is absent. We'll just have to assume */
418 /* that the paravirt case can handle memory setup */
419 /* correctly, without our help. */
420 if (paravirt_enabled())
421 return;
422
423 /* end of low (conventional) memory */
424 lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
425 lowmem <<= 10;
426
427 /* start of EBDA area */
428 ebda_addr = get_bios_ebda();
429
430 /* Fixup: bios puts an EBDA in the top 64K segment */
431 /* of conventional memory, but does not adjust lowmem. */
432 if ((lowmem - ebda_addr) <= 0x10000)
433 lowmem = ebda_addr;
434
435 /* Fixup: bios does not report an EBDA at all. */
436 /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
437 if ((ebda_addr == 0) && (lowmem >= 0x9f000))
438 lowmem = 0x9f000;
439
440 /* Paranoia: should never happen, but... */
441 if ((lowmem == 0) || (lowmem >= 0x100000))
442 lowmem = 0x9f000;
443
444 /* reserve all memory between lowmem and the 1MB mark */
445 reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT);
446}
447
448#ifndef CONFIG_NEED_MULTIPLE_NODES 372#ifndef CONFIG_NEED_MULTIPLE_NODES
449static void __init setup_bootmem_allocator(void); 373static void __init setup_bootmem_allocator(void);
450static unsigned long __init setup_memory(void) 374static unsigned long __init setup_memory(void)
@@ -462,11 +386,13 @@ static unsigned long __init setup_memory(void)
462 if (max_pfn > max_low_pfn) { 386 if (max_pfn > max_low_pfn) {
463 highstart_pfn = max_low_pfn; 387 highstart_pfn = max_low_pfn;
464 } 388 }
389 memory_present(0, 0, highend_pfn);
465 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", 390 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
466 pages_to_mb(highend_pfn - highstart_pfn)); 391 pages_to_mb(highend_pfn - highstart_pfn));
467 num_physpages = highend_pfn; 392 num_physpages = highend_pfn;
468 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; 393 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
469#else 394#else
395 memory_present(0, 0, max_low_pfn);
470 num_physpages = max_low_pfn; 396 num_physpages = max_low_pfn;
471 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; 397 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
472#endif 398#endif
@@ -488,11 +414,12 @@ static void __init zone_sizes_init(void)
488 max_zone_pfns[ZONE_DMA] = 414 max_zone_pfns[ZONE_DMA] =
489 virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; 415 virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
490 max_zone_pfns[ZONE_NORMAL] = max_low_pfn; 416 max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
417 remove_all_active_ranges();
491#ifdef CONFIG_HIGHMEM 418#ifdef CONFIG_HIGHMEM
492 max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; 419 max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
493 add_active_range(0, 0, highend_pfn); 420 e820_register_active_regions(0, 0, highend_pfn);
494#else 421#else
495 add_active_range(0, 0, max_low_pfn); 422 e820_register_active_regions(0, 0, max_low_pfn);
496#endif 423#endif
497 424
498 free_area_init_nodes(max_zone_pfns); 425 free_area_init_nodes(max_zone_pfns);
@@ -526,25 +453,28 @@ static void __init reserve_crashkernel(void)
526 ret = parse_crashkernel(boot_command_line, total_mem, 453 ret = parse_crashkernel(boot_command_line, total_mem,
527 &crash_size, &crash_base); 454 &crash_size, &crash_base);
528 if (ret == 0 && crash_size > 0) { 455 if (ret == 0 && crash_size > 0) {
529 if (crash_base > 0) { 456 if (crash_base <= 0) {
530 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
531 "for crashkernel (System RAM: %ldMB)\n",
532 (unsigned long)(crash_size >> 20),
533 (unsigned long)(crash_base >> 20),
534 (unsigned long)(total_mem >> 20));
535
536 if (reserve_bootmem(crash_base, crash_size,
537 BOOTMEM_EXCLUSIVE) < 0) {
538 printk(KERN_INFO "crashkernel reservation "
539 "failed - memory is in use\n");
540 return;
541 }
542
543 crashk_res.start = crash_base;
544 crashk_res.end = crash_base + crash_size - 1;
545 } else
546 printk(KERN_INFO "crashkernel reservation failed - " 457 printk(KERN_INFO "crashkernel reservation failed - "
547 "you have to specify a base address\n"); 458 "you have to specify a base address\n");
459 return;
460 }
461
462 if (reserve_bootmem_generic(crash_base, crash_size,
463 BOOTMEM_EXCLUSIVE) < 0) {
464 printk(KERN_INFO "crashkernel reservation failed - "
465 "memory is in use\n");
466 return;
467 }
468
469 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
470 "for crashkernel (System RAM: %ldMB)\n",
471 (unsigned long)(crash_size >> 20),
472 (unsigned long)(crash_base >> 20),
473 (unsigned long)(total_mem >> 20));
474
475 crashk_res.start = crash_base;
476 crashk_res.end = crash_base + crash_size - 1;
477 insert_resource(&iomem_resource, &crashk_res);
548 } 478 }
549} 479}
550#else 480#else
@@ -558,44 +488,57 @@ static bool do_relocate_initrd = false;
558 488
559static void __init reserve_initrd(void) 489static void __init reserve_initrd(void)
560{ 490{
561 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; 491 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
562 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; 492 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
563 unsigned long ramdisk_end = ramdisk_image + ramdisk_size; 493 u64 ramdisk_end = ramdisk_image + ramdisk_size;
564 unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; 494 u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
565 unsigned long ramdisk_here; 495 u64 ramdisk_here;
566
567 initrd_start = 0;
568 496
569 if (!boot_params.hdr.type_of_loader || 497 if (!boot_params.hdr.type_of_loader ||
570 !ramdisk_image || !ramdisk_size) 498 !ramdisk_image || !ramdisk_size)
571 return; /* No initrd provided by bootloader */ 499 return; /* No initrd provided by bootloader */
572 500
573 if (ramdisk_end < ramdisk_image) { 501 initrd_start = 0;
574 printk(KERN_ERR "initrd wraps around end of memory, " 502
575 "disabling initrd\n");
576 return;
577 }
578 if (ramdisk_size >= end_of_lowmem/2) { 503 if (ramdisk_size >= end_of_lowmem/2) {
504 free_early(ramdisk_image, ramdisk_end);
579 printk(KERN_ERR "initrd too large to handle, " 505 printk(KERN_ERR "initrd too large to handle, "
580 "disabling initrd\n"); 506 "disabling initrd\n");
581 return; 507 return;
582 } 508 }
509
510 printk(KERN_INFO "old RAMDISK: %08llx - %08llx\n", ramdisk_image,
511 ramdisk_end);
512
513
583 if (ramdisk_end <= end_of_lowmem) { 514 if (ramdisk_end <= end_of_lowmem) {
584 /* All in lowmem, easy case */ 515 /* All in lowmem, easy case */
585 reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT); 516 /*
517 * don't need to reserve again, already reserved early
518 * in i386_start_kernel
519 */
586 initrd_start = ramdisk_image + PAGE_OFFSET; 520 initrd_start = ramdisk_image + PAGE_OFFSET;
587 initrd_end = initrd_start+ramdisk_size; 521 initrd_end = initrd_start+ramdisk_size;
588 return; 522 return;
589 } 523 }
590 524
591 /* We need to move the initrd down into lowmem */ 525 /* We need to move the initrd down into lowmem */
592 ramdisk_here = (end_of_lowmem - ramdisk_size) & PAGE_MASK; 526 ramdisk_here = find_e820_area(min_low_pfn<<PAGE_SHIFT,
527 end_of_lowmem, ramdisk_size,
528 PAGE_SIZE);
529
530 if (ramdisk_here == -1ULL)
531 panic("Cannot find place for new RAMDISK of size %lld\n",
532 ramdisk_size);
593 533
594 /* Note: this includes all the lowmem currently occupied by 534 /* Note: this includes all the lowmem currently occupied by
595 the initrd, we rely on that fact to keep the data intact. */ 535 the initrd, we rely on that fact to keep the data intact. */
596 reserve_bootmem(ramdisk_here, ramdisk_size, BOOTMEM_DEFAULT); 536 reserve_early(ramdisk_here, ramdisk_here + ramdisk_size,
537 "NEW RAMDISK");
597 initrd_start = ramdisk_here + PAGE_OFFSET; 538 initrd_start = ramdisk_here + PAGE_OFFSET;
598 initrd_end = initrd_start + ramdisk_size; 539 initrd_end = initrd_start + ramdisk_size;
540 printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
541 ramdisk_here, ramdisk_here + ramdisk_size);
599 542
600 do_relocate_initrd = true; 543 do_relocate_initrd = true;
601} 544}
@@ -604,10 +547,10 @@ static void __init reserve_initrd(void)
604 547
605static void __init relocate_initrd(void) 548static void __init relocate_initrd(void)
606{ 549{
607 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; 550 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
608 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; 551 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
609 unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; 552 u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
610 unsigned long ramdisk_here; 553 u64 ramdisk_here;
611 unsigned long slop, clen, mapaddr; 554 unsigned long slop, clen, mapaddr;
612 char *p, *q; 555 char *p, *q;
613 556
@@ -624,6 +567,10 @@ static void __init relocate_initrd(void)
624 p = (char *)__va(ramdisk_image); 567 p = (char *)__va(ramdisk_image);
625 memcpy(q, p, clen); 568 memcpy(q, p, clen);
626 q += clen; 569 q += clen;
570 /* need to free these low pages...*/
571 printk(KERN_INFO "Freeing old partial RAMDISK %08llx-%08llx\n",
572 ramdisk_image, ramdisk_image + clen - 1);
573 free_bootmem(ramdisk_image, clen);
627 ramdisk_image += clen; 574 ramdisk_image += clen;
628 ramdisk_size -= clen; 575 ramdisk_size -= clen;
629 } 576 }
@@ -642,66 +589,47 @@ static void __init relocate_initrd(void)
642 ramdisk_image += clen; 589 ramdisk_image += clen;
643 ramdisk_size -= clen; 590 ramdisk_size -= clen;
644 } 591 }
592 /* high pages is not converted by early_res_to_bootmem */
593 ramdisk_image = boot_params.hdr.ramdisk_image;
594 ramdisk_size = boot_params.hdr.ramdisk_size;
595 printk(KERN_INFO "Copied RAMDISK from %016llx - %016llx to %08llx - %08llx\n",
596 ramdisk_image, ramdisk_image + ramdisk_size - 1,
597 ramdisk_here, ramdisk_here + ramdisk_size - 1);
598
599 /* need to free that, otherwise init highmem will reserve it again */
600 free_early(ramdisk_image, ramdisk_image+ramdisk_size);
645} 601}
646 602
647#endif /* CONFIG_BLK_DEV_INITRD */ 603#endif /* CONFIG_BLK_DEV_INITRD */
648 604
649void __init setup_bootmem_allocator(void) 605void __init setup_bootmem_allocator(void)
650{ 606{
651 unsigned long bootmap_size; 607 int i;
608 unsigned long bootmap_size, bootmap;
652 /* 609 /*
653 * Initialize the boot-time allocator (with low memory only): 610 * Initialize the boot-time allocator (with low memory only):
654 */ 611 */
655 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn); 612 bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT;
656 613 bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT,
657 register_bootmem_low_pages(max_low_pfn); 614 max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
658 615 PAGE_SIZE);
659 /* 616 if (bootmap == -1L)
660 * Reserve the bootmem bitmap itself as well. We do this in two 617 panic("Cannot find bootmem map of size %ld\n", bootmap_size);
661 * steps (first step was init_bootmem()) because this catches 618 reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
662 * the (very unlikely) case of us accidentally initializing the
663 * bootmem allocator with an invalid RAM area.
664 */
665 reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
666 bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text),
667 BOOTMEM_DEFAULT);
668
669 /*
670 * reserve physical page 0 - it's a special BIOS page on many boxes,
671 * enabling clean reboots, SMP operation, laptop functions.
672 */
673 reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT);
674
675 /* reserve EBDA region */
676 reserve_ebda_region();
677
678#ifdef CONFIG_SMP
679 /*
680 * But first pinch a few for the stack/trampoline stuff
681 * FIXME: Don't need the extra page at 4K, but need to fix
682 * trampoline before removing it. (see the GDT stuff)
683 */
684 reserve_bootmem(PAGE_SIZE, PAGE_SIZE, BOOTMEM_DEFAULT);
685#endif
686#ifdef CONFIG_ACPI_SLEEP
687 /*
688 * Reserve low memory region for sleep support.
689 */
690 acpi_reserve_bootmem();
691#endif
692#ifdef CONFIG_X86_FIND_SMP_CONFIG
693 /*
694 * Find and reserve possible boot-time SMP configuration:
695 */
696 find_smp_config();
697#endif
698#ifdef CONFIG_BLK_DEV_INITRD 619#ifdef CONFIG_BLK_DEV_INITRD
699 reserve_initrd(); 620 reserve_initrd();
700#endif 621#endif
701 numa_kva_reserve(); 622 bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn);
702 reserve_crashkernel(); 623 printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
624 max_pfn_mapped<<PAGE_SHIFT);
625 printk(KERN_INFO " low ram: %08lx - %08lx\n",
626 min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
627 printk(KERN_INFO " bootmap %08lx - %08lx\n",
628 bootmap, bootmap + bootmap_size);
629 for_each_online_node(i)
630 free_bootmem_with_active_regions(i, max_low_pfn);
631 early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
703 632
704 reserve_ibft_region();
705} 633}
706 634
707/* 635/*
@@ -731,12 +659,6 @@ static void set_mca_bus(int x)
731static void set_mca_bus(int x) { } 659static void set_mca_bus(int x) { }
732#endif 660#endif
733 661
734/* Overridden in paravirt.c if CONFIG_PARAVIRT */
735char * __init __attribute__((weak)) memory_setup(void)
736{
737 return machine_specific_memory_setup();
738}
739
740#ifdef CONFIG_NUMA 662#ifdef CONFIG_NUMA
741/* 663/*
742 * In the golden day, when everything among i386 and x86_64 will be 664 * In the golden day, when everything among i386 and x86_64 will be
@@ -749,6 +671,8 @@ int x86_cpu_to_node_map_init[NR_CPUS] = {
749DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; 671DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
750#endif 672#endif
751 673
674static void probe_roms(void);
675
752/* 676/*
753 * Determine if we were loaded by an EFI loader. If so, then we have also been 677 * Determine if we were loaded by an EFI loader. If so, then we have also been
754 * passed the efi memmap, systab, etc., so we should use these data structures 678 * passed the efi memmap, systab, etc., so we should use these data structures
@@ -758,17 +682,21 @@ DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
758 */ 682 */
759void __init setup_arch(char **cmdline_p) 683void __init setup_arch(char **cmdline_p)
760{ 684{
685 int i;
761 unsigned long max_low_pfn; 686 unsigned long max_low_pfn;
762 687
763 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 688 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
764 pre_setup_arch_hook(); 689 pre_setup_arch_hook();
765 early_cpu_init(); 690 early_cpu_init();
766 early_ioremap_init(); 691 early_ioremap_init();
692 reserve_setup_data();
767 693
768#ifdef CONFIG_EFI 694#ifdef CONFIG_EFI
769 if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, 695 if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
770 "EL32", 4)) 696 "EL32", 4)) {
771 efi_enabled = 1; 697 efi_enabled = 1;
698 efi_reserve_early();
699 }
772#endif 700#endif
773 701
774 ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); 702 ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
@@ -792,8 +720,7 @@ void __init setup_arch(char **cmdline_p)
792#endif 720#endif
793 ARCH_SETUP 721 ARCH_SETUP
794 722
795 printk(KERN_INFO "BIOS-provided physical RAM map:\n"); 723 setup_memory_map();
796 print_memory_map(memory_setup());
797 724
798 copy_edd(); 725 copy_edd();
799 726
@@ -811,12 +738,18 @@ void __init setup_arch(char **cmdline_p)
811 bss_resource.start = virt_to_phys(&__bss_start); 738 bss_resource.start = virt_to_phys(&__bss_start);
812 bss_resource.end = virt_to_phys(&__bss_stop)-1; 739 bss_resource.end = virt_to_phys(&__bss_stop)-1;
813 740
741 parse_setup_data();
742
814 parse_early_param(); 743 parse_early_param();
815 744
816 if (user_defined_memmap) { 745 finish_e820_parsing();
817 printk(KERN_INFO "user-defined physical RAM map:\n"); 746
818 print_memory_map("user"); 747 probe_roms();
819 } 748
749 /* after parse_early_param, so could debug it */
750 insert_resource(&iomem_resource, &code_resource);
751 insert_resource(&iomem_resource, &data_resource);
752 insert_resource(&iomem_resource, &bss_resource);
820 753
821 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); 754 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
822 *cmdline_p = command_line; 755 *cmdline_p = command_line;
@@ -824,14 +757,67 @@ void __init setup_arch(char **cmdline_p)
824 if (efi_enabled) 757 if (efi_enabled)
825 efi_init(); 758 efi_init();
826 759
760 if (ppro_with_ram_bug()) {
761 e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
762 E820_RESERVED);
763 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
764 printk(KERN_INFO "fixed physical RAM map:\n");
765 e820_print_map("bad_ppro");
766 }
767
768 e820_register_active_regions(0, 0, -1UL);
769 /*
770 * partially used pages are not usable - thus
771 * we are rounding upwards:
772 */
773 max_pfn = e820_end_of_ram();
774
775 /* preallocate 4k for mptable mpc */
776 early_reserve_e820_mpc_new();
827 /* update e820 for memory not covered by WB MTRRs */ 777 /* update e820 for memory not covered by WB MTRRs */
828 propagate_e820_map();
829 mtrr_bp_init(); 778 mtrr_bp_init();
830 if (mtrr_trim_uncached_memory(max_pfn)) 779 if (mtrr_trim_uncached_memory(max_pfn)) {
831 propagate_e820_map(); 780 remove_all_active_ranges();
781 e820_register_active_regions(0, 0, -1UL);
782 max_pfn = e820_end_of_ram();
783 }
784
785 dmi_scan_machine();
786
787 io_delay_init();
788
789#ifdef CONFIG_ACPI
790 /*
791 * Parse the ACPI tables for possible boot-time SMP configuration.
792 */
793 acpi_boot_table_init();
794#endif
795
796#ifdef CONFIG_ACPI_NUMA
797 /*
798 * Parse SRAT to discover nodes.
799 */
800 acpi_numa_init();
801#endif
832 802
833 max_low_pfn = setup_memory(); 803 max_low_pfn = setup_memory();
834 804
805#ifdef CONFIG_ACPI_SLEEP
806 /*
807 * Reserve low memory region for sleep support.
808 */
809 acpi_reserve_bootmem();
810#endif
811#ifdef CONFIG_X86_FIND_SMP_CONFIG
812 /*
813 * Find and reserve possible boot-time SMP configuration:
814 */
815 find_smp_config();
816#endif
817 reserve_crashkernel();
818
819 reserve_ibft_region();
820
835#ifdef CONFIG_KVM_CLOCK 821#ifdef CONFIG_KVM_CLOCK
836 kvmclock_init(); 822 kvmclock_init();
837#endif 823#endif
@@ -855,9 +841,6 @@ void __init setup_arch(char **cmdline_p)
855 * not to exceed the 8Mb limit. 841 * not to exceed the 8Mb limit.
856 */ 842 */
857 843
858#ifdef CONFIG_SMP
859 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
860#endif
861 paging_init(); 844 paging_init();
862 845
863 /* 846 /*
@@ -869,10 +852,6 @@ void __init setup_arch(char **cmdline_p)
869 init_ohci1394_dma_on_all_controllers(); 852 init_ohci1394_dma_on_all_controllers();
870#endif 853#endif
871 854
872 remapped_pgdat_init();
873 sparse_init();
874 zone_sizes_init();
875
876 /* 855 /*
877 * NOTE: at this point the bootmem allocator is fully available. 856 * NOTE: at this point the bootmem allocator is fully available.
878 */ 857 */
@@ -881,11 +860,11 @@ void __init setup_arch(char **cmdline_p)
881 relocate_initrd(); 860 relocate_initrd();
882#endif 861#endif
883 862
884 paravirt_post_allocator_init(); 863 remapped_pgdat_init();
885 864 sparse_init();
886 dmi_scan_machine(); 865 zone_sizes_init();
887 866
888 io_delay_init(); 867 paravirt_post_allocator_init();
889 868
890#ifdef CONFIG_X86_SMP 869#ifdef CONFIG_X86_SMP
891 /* 870 /*
@@ -903,32 +882,31 @@ void __init setup_arch(char **cmdline_p)
903 generic_apic_probe(); 882 generic_apic_probe();
904#endif 883#endif
905 884
906#ifdef CONFIG_ACPI
907 /*
908 * Parse the ACPI tables for possible boot-time SMP configuration.
909 */
910 acpi_boot_table_init();
911#endif
912
913 early_quirks(); 885 early_quirks();
914 886
915#ifdef CONFIG_ACPI 887#ifdef CONFIG_ACPI
916 acpi_boot_init(); 888 acpi_boot_init();
917 889#endif
890#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
891 if (smp_found_config)
892 get_smp_config();
893#endif
918#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) 894#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
919 if (def_to_bigsmp) 895 if (def_to_bigsmp)
920 printk(KERN_WARNING "More than 8 CPUs detected and " 896 printk(KERN_WARNING "More than 8 CPUs detected and "
921 "CONFIG_X86_PC cannot handle it.\nUse " 897 "CONFIG_X86_PC cannot handle it.\nUse "
922 "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); 898 "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
923#endif 899#endif
924#endif
925#ifdef CONFIG_X86_LOCAL_APIC
926 if (smp_found_config)
927 get_smp_config();
928#endif
929 900
930 e820_register_memory(); 901 e820_reserve_resources();
931 e820_mark_nosave_regions(); 902 e820_mark_nosave_regions(max_low_pfn);
903
904 request_resource(&iomem_resource, &video_ram_resource);
905 /* request I/O space for devices used on all i[345]86 PCs */
906 for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
907 request_resource(&ioport_resource, &standard_io_resources[i]);
908
909 e820_setup_gap();
932 910
933#ifdef CONFIG_VT 911#ifdef CONFIG_VT
934#if defined(CONFIG_VGA_CONSOLE) 912#if defined(CONFIG_VGA_CONSOLE)
@@ -940,25 +918,147 @@ void __init setup_arch(char **cmdline_p)
940#endif 918#endif
941} 919}
942 920
943/* 921static struct resource system_rom_resource = {
944 * Request address space for all standard resources 922 .name = "System ROM",
945 * 923 .start = 0xf0000,
946 * This is called just before pcibios_init(), which is also a 924 .end = 0xfffff,
947 * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). 925 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
948 */ 926};
949static int __init request_standard_resources(void) 927
928static struct resource extension_rom_resource = {
929 .name = "Extension ROM",
930 .start = 0xe0000,
931 .end = 0xeffff,
932 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
933};
934
935static struct resource adapter_rom_resources[] = { {
936 .name = "Adapter ROM",
937 .start = 0xc8000,
938 .end = 0,
939 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
940}, {
941 .name = "Adapter ROM",
942 .start = 0,
943 .end = 0,
944 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
945}, {
946 .name = "Adapter ROM",
947 .start = 0,
948 .end = 0,
949 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
950}, {
951 .name = "Adapter ROM",
952 .start = 0,
953 .end = 0,
954 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
955}, {
956 .name = "Adapter ROM",
957 .start = 0,
958 .end = 0,
959 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
960}, {
961 .name = "Adapter ROM",
962 .start = 0,
963 .end = 0,
964 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
965} };
966
967static struct resource video_rom_resource = {
968 .name = "Video ROM",
969 .start = 0xc0000,
970 .end = 0xc7fff,
971 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
972};
973
974#define ROMSIGNATURE 0xaa55
975
976static int __init romsignature(const unsigned char *rom)
950{ 977{
978 const unsigned short * const ptr = (const unsigned short *)rom;
979 unsigned short sig;
980
981 return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
982}
983
984static int __init romchecksum(const unsigned char *rom, unsigned long length)
985{
986 unsigned char sum, c;
987
988 for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
989 sum += c;
990 return !length && !sum;
991}
992
993static void __init probe_roms(void)
994{
995 const unsigned char *rom;
996 unsigned long start, length, upper;
997 unsigned char c;
951 int i; 998 int i;
952 999
953 printk(KERN_INFO "Setting up standard PCI resources\n"); 1000 /* video rom */
954 init_iomem_resources(&code_resource, &data_resource, &bss_resource); 1001 upper = adapter_rom_resources[0].start;
1002 for (start = video_rom_resource.start; start < upper; start += 2048) {
1003 rom = isa_bus_to_virt(start);
1004 if (!romsignature(rom))
1005 continue;
955 1006
956 request_resource(&iomem_resource, &video_ram_resource); 1007 video_rom_resource.start = start;
957 1008
958 /* request I/O space for devices used on all i[345]86 PCs */ 1009 if (probe_kernel_address(rom + 2, c) != 0)
959 for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) 1010 continue;
960 request_resource(&ioport_resource, &standard_io_resources[i]); 1011
961 return 0; 1012 /* 0 < length <= 0x7f * 512, historically */
1013 length = c * 512;
1014
1015 /* if checksum okay, trust length byte */
1016 if (length && romchecksum(rom, length))
1017 video_rom_resource.end = start + length - 1;
1018
1019 request_resource(&iomem_resource, &video_rom_resource);
1020 break;
1021 }
1022
1023 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
1024 if (start < upper)
1025 start = upper;
1026
1027 /* system rom */
1028 request_resource(&iomem_resource, &system_rom_resource);
1029 upper = system_rom_resource.start;
1030
1031 /* check for extension rom (ignore length byte!) */
1032 rom = isa_bus_to_virt(extension_rom_resource.start);
1033 if (romsignature(rom)) {
1034 length = extension_rom_resource.end - extension_rom_resource.start + 1;
1035 if (romchecksum(rom, length)) {
1036 request_resource(&iomem_resource, &extension_rom_resource);
1037 upper = extension_rom_resource.start;
1038 }
1039 }
1040
1041 /* check for adapter roms on 2k boundaries */
1042 for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
1043 rom = isa_bus_to_virt(start);
1044 if (!romsignature(rom))
1045 continue;
1046
1047 if (probe_kernel_address(rom + 2, c) != 0)
1048 continue;
1049
1050 /* 0 < length <= 0x7f * 512, historically */
1051 length = c * 512;
1052
1053 /* but accept any length that fits if checksum okay */
1054 if (!length || start + length > upper || !romchecksum(rom, length))
1055 continue;
1056
1057 adapter_rom_resources[i].start = start;
1058 adapter_rom_resources[i].end = start + length - 1;
1059 request_resource(&iomem_resource, &adapter_rom_resources[i]);
1060
1061 start = adapter_rom_resources[i++].end & ~2047UL;
1062 }
962} 1063}
963 1064
964subsys_initcall(request_standard_resources);
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 545440e471b..9a87113ba99 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -56,6 +56,7 @@
56#include <asm/desc.h> 56#include <asm/desc.h>
57#include <video/edid.h> 57#include <video/edid.h>
58#include <asm/e820.h> 58#include <asm/e820.h>
59#include <asm/mpspec.h>
59#include <asm/dma.h> 60#include <asm/dma.h>
60#include <asm/gart.h> 61#include <asm/gart.h>
61#include <asm/mpspec.h> 62#include <asm/mpspec.h>
@@ -245,7 +246,7 @@ static void __init reserve_crashkernel(void)
245 return; 246 return;
246 } 247 }
247 248
248 if (reserve_bootmem(crash_base, crash_size, 249 if (reserve_bootmem_generic(crash_base, crash_size,
249 BOOTMEM_EXCLUSIVE) < 0) { 250 BOOTMEM_EXCLUSIVE) < 0) {
250 printk(KERN_INFO "crashkernel reservation failed - " 251 printk(KERN_INFO "crashkernel reservation failed - "
251 "memory is in use\n"); 252 "memory is in use\n");
@@ -267,34 +268,6 @@ static inline void __init reserve_crashkernel(void)
267{} 268{}
268#endif 269#endif
269 270
270/* Overridden in paravirt.c if CONFIG_PARAVIRT */
271void __attribute__((weak)) __init memory_setup(void)
272{
273 machine_specific_memory_setup();
274}
275
276static void __init parse_setup_data(void)
277{
278 struct setup_data *data;
279 unsigned long pa_data;
280
281 if (boot_params.hdr.version < 0x0209)
282 return;
283 pa_data = boot_params.hdr.setup_data;
284 while (pa_data) {
285 data = early_ioremap(pa_data, PAGE_SIZE);
286 switch (data->type) {
287 default:
288 break;
289 }
290#ifndef CONFIG_DEBUG_BOOT_PARAMS
291 free_early(pa_data, pa_data+sizeof(*data)+data->len);
292#endif
293 pa_data = data->next;
294 early_iounmap(data, PAGE_SIZE);
295 }
296}
297
298/* 271/*
299 * setup_arch - architecture-specific boot-time initializations 272 * setup_arch - architecture-specific boot-time initializations
300 * 273 *
@@ -319,13 +292,15 @@ void __init setup_arch(char **cmdline_p)
319#endif 292#endif
320#ifdef CONFIG_EFI 293#ifdef CONFIG_EFI
321 if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, 294 if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
322 "EL64", 4)) 295 "EL64", 4)) {
323 efi_enabled = 1; 296 efi_enabled = 1;
297 efi_reserve_early();
298 }
324#endif 299#endif
325 300
326 ARCH_SETUP 301 ARCH_SETUP
327 302
328 memory_setup(); 303 setup_memory_map();
329 copy_edd(); 304 copy_edd();
330 305
331 if (!boot_params.hdr.root_flags) 306 if (!boot_params.hdr.root_flags)
@@ -372,9 +347,13 @@ void __init setup_arch(char **cmdline_p)
372 * we are rounding upwards: 347 * we are rounding upwards:
373 */ 348 */
374 end_pfn = e820_end_of_ram(); 349 end_pfn = e820_end_of_ram();
350
351 /* pre allocte 4k for mptable mpc */
352 early_reserve_e820_mpc_new();
375 /* update e820 for memory not covered by WB MTRRs */ 353 /* update e820 for memory not covered by WB MTRRs */
376 mtrr_bp_init(); 354 mtrr_bp_init();
377 if (mtrr_trim_uncached_memory(end_pfn)) { 355 if (mtrr_trim_uncached_memory(end_pfn)) {
356 remove_all_active_ranges();
378 e820_register_active_regions(0, 0, -1UL); 357 e820_register_active_regions(0, 0, -1UL);
379 end_pfn = e820_end_of_ram(); 358 end_pfn = e820_end_of_ram();
380 } 359 }
@@ -383,7 +362,7 @@ void __init setup_arch(char **cmdline_p)
383 362
384 check_efer(); 363 check_efer();
385 364
386 max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT)); 365 max_pfn_mapped = init_memory_mapping(0, (end_pfn << PAGE_SHIFT));
387 if (efi_enabled) 366 if (efi_enabled)
388 efi_init(); 367 efi_init();
389 368
@@ -444,13 +423,12 @@ void __init setup_arch(char **cmdline_p)
444 acpi_reserve_bootmem(); 423 acpi_reserve_bootmem();
445#endif 424#endif
446 425
447 if (efi_enabled) 426#ifdef CONFIG_X86_MPPARSE
448 efi_reserve_bootmem();
449
450 /* 427 /*
451 * Find and reserve possible boot-time SMP configuration: 428 * Find and reserve possible boot-time SMP configuration:
452 */ 429 */
453 find_smp_config(); 430 find_smp_config();
431#endif
454#ifdef CONFIG_BLK_DEV_INITRD 432#ifdef CONFIG_BLK_DEV_INITRD
455 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { 433 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
456 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; 434 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
@@ -493,11 +471,13 @@ void __init setup_arch(char **cmdline_p)
493 471
494 init_cpu_to_node(); 472 init_cpu_to_node();
495 473
474#ifdef CONFIG_X86_MPPARSE
496 /* 475 /*
497 * get boot-time SMP configuration: 476 * get boot-time SMP configuration:
498 */ 477 */
499 if (smp_found_config) 478 if (smp_found_config)
500 get_smp_config(); 479 get_smp_config();
480#endif
501 init_apic_mappings(); 481 init_apic_mappings();
502 ioapic_init_mappings(); 482 ioapic_init_mappings();
503 483
@@ -507,7 +487,7 @@ void __init setup_arch(char **cmdline_p)
507 * We trust e820 completely. No explicit ROM probing in memory. 487 * We trust e820 completely. No explicit ROM probing in memory.
508 */ 488 */
509 e820_reserve_resources(); 489 e820_reserve_resources();
510 e820_mark_nosave_regions(); 490 e820_mark_nosave_regions(end_pfn);
511 491
512 /* request I/O space for devices used on all i[345]86 PCs */ 492 /* request I/O space for devices used on all i[345]86 PCs */
513 for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) 493 for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f2b66675629..6be701f3027 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -554,23 +554,6 @@ cpumask_t cpu_coregroup_map(int cpu)
554 return c->llc_shared_map; 554 return c->llc_shared_map;
555} 555}
556 556
557#ifdef CONFIG_X86_32
558/*
559 * We are called very early to get the low memory for the
560 * SMP bootup trampoline page.
561 */
562void __init smp_alloc_memory(void)
563{
564 trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
565 /*
566 * Has to be in very low memory so we can execute
567 * real-mode AP code.
568 */
569 if (__pa(trampoline_base) >= 0x9F000)
570 BUG();
571}
572#endif
573
574static void impress_friends(void) 557static void impress_friends(void)
575{ 558{
576 int cpu; 559 int cpu;
diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c
index 70e4a374b4e..5978023b799 100644
--- a/arch/x86/kernel/srat_32.c
+++ b/arch/x86/kernel/srat_32.c
@@ -31,6 +31,7 @@
31#include <asm/srat.h> 31#include <asm/srat.h>
32#include <asm/topology.h> 32#include <asm/topology.h>
33#include <asm/smp.h> 33#include <asm/smp.h>
34#include <asm/e820.h>
34 35
35/* 36/*
36 * proximity macros and definitions 37 * proximity macros and definitions
@@ -41,7 +42,7 @@
41#define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit))) 42#define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit)))
42/* bitmap length; _PXM is at most 255 */ 43/* bitmap length; _PXM is at most 255 */
43#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) 44#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8)
44static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ 45static u8 __initdata pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */
45 46
46#define MAX_CHUNKS_PER_NODE 3 47#define MAX_CHUNKS_PER_NODE 3
47#define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES) 48#define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
@@ -52,16 +53,37 @@ struct node_memory_chunk_s {
52 u8 nid; // which cnode contains this chunk? 53 u8 nid; // which cnode contains this chunk?
53 u8 bank; // which mem bank on this node 54 u8 bank; // which mem bank on this node
54}; 55};
55static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS]; 56static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
56 57
57static int num_memory_chunks; /* total number of memory chunks */ 58static int __initdata num_memory_chunks; /* total number of memory chunks */
58static u8 __initdata apicid_to_pxm[MAX_APICID]; 59static u8 __initdata apicid_to_pxm[MAX_APICID];
59 60
61int numa_off __initdata;
62int acpi_numa __initdata;
63
64static __init void bad_srat(void)
65{
66 printk(KERN_ERR "SRAT: SRAT not used.\n");
67 acpi_numa = -1;
68 num_memory_chunks = 0;
69}
70
71static __init inline int srat_disabled(void)
72{
73 return numa_off || acpi_numa < 0;
74}
75
60/* Identify CPU proximity domains */ 76/* Identify CPU proximity domains */
61static void __init parse_cpu_affinity_structure(char *p) 77void __init
78acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity)
62{ 79{
63 struct acpi_srat_cpu_affinity *cpu_affinity = 80 if (srat_disabled())
64 (struct acpi_srat_cpu_affinity *) p; 81 return;
82 if (cpu_affinity->header.length !=
83 sizeof(struct acpi_srat_cpu_affinity)) {
84 bad_srat();
85 return;
86 }
65 87
66 if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0) 88 if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0)
67 return; /* empty entry */ 89 return; /* empty entry */
@@ -79,14 +101,21 @@ static void __init parse_cpu_affinity_structure(char *p)
79 * Identify memory proximity domains and hot-remove capabilities. 101 * Identify memory proximity domains and hot-remove capabilities.
80 * Fill node memory chunk list structure. 102 * Fill node memory chunk list structure.
81 */ 103 */
82static void __init parse_memory_affinity_structure (char *sratp) 104void __init
105acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity)
83{ 106{
84 unsigned long long paddr, size; 107 unsigned long long paddr, size;
85 unsigned long start_pfn, end_pfn; 108 unsigned long start_pfn, end_pfn;
86 u8 pxm; 109 u8 pxm;
87 struct node_memory_chunk_s *p, *q, *pend; 110 struct node_memory_chunk_s *p, *q, *pend;
88 struct acpi_srat_mem_affinity *memory_affinity = 111
89 (struct acpi_srat_mem_affinity *) sratp; 112 if (srat_disabled())
113 return;
114 if (memory_affinity->header.length !=
115 sizeof(struct acpi_srat_mem_affinity)) {
116 bad_srat();
117 return;
118 }
90 119
91 if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0) 120 if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0)
92 return; /* empty entry */ 121 return; /* empty entry */
@@ -134,6 +163,14 @@ static void __init parse_memory_affinity_structure (char *sratp)
134 "enabled and removable" : "enabled" ) ); 163 "enabled and removable" : "enabled" ) );
135} 164}
136 165
166/* Callback for SLIT parsing */
167void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
168{
169}
170
171void acpi_numa_arch_fixup(void)
172{
173}
137/* 174/*
138 * The SRAT table always lists ascending addresses, so can always 175 * The SRAT table always lists ascending addresses, so can always
139 * assume that the first "start" address that you see is the real 176 * assume that the first "start" address that you see is the real
@@ -166,39 +203,13 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c
166 node_end_pfn[nid] = memory_chunk->end_pfn; 203 node_end_pfn[nid] = memory_chunk->end_pfn;
167} 204}
168 205
169/* Parse the ACPI Static Resource Affinity Table */ 206int __init get_memcfg_from_srat(void)
170static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
171{ 207{
172 u8 *start, *end, *p;
173 int i, j, nid; 208 int i, j, nid;
174 209
175 start = (u8 *)(&(sratp->reserved) + 1); /* skip header */
176 p = start;
177 end = (u8 *)sratp + sratp->header.length;
178
179 memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */
180 memset(node_memory_chunk, 0, sizeof(node_memory_chunk));
181 210
182 num_memory_chunks = 0; 211 if (srat_disabled())
183 while (p < end) { 212 goto out_fail;
184 switch (*p) {
185 case ACPI_SRAT_TYPE_CPU_AFFINITY:
186 parse_cpu_affinity_structure(p);
187 break;
188 case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
189 parse_memory_affinity_structure(p);
190 break;
191 default:
192 printk("ACPI 2.0 SRAT: unknown entry skipped: type=0x%02X, len=%d\n", p[0], p[1]);
193 break;
194 }
195 p += p[1];
196 if (p[1] == 0) {
197 printk("acpi20_parse_srat: Entry length value is zero;"
198 " can't parse any further!\n");
199 break;
200 }
201 }
202 213
203 if (num_memory_chunks == 0) { 214 if (num_memory_chunks == 0) {
204 printk("could not finy any ACPI SRAT memory areas.\n"); 215 printk("could not finy any ACPI SRAT memory areas.\n");
@@ -244,115 +255,19 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
244 printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", 255 printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
245 j, chunk->nid, chunk->start_pfn, chunk->end_pfn); 256 j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
246 node_read_chunk(chunk->nid, chunk); 257 node_read_chunk(chunk->nid, chunk);
247 add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn); 258 e820_register_active_regions(chunk->nid, chunk->start_pfn,
259 min(chunk->end_pfn, max_pfn));
248 } 260 }
249 261
250 for_each_online_node(nid) { 262 for_each_online_node(nid) {
251 unsigned long start = node_start_pfn[nid]; 263 unsigned long start = node_start_pfn[nid];
252 unsigned long end = node_end_pfn[nid]; 264 unsigned long end = min(node_end_pfn[nid], max_pfn);
253 265
254 memory_present(nid, start, end); 266 memory_present(nid, start, end);
255 node_remap_size[nid] = node_memmap_size_bytes(nid, start, end); 267 node_remap_size[nid] = node_memmap_size_bytes(nid, start, end);
256 } 268 }
257 return 1; 269 return 1;
258out_fail: 270out_fail:
259 return 0;
260}
261
262struct acpi_static_rsdt {
263 struct acpi_table_rsdt table;
264 u32 padding[7]; /* Allow for 7 more table entries */
265};
266
267int __init get_memcfg_from_srat(void)
268{
269 struct acpi_table_header *header = NULL;
270 struct acpi_table_rsdp *rsdp = NULL;
271 struct acpi_table_rsdt *rsdt = NULL;
272 acpi_native_uint rsdp_address = 0;
273 struct acpi_static_rsdt saved_rsdt;
274 int tables = 0;
275 int i = 0;
276
277 rsdp_address = acpi_os_get_root_pointer();
278 if (!rsdp_address) {
279 printk("%s: System description tables not found\n",
280 __func__);
281 goto out_err;
282 }
283
284 printk("%s: assigning address to rsdp\n", __func__);
285 rsdp = (struct acpi_table_rsdp *)(u32)rsdp_address;
286 if (!rsdp) {
287 printk("%s: Didn't find ACPI root!\n", __func__);
288 goto out_err;
289 }
290
291 printk(KERN_INFO "%.8s v%d [%.6s]\n", rsdp->signature, rsdp->revision,
292 rsdp->oem_id);
293
294 if (strncmp(rsdp->signature, ACPI_SIG_RSDP,strlen(ACPI_SIG_RSDP))) {
295 printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __func__);
296 goto out_err;
297 }
298
299 rsdt = (struct acpi_table_rsdt *)
300 early_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt));
301
302 if (!rsdt) {
303 printk(KERN_WARNING
304 "%s: ACPI: Invalid root system description tables (RSDT)\n",
305 __func__);
306 goto out_err;
307 }
308
309 header = &rsdt->header;
310
311 if (strncmp(header->signature, ACPI_SIG_RSDT, strlen(ACPI_SIG_RSDT))) {
312 printk(KERN_WARNING "ACPI: RSDT signature incorrect\n");
313 goto out_err;
314 }
315
316 /*
317 * The number of tables is computed by taking the
318 * size of all entries (header size minus total
319 * size of RSDT) divided by the size of each entry
320 * (4-byte table pointers).
321 */
322 tables = (header->length - sizeof(struct acpi_table_header)) / 4;
323
324 if (!tables)
325 goto out_err;
326
327 memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt));
328
329 if (saved_rsdt.table.header.length > sizeof(saved_rsdt)) {
330 printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n",
331 saved_rsdt.table.header.length);
332 goto out_err;
333 }
334
335 printk("Begin SRAT table scan....\n");
336
337 for (i = 0; i < tables; i++) {
338 /* Map in header, then map in full table length. */
339 header = (struct acpi_table_header *)
340 early_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header));
341 if (!header)
342 break;
343 header = (struct acpi_table_header *)
344 early_ioremap(saved_rsdt.table.table_offset_entry[i], header->length);
345 if (!header)
346 break;
347
348 if (strncmp((char *) &header->signature, ACPI_SIG_SRAT, 4))
349 continue;
350
351 /* we've found the srat table. don't need to look at any more tables */
352 return acpi20_parse_srat((struct acpi_table_srat *)header);
353 }
354out_err:
355 remove_all_active_ranges();
356 printk("failed to get NUMA memory information from SRAT table\n"); 271 printk("failed to get NUMA memory information from SRAT table\n");
357 return 0; 272 return 0;
358} 273}
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index ae751094eba..d67ce5f044b 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -36,7 +36,9 @@ static struct rio_table_hdr *rio_table_hdr __initdata;
36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; 36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
37static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; 37static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata;
38 38
39#ifndef CONFIG_X86_NUMAQ
39static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata; 40static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata;
41#endif
40 42
41static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) 43static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
42{ 44{
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index abbf199adeb..1106fac6024 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -2,7 +2,7 @@
2 2
3#include <asm/trampoline.h> 3#include <asm/trampoline.h>
4 4
5/* ready for x86_64, no harm for x86, since it will overwrite after alloc */ 5/* ready for x86_64 and x86 */
6unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); 6unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
7 7
8/* 8/*
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 5c7e2fd5207..e72cf0793fb 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -835,7 +835,7 @@ static __init char *lguest_memory_setup(void)
835 835
836 /* The Linux bootloader header contains an "e820" memory map: the 836 /* The Linux bootloader header contains an "e820" memory map: the
837 * Launcher populated the first entry with our memory limit. */ 837 * Launcher populated the first entry with our memory limit. */
838 add_memory_region(boot_params.e820_map[0].addr, 838 e820_add_region(boot_params.e820_map[0].addr,
839 boot_params.e820_map[0].size, 839 boot_params.e820_map[0].size,
840 boot_params.e820_map[0].type); 840 boot_params.e820_map[0].type);
841 841
@@ -1012,6 +1012,7 @@ __init void lguest_init(void)
1012 * clobbered. The Launcher places our initial pagetables somewhere at 1012 * clobbered. The Launcher places our initial pagetables somewhere at
1013 * the top of our physical memory, so we don't need extra space: set 1013 * the top of our physical memory, so we don't need extra space: set
1014 * init_pg_tables_end to the end of the kernel. */ 1014 * init_pg_tables_end to the end of the kernel. */
1015 init_pg_tables_start = __pa(pg0);
1015 init_pg_tables_end = __pa(pg0); 1016 init_pg_tables_end = __pa(pg0);
1016 1017
1017 /* Load the %fs segment register (the per-cpu segment register) with 1018 /* Load the %fs segment register (the per-cpu segment register) with
@@ -1065,9 +1066,9 @@ __init void lguest_init(void)
1065 pm_power_off = lguest_power_off; 1066 pm_power_off = lguest_power_off;
1066 machine_ops.restart = lguest_restart; 1067 machine_ops.restart = lguest_restart;
1067 1068
1068 /* Now we're set up, call start_kernel() in init/main.c and we proceed 1069 /* Now we're set up, call i386_start_kernel() in head32.c and we proceed
1069 * to boot as normal. It never returns. */ 1070 * to boot as normal. It never returns. */
1070 start_kernel(); 1071 i386_start_kernel();
1071} 1072}
1072/* 1073/*
1073 * This marks the end of stage II of our journey, The Guest. 1074 * This marks the end of stage II of our journey, The Guest.
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 0c28a071824..2f5e277686b 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -142,45 +142,3 @@ static int __init print_ipi_mode(void)
142 142
143late_initcall(print_ipi_mode); 143late_initcall(print_ipi_mode);
144 144
145/**
146 * machine_specific_memory_setup - Hook for machine specific memory setup.
147 *
148 * Description:
149 * This is included late in kernel/setup.c so that it can make
150 * use of all of the static functions.
151 **/
152
153char * __init machine_specific_memory_setup(void)
154{
155 char *who;
156
157
158 who = "BIOS-e820";
159
160 /*
161 * Try to copy the BIOS-supplied E820-map.
162 *
163 * Otherwise fake a memory map; one section from 0k->640k,
164 * the next section from 1mb->appropriate_mem_k
165 */
166 sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
167 if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries)
168 < 0) {
169 unsigned long mem_size;
170
171 /* compare results from other methods and take the greater */
172 if (boot_params.alt_mem_k
173 < boot_params.screen_info.ext_mem_k) {
174 mem_size = boot_params.screen_info.ext_mem_k;
175 who = "BIOS-88";
176 } else {
177 mem_size = boot_params.alt_mem_k;
178 who = "BIOS-e801";
179 }
180
181 e820.nr_map = 0;
182 add_memory_region(0, LOWMEMSIZE(), E820_RAM);
183 add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
184 }
185 return who;
186}
diff --git a/arch/x86/mach-es7000/Makefile b/arch/x86/mach-es7000/Makefile
index 69dd4da218d..3ef8b43b62f 100644
--- a/arch/x86/mach-es7000/Makefile
+++ b/arch/x86/mach-es7000/Makefile
@@ -3,4 +3,3 @@
3# 3#
4 4
5obj-$(CONFIG_X86_ES7000) := es7000plat.o 5obj-$(CONFIG_X86_ES7000) := es7000plat.o
6obj-$(CONFIG_X86_GENERICARCH) := es7000plat.o
diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/mach-es7000/es7000plat.c
index f5d6f7d8b86..4354ce80488 100644
--- a/arch/x86/mach-es7000/es7000plat.c
+++ b/arch/x86/mach-es7000/es7000plat.c
@@ -52,6 +52,8 @@ static struct mip_reg *host_reg;
52static int mip_port; 52static int mip_port;
53static unsigned long mip_addr, host_addr; 53static unsigned long mip_addr, host_addr;
54 54
55int es7000_plat;
56
55/* 57/*
56 * GSI override for ES7000 platforms. 58 * GSI override for ES7000 platforms.
57 */ 59 */
@@ -175,53 +177,6 @@ find_unisys_acpi_oem_table(unsigned long *oem_addr)
175} 177}
176#endif 178#endif
177 179
178/*
179 * This file also gets compiled if CONFIG_X86_GENERICARCH is set. Generic
180 * arch already has got following function definitions (asm-generic/es7000.c)
181 * hence no need to define these for that case.
182 */
183#ifndef CONFIG_X86_GENERICARCH
184void es7000_sw_apic(void);
185void __init enable_apic_mode(void)
186{
187 es7000_sw_apic();
188 return;
189}
190
191__init int mps_oem_check(struct mp_config_table *mpc, char *oem,
192 char *productid)
193{
194 if (mpc->mpc_oemptr) {
195 struct mp_config_oemtable *oem_table =
196 (struct mp_config_oemtable *)mpc->mpc_oemptr;
197 if (!strncmp(oem, "UNISYS", 6))
198 return parse_unisys_oem((char *)oem_table);
199 }
200 return 0;
201}
202#ifdef CONFIG_ACPI
203/* Hook from generic ACPI tables.c */
204int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
205{
206 unsigned long oem_addr;
207 if (!find_unisys_acpi_oem_table(&oem_addr)) {
208 if (es7000_check_dsdt())
209 return parse_unisys_oem((char *)oem_addr);
210 else {
211 setup_unisys();
212 return 1;
213 }
214 }
215 return 0;
216}
217#else
218int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
219{
220 return 0;
221}
222#endif
223#endif /* COFIG_X86_GENERICARCH */
224
225static void 180static void
226es7000_spin(int n) 181es7000_spin(int n)
227{ 182{
diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile
index 19d6d407737..0dbd7803a1d 100644
--- a/arch/x86/mach-generic/Makefile
+++ b/arch/x86/mach-generic/Makefile
@@ -2,7 +2,11 @@
2# Makefile for the generic architecture 2# Makefile for the generic architecture
3# 3#
4 4
5EXTRA_CFLAGS := -Iarch/x86/kernel 5EXTRA_CFLAGS := -Iarch/x86/kernel
6 6
7obj-y := probe.o summit.o bigsmp.o es7000.o default.o 7obj-y := probe.o default.o
8obj-y += ../../x86/mach-es7000/ 8obj-$(CONFIG_X86_NUMAQ) += numaq.o
9obj-$(CONFIG_X86_SUMMIT) += summit.o
10obj-$(CONFIG_X86_BIGSMP) += bigsmp.o
11obj-$(CONFIG_X86_ES7000) += es7000.o
12obj-$(CONFIG_X86_ES7000) += ../../x86/mach-es7000/
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 95fc463056d..59d77171455 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -23,10 +23,8 @@ static int dmi_bigsmp; /* can be set by dmi scanners */
23 23
24static int hp_ht_bigsmp(const struct dmi_system_id *d) 24static int hp_ht_bigsmp(const struct dmi_system_id *d)
25{ 25{
26#ifdef CONFIG_X86_GENERICARCH
27 printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); 26 printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
28 dmi_bigsmp = 1; 27 dmi_bigsmp = 1;
29#endif
30 return 0; 28 return 0;
31} 29}
32 30
@@ -48,7 +46,7 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
48static int probe_bigsmp(void) 46static int probe_bigsmp(void)
49{ 47{
50 if (def_to_bigsmp) 48 if (def_to_bigsmp)
51 dmi_bigsmp = 1; 49 dmi_bigsmp = 1;
52 else 50 else
53 dmi_check_system(bigsmp_dmi_table); 51 dmi_check_system(bigsmp_dmi_table);
54 return dmi_bigsmp; 52 return dmi_bigsmp;
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
new file mode 100644
index 00000000000..8091e68764c
--- /dev/null
+++ b/arch/x86/mach-generic/numaq.c
@@ -0,0 +1,41 @@
1/*
2 * APIC driver for the IBM NUMAQ chipset.
3 */
4#define APIC_DEFINITION 1
5#include <linux/threads.h>
6#include <linux/cpumask.h>
7#include <linux/smp.h>
8#include <asm/mpspec.h>
9#include <asm/genapic.h>
10#include <asm/fixmap.h>
11#include <asm/apicdef.h>
12#include <linux/kernel.h>
13#include <linux/string.h>
14#include <linux/init.h>
15#include <asm/mach-numaq/mach_apic.h>
16#include <asm/mach-numaq/mach_apicdef.h>
17#include <asm/mach-numaq/mach_ipi.h>
18#include <asm/mach-numaq/mach_mpparse.h>
19#include <asm/mach-numaq/mach_wakecpu.h>
20#include <asm/numaq.h>
21
22static int mps_oem_check(struct mp_config_table *mpc, char *oem,
23 char *productid)
24{
25 numaq_mps_oem_check(mpc, oem, productid);
26 return found_numaq;
27}
28
29static int probe_numaq(void)
30{
31 /* already know from get_memcfg_numaq() */
32 return found_numaq;
33}
34
35/* Hook from generic ACPI tables.c */
36static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
37{
38 return 0;
39}
40
41struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c
index c5ae751b994..5a7e4619e1c 100644
--- a/arch/x86/mach-generic/probe.c
+++ b/arch/x86/mach-generic/probe.c
@@ -16,6 +16,7 @@
16#include <asm/apicdef.h> 16#include <asm/apicdef.h>
17#include <asm/genapic.h> 17#include <asm/genapic.h>
18 18
19extern struct genapic apic_numaq;
19extern struct genapic apic_summit; 20extern struct genapic apic_summit;
20extern struct genapic apic_bigsmp; 21extern struct genapic apic_bigsmp;
21extern struct genapic apic_es7000; 22extern struct genapic apic_es7000;
@@ -24,9 +25,18 @@ extern struct genapic apic_default;
24struct genapic *genapic = &apic_default; 25struct genapic *genapic = &apic_default;
25 26
26static struct genapic *apic_probe[] __initdata = { 27static struct genapic *apic_probe[] __initdata = {
28#ifdef CONFIG_X86_NUMAQ
29 &apic_numaq,
30#endif
31#ifdef CONFIG_X86_SUMMIT
27 &apic_summit, 32 &apic_summit,
33#endif
34#ifdef CONFIG_X86_BIGSMP
28 &apic_bigsmp, 35 &apic_bigsmp,
36#endif
37#ifdef CONFIG_X86_ES7000
29 &apic_es7000, 38 &apic_es7000,
39#endif
30 &apic_default, /* must be last */ 40 &apic_default, /* must be last */
31 NULL, 41 NULL,
32}; 42};
@@ -54,6 +64,7 @@ early_param("apic", parse_apic);
54 64
55void __init generic_bigsmp_probe(void) 65void __init generic_bigsmp_probe(void)
56{ 66{
67#ifdef CONFIG_X86_BIGSMP
57 /* 68 /*
58 * This routine is used to switch to bigsmp mode when 69 * This routine is used to switch to bigsmp mode when
59 * - There is no apic= option specified by the user 70 * - There is no apic= option specified by the user
@@ -67,6 +78,7 @@ void __init generic_bigsmp_probe(void)
67 printk(KERN_INFO "Overriding APIC driver with %s\n", 78 printk(KERN_INFO "Overriding APIC driver with %s\n",
68 genapic->name); 79 genapic->name);
69 } 80 }
81#endif
70} 82}
71 83
72void __init generic_apic_probe(void) 84void __init generic_apic_probe(void)
@@ -88,7 +100,8 @@ void __init generic_apic_probe(void)
88 100
89/* These functions can switch the APIC even after the initial ->probe() */ 101/* These functions can switch the APIC even after the initial ->probe() */
90 102
91int __init mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid) 103int __init mps_oem_check(struct mp_config_table *mpc, char *oem,
104 char *productid)
92{ 105{
93 int i; 106 int i;
94 for (i = 0; apic_probe[i]; ++i) { 107 for (i = 0; apic_probe[i]; ++i) {
diff --git a/arch/x86/mach-visws/mpparse.c b/arch/x86/mach-visws/mpparse.c
index 57484e91ab9..a2fb78c0d15 100644
--- a/arch/x86/mach-visws/mpparse.c
+++ b/arch/x86/mach-visws/mpparse.c
@@ -8,11 +8,6 @@
8#include "cobalt.h" 8#include "cobalt.h"
9#include "mach_apic.h" 9#include "mach_apic.h"
10 10
11/* Have we found an MP table */
12int smp_found_config;
13
14int pic_mode;
15
16extern unsigned int __cpuinitdata maxcpus; 11extern unsigned int __cpuinitdata maxcpus;
17 12
18/* 13/*
@@ -76,7 +71,9 @@ void __init find_smp_config(void)
76 if (ncpus > maxcpus) 71 if (ncpus > maxcpus)
77 ncpus = maxcpus; 72 ncpus = maxcpus;
78 73
74#ifdef CONFIG_X86_LOCAL_APIC
79 smp_found_config = 1; 75 smp_found_config = 1;
76#endif
80 while (ncpus--) 77 while (ncpus--)
81 MP_processor_info(mp++); 78 MP_processor_info(mp++);
82 79
diff --git a/arch/x86/mach-visws/setup.c b/arch/x86/mach-visws/setup.c
index de4c9dbd086..d67868ec9b7 100644
--- a/arch/x86/mach-visws/setup.c
+++ b/arch/x86/mach-visws/setup.c
@@ -175,9 +175,9 @@ char * __init machine_specific_memory_setup(void)
175 sgivwfb_mem_size &= ~((1 << 20) - 1); 175 sgivwfb_mem_size &= ~((1 << 20) - 1);
176 sgivwfb_mem_phys = mem_size - gfx_mem_size; 176 sgivwfb_mem_phys = mem_size - gfx_mem_size;
177 177
178 add_memory_region(0, LOWMEMSIZE(), E820_RAM); 178 e820_add_region(0, LOWMEMSIZE(), E820_RAM);
179 add_memory_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM); 179 e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
180 add_memory_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED); 180 e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
181 181
182 return "PROM"; 182 return "PROM";
183} 183}
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
index 5ae5466b9eb..6bbdd633864 100644
--- a/arch/x86/mach-voyager/setup.c
+++ b/arch/x86/mach-voyager/setup.c
@@ -62,6 +62,7 @@ void __init time_init_hook(void)
62char *__init machine_specific_memory_setup(void) 62char *__init machine_specific_memory_setup(void)
63{ 63{
64 char *who; 64 char *who;
65 int new_nr;
65 66
66 who = "NOT VOYAGER"; 67 who = "NOT VOYAGER";
67 68
@@ -73,7 +74,7 @@ char *__init machine_specific_memory_setup(void)
73 74
74 e820.nr_map = 0; 75 e820.nr_map = 0;
75 for (i = 0; voyager_memory_detect(i, &addr, &length); i++) { 76 for (i = 0; voyager_memory_detect(i, &addr, &length); i++) {
76 add_memory_region(addr, length, E820_RAM); 77 e820_add_region(addr, length, E820_RAM);
77 } 78 }
78 return who; 79 return who;
79 } else if (voyager_level == 4) { 80 } else if (voyager_level == 4) {
@@ -91,43 +92,17 @@ char *__init machine_specific_memory_setup(void)
91 tom = (boot_params.screen_info.ext_mem_k) << 10; 92 tom = (boot_params.screen_info.ext_mem_k) << 10;
92 } 93 }
93 who = "Voyager-TOM"; 94 who = "Voyager-TOM";
94 add_memory_region(0, 0x9f000, E820_RAM); 95 e820_add_region(0, 0x9f000, E820_RAM);
95 /* map from 1M to top of memory */ 96 /* map from 1M to top of memory */
96 add_memory_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024, 97 e820_add_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024,
97 E820_RAM); 98 E820_RAM);
98 /* FIXME: Should check the ASICs to see if I need to 99 /* FIXME: Should check the ASICs to see if I need to
99 * take out the 8M window. Just do it at the moment 100 * take out the 8M window. Just do it at the moment
100 * */ 101 * */
101 add_memory_region(8 * 1024 * 1024, 8 * 1024 * 1024, 102 e820_add_region(8 * 1024 * 1024, 8 * 1024 * 1024,
102 E820_RESERVED); 103 E820_RESERVED);
103 return who; 104 return who;
104 } 105 }
105 106
106 who = "BIOS-e820"; 107 return default_machine_specific_memory_setup();
107
108 /*
109 * Try to copy the BIOS-supplied E820-map.
110 *
111 * Otherwise fake a memory map; one section from 0k->640k,
112 * the next section from 1mb->appropriate_mem_k
113 */
114 sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
115 if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries)
116 < 0) {
117 unsigned long mem_size;
118
119 /* compare results from other methods and take the greater */
120 if (boot_params.alt_mem_k < boot_params.screen_info.ext_mem_k) {
121 mem_size = boot_params.screen_info.ext_mem_k;
122 who = "BIOS-88";
123 } else {
124 mem_size = boot_params.alt_mem_k;
125 who = "BIOS-e801";
126 }
127
128 e820.nr_map = 0;
129 add_memory_region(0, LOWMEMSIZE(), E820_RAM);
130 add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
131 }
132 return who;
133} 108}
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 8acbf0cdf1a..8dedd01e909 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -59,11 +59,6 @@ __u32 voyager_quad_processors = 0;
59 * activity count. Finally exported by i386_ksyms.c */ 59 * activity count. Finally exported by i386_ksyms.c */
60static int voyager_extended_cpus = 1; 60static int voyager_extended_cpus = 1;
61 61
62/* Have we found an SMP box - used by time.c to do the profiling
63 interrupt for timeslicing; do not set to 1 until the per CPU timer
64 interrupt is active */
65int smp_found_config = 0;
66
67/* Used for the invalidate map that's also checked in the spinlock */ 62/* Used for the invalidate map that's also checked in the spinlock */
68static volatile unsigned long smp_invalidate_needed; 63static volatile unsigned long smp_invalidate_needed;
69 64
@@ -1137,15 +1132,6 @@ void flush_tlb_all(void)
1137 on_each_cpu(do_flush_tlb_all, 0, 1, 1); 1132 on_each_cpu(do_flush_tlb_all, 0, 1, 1);
1138} 1133}
1139 1134
1140/* used to set up the trampoline for other CPUs when the memory manager
1141 * is sorted out */
1142void __init smp_alloc_memory(void)
1143{
1144 trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
1145 if (__pa(trampoline_base) >= 0x93000)
1146 BUG();
1147}
1148
1149/* send a reschedule CPI to one CPU by physical CPU number*/ 1135/* send a reschedule CPI to one CPU by physical CPU number*/
1150static void voyager_smp_send_reschedule(int cpu) 1136static void voyager_smp_send_reschedule(int cpu)
1151{ 1137{
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 8b4eac0ca07..a2f73ba42b8 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -38,6 +38,7 @@
38#include <asm/setup.h> 38#include <asm/setup.h>
39#include <asm/mmzone.h> 39#include <asm/mmzone.h>
40#include <asm/bios_ebda.h> 40#include <asm/bios_ebda.h>
41#include <asm/proto.h>
41 42
42struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 43struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
43EXPORT_SYMBOL(node_data); 44EXPORT_SYMBOL(node_data);
@@ -59,14 +60,14 @@ unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly;
59/* 60/*
60 * 4) physnode_map - the mapping between a pfn and owning node 61 * 4) physnode_map - the mapping between a pfn and owning node
61 * physnode_map keeps track of the physical memory layout of a generic 62 * physnode_map keeps track of the physical memory layout of a generic
62 * numa node on a 256Mb break (each element of the array will 63 * numa node on a 64Mb break (each element of the array will
63 * represent 256Mb of memory and will be marked by the node id. so, 64 * represent 64Mb of memory and will be marked by the node id. so,
64 * if the first gig is on node 0, and the second gig is on node 1 65 * if the first gig is on node 0, and the second gig is on node 1
65 * physnode_map will contain: 66 * physnode_map will contain:
66 * 67 *
67 * physnode_map[0-3] = 0; 68 * physnode_map[0-15] = 0;
68 * physnode_map[4-7] = 1; 69 * physnode_map[16-31] = 1;
69 * physnode_map[8- ] = -1; 70 * physnode_map[32- ] = -1;
70 */ 71 */
71s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; 72s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1};
72EXPORT_SYMBOL(physnode_map); 73EXPORT_SYMBOL(physnode_map);
@@ -81,9 +82,9 @@ void memory_present(int nid, unsigned long start, unsigned long end)
81 printk(KERN_DEBUG " "); 82 printk(KERN_DEBUG " ");
82 for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { 83 for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) {
83 physnode_map[pfn / PAGES_PER_ELEMENT] = nid; 84 physnode_map[pfn / PAGES_PER_ELEMENT] = nid;
84 printk("%ld ", pfn); 85 printk(KERN_CONT "%ld ", pfn);
85 } 86 }
86 printk("\n"); 87 printk(KERN_CONT "\n");
87} 88}
88 89
89unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, 90unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
@@ -99,7 +100,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
99#endif 100#endif
100 101
101extern unsigned long find_max_low_pfn(void); 102extern unsigned long find_max_low_pfn(void);
102extern void add_one_highpage_init(struct page *, int, int);
103extern unsigned long highend_pfn, highstart_pfn; 103extern unsigned long highend_pfn, highstart_pfn;
104 104
105#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) 105#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
@@ -119,11 +119,11 @@ int __init get_memcfg_numa_flat(void)
119{ 119{
120 printk("NUMA - single node, flat memory mode\n"); 120 printk("NUMA - single node, flat memory mode\n");
121 121
122 /* Run the memory configuration and find the top of memory. */
123 propagate_e820_map();
124 node_start_pfn[0] = 0; 122 node_start_pfn[0] = 0;
125 node_end_pfn[0] = max_pfn; 123 node_end_pfn[0] = max_pfn;
124 e820_register_active_regions(0, 0, max_pfn);
126 memory_present(0, 0, max_pfn); 125 memory_present(0, 0, max_pfn);
126 node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn);
127 127
128 /* Indicate there is one node available. */ 128 /* Indicate there is one node available. */
129 nodes_clear(node_online_map); 129 nodes_clear(node_online_map);
@@ -159,9 +159,17 @@ static void __init allocate_pgdat(int nid)
159 if (nid && node_has_online_mem(nid) && node_remap_start_vaddr[nid]) 159 if (nid && node_has_online_mem(nid) && node_remap_start_vaddr[nid])
160 NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; 160 NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid];
161 else { 161 else {
162 NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn)); 162 unsigned long pgdat_phys;
163 min_low_pfn += PFN_UP(sizeof(pg_data_t)); 163 pgdat_phys = find_e820_area(min_low_pfn<<PAGE_SHIFT,
164 (nid ? max_low_pfn:max_pfn_mapped)<<PAGE_SHIFT,
165 sizeof(pg_data_t),
166 PAGE_SIZE);
167 NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT));
168 reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t),
169 "NODE_DATA");
164 } 170 }
171 printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n",
172 nid, (unsigned long)NODE_DATA(nid));
165} 173}
166 174
167/* 175/*
@@ -199,8 +207,12 @@ void __init remap_numa_kva(void)
199 int node; 207 int node;
200 208
201 for_each_online_node(node) { 209 for_each_online_node(node) {
210 printk(KERN_DEBUG "remap_numa_kva: node %d\n", node);
202 for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { 211 for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
203 vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); 212 vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
213 printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n",
214 (unsigned long)vaddr,
215 node_remap_start_pfn[node] + pfn);
204 set_pmd_pfn((ulong) vaddr, 216 set_pmd_pfn((ulong) vaddr,
205 node_remap_start_pfn[node] + pfn, 217 node_remap_start_pfn[node] + pfn,
206 PAGE_KERNEL_LARGE); 218 PAGE_KERNEL_LARGE);
@@ -212,17 +224,21 @@ static unsigned long calculate_numa_remap_pages(void)
212{ 224{
213 int nid; 225 int nid;
214 unsigned long size, reserve_pages = 0; 226 unsigned long size, reserve_pages = 0;
215 unsigned long pfn;
216 227
217 for_each_online_node(nid) { 228 for_each_online_node(nid) {
218 unsigned old_end_pfn = node_end_pfn[nid]; 229 u64 node_kva_target;
230 u64 node_kva_final;
219 231
220 /* 232 /*
221 * The acpi/srat node info can show hot-add memroy zones 233 * The acpi/srat node info can show hot-add memroy zones
222 * where memory could be added but not currently present. 234 * where memory could be added but not currently present.
223 */ 235 */
236 printk("node %d pfn: [%lx - %lx]\n",
237 nid, node_start_pfn[nid], node_end_pfn[nid]);
224 if (node_start_pfn[nid] > max_pfn) 238 if (node_start_pfn[nid] > max_pfn)
225 continue; 239 continue;
240 if (!node_end_pfn[nid])
241 continue;
226 if (node_end_pfn[nid] > max_pfn) 242 if (node_end_pfn[nid] > max_pfn)
227 node_end_pfn[nid] = max_pfn; 243 node_end_pfn[nid] = max_pfn;
228 244
@@ -234,39 +250,45 @@ static unsigned long calculate_numa_remap_pages(void)
234 /* now the roundup is correct, convert to PAGE_SIZE pages */ 250 /* now the roundup is correct, convert to PAGE_SIZE pages */
235 size = size * PTRS_PER_PTE; 251 size = size * PTRS_PER_PTE;
236 252
237 /* 253 node_kva_target = round_down(node_end_pfn[nid] - size,
238 * Validate the region we are allocating only contains valid 254 PTRS_PER_PTE);
239 * pages. 255 node_kva_target <<= PAGE_SHIFT;
240 */ 256 do {
241 for (pfn = node_end_pfn[nid] - size; 257 node_kva_final = find_e820_area(node_kva_target,
242 pfn < node_end_pfn[nid]; pfn++) 258 ((u64)node_end_pfn[nid])<<PAGE_SHIFT,
243 if (!page_is_ram(pfn)) 259 ((u64)size)<<PAGE_SHIFT,
244 break; 260 LARGE_PAGE_BYTES);
261 node_kva_target -= LARGE_PAGE_BYTES;
262 } while (node_kva_final == -1ULL &&
263 (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid]));
264
265 if (node_kva_final == -1ULL)
266 panic("Can not get kva ram\n");
245 267
246 if (pfn != node_end_pfn[nid])
247 size = 0;
248
249 printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
250 size, nid);
251 node_remap_size[nid] = size; 268 node_remap_size[nid] = size;
252 node_remap_offset[nid] = reserve_pages; 269 node_remap_offset[nid] = reserve_pages;
253 reserve_pages += size; 270 reserve_pages += size;
254 printk("Shrinking node %d from %ld pages to %ld pages\n", 271 printk("Reserving %ld pages of KVA for lmem_map of node %d at %llx\n",
255 nid, node_end_pfn[nid], node_end_pfn[nid] - size); 272 size, nid, node_kva_final>>PAGE_SHIFT);
256 273
257 if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) { 274 /*
258 /* 275 * prevent kva address below max_low_pfn want it on system
259 * Align node_end_pfn[] and node_remap_start_pfn[] to 276 * with less memory later.
260 * pmd boundary. remap_numa_kva will barf otherwise. 277 * layout will be: KVA address , KVA RAM
261 */ 278 *
262 printk("Shrinking node %d further by %ld pages for proper alignment\n", 279 * we are supposed to only record the one less then max_low_pfn
263 nid, node_end_pfn[nid] & (PTRS_PER_PTE-1)); 280 * but we could have some hole in high memory, and it will only
264 size += node_end_pfn[nid] & (PTRS_PER_PTE-1); 281 * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide
265 } 282 * to use it as free.
283 * So reserve_early here, hope we don't run out of that array
284 */
285 reserve_early(node_kva_final,
286 node_kva_final+(((u64)size)<<PAGE_SHIFT),
287 "KVA RAM");
266 288
267 node_end_pfn[nid] -= size; 289 node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT;
268 node_remap_start_pfn[nid] = node_end_pfn[nid]; 290 remove_active_range(nid, node_remap_start_pfn[nid],
269 shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]); 291 node_remap_start_pfn[nid] + size);
270 } 292 }
271 printk("Reserving total of %ld pages for numa KVA remap\n", 293 printk("Reserving total of %ld pages for numa KVA remap\n",
272 reserve_pages); 294 reserve_pages);
@@ -284,8 +306,7 @@ static void init_remap_allocator(int nid)
284 306
285 printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, 307 printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
286 (ulong) node_remap_start_vaddr[nid], 308 (ulong) node_remap_start_vaddr[nid],
287 (ulong) pfn_to_kaddr(highstart_pfn 309 (ulong) node_remap_end_vaddr[nid]);
288 + node_remap_offset[nid] + node_remap_size[nid]));
289} 310}
290 311
291extern void setup_bootmem_allocator(void); 312extern void setup_bootmem_allocator(void);
@@ -293,7 +314,7 @@ unsigned long __init setup_memory(void)
293{ 314{
294 int nid; 315 int nid;
295 unsigned long system_start_pfn, system_max_low_pfn; 316 unsigned long system_start_pfn, system_max_low_pfn;
296 unsigned long wasted_pages; 317 long kva_target_pfn;
297 318
298 /* 319 /*
299 * When mapping a NUMA machine we allocate the node_mem_map arrays 320 * When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -302,34 +323,38 @@ unsigned long __init setup_memory(void)
302 * this space and use it to adjust the boundary between ZONE_NORMAL 323 * this space and use it to adjust the boundary between ZONE_NORMAL
303 * and ZONE_HIGHMEM. 324 * and ZONE_HIGHMEM.
304 */ 325 */
326
327 /* call find_max_low_pfn at first, it could update max_pfn */
328 system_max_low_pfn = max_low_pfn = find_max_low_pfn();
329
330 remove_all_active_ranges();
305 get_memcfg_numa(); 331 get_memcfg_numa();
306 332
307 kva_pages = calculate_numa_remap_pages(); 333 kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE);
308 334
309 /* partially used pages are not usable - thus round upwards */ 335 /* partially used pages are not usable - thus round upwards */
310 system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); 336 system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
311 337
312 kva_start_pfn = find_max_low_pfn() - kva_pages; 338 kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
313 339 do {
314#ifdef CONFIG_BLK_DEV_INITRD 340 kva_start_pfn = find_e820_area(kva_target_pfn<<PAGE_SHIFT,
315 /* Numa kva area is below the initrd */ 341 max_low_pfn<<PAGE_SHIFT,
316 if (initrd_start) 342 kva_pages<<PAGE_SHIFT,
317 kva_start_pfn = PFN_DOWN(initrd_start - PAGE_OFFSET) 343 PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT;
318 - kva_pages; 344 kva_target_pfn -= PTRS_PER_PTE;
319#endif 345 } while (kva_start_pfn == -1UL && kva_target_pfn > min_low_pfn);
320 346
321 /* 347 if (kva_start_pfn == -1UL)
322 * We waste pages past at the end of the KVA for no good reason other 348 panic("Can not get kva space\n");
323 * than how it is located. This is bad.
324 */
325 wasted_pages = kva_start_pfn & (PTRS_PER_PTE-1);
326 kva_start_pfn -= wasted_pages;
327 kva_pages += wasted_pages;
328 349
329 system_max_low_pfn = max_low_pfn = find_max_low_pfn();
330 printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n", 350 printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n",
331 kva_start_pfn, max_low_pfn); 351 kva_start_pfn, max_low_pfn);
332 printk("max_pfn = %ld\n", max_pfn); 352 printk("max_pfn = %ld\n", max_pfn);
353
354 /* avoid clash with initrd */
355 reserve_early(kva_start_pfn<<PAGE_SHIFT,
356 (kva_start_pfn + kva_pages)<<PAGE_SHIFT,
357 "KVA PG");
333#ifdef CONFIG_HIGHMEM 358#ifdef CONFIG_HIGHMEM
334 highstart_pfn = highend_pfn = max_pfn; 359 highstart_pfn = highend_pfn = max_pfn;
335 if (max_pfn > system_max_low_pfn) 360 if (max_pfn > system_max_low_pfn)
@@ -365,16 +390,8 @@ unsigned long __init setup_memory(void)
365 return max_low_pfn; 390 return max_low_pfn;
366} 391}
367 392
368void __init numa_kva_reserve(void)
369{
370 if (kva_pages)
371 reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages),
372 BOOTMEM_DEFAULT);
373}
374
375void __init zone_sizes_init(void) 393void __init zone_sizes_init(void)
376{ 394{
377 int nid;
378 unsigned long max_zone_pfns[MAX_NR_ZONES]; 395 unsigned long max_zone_pfns[MAX_NR_ZONES];
379 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 396 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
380 max_zone_pfns[ZONE_DMA] = 397 max_zone_pfns[ZONE_DMA] =
@@ -384,27 +401,18 @@ void __init zone_sizes_init(void)
384 max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; 401 max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
385#endif 402#endif
386 403
387 /* If SRAT has not registered memory, register it now */
388 if (find_max_pfn_with_active_regions() == 0) {
389 for_each_online_node(nid) {
390 if (node_has_online_mem(nid))
391 add_active_range(nid, node_start_pfn[nid],
392 node_end_pfn[nid]);
393 }
394 }
395
396 free_area_init_nodes(max_zone_pfns); 404 free_area_init_nodes(max_zone_pfns);
397 return; 405 return;
398} 406}
399 407
400void __init set_highmem_pages_init(int bad_ppro) 408void __init set_highmem_pages_init(void)
401{ 409{
402#ifdef CONFIG_HIGHMEM 410#ifdef CONFIG_HIGHMEM
403 struct zone *zone; 411 struct zone *zone;
404 struct page *page; 412 int nid;
405 413
406 for_each_zone(zone) { 414 for_each_zone(zone) {
407 unsigned long node_pfn, zone_start_pfn, zone_end_pfn; 415 unsigned long zone_start_pfn, zone_end_pfn;
408 416
409 if (!is_highmem(zone)) 417 if (!is_highmem(zone))
410 continue; 418 continue;
@@ -412,16 +420,12 @@ void __init set_highmem_pages_init(int bad_ppro)
412 zone_start_pfn = zone->zone_start_pfn; 420 zone_start_pfn = zone->zone_start_pfn;
413 zone_end_pfn = zone_start_pfn + zone->spanned_pages; 421 zone_end_pfn = zone_start_pfn + zone->spanned_pages;
414 422
423 nid = zone_to_nid(zone);
415 printk("Initializing %s for node %d (%08lx:%08lx)\n", 424 printk("Initializing %s for node %d (%08lx:%08lx)\n",
416 zone->name, zone_to_nid(zone), 425 zone->name, nid, zone_start_pfn, zone_end_pfn);
417 zone_start_pfn, zone_end_pfn); 426
418 427 add_highpages_with_active_regions(nid, zone_start_pfn,
419 for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { 428 zone_end_pfn);
420 if (!pfn_valid(node_pfn))
421 continue;
422 page = pfn_to_page(node_pfn);
423 add_one_highpage_init(page, node_pfn, bad_ppro);
424 }
425 } 429 }
426 totalram_pages += totalhigh_pages; 430 totalram_pages += totalhigh_pages;
427#endif 431#endif
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index d71be0eb013..65d55056b6e 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -225,13 +225,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
225 update_page_count(PG_LEVEL_4K, pages_4k); 225 update_page_count(PG_LEVEL_4K, pages_4k);
226} 226}
227 227
228static inline int page_kills_ppro(unsigned long pagenr)
229{
230 if (pagenr >= 0x70000 && pagenr <= 0x7003F)
231 return 1;
232 return 0;
233}
234
235/* 228/*
236 * devmem_is_allowed() checks to see if /dev/mem access to a certain address 229 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
237 * is valid. The argument is a physical page number. 230 * is valid. The argument is a physical page number.
@@ -292,29 +285,60 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
292 pkmap_page_table = pte; 285 pkmap_page_table = pte;
293} 286}
294 287
295void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) 288static void __init add_one_highpage_init(struct page *page, int pfn)
296{ 289{
297 if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { 290 ClearPageReserved(page);
298 ClearPageReserved(page); 291 init_page_count(page);
299 init_page_count(page); 292 __free_page(page);
300 __free_page(page); 293 totalhigh_pages++;
301 totalhigh_pages++;
302 } else
303 SetPageReserved(page);
304} 294}
305 295
306#ifndef CONFIG_NUMA 296struct add_highpages_data {
307static void __init set_highmem_pages_init(int bad_ppro) 297 unsigned long start_pfn;
298 unsigned long end_pfn;
299};
300
301static void __init add_highpages_work_fn(unsigned long start_pfn,
302 unsigned long end_pfn, void *datax)
308{ 303{
309 int pfn; 304 int node_pfn;
305 struct page *page;
306 unsigned long final_start_pfn, final_end_pfn;
307 struct add_highpages_data *data;
310 308
311 for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) { 309 data = (struct add_highpages_data *)datax;
312 /* 310
313 * Holes under sparsemem might not have no mem_map[]: 311 final_start_pfn = max(start_pfn, data->start_pfn);
314 */ 312 final_end_pfn = min(end_pfn, data->end_pfn);
315 if (pfn_valid(pfn)) 313 if (final_start_pfn >= final_end_pfn)
316 add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); 314 return;
315
316 for (node_pfn = final_start_pfn; node_pfn < final_end_pfn;
317 node_pfn++) {
318 if (!pfn_valid(node_pfn))
319 continue;
320 page = pfn_to_page(node_pfn);
321 add_one_highpage_init(page, node_pfn);
317 } 322 }
323
324}
325
326void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
327 unsigned long end_pfn)
328{
329 struct add_highpages_data data;
330
331 data.start_pfn = start_pfn;
332 data.end_pfn = end_pfn;
333
334 work_with_active_regions(nid, add_highpages_work_fn, &data);
335}
336
337#ifndef CONFIG_NUMA
338static void __init set_highmem_pages_init(void)
339{
340 add_highpages_with_active_regions(0, highstart_pfn, highend_pfn);
341
318 totalram_pages += totalhigh_pages; 342 totalram_pages += totalhigh_pages;
319} 343}
320#endif /* !CONFIG_NUMA */ 344#endif /* !CONFIG_NUMA */
@@ -322,7 +346,7 @@ static void __init set_highmem_pages_init(int bad_ppro)
322#else 346#else
323# define kmap_init() do { } while (0) 347# define kmap_init() do { } while (0)
324# define permanent_kmaps_init(pgd_base) do { } while (0) 348# define permanent_kmaps_init(pgd_base) do { } while (0)
325# define set_highmem_pages_init(bad_ppro) do { } while (0) 349# define set_highmem_pages_init() do { } while (0)
326#endif /* CONFIG_HIGHMEM */ 350#endif /* CONFIG_HIGHMEM */
327 351
328pteval_t __PAGE_KERNEL = _PAGE_KERNEL; 352pteval_t __PAGE_KERNEL = _PAGE_KERNEL;
@@ -569,13 +593,11 @@ static struct kcore_list kcore_mem, kcore_vmalloc;
569void __init mem_init(void) 593void __init mem_init(void)
570{ 594{
571 int codesize, reservedpages, datasize, initsize; 595 int codesize, reservedpages, datasize, initsize;
572 int tmp, bad_ppro; 596 int tmp;
573 597
574#ifdef CONFIG_FLATMEM 598#ifdef CONFIG_FLATMEM
575 BUG_ON(!mem_map); 599 BUG_ON(!mem_map);
576#endif 600#endif
577 bad_ppro = ppro_with_ram_bug();
578
579 /* this will put all low memory onto the freelists */ 601 /* this will put all low memory onto the freelists */
580 totalram_pages += free_all_bootmem(); 602 totalram_pages += free_all_bootmem();
581 603
@@ -587,7 +609,7 @@ void __init mem_init(void)
587 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) 609 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
588 reservedpages++; 610 reservedpages++;
589 611
590 set_highmem_pages_init(bad_ppro); 612 set_highmem_pages_init();
591 613
592 codesize = (unsigned long) &_etext - (unsigned long) &_text; 614 codesize = (unsigned long) &_etext - (unsigned long) &_text;
593 datasize = (unsigned long) &_edata - (unsigned long) &_etext; 615 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
@@ -776,3 +798,9 @@ void free_initrd_mem(unsigned long start, unsigned long end)
776 free_init_pages("initrd memory", start, end); 798 free_init_pages("initrd memory", start, end);
777} 799}
778#endif 800#endif
801
802int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
803 int flags)
804{
805 return reserve_bootmem(phys, len, flags);
806}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 48623ae628f..18c6a006e40 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -48,6 +48,18 @@
48#include <asm/numa.h> 48#include <asm/numa.h>
49#include <asm/cacheflush.h> 49#include <asm/cacheflush.h>
50 50
51/*
52 * PFN of last memory page.
53 */
54unsigned long end_pfn;
55
56/*
57 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
58 * The direct mapping extends to max_pfn_mapped, so that we can directly access
59 * apertures, ACPI and other tables without having to play with fixmaps.
60 */
61unsigned long max_pfn_mapped;
62
51static unsigned long dma_reserve __initdata; 63static unsigned long dma_reserve __initdata;
52 64
53DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 65DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -808,12 +820,14 @@ void free_initrd_mem(unsigned long start, unsigned long end)
808} 820}
809#endif 821#endif
810 822
811void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 823int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
824 int flags)
812{ 825{
813#ifdef CONFIG_NUMA 826#ifdef CONFIG_NUMA
814 int nid, next_nid; 827 int nid, next_nid;
815#endif 828#endif
816 unsigned long pfn = phys >> PAGE_SHIFT; 829 unsigned long pfn = phys >> PAGE_SHIFT;
830 int ret;
817 831
818 if (pfn >= end_pfn) { 832 if (pfn >= end_pfn) {
819 /* 833 /*
@@ -821,11 +835,11 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
821 * firmware tables: 835 * firmware tables:
822 */ 836 */
823 if (pfn < max_pfn_mapped) 837 if (pfn < max_pfn_mapped)
824 return; 838 return -EFAULT;
825 839
826 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n", 840 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
827 phys, len); 841 phys, len);
828 return; 842 return -EFAULT;
829 } 843 }
830 844
831 /* Should check here against the e820 map to avoid double free */ 845 /* Should check here against the e820 map to avoid double free */
@@ -833,9 +847,13 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
833 nid = phys_to_nid(phys); 847 nid = phys_to_nid(phys);
834 next_nid = phys_to_nid(phys + len - 1); 848 next_nid = phys_to_nid(phys + len - 1);
835 if (nid == next_nid) 849 if (nid == next_nid)
836 reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); 850 ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags);
837 else 851 else
838 reserve_bootmem(phys, len, BOOTMEM_DEFAULT); 852 ret = reserve_bootmem(phys, len, flags);
853
854 if (ret != 0)
855 return ret;
856
839#else 857#else
840 reserve_bootmem(phys, len, BOOTMEM_DEFAULT); 858 reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
841#endif 859#endif
@@ -844,6 +862,8 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
844 dma_reserve += len / PAGE_SIZE; 862 dma_reserve += len / PAGE_SIZE;
845 set_dma_reserve(dma_reserve); 863 set_dma_reserve(dma_reserve);
846 } 864 }
865
866 return 0;
847} 867}
848 868
849int kern_addr_valid(unsigned long addr) 869int kern_addr_valid(unsigned long addr)
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 0ea66b532c3..317573ec925 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -57,18 +57,22 @@ static __init void early_get_boot_cpu_id(void)
57 /* 57 /*
58 * Find possible boot-time SMP configuration: 58 * Find possible boot-time SMP configuration:
59 */ 59 */
60#ifdef CONFIG_X86_MPPARSE
60 early_find_smp_config(); 61 early_find_smp_config();
62#endif
61#ifdef CONFIG_ACPI 63#ifdef CONFIG_ACPI
62 /* 64 /*
63 * Read APIC information from ACPI tables. 65 * Read APIC information from ACPI tables.
64 */ 66 */
65 early_acpi_boot_init(); 67 early_acpi_boot_init();
66#endif 68#endif
69#ifdef CONFIG_X86_MPPARSE
67 /* 70 /*
68 * get boot-time SMP configuration: 71 * get boot-time SMP configuration:
69 */ 72 */
70 if (smp_found_config) 73 if (smp_found_config)
71 early_get_smp_config(); 74 early_get_smp_config();
75#endif
72 early_init_lapic_mapping(); 76 early_init_lapic_mapping();
73} 77}
74 78
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index c5066d519e5..afb07ffb931 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -233,7 +233,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
233 else 233 else
234 bootmap_start = round_up(start, PAGE_SIZE); 234 bootmap_start = round_up(start, PAGE_SIZE);
235 /* 235 /*
236 * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like 236 * SMP_CACHE_BYTES could be enough, but init_bootmem_node like
237 * to use that to align to PAGE_SIZE 237 * to use that to align to PAGE_SIZE
238 */ 238 */
239 bootmap = early_node_mem(nodeid, bootmap_start, end, 239 bootmap = early_node_mem(nodeid, bootmap_start, end,
diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32
index f647e7e56da..a34fbf55792 100644
--- a/arch/x86/pci/Makefile_32
+++ b/arch/x86/pci/Makefile_32
@@ -13,10 +13,11 @@ pci-y := fixup.o
13pci-$(CONFIG_ACPI) += acpi.o 13pci-$(CONFIG_ACPI) += acpi.o
14pci-y += legacy.o irq.o 14pci-y += legacy.o irq.o
15 15
16# Careful: VISWS and NUMAQ overrule the pci-y above. The colons are 16# Careful: VISWS overrule the pci-y above. The colons are
17# therefor correct. This needs a proper fix by distangling the code. 17# therefor correct. This needs a proper fix by distangling the code.
18pci-$(CONFIG_X86_VISWS) := visws.o fixup.o 18pci-$(CONFIG_X86_VISWS) := visws.o fixup.o
19pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o 19
20pci-$(CONFIG_X86_NUMAQ) += numa.o
20 21
21# Necessary for NUMAQ as well 22# Necessary for NUMAQ as well
22pci-$(CONFIG_NUMA) += mp_bus_to_node.o 23pci-$(CONFIG_NUMA) += mp_bus_to_node.o
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 15f505d3a78..d02c598451e 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -388,7 +388,7 @@ static int __init early_fill_mp_bus_info(void)
388 /* need to take out [0, TOM) for RAM*/ 388 /* need to take out [0, TOM) for RAM*/
389 address = MSR_K8_TOP_MEM1; 389 address = MSR_K8_TOP_MEM1;
390 rdmsrl(address, val); 390 rdmsrl(address, val);
391 end = (val & 0xffffff8000000ULL); 391 end = (val & 0xffffff800000ULL);
392 printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); 392 printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20);
393 if (end < (1ULL<<32)) 393 if (end < (1ULL<<32))
394 update_range(range, 0, end - 1); 394 update_range(range, 0, end - 1);
@@ -482,7 +482,7 @@ static int __init early_fill_mp_bus_info(void)
482 /* TOP_MEM2 */ 482 /* TOP_MEM2 */
483 address = MSR_K8_TOP_MEM2; 483 address = MSR_K8_TOP_MEM2;
484 rdmsrl(address, val); 484 rdmsrl(address, val);
485 end = (val & 0xffffff8000000ULL); 485 end = (val & 0xffffff800000ULL);
486 printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); 486 printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20);
487 update_range(range, 1ULL<<32, end - 1); 487 update_range(range, 1ULL<<32, end - 1);
488 } 488 }
diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c
index d9afbae5092..99f1ecd485b 100644
--- a/arch/x86/pci/numa.c
+++ b/arch/x86/pci/numa.c
@@ -6,45 +6,21 @@
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/nodemask.h> 7#include <linux/nodemask.h>
8#include <mach_apic.h> 8#include <mach_apic.h>
9#include <asm/mpspec.h>
9#include "pci.h" 10#include "pci.h"
10 11
11#define XQUAD_PORTIO_BASE 0xfe400000 12#define XQUAD_PORTIO_BASE 0xfe400000
12#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ 13#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
13 14
14int mp_bus_id_to_node[MAX_MP_BUSSES];
15#define BUS2QUAD(global) (mp_bus_id_to_node[global]) 15#define BUS2QUAD(global) (mp_bus_id_to_node[global])
16 16
17int mp_bus_id_to_local[MAX_MP_BUSSES];
18#define BUS2LOCAL(global) (mp_bus_id_to_local[global]) 17#define BUS2LOCAL(global) (mp_bus_id_to_local[global])
19 18
20void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
21 struct mpc_config_translation *translation)
22{
23 int quad = translation->trans_quad;
24 int local = translation->trans_local;
25
26 mp_bus_id_to_node[m->mpc_busid] = quad;
27 mp_bus_id_to_local[m->mpc_busid] = local;
28 printk(KERN_INFO "Bus #%d is %s (node %d)\n",
29 m->mpc_busid, name, quad);
30}
31
32int quad_local_to_mp_bus_id [NR_CPUS/4][4];
33#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) 19#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
34void mpc_oem_pci_bus(struct mpc_config_bus *m,
35 struct mpc_config_translation *translation)
36{
37 int quad = translation->trans_quad;
38 int local = translation->trans_local;
39
40 quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
41}
42 20
43/* Where the IO area was mapped on multiquad, always 0 otherwise */ 21/* Where the IO area was mapped on multiquad, always 0 otherwise */
44void *xquad_portio; 22void *xquad_portio;
45#ifdef CONFIG_X86_NUMAQ
46EXPORT_SYMBOL(xquad_portio); 23EXPORT_SYMBOL(xquad_portio);
47#endif
48 24
49#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) 25#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
50 26
@@ -179,6 +155,9 @@ static int __init pci_numa_init(void)
179{ 155{
180 int quad; 156 int quad;
181 157
158 if (!found_numaq)
159 return 0;
160
182 raw_pci_ops = &pci_direct_conf1_mq; 161 raw_pci_ops = &pci_direct_conf1_mq;
183 162
184 if (pcibios_scanned++) 163 if (pcibios_scanned++)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bd74229081c..fe60aa9fed0 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1273,6 +1273,7 @@ asmlinkage void __init xen_start_kernel(void)
1273 1273
1274 pgd = (pgd_t *)xen_start_info->pt_base; 1274 pgd = (pgd_t *)xen_start_info->pt_base;
1275 1275
1276 init_pg_tables_start = __pa(pgd);
1276 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; 1277 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
1277 1278
1278 init_mm.pgd = pgd; /* use the Xen pagetables to start */ 1279 init_mm.pgd = pgd; /* use the Xen pagetables to start */
@@ -1316,5 +1317,5 @@ asmlinkage void __init xen_start_kernel(void)
1316 } 1317 }
1317 1318
1318 /* Start the world */ 1319 /* Start the world */
1319 start_kernel(); 1320 i386_start_kernel();
1320} 1321}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 488447878a9..a2957580320 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -40,8 +40,8 @@ char * __init xen_memory_setup(void)
40 max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); 40 max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
41 41
42 e820.nr_map = 0; 42 e820.nr_map = 0;
43 add_memory_region(0, LOWMEMSIZE(), E820_RAM); 43 e820_add_region(0, LOWMEMSIZE(), E820_RAM);
44 add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); 44 e820_add_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM);
45 45
46 return "Xen"; 46 return "Xen";
47} 47}