diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-07-08 05:14:58 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-08 05:14:58 -0400 |
commit | 3de352bbd86f890dd0c5e1c09a6a1b0b29e0f8ce (patch) | |
tree | d4c5eba8cd2abefd7c9f16d089393f0f5999cf63 /arch/x86 | |
parent | 1b8ba39a3fad9c58532f6dad12c94d6e675be656 (diff) | |
parent | 9340e1ccdf7b9b22a2be7f51cd74e8b5e11961bf (diff) |
Merge branch 'x86/mpparse' into x86/devel
Conflicts:
arch/x86/Kconfig
arch/x86/kernel/io_apic_32.c
arch/x86/kernel/setup_64.c
arch/x86/mm/init_32.c
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
55 files changed, 3588 insertions, 2689 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7dc46ba26fbf..640dc62a7fa0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -230,6 +230,27 @@ config SMP | |||
230 | 230 | ||
231 | If you don't know what to do here, say N. | 231 | If you don't know what to do here, say N. |
232 | 232 | ||
233 | config X86_FIND_SMP_CONFIG | ||
234 | def_bool y | ||
235 | depends on X86_MPPARSE || X86_VOYAGER || X86_VISWS | ||
236 | depends on X86_32 | ||
237 | |||
238 | if ACPI | ||
239 | config X86_MPPARSE | ||
240 | def_bool y | ||
241 | bool "Enable MPS table" | ||
242 | depends on X86_LOCAL_APIC && !X86_VISWS | ||
243 | help | ||
244 | For old smp systems that do not have proper acpi support. Newer systems | ||
245 | (esp with 64bit cpus) with acpi support, MADT and DSDT will override it | ||
246 | endif | ||
247 | |||
248 | if !ACPI | ||
249 | config X86_MPPARSE | ||
250 | def_bool y | ||
251 | depends on X86_LOCAL_APIC && !X86_VISWS | ||
252 | endif | ||
253 | |||
233 | choice | 254 | choice |
234 | prompt "Subarchitecture Type" | 255 | prompt "Subarchitecture Type" |
235 | default X86_PC | 256 | default X86_PC |
@@ -261,36 +282,6 @@ config X86_VOYAGER | |||
261 | If you do not specifically know you have a Voyager based machine, | 282 | If you do not specifically know you have a Voyager based machine, |
262 | say N here, otherwise the kernel you build will not be bootable. | 283 | say N here, otherwise the kernel you build will not be bootable. |
263 | 284 | ||
264 | config X86_NUMAQ | ||
265 | bool "NUMAQ (IBM/Sequent)" | ||
266 | depends on SMP && X86_32 && PCI | ||
267 | select NUMA | ||
268 | help | ||
269 | This option is used for getting Linux to run on a (IBM/Sequent) NUMA | ||
270 | multiquad box. This changes the way that processors are bootstrapped, | ||
271 | and uses Clustered Logical APIC addressing mode instead of Flat Logical. | ||
272 | You will need a new lynxer.elf file to flash your firmware with - send | ||
273 | email to <Martin.Bligh@us.ibm.com>. | ||
274 | |||
275 | config X86_SUMMIT | ||
276 | bool "Summit/EXA (IBM x440)" | ||
277 | depends on X86_32 && SMP | ||
278 | help | ||
279 | This option is needed for IBM systems that use the Summit/EXA chipset. | ||
280 | In particular, it is needed for the x440. | ||
281 | |||
282 | If you don't have one of these computers, you should say N here. | ||
283 | If you want to build a NUMA kernel, you must select ACPI. | ||
284 | |||
285 | config X86_BIGSMP | ||
286 | bool "Support for other sub-arch SMP systems with more than 8 CPUs" | ||
287 | depends on X86_32 && SMP | ||
288 | help | ||
289 | This option is needed for the systems that have more than 8 CPUs | ||
290 | and if the system is not of any sub-arch type above. | ||
291 | |||
292 | If you don't have such a system, you should say N here. | ||
293 | |||
294 | config X86_VISWS | 285 | config X86_VISWS |
295 | bool "SGI 320/540 (Visual Workstation)" | 286 | bool "SGI 320/540 (Visual Workstation)" |
296 | depends on X86_32 && !PCI | 287 | depends on X86_32 && !PCI |
@@ -304,12 +295,33 @@ config X86_VISWS | |||
304 | and vice versa. See <file:Documentation/sgi-visws.txt> for details. | 295 | and vice versa. See <file:Documentation/sgi-visws.txt> for details. |
305 | 296 | ||
306 | config X86_GENERICARCH | 297 | config X86_GENERICARCH |
307 | bool "Generic architecture (Summit, bigsmp, ES7000, default)" | 298 | bool "Generic architecture" |
308 | depends on X86_32 | 299 | depends on X86_32 |
309 | help | 300 | help |
310 | This option compiles in the Summit, bigsmp, ES7000, default subarchitectures. | 301 | This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default |
311 | It is intended for a generic binary kernel. | 302 | subarchitectures. It is intended for a generic binary kernel. |
312 | If you want a NUMA kernel, select ACPI. We need SRAT for NUMA. | 303 | if you select them all, kernel will probe it one by one. and will |
304 | fallback to default. | ||
305 | |||
306 | if X86_GENERICARCH | ||
307 | |||
308 | config X86_NUMAQ | ||
309 | bool "NUMAQ (IBM/Sequent)" | ||
310 | depends on SMP && X86_32 && PCI && X86_MPPARSE | ||
311 | select NUMA | ||
312 | help | ||
313 | This option is used for getting Linux to run on a NUMAQ (IBM/Sequent) | ||
314 | NUMA multiquad box. This changes the way that processors are | ||
315 | bootstrapped, and uses Clustered Logical APIC addressing mode instead | ||
316 | of Flat Logical. You will need a new lynxer.elf file to flash your | ||
317 | firmware with - send email to <Martin.Bligh@us.ibm.com>. | ||
318 | |||
319 | config X86_SUMMIT | ||
320 | bool "Summit/EXA (IBM x440)" | ||
321 | depends on X86_32 && SMP | ||
322 | help | ||
323 | This option is needed for IBM systems that use the Summit/EXA chipset. | ||
324 | In particular, it is needed for the x440. | ||
313 | 325 | ||
314 | config X86_ES7000 | 326 | config X86_ES7000 |
315 | bool "Support for Unisys ES7000 IA32 series" | 327 | bool "Support for Unisys ES7000 IA32 series" |
@@ -317,8 +329,15 @@ config X86_ES7000 | |||
317 | help | 329 | help |
318 | Support for Unisys ES7000 systems. Say 'Y' here if this kernel is | 330 | Support for Unisys ES7000 systems. Say 'Y' here if this kernel is |
319 | supposed to run on an IA32-based Unisys ES7000 system. | 331 | supposed to run on an IA32-based Unisys ES7000 system. |
320 | Only choose this option if you have such a system, otherwise you | 332 | |
321 | should say N here. | 333 | config X86_BIGSMP |
334 | bool "Support for big SMP systems with more than 8 CPUs" | ||
335 | depends on X86_32 && SMP | ||
336 | help | ||
337 | This option is needed for the systems that have more than 8 CPUs | ||
338 | and if the system is not of any sub-arch type above. | ||
339 | |||
340 | endif | ||
322 | 341 | ||
323 | config X86_RDC321X | 342 | config X86_RDC321X |
324 | bool "RDC R-321x SoC" | 343 | bool "RDC R-321x SoC" |
@@ -432,7 +451,7 @@ config MEMTEST | |||
432 | 451 | ||
433 | config ACPI_SRAT | 452 | config ACPI_SRAT |
434 | def_bool y | 453 | def_bool y |
435 | depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH) | 454 | depends on X86_32 && ACPI && NUMA && X86_GENERICARCH |
436 | select ACPI_NUMA | 455 | select ACPI_NUMA |
437 | 456 | ||
438 | config HAVE_ARCH_PARSE_SRAT | 457 | config HAVE_ARCH_PARSE_SRAT |
@@ -441,11 +460,11 @@ config HAVE_ARCH_PARSE_SRAT | |||
441 | 460 | ||
442 | config X86_SUMMIT_NUMA | 461 | config X86_SUMMIT_NUMA |
443 | def_bool y | 462 | def_bool y |
444 | depends on X86_32 && NUMA && (X86_SUMMIT || X86_GENERICARCH) | 463 | depends on X86_32 && NUMA && X86_GENERICARCH |
445 | 464 | ||
446 | config X86_CYCLONE_TIMER | 465 | config X86_CYCLONE_TIMER |
447 | def_bool y | 466 | def_bool y |
448 | depends on X86_32 && X86_SUMMIT || X86_GENERICARCH | 467 | depends on X86_GENERICARCH |
449 | 468 | ||
450 | config ES7000_CLUSTERED_APIC | 469 | config ES7000_CLUSTERED_APIC |
451 | def_bool y | 470 | def_bool y |
@@ -910,9 +929,9 @@ config X86_PAE | |||
910 | config NUMA | 929 | config NUMA |
911 | bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" | 930 | bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" |
912 | depends on SMP | 931 | depends on SMP |
913 | depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL) | 932 | depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) |
914 | default n if X86_PC | 933 | default n if X86_PC |
915 | default y if (X86_NUMAQ || X86_SUMMIT) | 934 | default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) |
916 | help | 935 | help |
917 | Enable NUMA (Non Uniform Memory Access) support. | 936 | Enable NUMA (Non Uniform Memory Access) support. |
918 | The kernel will try to allocate memory used by a CPU on the | 937 | The kernel will try to allocate memory used by a CPU on the |
@@ -1089,6 +1108,40 @@ config MTRR | |||
1089 | 1108 | ||
1090 | See <file:Documentation/mtrr.txt> for more information. | 1109 | See <file:Documentation/mtrr.txt> for more information. |
1091 | 1110 | ||
1111 | config MTRR_SANITIZER | ||
1112 | def_bool y | ||
1113 | prompt "MTRR cleanup support" | ||
1114 | depends on MTRR | ||
1115 | help | ||
1116 | Convert MTRR layout from continuous to discrete, so some X driver | ||
1117 | could add WB entries. | ||
1118 | |||
1119 | Say N here if you see bootup problems (boot crash, boot hang, | ||
1120 | spontaneous reboots). | ||
1121 | |||
1122 | Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size | ||
1123 | could be used to send largest mtrr entry size for continuous block | ||
1124 | to hold holes (aka. UC entries) | ||
1125 | |||
1126 | If unsure, say Y. | ||
1127 | |||
1128 | config MTRR_SANITIZER_ENABLE_DEFAULT | ||
1129 | int "MTRR cleanup enable value (0-1)" | ||
1130 | range 0 1 | ||
1131 | default "0" | ||
1132 | depends on MTRR_SANITIZER | ||
1133 | help | ||
1134 | Enable mtrr cleanup default value | ||
1135 | |||
1136 | config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT | ||
1137 | int "MTRR cleanup spare reg num (0-7)" | ||
1138 | range 0 7 | ||
1139 | default "1" | ||
1140 | depends on MTRR_SANITIZER | ||
1141 | help | ||
1142 | mtrr cleanup spare entries default, it can be changed via | ||
1143 | mtrr_spare_reg_nr= | ||
1144 | |||
1092 | config X86_PAT | 1145 | config X86_PAT |
1093 | bool | 1146 | bool |
1094 | prompt "x86 PAT support" | 1147 | prompt "x86 PAT support" |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 38a15333f725..f0684bb74faf 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -137,15 +137,6 @@ config 4KSTACKS | |||
137 | on the VM subsystem for higher order allocations. This option | 137 | on the VM subsystem for higher order allocations. This option |
138 | will also use IRQ stacks to compensate for the reduced stackspace. | 138 | will also use IRQ stacks to compensate for the reduced stackspace. |
139 | 139 | ||
140 | config X86_FIND_SMP_CONFIG | ||
141 | def_bool y | ||
142 | depends on X86_LOCAL_APIC || X86_VOYAGER | ||
143 | depends on X86_32 | ||
144 | |||
145 | config X86_MPPARSE | ||
146 | def_bool y | ||
147 | depends on (X86_32 && (X86_LOCAL_APIC && !X86_VISWS)) || X86_64 | ||
148 | |||
149 | config DOUBLEFAULT | 140 | config DOUBLEFAULT |
150 | default y | 141 | default y |
151 | bool "Enable doublefault exception handler" if EMBEDDED | 142 | bool "Enable doublefault exception handler" if EMBEDDED |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 5df0d1e330b1..b03d24b44bf9 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -117,29 +117,11 @@ mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/ | |||
117 | mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-x86/mach-visws | 117 | mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-x86/mach-visws |
118 | mcore-$(CONFIG_X86_VISWS) := arch/x86/mach-visws/ | 118 | mcore-$(CONFIG_X86_VISWS) := arch/x86/mach-visws/ |
119 | 119 | ||
120 | # NUMAQ subarch support | ||
121 | mflags-$(CONFIG_X86_NUMAQ) := -Iinclude/asm-x86/mach-numaq | ||
122 | mcore-$(CONFIG_X86_NUMAQ) := arch/x86/mach-default/ | ||
123 | |||
124 | # BIGSMP subarch support | ||
125 | mflags-$(CONFIG_X86_BIGSMP) := -Iinclude/asm-x86/mach-bigsmp | ||
126 | mcore-$(CONFIG_X86_BIGSMP) := arch/x86/mach-default/ | ||
127 | |||
128 | #Summit subarch support | ||
129 | mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-x86/mach-summit | ||
130 | mcore-$(CONFIG_X86_SUMMIT) := arch/x86/mach-default/ | ||
131 | |||
132 | # generic subarchitecture | 120 | # generic subarchitecture |
133 | mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic | 121 | mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic |
134 | fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ | 122 | fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ |
135 | mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ | 123 | mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ |
136 | 124 | ||
137 | |||
138 | # ES7000 subarch support | ||
139 | mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-x86/mach-es7000 | ||
140 | fcore-$(CONFIG_X86_ES7000) := arch/x86/mach-es7000/ | ||
141 | mcore-$(CONFIG_X86_ES7000) := arch/x86/mach-default/ | ||
142 | |||
143 | # RDC R-321x subarch support | 125 | # RDC R-321x subarch support |
144 | mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x | 126 | mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x |
145 | mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default/ | 127 | mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default/ |
@@ -160,6 +142,7 @@ KBUILD_AFLAGS += $(mflags-y) | |||
160 | 142 | ||
161 | head-y := arch/x86/kernel/head_$(BITS).o | 143 | head-y := arch/x86/kernel/head_$(BITS).o |
162 | head-y += arch/x86/kernel/head$(BITS).o | 144 | head-y += arch/x86/kernel/head$(BITS).o |
145 | head-y += arch/x86/kernel/head.o | ||
163 | head-y += arch/x86/kernel/init_task.o | 146 | head-y += arch/x86/kernel/init_task.o |
164 | 147 | ||
165 | libs-y += arch/x86/lib/ | 148 | libs-y += arch/x86/lib/ |
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 11629e903aa5..bc5553b496f7 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -218,10 +218,6 @@ static char *vidmem; | |||
218 | static int vidport; | 218 | static int vidport; |
219 | static int lines, cols; | 219 | static int lines, cols; |
220 | 220 | ||
221 | #ifdef CONFIG_X86_NUMAQ | ||
222 | void *xquad_portio; | ||
223 | #endif | ||
224 | |||
225 | #include "../../../../lib/inflate.c" | 221 | #include "../../../../lib/inflate.c" |
226 | 222 | ||
227 | static void *malloc(int size) | 223 | static void *malloc(int size) |
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index acad32eb4290..53165c97336b 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c | |||
@@ -13,6 +13,7 @@ | |||
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include "boot.h" | 15 | #include "boot.h" |
16 | #include <linux/kernel.h> | ||
16 | 17 | ||
17 | #define SMAP 0x534d4150 /* ASCII "SMAP" */ | 18 | #define SMAP 0x534d4150 /* ASCII "SMAP" */ |
18 | 19 | ||
@@ -53,7 +54,7 @@ static int detect_memory_e820(void) | |||
53 | 54 | ||
54 | count++; | 55 | count++; |
55 | desc++; | 56 | desc++; |
56 | } while (next && count < E820MAX); | 57 | } while (next && count < ARRAY_SIZE(boot_params.e820_map)); |
57 | 58 | ||
58 | return boot_params.e820_entries = count; | 59 | return boot_params.e820_entries = count; |
59 | } | 60 | } |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 53557cbe4bfa..d1d4ee895270 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | # Makefile for the linux kernel. | 2 | # Makefile for the linux kernel. |
3 | # | 3 | # |
4 | 4 | ||
5 | extra-y := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds | 5 | extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinux.lds |
6 | 6 | ||
7 | CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) | 7 | CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) |
8 | 8 | ||
@@ -22,7 +22,7 @@ obj-y += setup_$(BITS).o i8259.o irqinit_$(BITS).o setup.o | |||
22 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 22 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
23 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 23 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
24 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o | 24 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o |
25 | obj-y += bootflag.o e820_$(BITS).o | 25 | obj-y += bootflag.o e820.o |
26 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o | 26 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o |
27 | obj-y += alternative.o i8253.o pci-nommu.o | 27 | obj-y += alternative.o i8253.o pci-nommu.o |
28 | obj-y += tsc_$(BITS).o io_delay.o rtc.o | 28 | obj-y += tsc_$(BITS).o io_delay.o rtc.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ff1a7b49a460..6516359922ba 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -83,6 +83,8 @@ int acpi_lapic; | |||
83 | int acpi_ioapic; | 83 | int acpi_ioapic; |
84 | int acpi_strict; | 84 | int acpi_strict; |
85 | 85 | ||
86 | static int disable_irq0_through_ioapic __initdata; | ||
87 | |||
86 | u8 acpi_sci_flags __initdata; | 88 | u8 acpi_sci_flags __initdata; |
87 | int acpi_sci_override_gsi __initdata; | 89 | int acpi_sci_override_gsi __initdata; |
88 | int acpi_skip_timer_override __initdata; | 90 | int acpi_skip_timer_override __initdata; |
@@ -338,8 +340,6 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e | |||
338 | 340 | ||
339 | #ifdef CONFIG_X86_IO_APIC | 341 | #ifdef CONFIG_X86_IO_APIC |
340 | 342 | ||
341 | struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; | ||
342 | |||
343 | static int __init | 343 | static int __init |
344 | acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) | 344 | acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) |
345 | { | 345 | { |
@@ -858,6 +858,372 @@ static int __init acpi_parse_madt_lapic_entries(void) | |||
858 | #endif /* CONFIG_X86_LOCAL_APIC */ | 858 | #endif /* CONFIG_X86_LOCAL_APIC */ |
859 | 859 | ||
860 | #ifdef CONFIG_X86_IO_APIC | 860 | #ifdef CONFIG_X86_IO_APIC |
861 | #define MP_ISA_BUS 0 | ||
862 | |||
863 | #ifdef CONFIG_X86_ES7000 | ||
864 | extern int es7000_plat; | ||
865 | #endif | ||
866 | |||
867 | static struct { | ||
868 | int apic_id; | ||
869 | int gsi_base; | ||
870 | int gsi_end; | ||
871 | DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); | ||
872 | } mp_ioapic_routing[MAX_IO_APICS]; | ||
873 | |||
874 | static int mp_find_ioapic(int gsi) | ||
875 | { | ||
876 | int i = 0; | ||
877 | |||
878 | /* Find the IOAPIC that manages this GSI. */ | ||
879 | for (i = 0; i < nr_ioapics; i++) { | ||
880 | if ((gsi >= mp_ioapic_routing[i].gsi_base) | ||
881 | && (gsi <= mp_ioapic_routing[i].gsi_end)) | ||
882 | return i; | ||
883 | } | ||
884 | |||
885 | printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); | ||
886 | return -1; | ||
887 | } | ||
888 | |||
889 | static u8 __init uniq_ioapic_id(u8 id) | ||
890 | { | ||
891 | #ifdef CONFIG_X86_32 | ||
892 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && | ||
893 | !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) | ||
894 | return io_apic_get_unique_id(nr_ioapics, id); | ||
895 | else | ||
896 | return id; | ||
897 | #else | ||
898 | int i; | ||
899 | DECLARE_BITMAP(used, 256); | ||
900 | bitmap_zero(used, 256); | ||
901 | for (i = 0; i < nr_ioapics; i++) { | ||
902 | struct mp_config_ioapic *ia = &mp_ioapics[i]; | ||
903 | __set_bit(ia->mp_apicid, used); | ||
904 | } | ||
905 | if (!test_bit(id, used)) | ||
906 | return id; | ||
907 | return find_first_zero_bit(used, 256); | ||
908 | #endif | ||
909 | } | ||
910 | |||
911 | static int bad_ioapic(unsigned long address) | ||
912 | { | ||
913 | if (nr_ioapics >= MAX_IO_APICS) { | ||
914 | printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " | ||
915 | "(found %d)\n", MAX_IO_APICS, nr_ioapics); | ||
916 | panic("Recompile kernel with bigger MAX_IO_APICS!\n"); | ||
917 | } | ||
918 | if (!address) { | ||
919 | printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" | ||
920 | " found in table, skipping!\n"); | ||
921 | return 1; | ||
922 | } | ||
923 | return 0; | ||
924 | } | ||
925 | |||
926 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | ||
927 | { | ||
928 | int idx = 0; | ||
929 | |||
930 | if (bad_ioapic(address)) | ||
931 | return; | ||
932 | |||
933 | idx = nr_ioapics; | ||
934 | |||
935 | mp_ioapics[idx].mp_type = MP_IOAPIC; | ||
936 | mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; | ||
937 | mp_ioapics[idx].mp_apicaddr = address; | ||
938 | |||
939 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); | ||
940 | mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); | ||
941 | #ifdef CONFIG_X86_32 | ||
942 | mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); | ||
943 | #else | ||
944 | mp_ioapics[idx].mp_apicver = 0; | ||
945 | #endif | ||
946 | /* | ||
947 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups | ||
948 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). | ||
949 | */ | ||
950 | mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; | ||
951 | mp_ioapic_routing[idx].gsi_base = gsi_base; | ||
952 | mp_ioapic_routing[idx].gsi_end = gsi_base + | ||
953 | io_apic_get_redir_entries(idx); | ||
954 | |||
955 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " | ||
956 | "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, | ||
957 | mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, | ||
958 | mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); | ||
959 | |||
960 | nr_ioapics++; | ||
961 | } | ||
962 | |||
963 | static void assign_to_mp_irq(struct mp_config_intsrc *m, | ||
964 | struct mp_config_intsrc *mp_irq) | ||
965 | { | ||
966 | memcpy(mp_irq, m, sizeof(struct mp_config_intsrc)); | ||
967 | } | ||
968 | |||
969 | static int mp_irq_cmp(struct mp_config_intsrc *mp_irq, | ||
970 | struct mp_config_intsrc *m) | ||
971 | { | ||
972 | return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc)); | ||
973 | } | ||
974 | |||
975 | static void save_mp_irq(struct mp_config_intsrc *m) | ||
976 | { | ||
977 | int i; | ||
978 | |||
979 | for (i = 0; i < mp_irq_entries; i++) { | ||
980 | if (!mp_irq_cmp(&mp_irqs[i], m)) | ||
981 | return; | ||
982 | } | ||
983 | |||
984 | assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]); | ||
985 | if (++mp_irq_entries == MAX_IRQ_SOURCES) | ||
986 | panic("Max # of irq sources exceeded!!\n"); | ||
987 | } | ||
988 | |||
989 | void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | ||
990 | { | ||
991 | int ioapic; | ||
992 | int pin; | ||
993 | struct mp_config_intsrc mp_irq; | ||
994 | |||
995 | /* Skip the 8254 timer interrupt (IRQ 0) if requested. */ | ||
996 | if (bus_irq == 0 && disable_irq0_through_ioapic) | ||
997 | return; | ||
998 | |||
999 | /* | ||
1000 | * Convert 'gsi' to 'ioapic.pin'. | ||
1001 | */ | ||
1002 | ioapic = mp_find_ioapic(gsi); | ||
1003 | if (ioapic < 0) | ||
1004 | return; | ||
1005 | pin = gsi - mp_ioapic_routing[ioapic].gsi_base; | ||
1006 | |||
1007 | /* | ||
1008 | * TBD: This check is for faulty timer entries, where the override | ||
1009 | * erroneously sets the trigger to level, resulting in a HUGE | ||
1010 | * increase of timer interrupts! | ||
1011 | */ | ||
1012 | if ((bus_irq == 0) && (trigger == 3)) | ||
1013 | trigger = 1; | ||
1014 | |||
1015 | mp_irq.mp_type = MP_INTSRC; | ||
1016 | mp_irq.mp_irqtype = mp_INT; | ||
1017 | mp_irq.mp_irqflag = (trigger << 2) | polarity; | ||
1018 | mp_irq.mp_srcbus = MP_ISA_BUS; | ||
1019 | mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ | ||
1020 | mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */ | ||
1021 | mp_irq.mp_dstirq = pin; /* INTIN# */ | ||
1022 | |||
1023 | save_mp_irq(&mp_irq); | ||
1024 | } | ||
1025 | |||
1026 | void __init mp_config_acpi_legacy_irqs(void) | ||
1027 | { | ||
1028 | int i; | ||
1029 | int ioapic; | ||
1030 | unsigned int dstapic; | ||
1031 | struct mp_config_intsrc mp_irq; | ||
1032 | |||
1033 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | ||
1034 | /* | ||
1035 | * Fabricate the legacy ISA bus (bus #31). | ||
1036 | */ | ||
1037 | mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; | ||
1038 | #endif | ||
1039 | set_bit(MP_ISA_BUS, mp_bus_not_pci); | ||
1040 | Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); | ||
1041 | |||
1042 | #ifdef CONFIG_X86_ES7000 | ||
1043 | /* | ||
1044 | * Older generations of ES7000 have no legacy identity mappings | ||
1045 | */ | ||
1046 | if (es7000_plat == 1) | ||
1047 | return; | ||
1048 | #endif | ||
1049 | |||
1050 | /* | ||
1051 | * Locate the IOAPIC that manages the ISA IRQs (0-15). | ||
1052 | */ | ||
1053 | ioapic = mp_find_ioapic(0); | ||
1054 | if (ioapic < 0) | ||
1055 | return; | ||
1056 | dstapic = mp_ioapics[ioapic].mp_apicid; | ||
1057 | |||
1058 | /* | ||
1059 | * Use the default configuration for the IRQs 0-15. Unless | ||
1060 | * overridden by (MADT) interrupt source override entries. | ||
1061 | */ | ||
1062 | for (i = 0; i < 16; i++) { | ||
1063 | int idx; | ||
1064 | |||
1065 | /* Skip the 8254 timer interrupt (IRQ 0) if requested. */ | ||
1066 | if (i == 0 && disable_irq0_through_ioapic) | ||
1067 | continue; | ||
1068 | |||
1069 | for (idx = 0; idx < mp_irq_entries; idx++) { | ||
1070 | struct mp_config_intsrc *irq = mp_irqs + idx; | ||
1071 | |||
1072 | /* Do we already have a mapping for this ISA IRQ? */ | ||
1073 | if (irq->mp_srcbus == MP_ISA_BUS | ||
1074 | && irq->mp_srcbusirq == i) | ||
1075 | break; | ||
1076 | |||
1077 | /* Do we already have a mapping for this IOAPIC pin */ | ||
1078 | if (irq->mp_dstapic == dstapic && | ||
1079 | irq->mp_dstirq == i) | ||
1080 | break; | ||
1081 | } | ||
1082 | |||
1083 | if (idx != mp_irq_entries) { | ||
1084 | printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); | ||
1085 | continue; /* IRQ already used */ | ||
1086 | } | ||
1087 | |||
1088 | mp_irq.mp_type = MP_INTSRC; | ||
1089 | mp_irq.mp_irqflag = 0; /* Conforming */ | ||
1090 | mp_irq.mp_srcbus = MP_ISA_BUS; | ||
1091 | mp_irq.mp_dstapic = dstapic; | ||
1092 | mp_irq.mp_irqtype = mp_INT; | ||
1093 | mp_irq.mp_srcbusirq = i; /* Identity mapped */ | ||
1094 | mp_irq.mp_dstirq = i; | ||
1095 | |||
1096 | save_mp_irq(&mp_irq); | ||
1097 | } | ||
1098 | } | ||
1099 | |||
1100 | int mp_register_gsi(u32 gsi, int triggering, int polarity) | ||
1101 | { | ||
1102 | int ioapic; | ||
1103 | int ioapic_pin; | ||
1104 | #ifdef CONFIG_X86_32 | ||
1105 | #define MAX_GSI_NUM 4096 | ||
1106 | #define IRQ_COMPRESSION_START 64 | ||
1107 | |||
1108 | static int pci_irq = IRQ_COMPRESSION_START; | ||
1109 | /* | ||
1110 | * Mapping between Global System Interrupts, which | ||
1111 | * represent all possible interrupts, and IRQs | ||
1112 | * assigned to actual devices. | ||
1113 | */ | ||
1114 | static int gsi_to_irq[MAX_GSI_NUM]; | ||
1115 | #else | ||
1116 | |||
1117 | if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) | ||
1118 | return gsi; | ||
1119 | #endif | ||
1120 | |||
1121 | /* Don't set up the ACPI SCI because it's already set up */ | ||
1122 | if (acpi_gbl_FADT.sci_interrupt == gsi) | ||
1123 | return gsi; | ||
1124 | |||
1125 | ioapic = mp_find_ioapic(gsi); | ||
1126 | if (ioapic < 0) { | ||
1127 | printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); | ||
1128 | return gsi; | ||
1129 | } | ||
1130 | |||
1131 | ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; | ||
1132 | |||
1133 | #ifdef CONFIG_X86_32 | ||
1134 | if (ioapic_renumber_irq) | ||
1135 | gsi = ioapic_renumber_irq(ioapic, gsi); | ||
1136 | #endif | ||
1137 | |||
1138 | /* | ||
1139 | * Avoid pin reprogramming. PRTs typically include entries | ||
1140 | * with redundant pin->gsi mappings (but unique PCI devices); | ||
1141 | * we only program the IOAPIC on the first. | ||
1142 | */ | ||
1143 | if (ioapic_pin > MP_MAX_IOAPIC_PIN) { | ||
1144 | printk(KERN_ERR "Invalid reference to IOAPIC pin " | ||
1145 | "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, | ||
1146 | ioapic_pin); | ||
1147 | return gsi; | ||
1148 | } | ||
1149 | if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { | ||
1150 | Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", | ||
1151 | mp_ioapic_routing[ioapic].apic_id, ioapic_pin); | ||
1152 | #ifdef CONFIG_X86_32 | ||
1153 | return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); | ||
1154 | #else | ||
1155 | return gsi; | ||
1156 | #endif | ||
1157 | } | ||
1158 | |||
1159 | set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); | ||
1160 | #ifdef CONFIG_X86_32 | ||
1161 | /* | ||
1162 | * For GSI >= 64, use IRQ compression | ||
1163 | */ | ||
1164 | if ((gsi >= IRQ_COMPRESSION_START) | ||
1165 | && (triggering == ACPI_LEVEL_SENSITIVE)) { | ||
1166 | /* | ||
1167 | * For PCI devices assign IRQs in order, avoiding gaps | ||
1168 | * due to unused I/O APIC pins. | ||
1169 | */ | ||
1170 | int irq = gsi; | ||
1171 | if (gsi < MAX_GSI_NUM) { | ||
1172 | /* | ||
1173 | * Retain the VIA chipset work-around (gsi > 15), but | ||
1174 | * avoid a problem where the 8254 timer (IRQ0) is setup | ||
1175 | * via an override (so it's not on pin 0 of the ioapic), | ||
1176 | * and at the same time, the pin 0 interrupt is a PCI | ||
1177 | * type. The gsi > 15 test could cause these two pins | ||
1178 | * to be shared as IRQ0, and they are not shareable. | ||
1179 | * So test for this condition, and if necessary, avoid | ||
1180 | * the pin collision. | ||
1181 | */ | ||
1182 | gsi = pci_irq++; | ||
1183 | /* | ||
1184 | * Don't assign IRQ used by ACPI SCI | ||
1185 | */ | ||
1186 | if (gsi == acpi_gbl_FADT.sci_interrupt) | ||
1187 | gsi = pci_irq++; | ||
1188 | gsi_to_irq[irq] = gsi; | ||
1189 | } else { | ||
1190 | printk(KERN_ERR "GSI %u is too high\n", gsi); | ||
1191 | return gsi; | ||
1192 | } | ||
1193 | } | ||
1194 | #endif | ||
1195 | io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, | ||
1196 | triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, | ||
1197 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); | ||
1198 | return gsi; | ||
1199 | } | ||
1200 | |||
1201 | int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, | ||
1202 | u32 gsi, int triggering, int polarity) | ||
1203 | { | ||
1204 | #ifdef CONFIG_X86_MPPARSE | ||
1205 | struct mp_config_intsrc mp_irq; | ||
1206 | int ioapic; | ||
1207 | |||
1208 | if (!acpi_ioapic) | ||
1209 | return 0; | ||
1210 | |||
1211 | /* print the entry should happen on mptable identically */ | ||
1212 | mp_irq.mp_type = MP_INTSRC; | ||
1213 | mp_irq.mp_irqtype = mp_INT; | ||
1214 | mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | | ||
1215 | (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); | ||
1216 | mp_irq.mp_srcbus = number; | ||
1217 | mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); | ||
1218 | ioapic = mp_find_ioapic(gsi); | ||
1219 | mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id; | ||
1220 | mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; | ||
1221 | |||
1222 | save_mp_irq(&mp_irq); | ||
1223 | #endif | ||
1224 | return 0; | ||
1225 | } | ||
1226 | |||
861 | /* | 1227 | /* |
862 | * Parse IOAPIC related entries in MADT | 1228 | * Parse IOAPIC related entries in MADT |
863 | * returns 0 on success, < 0 on error | 1229 | * returns 0 on success, < 0 on error |
@@ -1059,6 +1425,17 @@ static int __init force_acpi_ht(const struct dmi_system_id *d) | |||
1059 | } | 1425 | } |
1060 | 1426 | ||
1061 | /* | 1427 | /* |
1428 | * Don't register any I/O APIC entries for the 8254 timer IRQ. | ||
1429 | */ | ||
1430 | static int __init | ||
1431 | dmi_disable_irq0_through_ioapic(const struct dmi_system_id *d) | ||
1432 | { | ||
1433 | pr_notice("%s detected: disabling IRQ 0 through I/O APIC\n", d->ident); | ||
1434 | disable_irq0_through_ioapic = 1; | ||
1435 | return 0; | ||
1436 | } | ||
1437 | |||
1438 | /* | ||
1062 | * If your system is blacklisted here, but you find that acpi=force | 1439 | * If your system is blacklisted here, but you find that acpi=force |
1063 | * works for you, please contact acpi-devel@sourceforge.net | 1440 | * works for you, please contact acpi-devel@sourceforge.net |
1064 | */ | 1441 | */ |
@@ -1225,6 +1602,32 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { | |||
1225 | DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), | 1602 | DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), |
1226 | }, | 1603 | }, |
1227 | }, | 1604 | }, |
1605 | /* | ||
1606 | * HP laptops which use a DSDT reporting as HP/SB400/10000, | ||
1607 | * which includes some code which overrides all temperature | ||
1608 | * trip points to 16C if the INTIN2 input of the I/O APIC | ||
1609 | * is enabled. This input is incorrectly designated the | ||
1610 | * ISA IRQ 0 via an interrupt source override even though | ||
1611 | * it is wired to the output of the master 8259A and INTIN0 | ||
1612 | * is not connected at all. Abandon any attempts to route | ||
1613 | * IRQ 0 through the I/O APIC therefore. | ||
1614 | */ | ||
1615 | { | ||
1616 | .callback = dmi_disable_irq0_through_ioapic, | ||
1617 | .ident = "HP NX6125 laptop", | ||
1618 | .matches = { | ||
1619 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), | ||
1620 | DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6125"), | ||
1621 | }, | ||
1622 | }, | ||
1623 | { | ||
1624 | .callback = dmi_disable_irq0_through_ioapic, | ||
1625 | .ident = "HP NX6325 laptop", | ||
1626 | .matches = { | ||
1627 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), | ||
1628 | DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"), | ||
1629 | }, | ||
1630 | }, | ||
1228 | {} | 1631 | {} |
1229 | }; | 1632 | }; |
1230 | 1633 | ||
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index e819362c7068..600470d464fa 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -328,7 +328,7 @@ void __init early_gart_iommu_check(void) | |||
328 | E820_RAM)) { | 328 | E820_RAM)) { |
329 | /* reserve it, so we can reuse it in second kernel */ | 329 | /* reserve it, so we can reuse it in second kernel */ |
330 | printk(KERN_INFO "update e820 for GART\n"); | 330 | printk(KERN_INFO "update e820 for GART\n"); |
331 | add_memory_region(aper_base, aper_size, E820_RESERVED); | 331 | e820_add_region(aper_base, aper_size, E820_RESERVED); |
332 | update_e820(); | 332 | update_e820(); |
333 | } | 333 | } |
334 | } | 334 | } |
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index ce4538ebb7fe..570c362eca8c 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c | |||
@@ -79,6 +79,11 @@ char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; | |||
79 | */ | 79 | */ |
80 | int apic_verbosity; | 80 | int apic_verbosity; |
81 | 81 | ||
82 | int pic_mode; | ||
83 | |||
84 | /* Have we found an MP table */ | ||
85 | int smp_found_config; | ||
86 | |||
82 | static unsigned int calibration_result; | 87 | static unsigned int calibration_result; |
83 | 88 | ||
84 | static int lapic_next_event(unsigned long delta, | 89 | static int lapic_next_event(unsigned long delta, |
@@ -1202,7 +1207,7 @@ void __init init_apic_mappings(void) | |||
1202 | 1207 | ||
1203 | for (i = 0; i < nr_ioapics; i++) { | 1208 | for (i = 0; i < nr_ioapics; i++) { |
1204 | if (smp_found_config) { | 1209 | if (smp_found_config) { |
1205 | ioapic_phys = mp_ioapics[i].mpc_apicaddr; | 1210 | ioapic_phys = mp_ioapics[i].mp_apicaddr; |
1206 | if (!ioapic_phys) { | 1211 | if (!ioapic_phys) { |
1207 | printk(KERN_ERR | 1212 | printk(KERN_ERR |
1208 | "WARNING: bogus zero IO-APIC " | 1213 | "WARNING: bogus zero IO-APIC " |
@@ -1517,6 +1522,9 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1517 | */ | 1522 | */ |
1518 | cpu = 0; | 1523 | cpu = 0; |
1519 | 1524 | ||
1525 | if (apicid > max_physical_apicid) | ||
1526 | max_physical_apicid = apicid; | ||
1527 | |||
1520 | /* | 1528 | /* |
1521 | * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y | 1529 | * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y |
1522 | * but we need to work other dependencies like SMP_SUSPEND etc | 1530 | * but we need to work other dependencies like SMP_SUSPEND etc |
@@ -1524,7 +1532,7 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1524 | * if (CPU_HOTPLUG_ENABLED || num_processors > 8) | 1532 | * if (CPU_HOTPLUG_ENABLED || num_processors > 8) |
1525 | * - Ashok Raj <ashok.raj@intel.com> | 1533 | * - Ashok Raj <ashok.raj@intel.com> |
1526 | */ | 1534 | */ |
1527 | if (num_processors > 8) { | 1535 | if (max_physical_apicid >= 8) { |
1528 | switch (boot_cpu_data.x86_vendor) { | 1536 | switch (boot_cpu_data.x86_vendor) { |
1529 | case X86_VENDOR_INTEL: | 1537 | case X86_VENDOR_INTEL: |
1530 | if (!APIC_XAPIC(version)) { | 1538 | if (!APIC_XAPIC(version)) { |
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 3ef7752aa8e5..d7406aa1c985 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c | |||
@@ -56,6 +56,9 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); | |||
56 | */ | 56 | */ |
57 | int apic_verbosity; | 57 | int apic_verbosity; |
58 | 58 | ||
59 | /* Have we found an MP table */ | ||
60 | int smp_found_config; | ||
61 | |||
59 | static struct resource lapic_resource = { | 62 | static struct resource lapic_resource = { |
60 | .name = "Local APIC", | 63 | .name = "Local APIC", |
61 | .flags = IORESOURCE_MEM | IORESOURCE_BUSY, | 64 | .flags = IORESOURCE_MEM | IORESOURCE_BUSY, |
@@ -1068,6 +1071,9 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1068 | */ | 1071 | */ |
1069 | cpu = 0; | 1072 | cpu = 0; |
1070 | } | 1073 | } |
1074 | if (apicid > max_physical_apicid) | ||
1075 | max_physical_apicid = apicid; | ||
1076 | |||
1071 | /* are we being called early in kernel startup? */ | 1077 | /* are we being called early in kernel startup? */ |
1072 | if (x86_cpu_to_apicid_early_ptr) { | 1078 | if (x86_cpu_to_apicid_early_ptr) { |
1073 | u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; | 1079 | u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 5d241ce94a44..509bd3d9eacd 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -37,7 +37,7 @@ static struct fixed_range_block fixed_range_blocks[] = { | |||
37 | static unsigned long smp_changes_mask; | 37 | static unsigned long smp_changes_mask; |
38 | static struct mtrr_state mtrr_state = {}; | 38 | static struct mtrr_state mtrr_state = {}; |
39 | static int mtrr_state_set; | 39 | static int mtrr_state_set; |
40 | static u64 tom2; | 40 | u64 mtrr_tom2; |
41 | 41 | ||
42 | #undef MODULE_PARAM_PREFIX | 42 | #undef MODULE_PARAM_PREFIX |
43 | #define MODULE_PARAM_PREFIX "mtrr." | 43 | #define MODULE_PARAM_PREFIX "mtrr." |
@@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end) | |||
139 | } | 139 | } |
140 | } | 140 | } |
141 | 141 | ||
142 | if (tom2) { | 142 | if (mtrr_tom2) { |
143 | if (start >= (1ULL<<32) && (end < tom2)) | 143 | if (start >= (1ULL<<32) && (end < mtrr_tom2)) |
144 | return MTRR_TYPE_WRBACK; | 144 | return MTRR_TYPE_WRBACK; |
145 | } | 145 | } |
146 | 146 | ||
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) | |||
158 | rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); | 158 | rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); |
159 | } | 159 | } |
160 | 160 | ||
161 | /* fill the MSR pair relating to a var range */ | ||
162 | void fill_mtrr_var_range(unsigned int index, | ||
163 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi) | ||
164 | { | ||
165 | struct mtrr_var_range *vr; | ||
166 | |||
167 | vr = mtrr_state.var_ranges; | ||
168 | |||
169 | vr[index].base_lo = base_lo; | ||
170 | vr[index].base_hi = base_hi; | ||
171 | vr[index].mask_lo = mask_lo; | ||
172 | vr[index].mask_hi = mask_hi; | ||
173 | } | ||
174 | |||
161 | static void | 175 | static void |
162 | get_fixed_ranges(mtrr_type * frs) | 176 | get_fixed_ranges(mtrr_type * frs) |
163 | { | 177 | { |
@@ -213,13 +227,13 @@ void __init get_mtrr_state(void) | |||
213 | mtrr_state.enabled = (lo & 0xc00) >> 10; | 227 | mtrr_state.enabled = (lo & 0xc00) >> 10; |
214 | 228 | ||
215 | if (amd_special_default_mtrr()) { | 229 | if (amd_special_default_mtrr()) { |
216 | unsigned lo, hi; | 230 | unsigned low, high; |
217 | /* TOP_MEM2 */ | 231 | /* TOP_MEM2 */ |
218 | rdmsr(MSR_K8_TOP_MEM2, lo, hi); | 232 | rdmsr(MSR_K8_TOP_MEM2, low, high); |
219 | tom2 = hi; | 233 | mtrr_tom2 = high; |
220 | tom2 <<= 32; | 234 | mtrr_tom2 <<= 32; |
221 | tom2 |= lo; | 235 | mtrr_tom2 |= low; |
222 | tom2 &= 0xffffff8000000ULL; | 236 | mtrr_tom2 &= 0xffffff800000ULL; |
223 | } | 237 | } |
224 | if (mtrr_show) { | 238 | if (mtrr_show) { |
225 | int high_width; | 239 | int high_width; |
@@ -251,9 +265,9 @@ void __init get_mtrr_state(void) | |||
251 | else | 265 | else |
252 | printk(KERN_INFO "MTRR %u disabled\n", i); | 266 | printk(KERN_INFO "MTRR %u disabled\n", i); |
253 | } | 267 | } |
254 | if (tom2) { | 268 | if (mtrr_tom2) { |
255 | printk(KERN_INFO "TOM2: %016llx aka %lldM\n", | 269 | printk(KERN_INFO "TOM2: %016llx aka %lldM\n", |
256 | tom2, tom2>>20); | 270 | mtrr_tom2, mtrr_tom2>>20); |
257 | } | 271 | } |
258 | } | 272 | } |
259 | mtrr_state_set = 1; | 273 | mtrr_state_set = 1; |
@@ -328,7 +342,7 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) | |||
328 | 342 | ||
329 | if (lo != msrwords[0] || hi != msrwords[1]) { | 343 | if (lo != msrwords[0] || hi != msrwords[1]) { |
330 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | 344 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && |
331 | boot_cpu_data.x86 == 15 && | 345 | (boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) && |
332 | ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) | 346 | ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) |
333 | k8_enable_fixed_iorrs(); | 347 | k8_enable_fixed_iorrs(); |
334 | mtrr_wrmsr(msr, msrwords[0], msrwords[1]); | 348 | mtrr_wrmsr(msr, msrwords[0], msrwords[1]); |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6a1e278d9323..105afe12beb0 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/smp.h> | 37 | #include <linux/smp.h> |
38 | #include <linux/cpu.h> | 38 | #include <linux/cpu.h> |
39 | #include <linux/mutex.h> | 39 | #include <linux/mutex.h> |
40 | #include <linux/sort.h> | ||
40 | 41 | ||
41 | #include <asm/e820.h> | 42 | #include <asm/e820.h> |
42 | #include <asm/mtrr.h> | 43 | #include <asm/mtrr.h> |
@@ -609,6 +610,787 @@ static struct sysdev_driver mtrr_sysdev_driver = { | |||
609 | .resume = mtrr_restore, | 610 | .resume = mtrr_restore, |
610 | }; | 611 | }; |
611 | 612 | ||
613 | /* should be related to MTRR_VAR_RANGES nums */ | ||
614 | #define RANGE_NUM 256 | ||
615 | |||
616 | struct res_range { | ||
617 | unsigned long start; | ||
618 | unsigned long end; | ||
619 | }; | ||
620 | |||
621 | static int __init | ||
622 | add_range(struct res_range *range, int nr_range, unsigned long start, | ||
623 | unsigned long end) | ||
624 | { | ||
625 | /* out of slots */ | ||
626 | if (nr_range >= RANGE_NUM) | ||
627 | return nr_range; | ||
628 | |||
629 | range[nr_range].start = start; | ||
630 | range[nr_range].end = end; | ||
631 | |||
632 | nr_range++; | ||
633 | |||
634 | return nr_range; | ||
635 | } | ||
636 | |||
637 | static int __init | ||
638 | add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, | ||
639 | unsigned long end) | ||
640 | { | ||
641 | int i; | ||
642 | |||
643 | /* try to merge it with old one */ | ||
644 | for (i = 0; i < nr_range; i++) { | ||
645 | unsigned long final_start, final_end; | ||
646 | unsigned long common_start, common_end; | ||
647 | |||
648 | if (!range[i].end) | ||
649 | continue; | ||
650 | |||
651 | common_start = max(range[i].start, start); | ||
652 | common_end = min(range[i].end, end); | ||
653 | if (common_start > common_end + 1) | ||
654 | continue; | ||
655 | |||
656 | final_start = min(range[i].start, start); | ||
657 | final_end = max(range[i].end, end); | ||
658 | |||
659 | range[i].start = final_start; | ||
660 | range[i].end = final_end; | ||
661 | return nr_range; | ||
662 | } | ||
663 | |||
664 | /* need to add that */ | ||
665 | return add_range(range, nr_range, start, end); | ||
666 | } | ||
667 | |||
668 | static void __init | ||
669 | subtract_range(struct res_range *range, unsigned long start, unsigned long end) | ||
670 | { | ||
671 | int i, j; | ||
672 | |||
673 | for (j = 0; j < RANGE_NUM; j++) { | ||
674 | if (!range[j].end) | ||
675 | continue; | ||
676 | |||
677 | if (start <= range[j].start && end >= range[j].end) { | ||
678 | range[j].start = 0; | ||
679 | range[j].end = 0; | ||
680 | continue; | ||
681 | } | ||
682 | |||
683 | if (start <= range[j].start && end < range[j].end && | ||
684 | range[j].start < end + 1) { | ||
685 | range[j].start = end + 1; | ||
686 | continue; | ||
687 | } | ||
688 | |||
689 | |||
690 | if (start > range[j].start && end >= range[j].end && | ||
691 | range[j].end > start - 1) { | ||
692 | range[j].end = start - 1; | ||
693 | continue; | ||
694 | } | ||
695 | |||
696 | if (start > range[j].start && end < range[j].end) { | ||
697 | /* find the new spare */ | ||
698 | for (i = 0; i < RANGE_NUM; i++) { | ||
699 | if (range[i].end == 0) | ||
700 | break; | ||
701 | } | ||
702 | if (i < RANGE_NUM) { | ||
703 | range[i].end = range[j].end; | ||
704 | range[i].start = end + 1; | ||
705 | } else { | ||
706 | printk(KERN_ERR "run of slot in ranges\n"); | ||
707 | } | ||
708 | range[j].end = start - 1; | ||
709 | continue; | ||
710 | } | ||
711 | } | ||
712 | } | ||
713 | |||
714 | static int __init cmp_range(const void *x1, const void *x2) | ||
715 | { | ||
716 | const struct res_range *r1 = x1; | ||
717 | const struct res_range *r2 = x2; | ||
718 | long start1, start2; | ||
719 | |||
720 | start1 = r1->start; | ||
721 | start2 = r2->start; | ||
722 | |||
723 | return start1 - start2; | ||
724 | } | ||
725 | |||
726 | struct var_mtrr_range_state { | ||
727 | unsigned long base_pfn; | ||
728 | unsigned long size_pfn; | ||
729 | mtrr_type type; | ||
730 | }; | ||
731 | |||
732 | struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | ||
733 | static int __initdata debug_print; | ||
734 | |||
735 | static int __init | ||
736 | x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | ||
737 | unsigned long extra_remove_base, | ||
738 | unsigned long extra_remove_size) | ||
739 | { | ||
740 | unsigned long i, base, size; | ||
741 | mtrr_type type; | ||
742 | |||
743 | for (i = 0; i < num_var_ranges; i++) { | ||
744 | type = range_state[i].type; | ||
745 | if (type != MTRR_TYPE_WRBACK) | ||
746 | continue; | ||
747 | base = range_state[i].base_pfn; | ||
748 | size = range_state[i].size_pfn; | ||
749 | nr_range = add_range_with_merge(range, nr_range, base, | ||
750 | base + size - 1); | ||
751 | } | ||
752 | if (debug_print) { | ||
753 | printk(KERN_DEBUG "After WB checking\n"); | ||
754 | for (i = 0; i < nr_range; i++) | ||
755 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | ||
756 | range[i].start, range[i].end + 1); | ||
757 | } | ||
758 | |||
759 | /* take out UC ranges */ | ||
760 | for (i = 0; i < num_var_ranges; i++) { | ||
761 | type = range_state[i].type; | ||
762 | if (type != MTRR_TYPE_UNCACHABLE) | ||
763 | continue; | ||
764 | size = range_state[i].size_pfn; | ||
765 | if (!size) | ||
766 | continue; | ||
767 | base = range_state[i].base_pfn; | ||
768 | subtract_range(range, base, base + size - 1); | ||
769 | } | ||
770 | if (extra_remove_size) | ||
771 | subtract_range(range, extra_remove_base, | ||
772 | extra_remove_base + extra_remove_size - 1); | ||
773 | |||
774 | /* get new range num */ | ||
775 | nr_range = 0; | ||
776 | for (i = 0; i < RANGE_NUM; i++) { | ||
777 | if (!range[i].end) | ||
778 | continue; | ||
779 | nr_range++; | ||
780 | } | ||
781 | if (debug_print) { | ||
782 | printk(KERN_DEBUG "After UC checking\n"); | ||
783 | for (i = 0; i < nr_range; i++) | ||
784 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | ||
785 | range[i].start, range[i].end + 1); | ||
786 | } | ||
787 | |||
788 | /* sort the ranges */ | ||
789 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | ||
790 | if (debug_print) { | ||
791 | printk(KERN_DEBUG "After sorting\n"); | ||
792 | for (i = 0; i < nr_range; i++) | ||
793 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | ||
794 | range[i].start, range[i].end + 1); | ||
795 | } | ||
796 | |||
797 | /* clear those is not used */ | ||
798 | for (i = nr_range; i < RANGE_NUM; i++) | ||
799 | memset(&range[i], 0, sizeof(range[i])); | ||
800 | |||
801 | return nr_range; | ||
802 | } | ||
803 | |||
804 | static struct res_range __initdata range[RANGE_NUM]; | ||
805 | |||
806 | #ifdef CONFIG_MTRR_SANITIZER | ||
807 | |||
808 | static unsigned long __init sum_ranges(struct res_range *range, int nr_range) | ||
809 | { | ||
810 | unsigned long sum; | ||
811 | int i; | ||
812 | |||
813 | sum = 0; | ||
814 | for (i = 0; i < nr_range; i++) | ||
815 | sum += range[i].end + 1 - range[i].start; | ||
816 | |||
817 | return sum; | ||
818 | } | ||
819 | |||
820 | static int enable_mtrr_cleanup __initdata = | ||
821 | CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT; | ||
822 | |||
823 | static int __init disable_mtrr_cleanup_setup(char *str) | ||
824 | { | ||
825 | if (enable_mtrr_cleanup != -1) | ||
826 | enable_mtrr_cleanup = 0; | ||
827 | return 0; | ||
828 | } | ||
829 | early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); | ||
830 | |||
831 | static int __init enable_mtrr_cleanup_setup(char *str) | ||
832 | { | ||
833 | if (enable_mtrr_cleanup != -1) | ||
834 | enable_mtrr_cleanup = 1; | ||
835 | return 0; | ||
836 | } | ||
837 | early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); | ||
838 | |||
839 | struct var_mtrr_state { | ||
840 | unsigned long range_startk; | ||
841 | unsigned long range_sizek; | ||
842 | unsigned long chunk_sizek; | ||
843 | unsigned long gran_sizek; | ||
844 | unsigned int reg; | ||
845 | }; | ||
846 | |||
847 | static void __init | ||
848 | set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, | ||
849 | unsigned char type, unsigned int address_bits) | ||
850 | { | ||
851 | u32 base_lo, base_hi, mask_lo, mask_hi; | ||
852 | u64 base, mask; | ||
853 | |||
854 | if (!sizek) { | ||
855 | fill_mtrr_var_range(reg, 0, 0, 0, 0); | ||
856 | return; | ||
857 | } | ||
858 | |||
859 | mask = (1ULL << address_bits) - 1; | ||
860 | mask &= ~((((u64)sizek) << 10) - 1); | ||
861 | |||
862 | base = ((u64)basek) << 10; | ||
863 | |||
864 | base |= type; | ||
865 | mask |= 0x800; | ||
866 | |||
867 | base_lo = base & ((1ULL<<32) - 1); | ||
868 | base_hi = base >> 32; | ||
869 | |||
870 | mask_lo = mask & ((1ULL<<32) - 1); | ||
871 | mask_hi = mask >> 32; | ||
872 | |||
873 | fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi); | ||
874 | } | ||
875 | |||
876 | static void __init | ||
877 | save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, | ||
878 | unsigned char type) | ||
879 | { | ||
880 | range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); | ||
881 | range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); | ||
882 | range_state[reg].type = type; | ||
883 | } | ||
884 | |||
885 | static void __init | ||
886 | set_var_mtrr_all(unsigned int address_bits) | ||
887 | { | ||
888 | unsigned long basek, sizek; | ||
889 | unsigned char type; | ||
890 | unsigned int reg; | ||
891 | |||
892 | for (reg = 0; reg < num_var_ranges; reg++) { | ||
893 | basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10); | ||
894 | sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10); | ||
895 | type = range_state[reg].type; | ||
896 | |||
897 | set_var_mtrr(reg, basek, sizek, type, address_bits); | ||
898 | } | ||
899 | } | ||
900 | |||
901 | static unsigned int __init | ||
902 | range_to_mtrr(unsigned int reg, unsigned long range_startk, | ||
903 | unsigned long range_sizek, unsigned char type) | ||
904 | { | ||
905 | if (!range_sizek || (reg >= num_var_ranges)) | ||
906 | return reg; | ||
907 | |||
908 | while (range_sizek) { | ||
909 | unsigned long max_align, align; | ||
910 | unsigned long sizek; | ||
911 | |||
912 | /* Compute the maximum size I can make a range */ | ||
913 | if (range_startk) | ||
914 | max_align = ffs(range_startk) - 1; | ||
915 | else | ||
916 | max_align = 32; | ||
917 | align = fls(range_sizek) - 1; | ||
918 | if (align > max_align) | ||
919 | align = max_align; | ||
920 | |||
921 | sizek = 1 << align; | ||
922 | if (debug_print) | ||
923 | printk(KERN_DEBUG "Setting variable MTRR %d, " | ||
924 | "base: %ldMB, range: %ldMB, type %s\n", | ||
925 | reg, range_startk >> 10, sizek >> 10, | ||
926 | (type == MTRR_TYPE_UNCACHABLE)?"UC": | ||
927 | ((type == MTRR_TYPE_WRBACK)?"WB":"Other") | ||
928 | ); | ||
929 | save_var_mtrr(reg++, range_startk, sizek, type); | ||
930 | range_startk += sizek; | ||
931 | range_sizek -= sizek; | ||
932 | if (reg >= num_var_ranges) | ||
933 | break; | ||
934 | } | ||
935 | return reg; | ||
936 | } | ||
937 | |||
938 | static unsigned __init | ||
939 | range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | ||
940 | unsigned long sizek) | ||
941 | { | ||
942 | unsigned long hole_basek, hole_sizek; | ||
943 | unsigned long second_basek, second_sizek; | ||
944 | unsigned long range0_basek, range0_sizek; | ||
945 | unsigned long range_basek, range_sizek; | ||
946 | unsigned long chunk_sizek; | ||
947 | unsigned long gran_sizek; | ||
948 | |||
949 | hole_basek = 0; | ||
950 | hole_sizek = 0; | ||
951 | second_basek = 0; | ||
952 | second_sizek = 0; | ||
953 | chunk_sizek = state->chunk_sizek; | ||
954 | gran_sizek = state->gran_sizek; | ||
955 | |||
956 | /* align with gran size, prevent small block used up MTRRs */ | ||
957 | range_basek = ALIGN(state->range_startk, gran_sizek); | ||
958 | if ((range_basek > basek) && basek) | ||
959 | return second_sizek; | ||
960 | state->range_sizek -= (range_basek - state->range_startk); | ||
961 | range_sizek = ALIGN(state->range_sizek, gran_sizek); | ||
962 | |||
963 | while (range_sizek > state->range_sizek) { | ||
964 | range_sizek -= gran_sizek; | ||
965 | if (!range_sizek) | ||
966 | return 0; | ||
967 | } | ||
968 | state->range_sizek = range_sizek; | ||
969 | |||
970 | /* try to append some small hole */ | ||
971 | range0_basek = state->range_startk; | ||
972 | range0_sizek = ALIGN(state->range_sizek, chunk_sizek); | ||
973 | if (range0_sizek == state->range_sizek) { | ||
974 | if (debug_print) | ||
975 | printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", | ||
976 | range0_basek<<10, | ||
977 | (range0_basek + state->range_sizek)<<10); | ||
978 | state->reg = range_to_mtrr(state->reg, range0_basek, | ||
979 | state->range_sizek, MTRR_TYPE_WRBACK); | ||
980 | return 0; | ||
981 | } | ||
982 | |||
983 | range0_sizek -= chunk_sizek; | ||
984 | if (range0_sizek && sizek) { | ||
985 | while (range0_basek + range0_sizek > (basek + sizek)) { | ||
986 | range0_sizek -= chunk_sizek; | ||
987 | if (!range0_sizek) | ||
988 | break; | ||
989 | } | ||
990 | } | ||
991 | |||
992 | if (range0_sizek) { | ||
993 | if (debug_print) | ||
994 | printk(KERN_DEBUG "range0: %016lx - %016lx\n", | ||
995 | range0_basek<<10, | ||
996 | (range0_basek + range0_sizek)<<10); | ||
997 | state->reg = range_to_mtrr(state->reg, range0_basek, | ||
998 | range0_sizek, MTRR_TYPE_WRBACK); | ||
999 | |||
1000 | } | ||
1001 | |||
1002 | range_basek = range0_basek + range0_sizek; | ||
1003 | range_sizek = chunk_sizek; | ||
1004 | |||
1005 | if (range_basek + range_sizek > basek && | ||
1006 | range_basek + range_sizek <= (basek + sizek)) { | ||
1007 | /* one hole */ | ||
1008 | second_basek = basek; | ||
1009 | second_sizek = range_basek + range_sizek - basek; | ||
1010 | } | ||
1011 | |||
1012 | /* if last piece, only could one hole near end */ | ||
1013 | if ((second_basek || !basek) && | ||
1014 | range_sizek - (state->range_sizek - range0_sizek) - second_sizek < | ||
1015 | (chunk_sizek >> 1)) { | ||
1016 | /* | ||
1017 | * one hole in middle (second_sizek is 0) or at end | ||
1018 | * (second_sizek is 0 ) | ||
1019 | */ | ||
1020 | hole_sizek = range_sizek - (state->range_sizek - range0_sizek) | ||
1021 | - second_sizek; | ||
1022 | hole_basek = range_basek + range_sizek - hole_sizek | ||
1023 | - second_sizek; | ||
1024 | } else { | ||
1025 | /* fallback for big hole, or several holes */ | ||
1026 | range_sizek = state->range_sizek - range0_sizek; | ||
1027 | second_basek = 0; | ||
1028 | second_sizek = 0; | ||
1029 | } | ||
1030 | |||
1031 | if (debug_print) | ||
1032 | printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10, | ||
1033 | (range_basek + range_sizek)<<10); | ||
1034 | state->reg = range_to_mtrr(state->reg, range_basek, range_sizek, | ||
1035 | MTRR_TYPE_WRBACK); | ||
1036 | if (hole_sizek) { | ||
1037 | if (debug_print) | ||
1038 | printk(KERN_DEBUG "hole: %016lx - %016lx\n", | ||
1039 | hole_basek<<10, (hole_basek + hole_sizek)<<10); | ||
1040 | state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek, | ||
1041 | MTRR_TYPE_UNCACHABLE); | ||
1042 | |||
1043 | } | ||
1044 | |||
1045 | return second_sizek; | ||
1046 | } | ||
1047 | |||
1048 | static void __init | ||
1049 | set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, | ||
1050 | unsigned long size_pfn) | ||
1051 | { | ||
1052 | unsigned long basek, sizek; | ||
1053 | unsigned long second_sizek = 0; | ||
1054 | |||
1055 | if (state->reg >= num_var_ranges) | ||
1056 | return; | ||
1057 | |||
1058 | basek = base_pfn << (PAGE_SHIFT - 10); | ||
1059 | sizek = size_pfn << (PAGE_SHIFT - 10); | ||
1060 | |||
1061 | /* See if I can merge with the last range */ | ||
1062 | if ((basek <= 1024) || | ||
1063 | (state->range_startk + state->range_sizek == basek)) { | ||
1064 | unsigned long endk = basek + sizek; | ||
1065 | state->range_sizek = endk - state->range_startk; | ||
1066 | return; | ||
1067 | } | ||
1068 | /* Write the range mtrrs */ | ||
1069 | if (state->range_sizek != 0) | ||
1070 | second_sizek = range_to_mtrr_with_hole(state, basek, sizek); | ||
1071 | |||
1072 | /* Allocate an msr */ | ||
1073 | state->range_startk = basek + second_sizek; | ||
1074 | state->range_sizek = sizek - second_sizek; | ||
1075 | } | ||
1076 | |||
1077 | /* mininum size of mtrr block that can take hole */ | ||
1078 | static u64 mtrr_chunk_size __initdata = (256ULL<<20); | ||
1079 | |||
1080 | static int __init parse_mtrr_chunk_size_opt(char *p) | ||
1081 | { | ||
1082 | if (!p) | ||
1083 | return -EINVAL; | ||
1084 | mtrr_chunk_size = memparse(p, &p); | ||
1085 | return 0; | ||
1086 | } | ||
1087 | early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); | ||
1088 | |||
1089 | /* granity of mtrr of block */ | ||
1090 | static u64 mtrr_gran_size __initdata; | ||
1091 | |||
1092 | static int __init parse_mtrr_gran_size_opt(char *p) | ||
1093 | { | ||
1094 | if (!p) | ||
1095 | return -EINVAL; | ||
1096 | mtrr_gran_size = memparse(p, &p); | ||
1097 | return 0; | ||
1098 | } | ||
1099 | early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); | ||
1100 | |||
1101 | static int nr_mtrr_spare_reg __initdata = | ||
1102 | CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; | ||
1103 | |||
1104 | static int __init parse_mtrr_spare_reg(char *arg) | ||
1105 | { | ||
1106 | if (arg) | ||
1107 | nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); | ||
1108 | return 0; | ||
1109 | } | ||
1110 | |||
1111 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); | ||
1112 | |||
1113 | static int __init | ||
1114 | x86_setup_var_mtrrs(struct res_range *range, int nr_range, | ||
1115 | u64 chunk_size, u64 gran_size) | ||
1116 | { | ||
1117 | struct var_mtrr_state var_state; | ||
1118 | int i; | ||
1119 | int num_reg; | ||
1120 | |||
1121 | var_state.range_startk = 0; | ||
1122 | var_state.range_sizek = 0; | ||
1123 | var_state.reg = 0; | ||
1124 | var_state.chunk_sizek = chunk_size >> 10; | ||
1125 | var_state.gran_sizek = gran_size >> 10; | ||
1126 | |||
1127 | memset(range_state, 0, sizeof(range_state)); | ||
1128 | |||
1129 | /* Write the range etc */ | ||
1130 | for (i = 0; i < nr_range; i++) | ||
1131 | set_var_mtrr_range(&var_state, range[i].start, | ||
1132 | range[i].end - range[i].start + 1); | ||
1133 | |||
1134 | /* Write the last range */ | ||
1135 | if (var_state.range_sizek != 0) | ||
1136 | range_to_mtrr_with_hole(&var_state, 0, 0); | ||
1137 | |||
1138 | num_reg = var_state.reg; | ||
1139 | /* Clear out the extra MTRR's */ | ||
1140 | while (var_state.reg < num_var_ranges) { | ||
1141 | save_var_mtrr(var_state.reg, 0, 0, 0); | ||
1142 | var_state.reg++; | ||
1143 | } | ||
1144 | |||
1145 | return num_reg; | ||
1146 | } | ||
1147 | |||
1148 | struct mtrr_cleanup_result { | ||
1149 | unsigned long gran_sizek; | ||
1150 | unsigned long chunk_sizek; | ||
1151 | unsigned long lose_cover_sizek; | ||
1152 | unsigned int num_reg; | ||
1153 | int bad; | ||
1154 | }; | ||
1155 | |||
1156 | /* | ||
1157 | * gran_size: 1M, 2M, ..., 2G | ||
1158 | * chunk size: gran_size, ..., 4G | ||
1159 | * so we need (2+13)*6 | ||
1160 | */ | ||
1161 | #define NUM_RESULT 90 | ||
1162 | #define PSHIFT (PAGE_SHIFT - 10) | ||
1163 | |||
1164 | static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; | ||
1165 | static struct res_range __initdata range_new[RANGE_NUM]; | ||
1166 | static unsigned long __initdata min_loss_pfn[RANGE_NUM]; | ||
1167 | |||
1168 | static int __init mtrr_cleanup(unsigned address_bits) | ||
1169 | { | ||
1170 | unsigned long extra_remove_base, extra_remove_size; | ||
1171 | unsigned long i, base, size, def, dummy; | ||
1172 | mtrr_type type; | ||
1173 | int nr_range, nr_range_new; | ||
1174 | u64 chunk_size, gran_size; | ||
1175 | unsigned long range_sums, range_sums_new; | ||
1176 | int index_good; | ||
1177 | int num_reg_good; | ||
1178 | |||
1179 | /* extra one for all 0 */ | ||
1180 | int num[MTRR_NUM_TYPES + 1]; | ||
1181 | |||
1182 | if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) | ||
1183 | return 0; | ||
1184 | rdmsr(MTRRdefType_MSR, def, dummy); | ||
1185 | def &= 0xff; | ||
1186 | if (def != MTRR_TYPE_UNCACHABLE) | ||
1187 | return 0; | ||
1188 | |||
1189 | /* get it and store it aside */ | ||
1190 | memset(range_state, 0, sizeof(range_state)); | ||
1191 | for (i = 0; i < num_var_ranges; i++) { | ||
1192 | mtrr_if->get(i, &base, &size, &type); | ||
1193 | range_state[i].base_pfn = base; | ||
1194 | range_state[i].size_pfn = size; | ||
1195 | range_state[i].type = type; | ||
1196 | } | ||
1197 | |||
1198 | /* check entries number */ | ||
1199 | memset(num, 0, sizeof(num)); | ||
1200 | for (i = 0; i < num_var_ranges; i++) { | ||
1201 | type = range_state[i].type; | ||
1202 | size = range_state[i].size_pfn; | ||
1203 | if (type >= MTRR_NUM_TYPES) | ||
1204 | continue; | ||
1205 | if (!size) | ||
1206 | type = MTRR_NUM_TYPES; | ||
1207 | num[type]++; | ||
1208 | } | ||
1209 | |||
1210 | /* check if we got UC entries */ | ||
1211 | if (!num[MTRR_TYPE_UNCACHABLE]) | ||
1212 | return 0; | ||
1213 | |||
1214 | /* check if we only had WB and UC */ | ||
1215 | if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != | ||
1216 | num_var_ranges - num[MTRR_NUM_TYPES]) | ||
1217 | return 0; | ||
1218 | |||
1219 | memset(range, 0, sizeof(range)); | ||
1220 | extra_remove_size = 0; | ||
1221 | if (mtrr_tom2) { | ||
1222 | extra_remove_base = 1 << (32 - PAGE_SHIFT); | ||
1223 | extra_remove_size = | ||
1224 | (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; | ||
1225 | } | ||
1226 | nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, | ||
1227 | extra_remove_size); | ||
1228 | range_sums = sum_ranges(range, nr_range); | ||
1229 | printk(KERN_INFO "total RAM coverred: %ldM\n", | ||
1230 | range_sums >> (20 - PAGE_SHIFT)); | ||
1231 | |||
1232 | if (mtrr_chunk_size && mtrr_gran_size) { | ||
1233 | int num_reg; | ||
1234 | |||
1235 | debug_print = 1; | ||
1236 | /* convert ranges to var ranges state */ | ||
1237 | num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, | ||
1238 | mtrr_gran_size); | ||
1239 | |||
1240 | /* we got new setting in range_state, check it */ | ||
1241 | memset(range_new, 0, sizeof(range_new)); | ||
1242 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | ||
1243 | extra_remove_base, | ||
1244 | extra_remove_size); | ||
1245 | range_sums_new = sum_ranges(range_new, nr_range_new); | ||
1246 | |||
1247 | i = 0; | ||
1248 | result[i].chunk_sizek = mtrr_chunk_size >> 10; | ||
1249 | result[i].gran_sizek = mtrr_gran_size >> 10; | ||
1250 | result[i].num_reg = num_reg; | ||
1251 | if (range_sums < range_sums_new) { | ||
1252 | result[i].lose_cover_sizek = | ||
1253 | (range_sums_new - range_sums) << PSHIFT; | ||
1254 | result[i].bad = 1; | ||
1255 | } else | ||
1256 | result[i].lose_cover_sizek = | ||
1257 | (range_sums - range_sums_new) << PSHIFT; | ||
1258 | |||
1259 | printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", | ||
1260 | result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10, | ||
1261 | result[i].chunk_sizek >> 10); | ||
1262 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n", | ||
1263 | result[i].num_reg, result[i].bad?"-":"", | ||
1264 | result[i].lose_cover_sizek >> 10); | ||
1265 | if (!result[i].bad) { | ||
1266 | set_var_mtrr_all(address_bits); | ||
1267 | return 1; | ||
1268 | } | ||
1269 | printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " | ||
1270 | "will find optimal one\n"); | ||
1271 | debug_print = 0; | ||
1272 | memset(result, 0, sizeof(result[0])); | ||
1273 | } | ||
1274 | |||
1275 | i = 0; | ||
1276 | memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); | ||
1277 | memset(result, 0, sizeof(result)); | ||
1278 | for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { | ||
1279 | for (chunk_size = gran_size; chunk_size < (1ULL<<33); | ||
1280 | chunk_size <<= 1) { | ||
1281 | int num_reg; | ||
1282 | |||
1283 | if (debug_print) | ||
1284 | printk(KERN_INFO | ||
1285 | "\ngran_size: %lldM chunk_size_size: %lldM\n", | ||
1286 | gran_size >> 20, chunk_size >> 20); | ||
1287 | if (i >= NUM_RESULT) | ||
1288 | continue; | ||
1289 | |||
1290 | /* convert ranges to var ranges state */ | ||
1291 | num_reg = x86_setup_var_mtrrs(range, nr_range, | ||
1292 | chunk_size, gran_size); | ||
1293 | |||
1294 | /* we got new setting in range_state, check it */ | ||
1295 | memset(range_new, 0, sizeof(range_new)); | ||
1296 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | ||
1297 | extra_remove_base, extra_remove_size); | ||
1298 | range_sums_new = sum_ranges(range_new, nr_range_new); | ||
1299 | |||
1300 | result[i].chunk_sizek = chunk_size >> 10; | ||
1301 | result[i].gran_sizek = gran_size >> 10; | ||
1302 | result[i].num_reg = num_reg; | ||
1303 | if (range_sums < range_sums_new) { | ||
1304 | result[i].lose_cover_sizek = | ||
1305 | (range_sums_new - range_sums) << PSHIFT; | ||
1306 | result[i].bad = 1; | ||
1307 | } else | ||
1308 | result[i].lose_cover_sizek = | ||
1309 | (range_sums - range_sums_new) << PSHIFT; | ||
1310 | |||
1311 | /* double check it */ | ||
1312 | if (!result[i].bad && !result[i].lose_cover_sizek) { | ||
1313 | if (nr_range_new != nr_range || | ||
1314 | memcmp(range, range_new, sizeof(range))) | ||
1315 | result[i].bad = 1; | ||
1316 | } | ||
1317 | |||
1318 | if (!result[i].bad && (range_sums - range_sums_new < | ||
1319 | min_loss_pfn[num_reg])) { | ||
1320 | min_loss_pfn[num_reg] = | ||
1321 | range_sums - range_sums_new; | ||
1322 | } | ||
1323 | i++; | ||
1324 | } | ||
1325 | } | ||
1326 | |||
1327 | /* print out all */ | ||
1328 | for (i = 0; i < NUM_RESULT; i++) { | ||
1329 | printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", | ||
1330 | result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10, | ||
1331 | result[i].chunk_sizek >> 10); | ||
1332 | printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n", | ||
1333 | result[i].num_reg, result[i].bad?"-":"", | ||
1334 | result[i].lose_cover_sizek >> 10); | ||
1335 | } | ||
1336 | |||
1337 | /* try to find the optimal index */ | ||
1338 | if (nr_mtrr_spare_reg >= num_var_ranges) | ||
1339 | nr_mtrr_spare_reg = num_var_ranges - 1; | ||
1340 | num_reg_good = -1; | ||
1341 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { | ||
1342 | if (!min_loss_pfn[i]) { | ||
1343 | num_reg_good = i; | ||
1344 | break; | ||
1345 | } | ||
1346 | } | ||
1347 | |||
1348 | index_good = -1; | ||
1349 | if (num_reg_good != -1) { | ||
1350 | for (i = 0; i < NUM_RESULT; i++) { | ||
1351 | if (!result[i].bad && | ||
1352 | result[i].num_reg == num_reg_good && | ||
1353 | !result[i].lose_cover_sizek) { | ||
1354 | index_good = i; | ||
1355 | break; | ||
1356 | } | ||
1357 | } | ||
1358 | } | ||
1359 | |||
1360 | if (index_good != -1) { | ||
1361 | printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); | ||
1362 | i = index_good; | ||
1363 | printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t", | ||
1364 | result[i].gran_sizek >> 10, | ||
1365 | result[i].chunk_sizek >> 10); | ||
1366 | printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n", | ||
1367 | result[i].num_reg, | ||
1368 | result[i].lose_cover_sizek >> 10); | ||
1369 | /* convert ranges to var ranges state */ | ||
1370 | chunk_size = result[i].chunk_sizek; | ||
1371 | chunk_size <<= 10; | ||
1372 | gran_size = result[i].gran_sizek; | ||
1373 | gran_size <<= 10; | ||
1374 | debug_print = 1; | ||
1375 | x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); | ||
1376 | set_var_mtrr_all(address_bits); | ||
1377 | return 1; | ||
1378 | } | ||
1379 | |||
1380 | printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); | ||
1381 | printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n"); | ||
1382 | |||
1383 | return 0; | ||
1384 | } | ||
1385 | #else | ||
1386 | static int __init mtrr_cleanup(unsigned address_bits) | ||
1387 | { | ||
1388 | return 0; | ||
1389 | } | ||
1390 | #endif | ||
1391 | |||
1392 | static int __initdata changed_by_mtrr_cleanup; | ||
1393 | |||
612 | static int disable_mtrr_trim; | 1394 | static int disable_mtrr_trim; |
613 | 1395 | ||
614 | static int __init disable_mtrr_trim_setup(char *str) | 1396 | static int __init disable_mtrr_trim_setup(char *str) |
@@ -648,6 +1430,19 @@ int __init amd_special_default_mtrr(void) | |||
648 | return 0; | 1430 | return 0; |
649 | } | 1431 | } |
650 | 1432 | ||
1433 | static u64 __init real_trim_memory(unsigned long start_pfn, | ||
1434 | unsigned long limit_pfn) | ||
1435 | { | ||
1436 | u64 trim_start, trim_size; | ||
1437 | trim_start = start_pfn; | ||
1438 | trim_start <<= PAGE_SHIFT; | ||
1439 | trim_size = limit_pfn; | ||
1440 | trim_size <<= PAGE_SHIFT; | ||
1441 | trim_size -= trim_start; | ||
1442 | |||
1443 | return e820_update_range(trim_start, trim_size, E820_RAM, | ||
1444 | E820_RESERVED); | ||
1445 | } | ||
651 | /** | 1446 | /** |
652 | * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs | 1447 | * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs |
653 | * @end_pfn: ending page frame number | 1448 | * @end_pfn: ending page frame number |
@@ -663,8 +1458,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
663 | { | 1458 | { |
664 | unsigned long i, base, size, highest_pfn = 0, def, dummy; | 1459 | unsigned long i, base, size, highest_pfn = 0, def, dummy; |
665 | mtrr_type type; | 1460 | mtrr_type type; |
666 | u64 trim_start, trim_size; | 1461 | int nr_range; |
1462 | u64 total_trim_size; | ||
667 | 1463 | ||
1464 | /* extra one for all 0 */ | ||
1465 | int num[MTRR_NUM_TYPES + 1]; | ||
668 | /* | 1466 | /* |
669 | * Make sure we only trim uncachable memory on machines that | 1467 | * Make sure we only trim uncachable memory on machines that |
670 | * support the Intel MTRR architecture: | 1468 | * support the Intel MTRR architecture: |
@@ -676,14 +1474,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
676 | if (def != MTRR_TYPE_UNCACHABLE) | 1474 | if (def != MTRR_TYPE_UNCACHABLE) |
677 | return 0; | 1475 | return 0; |
678 | 1476 | ||
679 | if (amd_special_default_mtrr()) | 1477 | /* get it and store it aside */ |
680 | return 0; | 1478 | memset(range_state, 0, sizeof(range_state)); |
1479 | for (i = 0; i < num_var_ranges; i++) { | ||
1480 | mtrr_if->get(i, &base, &size, &type); | ||
1481 | range_state[i].base_pfn = base; | ||
1482 | range_state[i].size_pfn = size; | ||
1483 | range_state[i].type = type; | ||
1484 | } | ||
681 | 1485 | ||
682 | /* Find highest cached pfn */ | 1486 | /* Find highest cached pfn */ |
683 | for (i = 0; i < num_var_ranges; i++) { | 1487 | for (i = 0; i < num_var_ranges; i++) { |
684 | mtrr_if->get(i, &base, &size, &type); | 1488 | type = range_state[i].type; |
685 | if (type != MTRR_TYPE_WRBACK) | 1489 | if (type != MTRR_TYPE_WRBACK) |
686 | continue; | 1490 | continue; |
1491 | base = range_state[i].base_pfn; | ||
1492 | size = range_state[i].size_pfn; | ||
687 | if (highest_pfn < base + size) | 1493 | if (highest_pfn < base + size) |
688 | highest_pfn = base + size; | 1494 | highest_pfn = base + size; |
689 | } | 1495 | } |
@@ -698,22 +1504,65 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
698 | return 0; | 1504 | return 0; |
699 | } | 1505 | } |
700 | 1506 | ||
701 | if (highest_pfn < end_pfn) { | 1507 | /* check entries number */ |
1508 | memset(num, 0, sizeof(num)); | ||
1509 | for (i = 0; i < num_var_ranges; i++) { | ||
1510 | type = range_state[i].type; | ||
1511 | if (type >= MTRR_NUM_TYPES) | ||
1512 | continue; | ||
1513 | size = range_state[i].size_pfn; | ||
1514 | if (!size) | ||
1515 | type = MTRR_NUM_TYPES; | ||
1516 | num[type]++; | ||
1517 | } | ||
1518 | |||
1519 | /* no entry for WB? */ | ||
1520 | if (!num[MTRR_TYPE_WRBACK]) | ||
1521 | return 0; | ||
1522 | |||
1523 | /* check if we only had WB and UC */ | ||
1524 | if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != | ||
1525 | num_var_ranges - num[MTRR_NUM_TYPES]) | ||
1526 | return 0; | ||
1527 | |||
1528 | memset(range, 0, sizeof(range)); | ||
1529 | nr_range = 0; | ||
1530 | if (mtrr_tom2) { | ||
1531 | range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); | ||
1532 | range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; | ||
1533 | if (highest_pfn < range[nr_range].end + 1) | ||
1534 | highest_pfn = range[nr_range].end + 1; | ||
1535 | nr_range++; | ||
1536 | } | ||
1537 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); | ||
1538 | |||
1539 | total_trim_size = 0; | ||
1540 | /* check the head */ | ||
1541 | if (range[0].start) | ||
1542 | total_trim_size += real_trim_memory(0, range[0].start); | ||
1543 | /* check the holes */ | ||
1544 | for (i = 0; i < nr_range - 1; i++) { | ||
1545 | if (range[i].end + 1 < range[i+1].start) | ||
1546 | total_trim_size += real_trim_memory(range[i].end + 1, | ||
1547 | range[i+1].start); | ||
1548 | } | ||
1549 | /* check the top */ | ||
1550 | i = nr_range - 1; | ||
1551 | if (range[i].end + 1 < end_pfn) | ||
1552 | total_trim_size += real_trim_memory(range[i].end + 1, | ||
1553 | end_pfn); | ||
1554 | |||
1555 | if (total_trim_size) { | ||
702 | printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" | 1556 | printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" |
703 | " all of memory, losing %luMB of RAM.\n", | 1557 | " all of memory, losing %lluMB of RAM.\n", |
704 | (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT)); | 1558 | total_trim_size >> 20); |
705 | 1559 | ||
706 | WARN_ON(1); | 1560 | if (!changed_by_mtrr_cleanup) |
1561 | WARN_ON(1); | ||
707 | 1562 | ||
708 | printk(KERN_INFO "update e820 for mtrr\n"); | 1563 | printk(KERN_INFO "update e820 for mtrr\n"); |
709 | trim_start = highest_pfn; | ||
710 | trim_start <<= PAGE_SHIFT; | ||
711 | trim_size = end_pfn; | ||
712 | trim_size <<= PAGE_SHIFT; | ||
713 | trim_size -= trim_start; | ||
714 | update_memory_range(trim_start, trim_size, E820_RAM, | ||
715 | E820_RESERVED); | ||
716 | update_e820(); | 1564 | update_e820(); |
1565 | |||
717 | return 1; | 1566 | return 1; |
718 | } | 1567 | } |
719 | 1568 | ||
@@ -729,18 +1578,21 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
729 | */ | 1578 | */ |
730 | void __init mtrr_bp_init(void) | 1579 | void __init mtrr_bp_init(void) |
731 | { | 1580 | { |
1581 | u32 phys_addr; | ||
732 | init_ifs(); | 1582 | init_ifs(); |
733 | 1583 | ||
1584 | phys_addr = 32; | ||
1585 | |||
734 | if (cpu_has_mtrr) { | 1586 | if (cpu_has_mtrr) { |
735 | mtrr_if = &generic_mtrr_ops; | 1587 | mtrr_if = &generic_mtrr_ops; |
736 | size_or_mask = 0xff000000; /* 36 bits */ | 1588 | size_or_mask = 0xff000000; /* 36 bits */ |
737 | size_and_mask = 0x00f00000; | 1589 | size_and_mask = 0x00f00000; |
1590 | phys_addr = 36; | ||
738 | 1591 | ||
739 | /* This is an AMD specific MSR, but we assume(hope?) that | 1592 | /* This is an AMD specific MSR, but we assume(hope?) that |
740 | Intel will implement it to when they extend the address | 1593 | Intel will implement it to when they extend the address |
741 | bus of the Xeon. */ | 1594 | bus of the Xeon. */ |
742 | if (cpuid_eax(0x80000000) >= 0x80000008) { | 1595 | if (cpuid_eax(0x80000000) >= 0x80000008) { |
743 | u32 phys_addr; | ||
744 | phys_addr = cpuid_eax(0x80000008) & 0xff; | 1596 | phys_addr = cpuid_eax(0x80000008) & 0xff; |
745 | /* CPUID workaround for Intel 0F33/0F34 CPU */ | 1597 | /* CPUID workaround for Intel 0F33/0F34 CPU */ |
746 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && | 1598 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && |
@@ -758,6 +1610,7 @@ void __init mtrr_bp_init(void) | |||
758 | don't support PAE */ | 1610 | don't support PAE */ |
759 | size_or_mask = 0xfff00000; /* 32 bits */ | 1611 | size_or_mask = 0xfff00000; /* 32 bits */ |
760 | size_and_mask = 0; | 1612 | size_and_mask = 0; |
1613 | phys_addr = 32; | ||
761 | } | 1614 | } |
762 | } else { | 1615 | } else { |
763 | switch (boot_cpu_data.x86_vendor) { | 1616 | switch (boot_cpu_data.x86_vendor) { |
@@ -791,8 +1644,15 @@ void __init mtrr_bp_init(void) | |||
791 | if (mtrr_if) { | 1644 | if (mtrr_if) { |
792 | set_num_var_ranges(); | 1645 | set_num_var_ranges(); |
793 | init_table(); | 1646 | init_table(); |
794 | if (use_intel()) | 1647 | if (use_intel()) { |
795 | get_mtrr_state(); | 1648 | get_mtrr_state(); |
1649 | |||
1650 | if (mtrr_cleanup(phys_addr)) { | ||
1651 | changed_by_mtrr_cleanup = 1; | ||
1652 | mtrr_if->set_all(); | ||
1653 | } | ||
1654 | |||
1655 | } | ||
796 | } | 1656 | } |
797 | } | 1657 | } |
798 | 1658 | ||
@@ -829,9 +1689,10 @@ static int __init mtrr_init_finialize(void) | |||
829 | { | 1689 | { |
830 | if (!mtrr_if) | 1690 | if (!mtrr_if) |
831 | return 0; | 1691 | return 0; |
832 | if (use_intel()) | 1692 | if (use_intel()) { |
833 | mtrr_state_warn(); | 1693 | if (!changed_by_mtrr_cleanup) |
834 | else { | 1694 | mtrr_state_warn(); |
1695 | } else { | ||
835 | /* The CPUs haven't MTRR and seem to not support SMP. They have | 1696 | /* The CPUs haven't MTRR and seem to not support SMP. They have |
836 | * specific drivers, we use a tricky method to support | 1697 | * specific drivers, we use a tricky method to support |
837 | * suspend/resume for them. | 1698 | * suspend/resume for them. |
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 2cc77eb6fea3..2dc4ec656b23 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h | |||
@@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_context *ctxt); | |||
81 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); | 81 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); |
82 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); | 82 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); |
83 | 83 | ||
84 | void fill_mtrr_var_range(unsigned int index, | ||
85 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); | ||
84 | void get_mtrr_state(void); | 86 | void get_mtrr_state(void); |
85 | 87 | ||
86 | extern void set_mtrr_ops(struct mtrr_ops * ops); | 88 | extern void set_mtrr_ops(struct mtrr_ops * ops); |
@@ -92,6 +94,7 @@ extern struct mtrr_ops * mtrr_if; | |||
92 | #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) | 94 | #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) |
93 | 95 | ||
94 | extern unsigned int num_var_ranges; | 96 | extern unsigned int num_var_ranges; |
97 | extern u64 mtrr_tom2; | ||
95 | 98 | ||
96 | void mtrr_state_warn(void); | 99 | void mtrr_state_warn(void); |
97 | const char *mtrr_attrib_to_str(int x); | 100 | const char *mtrr_attrib_to_str(int x); |
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820.c index 124480c0008d..7b613d2efb04 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820.c | |||
@@ -17,171 +17,30 @@ | |||
17 | #include <linux/kexec.h> | 17 | #include <linux/kexec.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/mm.h> | 19 | #include <linux/mm.h> |
20 | #include <linux/suspend.h> | ||
21 | #include <linux/pfn.h> | 20 | #include <linux/pfn.h> |
21 | #include <linux/suspend.h> | ||
22 | 22 | ||
23 | #include <asm/pgtable.h> | 23 | #include <asm/pgtable.h> |
24 | #include <asm/page.h> | 24 | #include <asm/page.h> |
25 | #include <asm/e820.h> | 25 | #include <asm/e820.h> |
26 | #include <asm/proto.h> | 26 | #include <asm/proto.h> |
27 | #include <asm/setup.h> | 27 | #include <asm/setup.h> |
28 | #include <asm/sections.h> | ||
29 | #include <asm/kdebug.h> | ||
30 | #include <asm/trampoline.h> | 28 | #include <asm/trampoline.h> |
31 | 29 | ||
32 | struct e820map e820; | 30 | struct e820map e820; |
33 | 31 | ||
34 | /* | 32 | /* For PCI or other memory-mapped resources */ |
35 | * PFN of last memory page. | 33 | unsigned long pci_mem_start = 0xaeedbabe; |
36 | */ | 34 | #ifdef CONFIG_PCI |
37 | unsigned long end_pfn; | 35 | EXPORT_SYMBOL(pci_mem_start); |
38 | |||
39 | /* | ||
40 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | ||
41 | * The direct mapping extends to max_pfn_mapped, so that we can directly access | ||
42 | * apertures, ACPI and other tables without having to play with fixmaps. | ||
43 | */ | ||
44 | unsigned long max_pfn_mapped; | ||
45 | |||
46 | /* | ||
47 | * Last pfn which the user wants to use. | ||
48 | */ | ||
49 | static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; | ||
50 | |||
51 | /* | ||
52 | * Early reserved memory areas. | ||
53 | */ | ||
54 | #define MAX_EARLY_RES 20 | ||
55 | |||
56 | struct early_res { | ||
57 | unsigned long start, end; | ||
58 | char name[16]; | ||
59 | }; | ||
60 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { | ||
61 | { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ | ||
62 | #ifdef CONFIG_X86_TRAMPOLINE | ||
63 | { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, | ||
64 | #endif | 36 | #endif |
65 | {} | ||
66 | }; | ||
67 | |||
68 | void __init reserve_early(unsigned long start, unsigned long end, char *name) | ||
69 | { | ||
70 | int i; | ||
71 | struct early_res *r; | ||
72 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
73 | r = &early_res[i]; | ||
74 | if (end > r->start && start < r->end) | ||
75 | panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n", | ||
76 | start, end - 1, name?name:"", r->start, r->end - 1, r->name); | ||
77 | } | ||
78 | if (i >= MAX_EARLY_RES) | ||
79 | panic("Too many early reservations"); | ||
80 | r = &early_res[i]; | ||
81 | r->start = start; | ||
82 | r->end = end; | ||
83 | if (name) | ||
84 | strncpy(r->name, name, sizeof(r->name) - 1); | ||
85 | } | ||
86 | |||
87 | void __init free_early(unsigned long start, unsigned long end) | ||
88 | { | ||
89 | struct early_res *r; | ||
90 | int i, j; | ||
91 | |||
92 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
93 | r = &early_res[i]; | ||
94 | if (start == r->start && end == r->end) | ||
95 | break; | ||
96 | } | ||
97 | if (i >= MAX_EARLY_RES || !early_res[i].end) | ||
98 | panic("free_early on not reserved area: %lx-%lx!", start, end); | ||
99 | 37 | ||
100 | for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) | ||
101 | ; | ||
102 | |||
103 | memmove(&early_res[i], &early_res[i + 1], | ||
104 | (j - 1 - i) * sizeof(struct early_res)); | ||
105 | |||
106 | early_res[j - 1].end = 0; | ||
107 | } | ||
108 | |||
109 | void __init early_res_to_bootmem(unsigned long start, unsigned long end) | ||
110 | { | ||
111 | int i; | ||
112 | unsigned long final_start, final_end; | ||
113 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
114 | struct early_res *r = &early_res[i]; | ||
115 | final_start = max(start, r->start); | ||
116 | final_end = min(end, r->end); | ||
117 | if (final_start >= final_end) | ||
118 | continue; | ||
119 | printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i, | ||
120 | final_start, final_end - 1, r->name); | ||
121 | reserve_bootmem_generic(final_start, final_end - final_start); | ||
122 | } | ||
123 | } | ||
124 | |||
125 | /* Check for already reserved areas */ | ||
126 | static inline int __init | ||
127 | bad_addr(unsigned long *addrp, unsigned long size, unsigned long align) | ||
128 | { | ||
129 | int i; | ||
130 | unsigned long addr = *addrp, last; | ||
131 | int changed = 0; | ||
132 | again: | ||
133 | last = addr + size; | ||
134 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
135 | struct early_res *r = &early_res[i]; | ||
136 | if (last >= r->start && addr < r->end) { | ||
137 | *addrp = addr = round_up(r->end, align); | ||
138 | changed = 1; | ||
139 | goto again; | ||
140 | } | ||
141 | } | ||
142 | return changed; | ||
143 | } | ||
144 | |||
145 | /* Check for already reserved areas */ | ||
146 | static inline int __init | ||
147 | bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align) | ||
148 | { | ||
149 | int i; | ||
150 | unsigned long addr = *addrp, last; | ||
151 | unsigned long size = *sizep; | ||
152 | int changed = 0; | ||
153 | again: | ||
154 | last = addr + size; | ||
155 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
156 | struct early_res *r = &early_res[i]; | ||
157 | if (last > r->start && addr < r->start) { | ||
158 | size = r->start - addr; | ||
159 | changed = 1; | ||
160 | goto again; | ||
161 | } | ||
162 | if (last > r->end && addr < r->end) { | ||
163 | addr = round_up(r->end, align); | ||
164 | size = last - addr; | ||
165 | changed = 1; | ||
166 | goto again; | ||
167 | } | ||
168 | if (last <= r->end && addr >= r->start) { | ||
169 | (*sizep)++; | ||
170 | return 0; | ||
171 | } | ||
172 | } | ||
173 | if (changed) { | ||
174 | *addrp = addr; | ||
175 | *sizep = size; | ||
176 | } | ||
177 | return changed; | ||
178 | } | ||
179 | /* | 38 | /* |
180 | * This function checks if any part of the range <start,end> is mapped | 39 | * This function checks if any part of the range <start,end> is mapped |
181 | * with type. | 40 | * with type. |
182 | */ | 41 | */ |
183 | int | 42 | int |
184 | e820_any_mapped(unsigned long start, unsigned long end, unsigned type) | 43 | e820_any_mapped(u64 start, u64 end, unsigned type) |
185 | { | 44 | { |
186 | int i; | 45 | int i; |
187 | 46 | ||
@@ -204,8 +63,7 @@ EXPORT_SYMBOL_GPL(e820_any_mapped); | |||
204 | * Note: this function only works correct if the e820 table is sorted and | 63 | * Note: this function only works correct if the e820 table is sorted and |
205 | * not-overlapping, which is the case | 64 | * not-overlapping, which is the case |
206 | */ | 65 | */ |
207 | int __init e820_all_mapped(unsigned long start, unsigned long end, | 66 | int __init e820_all_mapped(u64 start, u64 end, unsigned type) |
208 | unsigned type) | ||
209 | { | 67 | { |
210 | int i; | 68 | int i; |
211 | 69 | ||
@@ -234,214 +92,13 @@ int __init e820_all_mapped(unsigned long start, unsigned long end, | |||
234 | } | 92 | } |
235 | 93 | ||
236 | /* | 94 | /* |
237 | * Find a free area with specified alignment in a specific range. | ||
238 | */ | ||
239 | unsigned long __init find_e820_area(unsigned long start, unsigned long end, | ||
240 | unsigned long size, unsigned long align) | ||
241 | { | ||
242 | int i; | ||
243 | |||
244 | for (i = 0; i < e820.nr_map; i++) { | ||
245 | struct e820entry *ei = &e820.map[i]; | ||
246 | unsigned long addr, last; | ||
247 | unsigned long ei_last; | ||
248 | |||
249 | if (ei->type != E820_RAM) | ||
250 | continue; | ||
251 | addr = round_up(ei->addr, align); | ||
252 | ei_last = ei->addr + ei->size; | ||
253 | if (addr < start) | ||
254 | addr = round_up(start, align); | ||
255 | if (addr >= ei_last) | ||
256 | continue; | ||
257 | while (bad_addr(&addr, size, align) && addr+size <= ei_last) | ||
258 | ; | ||
259 | last = addr + size; | ||
260 | if (last > ei_last) | ||
261 | continue; | ||
262 | if (last > end) | ||
263 | continue; | ||
264 | return addr; | ||
265 | } | ||
266 | return -1UL; | ||
267 | } | ||
268 | |||
269 | /* | ||
270 | * Find next free range after *start | ||
271 | */ | ||
272 | unsigned long __init find_e820_area_size(unsigned long start, | ||
273 | unsigned long *sizep, | ||
274 | unsigned long align) | ||
275 | { | ||
276 | int i; | ||
277 | |||
278 | for (i = 0; i < e820.nr_map; i++) { | ||
279 | struct e820entry *ei = &e820.map[i]; | ||
280 | unsigned long addr, last; | ||
281 | unsigned long ei_last; | ||
282 | |||
283 | if (ei->type != E820_RAM) | ||
284 | continue; | ||
285 | addr = round_up(ei->addr, align); | ||
286 | ei_last = ei->addr + ei->size; | ||
287 | if (addr < start) | ||
288 | addr = round_up(start, align); | ||
289 | if (addr >= ei_last) | ||
290 | continue; | ||
291 | *sizep = ei_last - addr; | ||
292 | while (bad_addr_size(&addr, sizep, align) && | ||
293 | addr + *sizep <= ei_last) | ||
294 | ; | ||
295 | last = addr + *sizep; | ||
296 | if (last > ei_last) | ||
297 | continue; | ||
298 | return addr; | ||
299 | } | ||
300 | return -1UL; | ||
301 | |||
302 | } | ||
303 | /* | ||
304 | * Find the highest page frame number we have available | ||
305 | */ | ||
306 | unsigned long __init e820_end_of_ram(void) | ||
307 | { | ||
308 | unsigned long end_pfn; | ||
309 | |||
310 | end_pfn = find_max_pfn_with_active_regions(); | ||
311 | |||
312 | if (end_pfn > max_pfn_mapped) | ||
313 | max_pfn_mapped = end_pfn; | ||
314 | if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT) | ||
315 | max_pfn_mapped = MAXMEM>>PAGE_SHIFT; | ||
316 | if (end_pfn > end_user_pfn) | ||
317 | end_pfn = end_user_pfn; | ||
318 | if (end_pfn > max_pfn_mapped) | ||
319 | end_pfn = max_pfn_mapped; | ||
320 | |||
321 | printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped); | ||
322 | return end_pfn; | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * Mark e820 reserved areas as busy for the resource manager. | ||
327 | */ | ||
328 | void __init e820_reserve_resources(void) | ||
329 | { | ||
330 | int i; | ||
331 | struct resource *res; | ||
332 | |||
333 | res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); | ||
334 | for (i = 0; i < e820.nr_map; i++) { | ||
335 | switch (e820.map[i].type) { | ||
336 | case E820_RAM: res->name = "System RAM"; break; | ||
337 | case E820_ACPI: res->name = "ACPI Tables"; break; | ||
338 | case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; | ||
339 | default: res->name = "reserved"; | ||
340 | } | ||
341 | res->start = e820.map[i].addr; | ||
342 | res->end = res->start + e820.map[i].size - 1; | ||
343 | res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | ||
344 | insert_resource(&iomem_resource, res); | ||
345 | res++; | ||
346 | } | ||
347 | } | ||
348 | |||
349 | /* | ||
350 | * Find the ranges of physical addresses that do not correspond to | ||
351 | * e820 RAM areas and mark the corresponding pages as nosave for software | ||
352 | * suspend and suspend to RAM. | ||
353 | * | ||
354 | * This function requires the e820 map to be sorted and without any | ||
355 | * overlapping entries and assumes the first e820 area to be RAM. | ||
356 | */ | ||
357 | void __init e820_mark_nosave_regions(void) | ||
358 | { | ||
359 | int i; | ||
360 | unsigned long paddr; | ||
361 | |||
362 | paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE); | ||
363 | for (i = 1; i < e820.nr_map; i++) { | ||
364 | struct e820entry *ei = &e820.map[i]; | ||
365 | |||
366 | if (paddr < ei->addr) | ||
367 | register_nosave_region(PFN_DOWN(paddr), | ||
368 | PFN_UP(ei->addr)); | ||
369 | |||
370 | paddr = round_down(ei->addr + ei->size, PAGE_SIZE); | ||
371 | if (ei->type != E820_RAM) | ||
372 | register_nosave_region(PFN_UP(ei->addr), | ||
373 | PFN_DOWN(paddr)); | ||
374 | |||
375 | if (paddr >= (end_pfn << PAGE_SHIFT)) | ||
376 | break; | ||
377 | } | ||
378 | } | ||
379 | |||
380 | /* | ||
381 | * Finds an active region in the address range from start_pfn to end_pfn and | ||
382 | * returns its range in ei_startpfn and ei_endpfn for the e820 entry. | ||
383 | */ | ||
384 | static int __init e820_find_active_region(const struct e820entry *ei, | ||
385 | unsigned long start_pfn, | ||
386 | unsigned long end_pfn, | ||
387 | unsigned long *ei_startpfn, | ||
388 | unsigned long *ei_endpfn) | ||
389 | { | ||
390 | *ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT; | ||
391 | *ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT; | ||
392 | |||
393 | /* Skip map entries smaller than a page */ | ||
394 | if (*ei_startpfn >= *ei_endpfn) | ||
395 | return 0; | ||
396 | |||
397 | /* Check if max_pfn_mapped should be updated */ | ||
398 | if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped) | ||
399 | max_pfn_mapped = *ei_endpfn; | ||
400 | |||
401 | /* Skip if map is outside the node */ | ||
402 | if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || | ||
403 | *ei_startpfn >= end_pfn) | ||
404 | return 0; | ||
405 | |||
406 | /* Check for overlaps */ | ||
407 | if (*ei_startpfn < start_pfn) | ||
408 | *ei_startpfn = start_pfn; | ||
409 | if (*ei_endpfn > end_pfn) | ||
410 | *ei_endpfn = end_pfn; | ||
411 | |||
412 | /* Obey end_user_pfn to save on memmap */ | ||
413 | if (*ei_startpfn >= end_user_pfn) | ||
414 | return 0; | ||
415 | if (*ei_endpfn > end_user_pfn) | ||
416 | *ei_endpfn = end_user_pfn; | ||
417 | |||
418 | return 1; | ||
419 | } | ||
420 | |||
421 | /* Walk the e820 map and register active regions within a node */ | ||
422 | void __init | ||
423 | e820_register_active_regions(int nid, unsigned long start_pfn, | ||
424 | unsigned long end_pfn) | ||
425 | { | ||
426 | unsigned long ei_startpfn; | ||
427 | unsigned long ei_endpfn; | ||
428 | int i; | ||
429 | |||
430 | for (i = 0; i < e820.nr_map; i++) | ||
431 | if (e820_find_active_region(&e820.map[i], | ||
432 | start_pfn, end_pfn, | ||
433 | &ei_startpfn, &ei_endpfn)) | ||
434 | add_active_range(nid, ei_startpfn, ei_endpfn); | ||
435 | } | ||
436 | |||
437 | /* | ||
438 | * Add a memory region to the kernel e820 map. | 95 | * Add a memory region to the kernel e820 map. |
439 | */ | 96 | */ |
440 | void __init add_memory_region(unsigned long start, unsigned long size, int type) | 97 | void __init e820_add_region(u64 start, u64 size, int type) |
441 | { | 98 | { |
442 | int x = e820.nr_map; | 99 | int x = e820.nr_map; |
443 | 100 | ||
444 | if (x == E820MAX) { | 101 | if (x == ARRAY_SIZE(e820.map)) { |
445 | printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); | 102 | printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); |
446 | return; | 103 | return; |
447 | } | 104 | } |
@@ -452,28 +109,7 @@ void __init add_memory_region(unsigned long start, unsigned long size, int type) | |||
452 | e820.nr_map++; | 109 | e820.nr_map++; |
453 | } | 110 | } |
454 | 111 | ||
455 | /* | 112 | void __init e820_print_map(char *who) |
456 | * Find the hole size (in bytes) in the memory range. | ||
457 | * @start: starting address of the memory range to scan | ||
458 | * @end: ending address of the memory range to scan | ||
459 | */ | ||
460 | unsigned long __init e820_hole_size(unsigned long start, unsigned long end) | ||
461 | { | ||
462 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
463 | unsigned long end_pfn = end >> PAGE_SHIFT; | ||
464 | unsigned long ei_startpfn, ei_endpfn, ram = 0; | ||
465 | int i; | ||
466 | |||
467 | for (i = 0; i < e820.nr_map; i++) { | ||
468 | if (e820_find_active_region(&e820.map[i], | ||
469 | start_pfn, end_pfn, | ||
470 | &ei_startpfn, &ei_endpfn)) | ||
471 | ram += ei_endpfn - ei_startpfn; | ||
472 | } | ||
473 | return end - start - (ram << PAGE_SHIFT); | ||
474 | } | ||
475 | |||
476 | static void __init e820_print_map(char *who) | ||
477 | { | 113 | { |
478 | int i; | 114 | int i; |
479 | 115 | ||
@@ -506,19 +142,75 @@ static void __init e820_print_map(char *who) | |||
506 | * Sanitize the BIOS e820 map. | 142 | * Sanitize the BIOS e820 map. |
507 | * | 143 | * |
508 | * Some e820 responses include overlapping entries. The following | 144 | * Some e820 responses include overlapping entries. The following |
509 | * replaces the original e820 map with a new one, removing overlaps. | 145 | * replaces the original e820 map with a new one, removing overlaps, |
146 | * and resolving conflicting memory types in favor of highest | ||
147 | * numbered type. | ||
510 | * | 148 | * |
149 | * The input parameter biosmap points to an array of 'struct | ||
150 | * e820entry' which on entry has elements in the range [0, *pnr_map) | ||
151 | * valid, and which has space for up to max_nr_map entries. | ||
152 | * On return, the resulting sanitized e820 map entries will be in | ||
153 | * overwritten in the same location, starting at biosmap. | ||
154 | * | ||
155 | * The integer pointed to by pnr_map must be valid on entry (the | ||
156 | * current number of valid entries located at biosmap) and will | ||
157 | * be updated on return, with the new number of valid entries | ||
158 | * (something no more than max_nr_map.) | ||
159 | * | ||
160 | * The return value from sanitize_e820_map() is zero if it | ||
161 | * successfully 'sanitized' the map entries passed in, and is -1 | ||
162 | * if it did nothing, which can happen if either of (1) it was | ||
163 | * only passed one map entry, or (2) any of the input map entries | ||
164 | * were invalid (start + size < start, meaning that the size was | ||
165 | * so big the described memory range wrapped around through zero.) | ||
166 | * | ||
167 | * Visually we're performing the following | ||
168 | * (1,2,3,4 = memory types)... | ||
169 | * | ||
170 | * Sample memory map (w/overlaps): | ||
171 | * ____22__________________ | ||
172 | * ______________________4_ | ||
173 | * ____1111________________ | ||
174 | * _44_____________________ | ||
175 | * 11111111________________ | ||
176 | * ____________________33__ | ||
177 | * ___________44___________ | ||
178 | * __________33333_________ | ||
179 | * ______________22________ | ||
180 | * ___________________2222_ | ||
181 | * _________111111111______ | ||
182 | * _____________________11_ | ||
183 | * _________________4______ | ||
184 | * | ||
185 | * Sanitized equivalent (no overlap): | ||
186 | * 1_______________________ | ||
187 | * _44_____________________ | ||
188 | * ___1____________________ | ||
189 | * ____22__________________ | ||
190 | * ______11________________ | ||
191 | * _________1______________ | ||
192 | * __________3_____________ | ||
193 | * ___________44___________ | ||
194 | * _____________33_________ | ||
195 | * _______________2________ | ||
196 | * ________________1_______ | ||
197 | * _________________4______ | ||
198 | * ___________________2____ | ||
199 | * ____________________33__ | ||
200 | * ______________________4_ | ||
511 | */ | 201 | */ |
512 | static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) | 202 | |
203 | int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, | ||
204 | int *pnr_map) | ||
513 | { | 205 | { |
514 | struct change_member { | 206 | struct change_member { |
515 | struct e820entry *pbios; /* pointer to original bios entry */ | 207 | struct e820entry *pbios; /* pointer to original bios entry */ |
516 | unsigned long long addr; /* address for this change point */ | 208 | unsigned long long addr; /* address for this change point */ |
517 | }; | 209 | }; |
518 | static struct change_member change_point_list[2*E820MAX] __initdata; | 210 | static struct change_member change_point_list[2*E820_X_MAX] __initdata; |
519 | static struct change_member *change_point[2*E820MAX] __initdata; | 211 | static struct change_member *change_point[2*E820_X_MAX] __initdata; |
520 | static struct e820entry *overlap_list[E820MAX] __initdata; | 212 | static struct e820entry *overlap_list[E820_X_MAX] __initdata; |
521 | static struct e820entry new_bios[E820MAX] __initdata; | 213 | static struct e820entry new_bios[E820_X_MAX] __initdata; |
522 | struct change_member *change_tmp; | 214 | struct change_member *change_tmp; |
523 | unsigned long current_type, last_type; | 215 | unsigned long current_type, last_type; |
524 | unsigned long long last_addr; | 216 | unsigned long long last_addr; |
@@ -528,48 +220,12 @@ static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) | |||
528 | int old_nr, new_nr, chg_nr; | 220 | int old_nr, new_nr, chg_nr; |
529 | int i; | 221 | int i; |
530 | 222 | ||
531 | /* | ||
532 | Visually we're performing the following | ||
533 | (1,2,3,4 = memory types)... | ||
534 | |||
535 | Sample memory map (w/overlaps): | ||
536 | ____22__________________ | ||
537 | ______________________4_ | ||
538 | ____1111________________ | ||
539 | _44_____________________ | ||
540 | 11111111________________ | ||
541 | ____________________33__ | ||
542 | ___________44___________ | ||
543 | __________33333_________ | ||
544 | ______________22________ | ||
545 | ___________________2222_ | ||
546 | _________111111111______ | ||
547 | _____________________11_ | ||
548 | _________________4______ | ||
549 | |||
550 | Sanitized equivalent (no overlap): | ||
551 | 1_______________________ | ||
552 | _44_____________________ | ||
553 | ___1____________________ | ||
554 | ____22__________________ | ||
555 | ______11________________ | ||
556 | _________1______________ | ||
557 | __________3_____________ | ||
558 | ___________44___________ | ||
559 | _____________33_________ | ||
560 | _______________2________ | ||
561 | ________________1_______ | ||
562 | _________________4______ | ||
563 | ___________________2____ | ||
564 | ____________________33__ | ||
565 | ______________________4_ | ||
566 | */ | ||
567 | |||
568 | /* if there's only one memory region, don't bother */ | 223 | /* if there's only one memory region, don't bother */ |
569 | if (*pnr_map < 2) | 224 | if (*pnr_map < 2) |
570 | return -1; | 225 | return -1; |
571 | 226 | ||
572 | old_nr = *pnr_map; | 227 | old_nr = *pnr_map; |
228 | BUG_ON(old_nr > max_nr_map); | ||
573 | 229 | ||
574 | /* bail out if we find any unreasonable addresses in bios map */ | 230 | /* bail out if we find any unreasonable addresses in bios map */ |
575 | for (i = 0; i < old_nr; i++) | 231 | for (i = 0; i < old_nr; i++) |
@@ -681,7 +337,7 @@ static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) | |||
681 | * no more space left for new | 337 | * no more space left for new |
682 | * bios entries ? | 338 | * bios entries ? |
683 | */ | 339 | */ |
684 | if (++new_bios_entry >= E820MAX) | 340 | if (++new_bios_entry >= max_nr_map) |
685 | break; | 341 | break; |
686 | } | 342 | } |
687 | if (current_type != 0) { | 343 | if (current_type != 0) { |
@@ -703,22 +359,9 @@ static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) | |||
703 | return 0; | 359 | return 0; |
704 | } | 360 | } |
705 | 361 | ||
706 | /* | 362 | static int __init __copy_e820_map(struct e820entry *biosmap, int nr_map) |
707 | * Copy the BIOS e820 map into a safe place. | ||
708 | * | ||
709 | * Sanity-check it while we're at it.. | ||
710 | * | ||
711 | * If we're lucky and live on a modern system, the setup code | ||
712 | * will have given us a memory map that we can use to properly | ||
713 | * set up memory. If we aren't, we'll fake a memory map. | ||
714 | */ | ||
715 | static int __init copy_e820_map(struct e820entry *biosmap, int nr_map) | ||
716 | { | 363 | { |
717 | /* Only one memory region (or negative)? Ignore it */ | 364 | while (nr_map) { |
718 | if (nr_map < 2) | ||
719 | return -1; | ||
720 | |||
721 | do { | ||
722 | u64 start = biosmap->addr; | 365 | u64 start = biosmap->addr; |
723 | u64 size = biosmap->size; | 366 | u64 size = biosmap->size; |
724 | u64 end = start + size; | 367 | u64 end = start + size; |
@@ -728,111 +371,37 @@ static int __init copy_e820_map(struct e820entry *biosmap, int nr_map) | |||
728 | if (start > end) | 371 | if (start > end) |
729 | return -1; | 372 | return -1; |
730 | 373 | ||
731 | add_memory_region(start, size, type); | 374 | e820_add_region(start, size, type); |
732 | } while (biosmap++, --nr_map); | ||
733 | return 0; | ||
734 | } | ||
735 | |||
736 | static void early_panic(char *msg) | ||
737 | { | ||
738 | early_printk(msg); | ||
739 | panic(msg); | ||
740 | } | ||
741 | |||
742 | /* We're not void only for x86 32-bit compat */ | ||
743 | char * __init machine_specific_memory_setup(void) | ||
744 | { | ||
745 | char *who = "BIOS-e820"; | ||
746 | /* | ||
747 | * Try to copy the BIOS-supplied E820-map. | ||
748 | * | ||
749 | * Otherwise fake a memory map; one section from 0k->640k, | ||
750 | * the next section from 1mb->appropriate_mem_k | ||
751 | */ | ||
752 | sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); | ||
753 | if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) | ||
754 | early_panic("Cannot find a valid memory map"); | ||
755 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | ||
756 | e820_print_map(who); | ||
757 | |||
758 | /* In case someone cares... */ | ||
759 | return who; | ||
760 | } | ||
761 | |||
762 | static int __init parse_memopt(char *p) | ||
763 | { | ||
764 | if (!p) | ||
765 | return -EINVAL; | ||
766 | end_user_pfn = memparse(p, &p); | ||
767 | end_user_pfn >>= PAGE_SHIFT; | ||
768 | return 0; | ||
769 | } | ||
770 | early_param("mem", parse_memopt); | ||
771 | |||
772 | static int userdef __initdata; | ||
773 | |||
774 | static int __init parse_memmap_opt(char *p) | ||
775 | { | ||
776 | char *oldp; | ||
777 | unsigned long long start_at, mem_size; | ||
778 | |||
779 | if (!strcmp(p, "exactmap")) { | ||
780 | #ifdef CONFIG_CRASH_DUMP | ||
781 | /* | ||
782 | * If we are doing a crash dump, we still need to know | ||
783 | * the real mem size before original memory map is | ||
784 | * reset. | ||
785 | */ | ||
786 | e820_register_active_regions(0, 0, -1UL); | ||
787 | saved_max_pfn = e820_end_of_ram(); | ||
788 | remove_all_active_ranges(); | ||
789 | #endif | ||
790 | max_pfn_mapped = 0; | ||
791 | e820.nr_map = 0; | ||
792 | userdef = 1; | ||
793 | return 0; | ||
794 | } | ||
795 | |||
796 | oldp = p; | ||
797 | mem_size = memparse(p, &p); | ||
798 | if (p == oldp) | ||
799 | return -EINVAL; | ||
800 | 375 | ||
801 | userdef = 1; | 376 | biosmap++; |
802 | if (*p == '@') { | 377 | nr_map--; |
803 | start_at = memparse(p+1, &p); | ||
804 | add_memory_region(start_at, mem_size, E820_RAM); | ||
805 | } else if (*p == '#') { | ||
806 | start_at = memparse(p+1, &p); | ||
807 | add_memory_region(start_at, mem_size, E820_ACPI); | ||
808 | } else if (*p == '$') { | ||
809 | start_at = memparse(p+1, &p); | ||
810 | add_memory_region(start_at, mem_size, E820_RESERVED); | ||
811 | } else { | ||
812 | end_user_pfn = (mem_size >> PAGE_SHIFT); | ||
813 | } | 378 | } |
814 | return *p == '\0' ? 0 : -EINVAL; | 379 | return 0; |
815 | } | 380 | } |
816 | early_param("memmap", parse_memmap_opt); | ||
817 | 381 | ||
818 | void __init finish_e820_parsing(void) | 382 | /* |
383 | * Copy the BIOS e820 map into a safe place. | ||
384 | * | ||
385 | * Sanity-check it while we're at it.. | ||
386 | * | ||
387 | * If we're lucky and live on a modern system, the setup code | ||
388 | * will have given us a memory map that we can use to properly | ||
389 | * set up memory. If we aren't, we'll fake a memory map. | ||
390 | */ | ||
391 | int __init copy_e820_map(struct e820entry *biosmap, int nr_map) | ||
819 | { | 392 | { |
820 | if (userdef) { | 393 | /* Only one memory region (or negative)? Ignore it */ |
821 | char nr = e820.nr_map; | 394 | if (nr_map < 2) |
822 | 395 | return -1; | |
823 | if (sanitize_e820_map(e820.map, &nr) < 0) | ||
824 | early_panic("Invalid user supplied memory map"); | ||
825 | e820.nr_map = nr; | ||
826 | 396 | ||
827 | printk(KERN_INFO "user-defined physical RAM map:\n"); | 397 | return __copy_e820_map(biosmap, nr_map); |
828 | e820_print_map("user"); | ||
829 | } | ||
830 | } | 398 | } |
831 | 399 | ||
832 | void __init update_memory_range(u64 start, u64 size, unsigned old_type, | 400 | u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, |
833 | unsigned new_type) | 401 | unsigned new_type) |
834 | { | 402 | { |
835 | int i; | 403 | int i; |
404 | u64 real_updated_size = 0; | ||
836 | 405 | ||
837 | BUG_ON(old_type == new_type); | 406 | BUG_ON(old_type == new_type); |
838 | 407 | ||
@@ -842,8 +411,10 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type, | |||
842 | if (ei->type != old_type) | 411 | if (ei->type != old_type) |
843 | continue; | 412 | continue; |
844 | /* totally covered? */ | 413 | /* totally covered? */ |
845 | if (ei->addr >= start && ei->size <= size) { | 414 | if (ei->addr >= start && |
415 | (ei->addr + ei->size) <= (start + size)) { | ||
846 | ei->type = new_type; | 416 | ei->type = new_type; |
417 | real_updated_size += ei->size; | ||
847 | continue; | 418 | continue; |
848 | } | 419 | } |
849 | /* partially covered */ | 420 | /* partially covered */ |
@@ -851,26 +422,25 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type, | |||
851 | final_end = min(start + size, ei->addr + ei->size); | 422 | final_end = min(start + size, ei->addr + ei->size); |
852 | if (final_start >= final_end) | 423 | if (final_start >= final_end) |
853 | continue; | 424 | continue; |
854 | add_memory_region(final_start, final_end - final_start, | 425 | e820_add_region(final_start, final_end - final_start, |
855 | new_type); | 426 | new_type); |
427 | real_updated_size += final_end - final_start; | ||
856 | } | 428 | } |
429 | return real_updated_size; | ||
857 | } | 430 | } |
858 | 431 | ||
859 | void __init update_e820(void) | 432 | void __init update_e820(void) |
860 | { | 433 | { |
861 | u8 nr_map; | 434 | int nr_map; |
862 | 435 | ||
863 | nr_map = e820.nr_map; | 436 | nr_map = e820.nr_map; |
864 | if (sanitize_e820_map(e820.map, &nr_map)) | 437 | if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) |
865 | return; | 438 | return; |
866 | e820.nr_map = nr_map; | 439 | e820.nr_map = nr_map; |
867 | printk(KERN_INFO "modified physical RAM map:\n"); | 440 | printk(KERN_INFO "modified physical RAM map:\n"); |
868 | e820_print_map("modified"); | 441 | e820_print_map("modified"); |
869 | } | 442 | } |
870 | 443 | ||
871 | unsigned long pci_mem_start = 0xaeedbabe; | ||
872 | EXPORT_SYMBOL(pci_mem_start); | ||
873 | |||
874 | /* | 444 | /* |
875 | * Search for the biggest gap in the low 32 bits of the e820 | 445 | * Search for the biggest gap in the low 32 bits of the e820 |
876 | * memory space. We pass this space to PCI to assign MMIO resources | 446 | * memory space. We pass this space to PCI to assign MMIO resources |
@@ -880,7 +450,7 @@ EXPORT_SYMBOL(pci_mem_start); | |||
880 | __init void e820_setup_gap(void) | 450 | __init void e820_setup_gap(void) |
881 | { | 451 | { |
882 | unsigned long gapstart, gapsize, round; | 452 | unsigned long gapstart, gapsize, round; |
883 | unsigned long last; | 453 | unsigned long long last; |
884 | int i; | 454 | int i; |
885 | int found = 0; | 455 | int found = 0; |
886 | 456 | ||
@@ -909,6 +479,7 @@ __init void e820_setup_gap(void) | |||
909 | last = start; | 479 | last = start; |
910 | } | 480 | } |
911 | 481 | ||
482 | #ifdef CONFIG_X86_64 | ||
912 | if (!found) { | 483 | if (!found) { |
913 | gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; | 484 | gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; |
914 | printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " | 485 | printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " |
@@ -916,6 +487,7 @@ __init void e820_setup_gap(void) | |||
916 | KERN_ERR "PCI: Unassigned devices with 32bit resource " | 487 | KERN_ERR "PCI: Unassigned devices with 32bit resource " |
917 | "registers may break!\n"); | 488 | "registers may break!\n"); |
918 | } | 489 | } |
490 | #endif | ||
919 | 491 | ||
920 | /* | 492 | /* |
921 | * See how much we want to round up: start off with | 493 | * See how much we want to round up: start off with |
@@ -932,6 +504,586 @@ __init void e820_setup_gap(void) | |||
932 | pci_mem_start, gapstart, gapsize); | 504 | pci_mem_start, gapstart, gapsize); |
933 | } | 505 | } |
934 | 506 | ||
507 | /** | ||
508 | * Because of the size limitation of struct boot_params, only first | ||
509 | * 128 E820 memory entries are passed to kernel via | ||
510 | * boot_params.e820_map, others are passed via SETUP_E820_EXT node of | ||
511 | * linked list of struct setup_data, which is parsed here. | ||
512 | */ | ||
513 | void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data) | ||
514 | { | ||
515 | u32 map_len; | ||
516 | int entries; | ||
517 | struct e820entry *extmap; | ||
518 | |||
519 | entries = sdata->len / sizeof(struct e820entry); | ||
520 | map_len = sdata->len + sizeof(struct setup_data); | ||
521 | if (map_len > PAGE_SIZE) | ||
522 | sdata = early_ioremap(pa_data, map_len); | ||
523 | extmap = (struct e820entry *)(sdata->data); | ||
524 | __copy_e820_map(extmap, entries); | ||
525 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | ||
526 | if (map_len > PAGE_SIZE) | ||
527 | early_iounmap(sdata, map_len); | ||
528 | printk(KERN_INFO "extended physical RAM map:\n"); | ||
529 | e820_print_map("extended"); | ||
530 | } | ||
531 | |||
532 | #if defined(CONFIG_X86_64) || \ | ||
533 | (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) | ||
534 | /** | ||
535 | * Find the ranges of physical addresses that do not correspond to | ||
536 | * e820 RAM areas and mark the corresponding pages as nosave for | ||
537 | * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). | ||
538 | * | ||
539 | * This function requires the e820 map to be sorted and without any | ||
540 | * overlapping entries and assumes the first e820 area to be RAM. | ||
541 | */ | ||
542 | void __init e820_mark_nosave_regions(unsigned long limit_pfn) | ||
543 | { | ||
544 | int i; | ||
545 | unsigned long pfn; | ||
546 | |||
547 | pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); | ||
548 | for (i = 1; i < e820.nr_map; i++) { | ||
549 | struct e820entry *ei = &e820.map[i]; | ||
550 | |||
551 | if (pfn < PFN_UP(ei->addr)) | ||
552 | register_nosave_region(pfn, PFN_UP(ei->addr)); | ||
553 | |||
554 | pfn = PFN_DOWN(ei->addr + ei->size); | ||
555 | if (ei->type != E820_RAM) | ||
556 | register_nosave_region(PFN_UP(ei->addr), pfn); | ||
557 | |||
558 | if (pfn >= limit_pfn) | ||
559 | break; | ||
560 | } | ||
561 | } | ||
562 | #endif | ||
563 | |||
564 | /* | ||
565 | * Early reserved memory areas. | ||
566 | */ | ||
567 | #define MAX_EARLY_RES 20 | ||
568 | |||
569 | struct early_res { | ||
570 | u64 start, end; | ||
571 | char name[16]; | ||
572 | }; | ||
573 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { | ||
574 | { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ | ||
575 | #if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE) | ||
576 | { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, | ||
577 | #endif | ||
578 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) | ||
579 | /* | ||
580 | * But first pinch a few for the stack/trampoline stuff | ||
581 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
582 | * trampoline before removing it. (see the GDT stuff) | ||
583 | */ | ||
584 | { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" }, | ||
585 | /* | ||
586 | * Has to be in very low memory so we can execute | ||
587 | * real-mode AP code. | ||
588 | */ | ||
589 | { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" }, | ||
590 | #endif | ||
591 | {} | ||
592 | }; | ||
593 | |||
594 | static int __init find_overlapped_early(u64 start, u64 end) | ||
595 | { | ||
596 | int i; | ||
597 | struct early_res *r; | ||
598 | |||
599 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
600 | r = &early_res[i]; | ||
601 | if (end > r->start && start < r->end) | ||
602 | break; | ||
603 | } | ||
604 | |||
605 | return i; | ||
606 | } | ||
607 | |||
608 | void __init reserve_early(u64 start, u64 end, char *name) | ||
609 | { | ||
610 | int i; | ||
611 | struct early_res *r; | ||
612 | |||
613 | i = find_overlapped_early(start, end); | ||
614 | if (i >= MAX_EARLY_RES) | ||
615 | panic("Too many early reservations"); | ||
616 | r = &early_res[i]; | ||
617 | if (r->end) | ||
618 | panic("Overlapping early reservations " | ||
619 | "%llx-%llx %s to %llx-%llx %s\n", | ||
620 | start, end - 1, name?name:"", r->start, | ||
621 | r->end - 1, r->name); | ||
622 | r->start = start; | ||
623 | r->end = end; | ||
624 | if (name) | ||
625 | strncpy(r->name, name, sizeof(r->name) - 1); | ||
626 | } | ||
627 | |||
628 | void __init free_early(u64 start, u64 end) | ||
629 | { | ||
630 | struct early_res *r; | ||
631 | int i, j; | ||
632 | |||
633 | i = find_overlapped_early(start, end); | ||
634 | r = &early_res[i]; | ||
635 | if (i >= MAX_EARLY_RES || r->end != end || r->start != start) | ||
636 | panic("free_early on not reserved area: %llx-%llx!", | ||
637 | start, end - 1); | ||
638 | |||
639 | for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) | ||
640 | ; | ||
641 | |||
642 | memmove(&early_res[i], &early_res[i + 1], | ||
643 | (j - 1 - i) * sizeof(struct early_res)); | ||
644 | |||
645 | early_res[j - 1].end = 0; | ||
646 | } | ||
647 | |||
648 | void __init early_res_to_bootmem(u64 start, u64 end) | ||
649 | { | ||
650 | int i; | ||
651 | u64 final_start, final_end; | ||
652 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
653 | struct early_res *r = &early_res[i]; | ||
654 | final_start = max(start, r->start); | ||
655 | final_end = min(end, r->end); | ||
656 | if (final_start >= final_end) | ||
657 | continue; | ||
658 | printk(KERN_INFO " early res: %d [%llx-%llx] %s\n", i, | ||
659 | final_start, final_end - 1, r->name); | ||
660 | reserve_bootmem_generic(final_start, final_end - final_start, | ||
661 | BOOTMEM_DEFAULT); | ||
662 | } | ||
663 | } | ||
664 | |||
665 | /* Check for already reserved areas */ | ||
666 | static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) | ||
667 | { | ||
668 | int i; | ||
669 | u64 addr = *addrp; | ||
670 | int changed = 0; | ||
671 | struct early_res *r; | ||
672 | again: | ||
673 | i = find_overlapped_early(addr, addr + size); | ||
674 | r = &early_res[i]; | ||
675 | if (i < MAX_EARLY_RES && r->end) { | ||
676 | *addrp = addr = round_up(r->end, align); | ||
677 | changed = 1; | ||
678 | goto again; | ||
679 | } | ||
680 | return changed; | ||
681 | } | ||
682 | |||
683 | /* Check for already reserved areas */ | ||
684 | static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) | ||
685 | { | ||
686 | int i; | ||
687 | u64 addr = *addrp, last; | ||
688 | u64 size = *sizep; | ||
689 | int changed = 0; | ||
690 | again: | ||
691 | last = addr + size; | ||
692 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
693 | struct early_res *r = &early_res[i]; | ||
694 | if (last > r->start && addr < r->start) { | ||
695 | size = r->start - addr; | ||
696 | changed = 1; | ||
697 | goto again; | ||
698 | } | ||
699 | if (last > r->end && addr < r->end) { | ||
700 | addr = round_up(r->end, align); | ||
701 | size = last - addr; | ||
702 | changed = 1; | ||
703 | goto again; | ||
704 | } | ||
705 | if (last <= r->end && addr >= r->start) { | ||
706 | (*sizep)++; | ||
707 | return 0; | ||
708 | } | ||
709 | } | ||
710 | if (changed) { | ||
711 | *addrp = addr; | ||
712 | *sizep = size; | ||
713 | } | ||
714 | return changed; | ||
715 | } | ||
716 | |||
717 | /* | ||
718 | * Find a free area with specified alignment in a specific range. | ||
719 | */ | ||
720 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) | ||
721 | { | ||
722 | int i; | ||
723 | |||
724 | for (i = 0; i < e820.nr_map; i++) { | ||
725 | struct e820entry *ei = &e820.map[i]; | ||
726 | u64 addr, last; | ||
727 | u64 ei_last; | ||
728 | |||
729 | if (ei->type != E820_RAM) | ||
730 | continue; | ||
731 | addr = round_up(ei->addr, align); | ||
732 | ei_last = ei->addr + ei->size; | ||
733 | if (addr < start) | ||
734 | addr = round_up(start, align); | ||
735 | if (addr >= ei_last) | ||
736 | continue; | ||
737 | while (bad_addr(&addr, size, align) && addr+size <= ei_last) | ||
738 | ; | ||
739 | last = addr + size; | ||
740 | if (last > ei_last) | ||
741 | continue; | ||
742 | if (last > end) | ||
743 | continue; | ||
744 | return addr; | ||
745 | } | ||
746 | return -1ULL; | ||
747 | } | ||
748 | |||
749 | /* | ||
750 | * Find next free range after *start | ||
751 | */ | ||
752 | u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) | ||
753 | { | ||
754 | int i; | ||
755 | |||
756 | for (i = 0; i < e820.nr_map; i++) { | ||
757 | struct e820entry *ei = &e820.map[i]; | ||
758 | u64 addr, last; | ||
759 | u64 ei_last; | ||
760 | |||
761 | if (ei->type != E820_RAM) | ||
762 | continue; | ||
763 | addr = round_up(ei->addr, align); | ||
764 | ei_last = ei->addr + ei->size; | ||
765 | if (addr < start) | ||
766 | addr = round_up(start, align); | ||
767 | if (addr >= ei_last) | ||
768 | continue; | ||
769 | *sizep = ei_last - addr; | ||
770 | while (bad_addr_size(&addr, sizep, align) && | ||
771 | addr + *sizep <= ei_last) | ||
772 | ; | ||
773 | last = addr + *sizep; | ||
774 | if (last > ei_last) | ||
775 | continue; | ||
776 | return addr; | ||
777 | } | ||
778 | return -1UL; | ||
779 | |||
780 | } | ||
781 | |||
782 | /* | ||
783 | * pre allocated 4k and reserved it in e820 | ||
784 | */ | ||
785 | u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) | ||
786 | { | ||
787 | u64 size = 0; | ||
788 | u64 addr; | ||
789 | u64 start; | ||
790 | |||
791 | start = startt; | ||
792 | while (size < sizet) | ||
793 | start = find_e820_area_size(start, &size, align); | ||
794 | |||
795 | if (size < sizet) | ||
796 | return 0; | ||
797 | |||
798 | addr = round_down(start + size - sizet, align); | ||
799 | e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); | ||
800 | printk(KERN_INFO "update e820 for early_reserve_e820\n"); | ||
801 | update_e820(); | ||
802 | |||
803 | return addr; | ||
804 | } | ||
805 | |||
806 | #ifdef CONFIG_X86_32 | ||
807 | # ifdef CONFIG_X86_PAE | ||
808 | # define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT)) | ||
809 | # else | ||
810 | # define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT)) | ||
811 | # endif | ||
812 | #else /* CONFIG_X86_32 */ | ||
813 | # define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT | ||
814 | #endif | ||
815 | |||
816 | /* | ||
817 | * Last pfn which the user wants to use. | ||
818 | */ | ||
819 | unsigned long __initdata end_user_pfn = MAX_ARCH_PFN; | ||
820 | |||
821 | /* | ||
822 | * Find the highest page frame number we have available | ||
823 | */ | ||
824 | unsigned long __init e820_end_of_ram(void) | ||
825 | { | ||
826 | unsigned long last_pfn; | ||
827 | unsigned long max_arch_pfn = MAX_ARCH_PFN; | ||
828 | |||
829 | last_pfn = find_max_pfn_with_active_regions(); | ||
830 | |||
831 | if (last_pfn > max_arch_pfn) | ||
832 | last_pfn = max_arch_pfn; | ||
833 | if (last_pfn > end_user_pfn) | ||
834 | last_pfn = end_user_pfn; | ||
835 | |||
836 | printk(KERN_INFO "last_pfn = %lu max_arch_pfn = %lu\n", | ||
837 | last_pfn, max_arch_pfn); | ||
838 | return last_pfn; | ||
839 | } | ||
840 | |||
841 | /* | ||
842 | * Finds an active region in the address range from start_pfn to last_pfn and | ||
843 | * returns its range in ei_startpfn and ei_endpfn for the e820 entry. | ||
844 | */ | ||
845 | int __init e820_find_active_region(const struct e820entry *ei, | ||
846 | unsigned long start_pfn, | ||
847 | unsigned long last_pfn, | ||
848 | unsigned long *ei_startpfn, | ||
849 | unsigned long *ei_endpfn) | ||
850 | { | ||
851 | u64 align = PAGE_SIZE; | ||
852 | |||
853 | *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT; | ||
854 | *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT; | ||
855 | |||
856 | /* Skip map entries smaller than a page */ | ||
857 | if (*ei_startpfn >= *ei_endpfn) | ||
858 | return 0; | ||
859 | |||
860 | /* Skip if map is outside the node */ | ||
861 | if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || | ||
862 | *ei_startpfn >= last_pfn) | ||
863 | return 0; | ||
864 | |||
865 | /* Check for overlaps */ | ||
866 | if (*ei_startpfn < start_pfn) | ||
867 | *ei_startpfn = start_pfn; | ||
868 | if (*ei_endpfn > last_pfn) | ||
869 | *ei_endpfn = last_pfn; | ||
870 | |||
871 | /* Obey end_user_pfn to save on memmap */ | ||
872 | if (*ei_startpfn >= end_user_pfn) | ||
873 | return 0; | ||
874 | if (*ei_endpfn > end_user_pfn) | ||
875 | *ei_endpfn = end_user_pfn; | ||
876 | |||
877 | return 1; | ||
878 | } | ||
879 | |||
880 | /* Walk the e820 map and register active regions within a node */ | ||
881 | void __init e820_register_active_regions(int nid, unsigned long start_pfn, | ||
882 | unsigned long last_pfn) | ||
883 | { | ||
884 | unsigned long ei_startpfn; | ||
885 | unsigned long ei_endpfn; | ||
886 | int i; | ||
887 | |||
888 | for (i = 0; i < e820.nr_map; i++) | ||
889 | if (e820_find_active_region(&e820.map[i], | ||
890 | start_pfn, last_pfn, | ||
891 | &ei_startpfn, &ei_endpfn)) | ||
892 | add_active_range(nid, ei_startpfn, ei_endpfn); | ||
893 | } | ||
894 | |||
895 | /* | ||
896 | * Find the hole size (in bytes) in the memory range. | ||
897 | * @start: starting address of the memory range to scan | ||
898 | * @end: ending address of the memory range to scan | ||
899 | */ | ||
900 | u64 __init e820_hole_size(u64 start, u64 end) | ||
901 | { | ||
902 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
903 | unsigned long last_pfn = end >> PAGE_SHIFT; | ||
904 | unsigned long ei_startpfn, ei_endpfn, ram = 0; | ||
905 | int i; | ||
906 | |||
907 | for (i = 0; i < e820.nr_map; i++) { | ||
908 | if (e820_find_active_region(&e820.map[i], | ||
909 | start_pfn, last_pfn, | ||
910 | &ei_startpfn, &ei_endpfn)) | ||
911 | ram += ei_endpfn - ei_startpfn; | ||
912 | } | ||
913 | return end - start - ((u64)ram << PAGE_SHIFT); | ||
914 | } | ||
915 | |||
916 | static void early_panic(char *msg) | ||
917 | { | ||
918 | early_printk(msg); | ||
919 | panic(msg); | ||
920 | } | ||
921 | |||
922 | /* "mem=nopentium" disables the 4MB page tables. */ | ||
923 | static int __init parse_memopt(char *p) | ||
924 | { | ||
925 | u64 mem_size; | ||
926 | |||
927 | if (!p) | ||
928 | return -EINVAL; | ||
929 | |||
930 | #ifdef CONFIG_X86_32 | ||
931 | if (!strcmp(p, "nopentium")) { | ||
932 | setup_clear_cpu_cap(X86_FEATURE_PSE); | ||
933 | return 0; | ||
934 | } | ||
935 | #endif | ||
936 | |||
937 | mem_size = memparse(p, &p); | ||
938 | end_user_pfn = mem_size>>PAGE_SHIFT; | ||
939 | return 0; | ||
940 | } | ||
941 | early_param("mem", parse_memopt); | ||
942 | |||
943 | static int userdef __initdata; | ||
944 | |||
945 | static int __init parse_memmap_opt(char *p) | ||
946 | { | ||
947 | char *oldp; | ||
948 | u64 start_at, mem_size; | ||
949 | |||
950 | if (!strcmp(p, "exactmap")) { | ||
951 | #ifdef CONFIG_CRASH_DUMP | ||
952 | /* | ||
953 | * If we are doing a crash dump, we still need to know | ||
954 | * the real mem size before original memory map is | ||
955 | * reset. | ||
956 | */ | ||
957 | e820_register_active_regions(0, 0, -1UL); | ||
958 | saved_max_pfn = e820_end_of_ram(); | ||
959 | remove_all_active_ranges(); | ||
960 | #endif | ||
961 | e820.nr_map = 0; | ||
962 | userdef = 1; | ||
963 | return 0; | ||
964 | } | ||
965 | |||
966 | oldp = p; | ||
967 | mem_size = memparse(p, &p); | ||
968 | if (p == oldp) | ||
969 | return -EINVAL; | ||
970 | |||
971 | userdef = 1; | ||
972 | if (*p == '@') { | ||
973 | start_at = memparse(p+1, &p); | ||
974 | e820_add_region(start_at, mem_size, E820_RAM); | ||
975 | } else if (*p == '#') { | ||
976 | start_at = memparse(p+1, &p); | ||
977 | e820_add_region(start_at, mem_size, E820_ACPI); | ||
978 | } else if (*p == '$') { | ||
979 | start_at = memparse(p+1, &p); | ||
980 | e820_add_region(start_at, mem_size, E820_RESERVED); | ||
981 | } else { | ||
982 | end_user_pfn = (mem_size >> PAGE_SHIFT); | ||
983 | } | ||
984 | return *p == '\0' ? 0 : -EINVAL; | ||
985 | } | ||
986 | early_param("memmap", parse_memmap_opt); | ||
987 | |||
988 | void __init finish_e820_parsing(void) | ||
989 | { | ||
990 | if (userdef) { | ||
991 | int nr = e820.nr_map; | ||
992 | |||
993 | if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) | ||
994 | early_panic("Invalid user supplied memory map"); | ||
995 | e820.nr_map = nr; | ||
996 | |||
997 | printk(KERN_INFO "user-defined physical RAM map:\n"); | ||
998 | e820_print_map("user"); | ||
999 | } | ||
1000 | } | ||
1001 | |||
1002 | /* | ||
1003 | * Mark e820 reserved areas as busy for the resource manager. | ||
1004 | */ | ||
1005 | void __init e820_reserve_resources(void) | ||
1006 | { | ||
1007 | int i; | ||
1008 | struct resource *res; | ||
1009 | |||
1010 | res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); | ||
1011 | for (i = 0; i < e820.nr_map; i++) { | ||
1012 | switch (e820.map[i].type) { | ||
1013 | case E820_RAM: res->name = "System RAM"; break; | ||
1014 | case E820_ACPI: res->name = "ACPI Tables"; break; | ||
1015 | case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; | ||
1016 | default: res->name = "reserved"; | ||
1017 | } | ||
1018 | res->start = e820.map[i].addr; | ||
1019 | res->end = res->start + e820.map[i].size - 1; | ||
1020 | #ifndef CONFIG_RESOURCES_64BIT | ||
1021 | if (res->end > 0x100000000ULL) { | ||
1022 | res++; | ||
1023 | continue; | ||
1024 | } | ||
1025 | #endif | ||
1026 | res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | ||
1027 | insert_resource(&iomem_resource, res); | ||
1028 | res++; | ||
1029 | } | ||
1030 | } | ||
1031 | |||
1032 | char *__init default_machine_specific_memory_setup(void) | ||
1033 | { | ||
1034 | char *who = "BIOS-e820"; | ||
1035 | int new_nr; | ||
1036 | /* | ||
1037 | * Try to copy the BIOS-supplied E820-map. | ||
1038 | * | ||
1039 | * Otherwise fake a memory map; one section from 0k->640k, | ||
1040 | * the next section from 1mb->appropriate_mem_k | ||
1041 | */ | ||
1042 | new_nr = boot_params.e820_entries; | ||
1043 | sanitize_e820_map(boot_params.e820_map, | ||
1044 | ARRAY_SIZE(boot_params.e820_map), | ||
1045 | &new_nr); | ||
1046 | boot_params.e820_entries = new_nr; | ||
1047 | if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) { | ||
1048 | u64 mem_size; | ||
1049 | |||
1050 | /* compare results from other methods and take the greater */ | ||
1051 | if (boot_params.alt_mem_k | ||
1052 | < boot_params.screen_info.ext_mem_k) { | ||
1053 | mem_size = boot_params.screen_info.ext_mem_k; | ||
1054 | who = "BIOS-88"; | ||
1055 | } else { | ||
1056 | mem_size = boot_params.alt_mem_k; | ||
1057 | who = "BIOS-e801"; | ||
1058 | } | ||
1059 | |||
1060 | e820.nr_map = 0; | ||
1061 | e820_add_region(0, LOWMEMSIZE(), E820_RAM); | ||
1062 | e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM); | ||
1063 | } | ||
1064 | |||
1065 | /* In case someone cares... */ | ||
1066 | return who; | ||
1067 | } | ||
1068 | |||
1069 | char *__init __attribute__((weak)) machine_specific_memory_setup(void) | ||
1070 | { | ||
1071 | return default_machine_specific_memory_setup(); | ||
1072 | } | ||
1073 | |||
1074 | /* Overridden in paravirt.c if CONFIG_PARAVIRT */ | ||
1075 | char * __init __attribute__((weak)) memory_setup(void) | ||
1076 | { | ||
1077 | return machine_specific_memory_setup(); | ||
1078 | } | ||
1079 | |||
1080 | void __init setup_memory_map(void) | ||
1081 | { | ||
1082 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | ||
1083 | e820_print_map(memory_setup()); | ||
1084 | } | ||
1085 | |||
1086 | #ifdef CONFIG_X86_64 | ||
935 | int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) | 1087 | int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) |
936 | { | 1088 | { |
937 | int i; | 1089 | int i; |
@@ -950,3 +1102,4 @@ int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) | |||
950 | max_pfn << PAGE_SHIFT) - *addr; | 1102 | max_pfn << PAGE_SHIFT) - *addr; |
951 | return i + 1; | 1103 | return i + 1; |
952 | } | 1104 | } |
1105 | #endif | ||
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c deleted file mode 100644 index ed733e7cf4e6..000000000000 --- a/arch/x86/kernel/e820_32.c +++ /dev/null | |||
@@ -1,775 +0,0 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/types.h> | ||
3 | #include <linux/init.h> | ||
4 | #include <linux/bootmem.h> | ||
5 | #include <linux/ioport.h> | ||
6 | #include <linux/string.h> | ||
7 | #include <linux/kexec.h> | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/mm.h> | ||
10 | #include <linux/pfn.h> | ||
11 | #include <linux/uaccess.h> | ||
12 | #include <linux/suspend.h> | ||
13 | |||
14 | #include <asm/pgtable.h> | ||
15 | #include <asm/page.h> | ||
16 | #include <asm/e820.h> | ||
17 | #include <asm/setup.h> | ||
18 | |||
19 | struct e820map e820; | ||
20 | struct change_member { | ||
21 | struct e820entry *pbios; /* pointer to original bios entry */ | ||
22 | unsigned long long addr; /* address for this change point */ | ||
23 | }; | ||
24 | static struct change_member change_point_list[2*E820MAX] __initdata; | ||
25 | static struct change_member *change_point[2*E820MAX] __initdata; | ||
26 | static struct e820entry *overlap_list[E820MAX] __initdata; | ||
27 | static struct e820entry new_bios[E820MAX] __initdata; | ||
28 | /* For PCI or other memory-mapped resources */ | ||
29 | unsigned long pci_mem_start = 0x10000000; | ||
30 | #ifdef CONFIG_PCI | ||
31 | EXPORT_SYMBOL(pci_mem_start); | ||
32 | #endif | ||
33 | extern int user_defined_memmap; | ||
34 | |||
35 | static struct resource system_rom_resource = { | ||
36 | .name = "System ROM", | ||
37 | .start = 0xf0000, | ||
38 | .end = 0xfffff, | ||
39 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
40 | }; | ||
41 | |||
42 | static struct resource extension_rom_resource = { | ||
43 | .name = "Extension ROM", | ||
44 | .start = 0xe0000, | ||
45 | .end = 0xeffff, | ||
46 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
47 | }; | ||
48 | |||
49 | static struct resource adapter_rom_resources[] = { { | ||
50 | .name = "Adapter ROM", | ||
51 | .start = 0xc8000, | ||
52 | .end = 0, | ||
53 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
54 | }, { | ||
55 | .name = "Adapter ROM", | ||
56 | .start = 0, | ||
57 | .end = 0, | ||
58 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
59 | }, { | ||
60 | .name = "Adapter ROM", | ||
61 | .start = 0, | ||
62 | .end = 0, | ||
63 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
64 | }, { | ||
65 | .name = "Adapter ROM", | ||
66 | .start = 0, | ||
67 | .end = 0, | ||
68 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
69 | }, { | ||
70 | .name = "Adapter ROM", | ||
71 | .start = 0, | ||
72 | .end = 0, | ||
73 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
74 | }, { | ||
75 | .name = "Adapter ROM", | ||
76 | .start = 0, | ||
77 | .end = 0, | ||
78 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
79 | } }; | ||
80 | |||
81 | static struct resource video_rom_resource = { | ||
82 | .name = "Video ROM", | ||
83 | .start = 0xc0000, | ||
84 | .end = 0xc7fff, | ||
85 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
86 | }; | ||
87 | |||
88 | #define ROMSIGNATURE 0xaa55 | ||
89 | |||
90 | static int __init romsignature(const unsigned char *rom) | ||
91 | { | ||
92 | const unsigned short * const ptr = (const unsigned short *)rom; | ||
93 | unsigned short sig; | ||
94 | |||
95 | return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; | ||
96 | } | ||
97 | |||
98 | static int __init romchecksum(const unsigned char *rom, unsigned long length) | ||
99 | { | ||
100 | unsigned char sum, c; | ||
101 | |||
102 | for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) | ||
103 | sum += c; | ||
104 | return !length && !sum; | ||
105 | } | ||
106 | |||
107 | static void __init probe_roms(void) | ||
108 | { | ||
109 | const unsigned char *rom; | ||
110 | unsigned long start, length, upper; | ||
111 | unsigned char c; | ||
112 | int i; | ||
113 | |||
114 | /* video rom */ | ||
115 | upper = adapter_rom_resources[0].start; | ||
116 | for (start = video_rom_resource.start; start < upper; start += 2048) { | ||
117 | rom = isa_bus_to_virt(start); | ||
118 | if (!romsignature(rom)) | ||
119 | continue; | ||
120 | |||
121 | video_rom_resource.start = start; | ||
122 | |||
123 | if (probe_kernel_address(rom + 2, c) != 0) | ||
124 | continue; | ||
125 | |||
126 | /* 0 < length <= 0x7f * 512, historically */ | ||
127 | length = c * 512; | ||
128 | |||
129 | /* if checksum okay, trust length byte */ | ||
130 | if (length && romchecksum(rom, length)) | ||
131 | video_rom_resource.end = start + length - 1; | ||
132 | |||
133 | request_resource(&iomem_resource, &video_rom_resource); | ||
134 | break; | ||
135 | } | ||
136 | |||
137 | start = (video_rom_resource.end + 1 + 2047) & ~2047UL; | ||
138 | if (start < upper) | ||
139 | start = upper; | ||
140 | |||
141 | /* system rom */ | ||
142 | request_resource(&iomem_resource, &system_rom_resource); | ||
143 | upper = system_rom_resource.start; | ||
144 | |||
145 | /* check for extension rom (ignore length byte!) */ | ||
146 | rom = isa_bus_to_virt(extension_rom_resource.start); | ||
147 | if (romsignature(rom)) { | ||
148 | length = extension_rom_resource.end - extension_rom_resource.start + 1; | ||
149 | if (romchecksum(rom, length)) { | ||
150 | request_resource(&iomem_resource, &extension_rom_resource); | ||
151 | upper = extension_rom_resource.start; | ||
152 | } | ||
153 | } | ||
154 | |||
155 | /* check for adapter roms on 2k boundaries */ | ||
156 | for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { | ||
157 | rom = isa_bus_to_virt(start); | ||
158 | if (!romsignature(rom)) | ||
159 | continue; | ||
160 | |||
161 | if (probe_kernel_address(rom + 2, c) != 0) | ||
162 | continue; | ||
163 | |||
164 | /* 0 < length <= 0x7f * 512, historically */ | ||
165 | length = c * 512; | ||
166 | |||
167 | /* but accept any length that fits if checksum okay */ | ||
168 | if (!length || start + length > upper || !romchecksum(rom, length)) | ||
169 | continue; | ||
170 | |||
171 | adapter_rom_resources[i].start = start; | ||
172 | adapter_rom_resources[i].end = start + length - 1; | ||
173 | request_resource(&iomem_resource, &adapter_rom_resources[i]); | ||
174 | |||
175 | start = adapter_rom_resources[i++].end & ~2047UL; | ||
176 | } | ||
177 | } | ||
178 | |||
179 | /* | ||
180 | * Request address space for all standard RAM and ROM resources | ||
181 | * and also for regions reported as reserved by the e820. | ||
182 | */ | ||
183 | void __init init_iomem_resources(struct resource *code_resource, | ||
184 | struct resource *data_resource, | ||
185 | struct resource *bss_resource) | ||
186 | { | ||
187 | int i; | ||
188 | |||
189 | probe_roms(); | ||
190 | for (i = 0; i < e820.nr_map; i++) { | ||
191 | struct resource *res; | ||
192 | #ifndef CONFIG_RESOURCES_64BIT | ||
193 | if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) | ||
194 | continue; | ||
195 | #endif | ||
196 | res = kzalloc(sizeof(struct resource), GFP_ATOMIC); | ||
197 | switch (e820.map[i].type) { | ||
198 | case E820_RAM: res->name = "System RAM"; break; | ||
199 | case E820_ACPI: res->name = "ACPI Tables"; break; | ||
200 | case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; | ||
201 | default: res->name = "reserved"; | ||
202 | } | ||
203 | res->start = e820.map[i].addr; | ||
204 | res->end = res->start + e820.map[i].size - 1; | ||
205 | res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | ||
206 | if (request_resource(&iomem_resource, res)) { | ||
207 | kfree(res); | ||
208 | continue; | ||
209 | } | ||
210 | if (e820.map[i].type == E820_RAM) { | ||
211 | /* | ||
212 | * We don't know which RAM region contains kernel data, | ||
213 | * so we try it repeatedly and let the resource manager | ||
214 | * test it. | ||
215 | */ | ||
216 | request_resource(res, code_resource); | ||
217 | request_resource(res, data_resource); | ||
218 | request_resource(res, bss_resource); | ||
219 | #ifdef CONFIG_KEXEC | ||
220 | if (crashk_res.start != crashk_res.end) | ||
221 | request_resource(res, &crashk_res); | ||
222 | #endif | ||
223 | } | ||
224 | } | ||
225 | } | ||
226 | |||
227 | #if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) | ||
228 | /** | ||
229 | * e820_mark_nosave_regions - Find the ranges of physical addresses that do not | ||
230 | * correspond to e820 RAM areas and mark the corresponding pages as nosave for | ||
231 | * hibernation. | ||
232 | * | ||
233 | * This function requires the e820 map to be sorted and without any | ||
234 | * overlapping entries and assumes the first e820 area to be RAM. | ||
235 | */ | ||
236 | void __init e820_mark_nosave_regions(void) | ||
237 | { | ||
238 | int i; | ||
239 | unsigned long pfn; | ||
240 | |||
241 | pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); | ||
242 | for (i = 1; i < e820.nr_map; i++) { | ||
243 | struct e820entry *ei = &e820.map[i]; | ||
244 | |||
245 | if (pfn < PFN_UP(ei->addr)) | ||
246 | register_nosave_region(pfn, PFN_UP(ei->addr)); | ||
247 | |||
248 | pfn = PFN_DOWN(ei->addr + ei->size); | ||
249 | if (ei->type != E820_RAM) | ||
250 | register_nosave_region(PFN_UP(ei->addr), pfn); | ||
251 | |||
252 | if (pfn >= max_low_pfn) | ||
253 | break; | ||
254 | } | ||
255 | } | ||
256 | #endif | ||
257 | |||
258 | void __init add_memory_region(unsigned long long start, | ||
259 | unsigned long long size, int type) | ||
260 | { | ||
261 | int x; | ||
262 | |||
263 | x = e820.nr_map; | ||
264 | |||
265 | if (x == E820MAX) { | ||
266 | printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); | ||
267 | return; | ||
268 | } | ||
269 | |||
270 | e820.map[x].addr = start; | ||
271 | e820.map[x].size = size; | ||
272 | e820.map[x].type = type; | ||
273 | e820.nr_map++; | ||
274 | } /* add_memory_region */ | ||
275 | |||
276 | /* | ||
277 | * Sanitize the BIOS e820 map. | ||
278 | * | ||
279 | * Some e820 responses include overlapping entries. The following | ||
280 | * replaces the original e820 map with a new one, removing overlaps. | ||
281 | * | ||
282 | */ | ||
283 | int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | ||
284 | { | ||
285 | struct change_member *change_tmp; | ||
286 | unsigned long current_type, last_type; | ||
287 | unsigned long long last_addr; | ||
288 | int chgidx, still_changing; | ||
289 | int overlap_entries; | ||
290 | int new_bios_entry; | ||
291 | int old_nr, new_nr, chg_nr; | ||
292 | int i; | ||
293 | |||
294 | /* | ||
295 | Visually we're performing the following (1,2,3,4 = memory types)... | ||
296 | |||
297 | Sample memory map (w/overlaps): | ||
298 | ____22__________________ | ||
299 | ______________________4_ | ||
300 | ____1111________________ | ||
301 | _44_____________________ | ||
302 | 11111111________________ | ||
303 | ____________________33__ | ||
304 | ___________44___________ | ||
305 | __________33333_________ | ||
306 | ______________22________ | ||
307 | ___________________2222_ | ||
308 | _________111111111______ | ||
309 | _____________________11_ | ||
310 | _________________4______ | ||
311 | |||
312 | Sanitized equivalent (no overlap): | ||
313 | 1_______________________ | ||
314 | _44_____________________ | ||
315 | ___1____________________ | ||
316 | ____22__________________ | ||
317 | ______11________________ | ||
318 | _________1______________ | ||
319 | __________3_____________ | ||
320 | ___________44___________ | ||
321 | _____________33_________ | ||
322 | _______________2________ | ||
323 | ________________1_______ | ||
324 | _________________4______ | ||
325 | ___________________2____ | ||
326 | ____________________33__ | ||
327 | ______________________4_ | ||
328 | */ | ||
329 | /* if there's only one memory region, don't bother */ | ||
330 | if (*pnr_map < 2) { | ||
331 | return -1; | ||
332 | } | ||
333 | |||
334 | old_nr = *pnr_map; | ||
335 | |||
336 | /* bail out if we find any unreasonable addresses in bios map */ | ||
337 | for (i=0; i<old_nr; i++) | ||
338 | if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) { | ||
339 | return -1; | ||
340 | } | ||
341 | |||
342 | /* create pointers for initial change-point information (for sorting) */ | ||
343 | for (i=0; i < 2*old_nr; i++) | ||
344 | change_point[i] = &change_point_list[i]; | ||
345 | |||
346 | /* record all known change-points (starting and ending addresses), | ||
347 | omitting those that are for empty memory regions */ | ||
348 | chgidx = 0; | ||
349 | for (i=0; i < old_nr; i++) { | ||
350 | if (biosmap[i].size != 0) { | ||
351 | change_point[chgidx]->addr = biosmap[i].addr; | ||
352 | change_point[chgidx++]->pbios = &biosmap[i]; | ||
353 | change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; | ||
354 | change_point[chgidx++]->pbios = &biosmap[i]; | ||
355 | } | ||
356 | } | ||
357 | chg_nr = chgidx; /* true number of change-points */ | ||
358 | |||
359 | /* sort change-point list by memory addresses (low -> high) */ | ||
360 | still_changing = 1; | ||
361 | while (still_changing) { | ||
362 | still_changing = 0; | ||
363 | for (i=1; i < chg_nr; i++) { | ||
364 | /* if <current_addr> > <last_addr>, swap */ | ||
365 | /* or, if current=<start_addr> & last=<end_addr>, swap */ | ||
366 | if ((change_point[i]->addr < change_point[i-1]->addr) || | ||
367 | ((change_point[i]->addr == change_point[i-1]->addr) && | ||
368 | (change_point[i]->addr == change_point[i]->pbios->addr) && | ||
369 | (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) | ||
370 | ) | ||
371 | { | ||
372 | change_tmp = change_point[i]; | ||
373 | change_point[i] = change_point[i-1]; | ||
374 | change_point[i-1] = change_tmp; | ||
375 | still_changing=1; | ||
376 | } | ||
377 | } | ||
378 | } | ||
379 | |||
380 | /* create a new bios memory map, removing overlaps */ | ||
381 | overlap_entries=0; /* number of entries in the overlap table */ | ||
382 | new_bios_entry=0; /* index for creating new bios map entries */ | ||
383 | last_type = 0; /* start with undefined memory type */ | ||
384 | last_addr = 0; /* start with 0 as last starting address */ | ||
385 | /* loop through change-points, determining affect on the new bios map */ | ||
386 | for (chgidx=0; chgidx < chg_nr; chgidx++) | ||
387 | { | ||
388 | /* keep track of all overlapping bios entries */ | ||
389 | if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) | ||
390 | { | ||
391 | /* add map entry to overlap list (> 1 entry implies an overlap) */ | ||
392 | overlap_list[overlap_entries++]=change_point[chgidx]->pbios; | ||
393 | } | ||
394 | else | ||
395 | { | ||
396 | /* remove entry from list (order independent, so swap with last) */ | ||
397 | for (i=0; i<overlap_entries; i++) | ||
398 | { | ||
399 | if (overlap_list[i] == change_point[chgidx]->pbios) | ||
400 | overlap_list[i] = overlap_list[overlap_entries-1]; | ||
401 | } | ||
402 | overlap_entries--; | ||
403 | } | ||
404 | /* if there are overlapping entries, decide which "type" to use */ | ||
405 | /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ | ||
406 | current_type = 0; | ||
407 | for (i=0; i<overlap_entries; i++) | ||
408 | if (overlap_list[i]->type > current_type) | ||
409 | current_type = overlap_list[i]->type; | ||
410 | /* continue building up new bios map based on this information */ | ||
411 | if (current_type != last_type) { | ||
412 | if (last_type != 0) { | ||
413 | new_bios[new_bios_entry].size = | ||
414 | change_point[chgidx]->addr - last_addr; | ||
415 | /* move forward only if the new size was non-zero */ | ||
416 | if (new_bios[new_bios_entry].size != 0) | ||
417 | if (++new_bios_entry >= E820MAX) | ||
418 | break; /* no more space left for new bios entries */ | ||
419 | } | ||
420 | if (current_type != 0) { | ||
421 | new_bios[new_bios_entry].addr = change_point[chgidx]->addr; | ||
422 | new_bios[new_bios_entry].type = current_type; | ||
423 | last_addr=change_point[chgidx]->addr; | ||
424 | } | ||
425 | last_type = current_type; | ||
426 | } | ||
427 | } | ||
428 | new_nr = new_bios_entry; /* retain count for new bios entries */ | ||
429 | |||
430 | /* copy new bios mapping into original location */ | ||
431 | memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); | ||
432 | *pnr_map = new_nr; | ||
433 | |||
434 | return 0; | ||
435 | } | ||
436 | |||
437 | /* | ||
438 | * Copy the BIOS e820 map into a safe place. | ||
439 | * | ||
440 | * Sanity-check it while we're at it.. | ||
441 | * | ||
442 | * If we're lucky and live on a modern system, the setup code | ||
443 | * will have given us a memory map that we can use to properly | ||
444 | * set up memory. If we aren't, we'll fake a memory map. | ||
445 | * | ||
446 | * We check to see that the memory map contains at least 2 elements | ||
447 | * before we'll use it, because the detection code in setup.S may | ||
448 | * not be perfect and most every PC known to man has two memory | ||
449 | * regions: one from 0 to 640k, and one from 1mb up. (The IBM | ||
450 | * thinkpad 560x, for example, does not cooperate with the memory | ||
451 | * detection code.) | ||
452 | */ | ||
453 | int __init copy_e820_map(struct e820entry *biosmap, int nr_map) | ||
454 | { | ||
455 | /* Only one memory region (or negative)? Ignore it */ | ||
456 | if (nr_map < 2) | ||
457 | return -1; | ||
458 | |||
459 | do { | ||
460 | u64 start = biosmap->addr; | ||
461 | u64 size = biosmap->size; | ||
462 | u64 end = start + size; | ||
463 | u32 type = biosmap->type; | ||
464 | |||
465 | /* Overflow in 64 bits? Ignore the memory map. */ | ||
466 | if (start > end) | ||
467 | return -1; | ||
468 | |||
469 | add_memory_region(start, size, type); | ||
470 | } while (biosmap++, --nr_map); | ||
471 | |||
472 | return 0; | ||
473 | } | ||
474 | |||
475 | /* | ||
476 | * Find the highest page frame number we have available | ||
477 | */ | ||
478 | void __init propagate_e820_map(void) | ||
479 | { | ||
480 | int i; | ||
481 | |||
482 | max_pfn = 0; | ||
483 | |||
484 | for (i = 0; i < e820.nr_map; i++) { | ||
485 | unsigned long start, end; | ||
486 | /* RAM? */ | ||
487 | if (e820.map[i].type != E820_RAM) | ||
488 | continue; | ||
489 | start = PFN_UP(e820.map[i].addr); | ||
490 | end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); | ||
491 | if (start >= end) | ||
492 | continue; | ||
493 | if (end > max_pfn) | ||
494 | max_pfn = end; | ||
495 | memory_present(0, start, end); | ||
496 | } | ||
497 | } | ||
498 | |||
499 | /* | ||
500 | * Register fully available low RAM pages with the bootmem allocator. | ||
501 | */ | ||
502 | void __init register_bootmem_low_pages(unsigned long max_low_pfn) | ||
503 | { | ||
504 | int i; | ||
505 | |||
506 | for (i = 0; i < e820.nr_map; i++) { | ||
507 | unsigned long curr_pfn, last_pfn, size; | ||
508 | /* | ||
509 | * Reserve usable low memory | ||
510 | */ | ||
511 | if (e820.map[i].type != E820_RAM) | ||
512 | continue; | ||
513 | /* | ||
514 | * We are rounding up the start address of usable memory: | ||
515 | */ | ||
516 | curr_pfn = PFN_UP(e820.map[i].addr); | ||
517 | if (curr_pfn >= max_low_pfn) | ||
518 | continue; | ||
519 | /* | ||
520 | * ... and at the end of the usable range downwards: | ||
521 | */ | ||
522 | last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); | ||
523 | |||
524 | if (last_pfn > max_low_pfn) | ||
525 | last_pfn = max_low_pfn; | ||
526 | |||
527 | /* | ||
528 | * .. finally, did all the rounding and playing | ||
529 | * around just make the area go away? | ||
530 | */ | ||
531 | if (last_pfn <= curr_pfn) | ||
532 | continue; | ||
533 | |||
534 | size = last_pfn - curr_pfn; | ||
535 | free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); | ||
536 | } | ||
537 | } | ||
538 | |||
539 | void __init e820_register_memory(void) | ||
540 | { | ||
541 | unsigned long gapstart, gapsize, round; | ||
542 | unsigned long long last; | ||
543 | int i; | ||
544 | |||
545 | /* | ||
546 | * Search for the biggest gap in the low 32 bits of the e820 | ||
547 | * memory space. | ||
548 | */ | ||
549 | last = 0x100000000ull; | ||
550 | gapstart = 0x10000000; | ||
551 | gapsize = 0x400000; | ||
552 | i = e820.nr_map; | ||
553 | while (--i >= 0) { | ||
554 | unsigned long long start = e820.map[i].addr; | ||
555 | unsigned long long end = start + e820.map[i].size; | ||
556 | |||
557 | /* | ||
558 | * Since "last" is at most 4GB, we know we'll | ||
559 | * fit in 32 bits if this condition is true | ||
560 | */ | ||
561 | if (last > end) { | ||
562 | unsigned long gap = last - end; | ||
563 | |||
564 | if (gap > gapsize) { | ||
565 | gapsize = gap; | ||
566 | gapstart = end; | ||
567 | } | ||
568 | } | ||
569 | if (start < last) | ||
570 | last = start; | ||
571 | } | ||
572 | |||
573 | /* | ||
574 | * See how much we want to round up: start off with | ||
575 | * rounding to the next 1MB area. | ||
576 | */ | ||
577 | round = 0x100000; | ||
578 | while ((gapsize >> 4) > round) | ||
579 | round += round; | ||
580 | /* Fun with two's complement */ | ||
581 | pci_mem_start = (gapstart + round) & -round; | ||
582 | |||
583 | printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", | ||
584 | pci_mem_start, gapstart, gapsize); | ||
585 | } | ||
586 | |||
587 | void __init print_memory_map(char *who) | ||
588 | { | ||
589 | int i; | ||
590 | |||
591 | for (i = 0; i < e820.nr_map; i++) { | ||
592 | printk(" %s: %016Lx - %016Lx ", who, | ||
593 | e820.map[i].addr, | ||
594 | e820.map[i].addr + e820.map[i].size); | ||
595 | switch (e820.map[i].type) { | ||
596 | case E820_RAM: printk("(usable)\n"); | ||
597 | break; | ||
598 | case E820_RESERVED: | ||
599 | printk("(reserved)\n"); | ||
600 | break; | ||
601 | case E820_ACPI: | ||
602 | printk("(ACPI data)\n"); | ||
603 | break; | ||
604 | case E820_NVS: | ||
605 | printk("(ACPI NVS)\n"); | ||
606 | break; | ||
607 | default: printk("type %u\n", e820.map[i].type); | ||
608 | break; | ||
609 | } | ||
610 | } | ||
611 | } | ||
612 | |||
613 | void __init limit_regions(unsigned long long size) | ||
614 | { | ||
615 | unsigned long long current_addr; | ||
616 | int i; | ||
617 | |||
618 | print_memory_map("limit_regions start"); | ||
619 | for (i = 0; i < e820.nr_map; i++) { | ||
620 | current_addr = e820.map[i].addr + e820.map[i].size; | ||
621 | if (current_addr < size) | ||
622 | continue; | ||
623 | |||
624 | if (e820.map[i].type != E820_RAM) | ||
625 | continue; | ||
626 | |||
627 | if (e820.map[i].addr >= size) { | ||
628 | /* | ||
629 | * This region starts past the end of the | ||
630 | * requested size, skip it completely. | ||
631 | */ | ||
632 | e820.nr_map = i; | ||
633 | } else { | ||
634 | e820.nr_map = i + 1; | ||
635 | e820.map[i].size -= current_addr - size; | ||
636 | } | ||
637 | print_memory_map("limit_regions endfor"); | ||
638 | return; | ||
639 | } | ||
640 | print_memory_map("limit_regions endfunc"); | ||
641 | } | ||
642 | |||
643 | /* | ||
644 | * This function checks if any part of the range <start,end> is mapped | ||
645 | * with type. | ||
646 | */ | ||
647 | int | ||
648 | e820_any_mapped(u64 start, u64 end, unsigned type) | ||
649 | { | ||
650 | int i; | ||
651 | for (i = 0; i < e820.nr_map; i++) { | ||
652 | const struct e820entry *ei = &e820.map[i]; | ||
653 | if (type && ei->type != type) | ||
654 | continue; | ||
655 | if (ei->addr >= end || ei->addr + ei->size <= start) | ||
656 | continue; | ||
657 | return 1; | ||
658 | } | ||
659 | return 0; | ||
660 | } | ||
661 | EXPORT_SYMBOL_GPL(e820_any_mapped); | ||
662 | |||
663 | /* | ||
664 | * This function checks if the entire range <start,end> is mapped with type. | ||
665 | * | ||
666 | * Note: this function only works correct if the e820 table is sorted and | ||
667 | * not-overlapping, which is the case | ||
668 | */ | ||
669 | int __init | ||
670 | e820_all_mapped(unsigned long s, unsigned long e, unsigned type) | ||
671 | { | ||
672 | u64 start = s; | ||
673 | u64 end = e; | ||
674 | int i; | ||
675 | for (i = 0; i < e820.nr_map; i++) { | ||
676 | struct e820entry *ei = &e820.map[i]; | ||
677 | if (type && ei->type != type) | ||
678 | continue; | ||
679 | /* is the region (part) in overlap with the current region ?*/ | ||
680 | if (ei->addr >= end || ei->addr + ei->size <= start) | ||
681 | continue; | ||
682 | /* if the region is at the beginning of <start,end> we move | ||
683 | * start to the end of the region since it's ok until there | ||
684 | */ | ||
685 | if (ei->addr <= start) | ||
686 | start = ei->addr + ei->size; | ||
687 | /* if start is now at or beyond end, we're done, full | ||
688 | * coverage */ | ||
689 | if (start >= end) | ||
690 | return 1; /* we're done */ | ||
691 | } | ||
692 | return 0; | ||
693 | } | ||
694 | |||
695 | static int __init parse_memmap(char *arg) | ||
696 | { | ||
697 | if (!arg) | ||
698 | return -EINVAL; | ||
699 | |||
700 | if (strcmp(arg, "exactmap") == 0) { | ||
701 | #ifdef CONFIG_CRASH_DUMP | ||
702 | /* If we are doing a crash dump, we | ||
703 | * still need to know the real mem | ||
704 | * size before original memory map is | ||
705 | * reset. | ||
706 | */ | ||
707 | propagate_e820_map(); | ||
708 | saved_max_pfn = max_pfn; | ||
709 | #endif | ||
710 | e820.nr_map = 0; | ||
711 | user_defined_memmap = 1; | ||
712 | } else { | ||
713 | /* If the user specifies memory size, we | ||
714 | * limit the BIOS-provided memory map to | ||
715 | * that size. exactmap can be used to specify | ||
716 | * the exact map. mem=number can be used to | ||
717 | * trim the existing memory map. | ||
718 | */ | ||
719 | unsigned long long start_at, mem_size; | ||
720 | |||
721 | mem_size = memparse(arg, &arg); | ||
722 | if (*arg == '@') { | ||
723 | start_at = memparse(arg+1, &arg); | ||
724 | add_memory_region(start_at, mem_size, E820_RAM); | ||
725 | } else if (*arg == '#') { | ||
726 | start_at = memparse(arg+1, &arg); | ||
727 | add_memory_region(start_at, mem_size, E820_ACPI); | ||
728 | } else if (*arg == '$') { | ||
729 | start_at = memparse(arg+1, &arg); | ||
730 | add_memory_region(start_at, mem_size, E820_RESERVED); | ||
731 | } else { | ||
732 | limit_regions(mem_size); | ||
733 | user_defined_memmap = 1; | ||
734 | } | ||
735 | } | ||
736 | return 0; | ||
737 | } | ||
738 | early_param("memmap", parse_memmap); | ||
739 | void __init update_memory_range(u64 start, u64 size, unsigned old_type, | ||
740 | unsigned new_type) | ||
741 | { | ||
742 | int i; | ||
743 | |||
744 | BUG_ON(old_type == new_type); | ||
745 | |||
746 | for (i = 0; i < e820.nr_map; i++) { | ||
747 | struct e820entry *ei = &e820.map[i]; | ||
748 | u64 final_start, final_end; | ||
749 | if (ei->type != old_type) | ||
750 | continue; | ||
751 | /* totally covered? */ | ||
752 | if (ei->addr >= start && ei->size <= size) { | ||
753 | ei->type = new_type; | ||
754 | continue; | ||
755 | } | ||
756 | /* partially covered */ | ||
757 | final_start = max(start, ei->addr); | ||
758 | final_end = min(start + size, ei->addr + ei->size); | ||
759 | if (final_start >= final_end) | ||
760 | continue; | ||
761 | add_memory_region(final_start, final_end - final_start, | ||
762 | new_type); | ||
763 | } | ||
764 | } | ||
765 | void __init update_e820(void) | ||
766 | { | ||
767 | u8 nr_map; | ||
768 | |||
769 | nr_map = e820.nr_map; | ||
770 | if (sanitize_e820_map(e820.map, &nr_map)) | ||
771 | return; | ||
772 | e820.nr_map = nr_map; | ||
773 | printk(KERN_INFO "modified physical RAM map:\n"); | ||
774 | print_memory_map("modified"); | ||
775 | } | ||
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 77d424cf68b3..473c89fe5073 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
@@ -213,6 +213,48 @@ unsigned long efi_get_time(void) | |||
213 | eft.minute, eft.second); | 213 | eft.minute, eft.second); |
214 | } | 214 | } |
215 | 215 | ||
216 | /* | ||
217 | * Tell the kernel about the EFI memory map. This might include | ||
218 | * more than the max 128 entries that can fit in the e820 legacy | ||
219 | * (zeropage) memory map. | ||
220 | */ | ||
221 | |||
222 | static void __init add_efi_memmap(void) | ||
223 | { | ||
224 | void *p; | ||
225 | |||
226 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
227 | efi_memory_desc_t *md = p; | ||
228 | unsigned long long start = md->phys_addr; | ||
229 | unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; | ||
230 | int e820_type; | ||
231 | |||
232 | if (md->attribute & EFI_MEMORY_WB) | ||
233 | e820_type = E820_RAM; | ||
234 | else | ||
235 | e820_type = E820_RESERVED; | ||
236 | e820_add_region(start, size, e820_type); | ||
237 | } | ||
238 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | ||
239 | } | ||
240 | |||
241 | void __init efi_reserve_early(void) | ||
242 | { | ||
243 | unsigned long pmap; | ||
244 | |||
245 | pmap = boot_params.efi_info.efi_memmap; | ||
246 | #ifdef CONFIG_X86_64 | ||
247 | pmap += (__u64)boot_params.efi_info.efi_memmap_hi << 32; | ||
248 | #endif | ||
249 | memmap.phys_map = (void *)pmap; | ||
250 | memmap.nr_map = boot_params.efi_info.efi_memmap_size / | ||
251 | boot_params.efi_info.efi_memdesc_size; | ||
252 | memmap.desc_version = boot_params.efi_info.efi_memdesc_version; | ||
253 | memmap.desc_size = boot_params.efi_info.efi_memdesc_size; | ||
254 | reserve_early(pmap, pmap + memmap.nr_map * memmap.desc_size, | ||
255 | "EFI memmap"); | ||
256 | } | ||
257 | |||
216 | #if EFI_DEBUG | 258 | #if EFI_DEBUG |
217 | static void __init print_efi_memmap(void) | 259 | static void __init print_efi_memmap(void) |
218 | { | 260 | { |
@@ -242,21 +284,11 @@ void __init efi_init(void) | |||
242 | int i = 0; | 284 | int i = 0; |
243 | void *tmp; | 285 | void *tmp; |
244 | 286 | ||
245 | #ifdef CONFIG_X86_32 | ||
246 | efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; | 287 | efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; |
247 | memmap.phys_map = (void *)boot_params.efi_info.efi_memmap; | 288 | #ifdef CONFIG_X86_64 |
248 | #else | 289 | efi_phys.systab = (void *)efi_phys.systab + |
249 | efi_phys.systab = (efi_system_table_t *) | 290 | ((__u64)boot_params.efi_info.efi_systab_hi<<32); |
250 | (boot_params.efi_info.efi_systab | | ||
251 | ((__u64)boot_params.efi_info.efi_systab_hi<<32)); | ||
252 | memmap.phys_map = (void *) | ||
253 | (boot_params.efi_info.efi_memmap | | ||
254 | ((__u64)boot_params.efi_info.efi_memmap_hi<<32)); | ||
255 | #endif | 291 | #endif |
256 | memmap.nr_map = boot_params.efi_info.efi_memmap_size / | ||
257 | boot_params.efi_info.efi_memdesc_size; | ||
258 | memmap.desc_version = boot_params.efi_info.efi_memdesc_version; | ||
259 | memmap.desc_size = boot_params.efi_info.efi_memdesc_size; | ||
260 | 292 | ||
261 | efi.systab = early_ioremap((unsigned long)efi_phys.systab, | 293 | efi.systab = early_ioremap((unsigned long)efi_phys.systab, |
262 | sizeof(efi_system_table_t)); | 294 | sizeof(efi_system_table_t)); |
@@ -370,6 +402,7 @@ void __init efi_init(void) | |||
370 | if (memmap.desc_size != sizeof(efi_memory_desc_t)) | 402 | if (memmap.desc_size != sizeof(efi_memory_desc_t)) |
371 | printk(KERN_WARNING "Kernel-defined memdesc" | 403 | printk(KERN_WARNING "Kernel-defined memdesc" |
372 | "doesn't match the one from EFI!\n"); | 404 | "doesn't match the one from EFI!\n"); |
405 | add_efi_memmap(); | ||
373 | 406 | ||
374 | /* Setup for EFI runtime service */ | 407 | /* Setup for EFI runtime service */ |
375 | reboot_type = BOOT_EFI; | 408 | reboot_type = BOOT_EFI; |
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index d0060fdcccac..652c5287215f 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c | |||
@@ -97,13 +97,7 @@ void __init efi_call_phys_epilog(void) | |||
97 | early_runtime_code_mapping_set_exec(0); | 97 | early_runtime_code_mapping_set_exec(0); |
98 | } | 98 | } |
99 | 99 | ||
100 | void __init efi_reserve_bootmem(void) | 100 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size) |
101 | { | ||
102 | reserve_bootmem_generic((unsigned long)memmap.phys_map, | ||
103 | memmap.nr_map * memmap.desc_size); | ||
104 | } | ||
105 | |||
106 | void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) | ||
107 | { | 101 | { |
108 | static unsigned pages_mapped __initdata; | 102 | static unsigned pages_mapped __initdata; |
109 | unsigned i, pages; | 103 | unsigned i, pages; |
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index cbaaf69bedb2..1fa8be5bd217 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c | |||
@@ -51,7 +51,7 @@ void __init setup_apic_routing(void) | |||
51 | else | 51 | else |
52 | #endif | 52 | #endif |
53 | 53 | ||
54 | if (num_possible_cpus() <= 8) | 54 | if (max_physical_apicid < 8) |
55 | genapic = &apic_flat; | 55 | genapic = &apic_flat; |
56 | else | 56 | else |
57 | genapic = &apic_physflat; | 57 | genapic = &apic_physflat; |
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c new file mode 100644 index 000000000000..a727c0b9819c --- /dev/null +++ b/arch/x86/kernel/head.c | |||
@@ -0,0 +1,73 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/init.h> | ||
3 | |||
4 | #include <asm/setup.h> | ||
5 | #include <asm/bios_ebda.h> | ||
6 | |||
7 | #define BIOS_LOWMEM_KILOBYTES 0x413 | ||
8 | |||
9 | /* | ||
10 | * The BIOS places the EBDA/XBDA at the top of conventional | ||
11 | * memory, and usually decreases the reported amount of | ||
12 | * conventional memory (int 0x12) too. This also contains a | ||
13 | * workaround for Dell systems that neglect to reserve EBDA. | ||
14 | * The same workaround also avoids a problem with the AMD768MPX | ||
15 | * chipset: reserve a page before VGA to prevent PCI prefetch | ||
16 | * into it (errata #56). Usually the page is reserved anyways, | ||
17 | * unless you have no PS/2 mouse plugged in. | ||
18 | */ | ||
19 | void __init reserve_ebda_region(void) | ||
20 | { | ||
21 | unsigned int lowmem, ebda_addr; | ||
22 | |||
23 | /* To determine the position of the EBDA and the */ | ||
24 | /* end of conventional memory, we need to look at */ | ||
25 | /* the BIOS data area. In a paravirtual environment */ | ||
26 | /* that area is absent. We'll just have to assume */ | ||
27 | /* that the paravirt case can handle memory setup */ | ||
28 | /* correctly, without our help. */ | ||
29 | if (paravirt_enabled()) | ||
30 | return; | ||
31 | |||
32 | /* end of low (conventional) memory */ | ||
33 | lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); | ||
34 | lowmem <<= 10; | ||
35 | |||
36 | /* start of EBDA area */ | ||
37 | ebda_addr = get_bios_ebda(); | ||
38 | |||
39 | /* Fixup: bios puts an EBDA in the top 64K segment */ | ||
40 | /* of conventional memory, but does not adjust lowmem. */ | ||
41 | if ((lowmem - ebda_addr) <= 0x10000) | ||
42 | lowmem = ebda_addr; | ||
43 | |||
44 | /* Fixup: bios does not report an EBDA at all. */ | ||
45 | /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ | ||
46 | if ((ebda_addr == 0) && (lowmem >= 0x9f000)) | ||
47 | lowmem = 0x9f000; | ||
48 | |||
49 | /* Paranoia: should never happen, but... */ | ||
50 | if ((lowmem == 0) || (lowmem >= 0x100000)) | ||
51 | lowmem = 0x9f000; | ||
52 | |||
53 | /* reserve all memory between lowmem and the 1MB mark */ | ||
54 | reserve_early(lowmem, 0x100000, "BIOS reserved"); | ||
55 | } | ||
56 | |||
57 | void __init reserve_setup_data(void) | ||
58 | { | ||
59 | struct setup_data *data; | ||
60 | u64 pa_data; | ||
61 | char buf[32]; | ||
62 | |||
63 | if (boot_params.hdr.version < 0x0209) | ||
64 | return; | ||
65 | pa_data = boot_params.hdr.setup_data; | ||
66 | while (pa_data) { | ||
67 | data = early_ioremap(pa_data, sizeof(*data)); | ||
68 | sprintf(buf, "setup data %x", data->type); | ||
69 | reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); | ||
70 | pa_data = data->next; | ||
71 | early_iounmap(data, sizeof(*data)); | ||
72 | } | ||
73 | } | ||
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 3db059058927..fa1d25dd83e3 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -8,7 +8,34 @@ | |||
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/start_kernel.h> | 9 | #include <linux/start_kernel.h> |
10 | 10 | ||
11 | #include <asm/setup.h> | ||
12 | #include <asm/sections.h> | ||
13 | #include <asm/e820.h> | ||
14 | #include <asm/bios_ebda.h> | ||
15 | |||
11 | void __init i386_start_kernel(void) | 16 | void __init i386_start_kernel(void) |
12 | { | 17 | { |
18 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); | ||
19 | |||
20 | #ifdef CONFIG_BLK_DEV_INITRD | ||
21 | /* Reserve INITRD */ | ||
22 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | ||
23 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | ||
24 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | ||
25 | u64 ramdisk_end = ramdisk_image + ramdisk_size; | ||
26 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); | ||
27 | } | ||
28 | #endif | ||
29 | reserve_early(init_pg_tables_start, init_pg_tables_end, | ||
30 | "INIT_PG_TABLE"); | ||
31 | |||
32 | reserve_ebda_region(); | ||
33 | |||
34 | /* | ||
35 | * At this point everything still needed from the boot loader | ||
36 | * or BIOS or kernel text should be early reserved or marked not | ||
37 | * RAM in e820. All other memory is free game. | ||
38 | */ | ||
39 | |||
13 | start_kernel(); | 40 | start_kernel(); |
14 | } | 41 | } |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index e25c57b8aa84..5fbed459ff3b 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -51,74 +51,6 @@ static void __init copy_bootdata(char *real_mode_data) | |||
51 | } | 51 | } |
52 | } | 52 | } |
53 | 53 | ||
54 | #define BIOS_LOWMEM_KILOBYTES 0x413 | ||
55 | |||
56 | /* | ||
57 | * The BIOS places the EBDA/XBDA at the top of conventional | ||
58 | * memory, and usually decreases the reported amount of | ||
59 | * conventional memory (int 0x12) too. This also contains a | ||
60 | * workaround for Dell systems that neglect to reserve EBDA. | ||
61 | * The same workaround also avoids a problem with the AMD768MPX | ||
62 | * chipset: reserve a page before VGA to prevent PCI prefetch | ||
63 | * into it (errata #56). Usually the page is reserved anyways, | ||
64 | * unless you have no PS/2 mouse plugged in. | ||
65 | */ | ||
66 | static void __init reserve_ebda_region(void) | ||
67 | { | ||
68 | unsigned int lowmem, ebda_addr; | ||
69 | |||
70 | /* To determine the position of the EBDA and the */ | ||
71 | /* end of conventional memory, we need to look at */ | ||
72 | /* the BIOS data area. In a paravirtual environment */ | ||
73 | /* that area is absent. We'll just have to assume */ | ||
74 | /* that the paravirt case can handle memory setup */ | ||
75 | /* correctly, without our help. */ | ||
76 | if (paravirt_enabled()) | ||
77 | return; | ||
78 | |||
79 | /* end of low (conventional) memory */ | ||
80 | lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); | ||
81 | lowmem <<= 10; | ||
82 | |||
83 | /* start of EBDA area */ | ||
84 | ebda_addr = get_bios_ebda(); | ||
85 | |||
86 | /* Fixup: bios puts an EBDA in the top 64K segment */ | ||
87 | /* of conventional memory, but does not adjust lowmem. */ | ||
88 | if ((lowmem - ebda_addr) <= 0x10000) | ||
89 | lowmem = ebda_addr; | ||
90 | |||
91 | /* Fixup: bios does not report an EBDA at all. */ | ||
92 | /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ | ||
93 | if ((ebda_addr == 0) && (lowmem >= 0x9f000)) | ||
94 | lowmem = 0x9f000; | ||
95 | |||
96 | /* Paranoia: should never happen, but... */ | ||
97 | if ((lowmem == 0) || (lowmem >= 0x100000)) | ||
98 | lowmem = 0x9f000; | ||
99 | |||
100 | /* reserve all memory between lowmem and the 1MB mark */ | ||
101 | reserve_early(lowmem, 0x100000, "BIOS reserved"); | ||
102 | } | ||
103 | |||
104 | static void __init reserve_setup_data(void) | ||
105 | { | ||
106 | struct setup_data *data; | ||
107 | unsigned long pa_data; | ||
108 | char buf[32]; | ||
109 | |||
110 | if (boot_params.hdr.version < 0x0209) | ||
111 | return; | ||
112 | pa_data = boot_params.hdr.setup_data; | ||
113 | while (pa_data) { | ||
114 | data = early_ioremap(pa_data, sizeof(*data)); | ||
115 | sprintf(buf, "setup data %x", data->type); | ||
116 | reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); | ||
117 | pa_data = data->next; | ||
118 | early_iounmap(data, sizeof(*data)); | ||
119 | } | ||
120 | } | ||
121 | |||
122 | void __init x86_64_start_kernel(char * real_mode_data) | 54 | void __init x86_64_start_kernel(char * real_mode_data) |
123 | { | 55 | { |
124 | int i; | 56 | int i; |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index f7357cc0162c..b98b338aae1a 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -194,6 +194,7 @@ default_entry: | |||
194 | xorl %ebx,%ebx /* %ebx is kept at zero */ | 194 | xorl %ebx,%ebx /* %ebx is kept at zero */ |
195 | 195 | ||
196 | movl $pa(pg0), %edi | 196 | movl $pa(pg0), %edi |
197 | movl %edi, pa(init_pg_tables_start) | ||
197 | movl $pa(swapper_pg_pmd), %edx | 198 | movl $pa(swapper_pg_pmd), %edx |
198 | movl $PTE_ATTR, %eax | 199 | movl $PTE_ATTR, %eax |
199 | 10: | 200 | 10: |
@@ -219,6 +220,8 @@ default_entry: | |||
219 | jb 10b | 220 | jb 10b |
220 | 1: | 221 | 1: |
221 | movl %edi,pa(init_pg_tables_end) | 222 | movl %edi,pa(init_pg_tables_end) |
223 | shrl $12, %eax | ||
224 | movl %eax, pa(max_pfn_mapped) | ||
222 | 225 | ||
223 | /* Do early initialization of the fixmap area */ | 226 | /* Do early initialization of the fixmap area */ |
224 | movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax | 227 | movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax |
@@ -228,6 +231,7 @@ default_entry: | |||
228 | page_pde_offset = (__PAGE_OFFSET >> 20); | 231 | page_pde_offset = (__PAGE_OFFSET >> 20); |
229 | 232 | ||
230 | movl $pa(pg0), %edi | 233 | movl $pa(pg0), %edi |
234 | movl %edi, pa(init_pg_tables_start) | ||
231 | movl $pa(swapper_pg_dir), %edx | 235 | movl $pa(swapper_pg_dir), %edx |
232 | movl $PTE_ATTR, %eax | 236 | movl $PTE_ATTR, %eax |
233 | 10: | 237 | 10: |
@@ -249,6 +253,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
249 | cmpl %ebp,%eax | 253 | cmpl %ebp,%eax |
250 | jb 10b | 254 | jb 10b |
251 | movl %edi,pa(init_pg_tables_end) | 255 | movl %edi,pa(init_pg_tables_end) |
256 | shrl $12, %eax | ||
257 | movl %eax, pa(max_pfn_mapped) | ||
252 | 258 | ||
253 | /* Do early initialization of the fixmap area */ | 259 | /* Do early initialization of the fixmap area */ |
254 | movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax | 260 | movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax |
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index dac47d61d2be..fedb3b113ace 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c | |||
@@ -72,15 +72,21 @@ int sis_apic_bug = -1; | |||
72 | int nr_ioapic_registers[MAX_IO_APICS]; | 72 | int nr_ioapic_registers[MAX_IO_APICS]; |
73 | 73 | ||
74 | /* I/O APIC entries */ | 74 | /* I/O APIC entries */ |
75 | struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; | 75 | struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; |
76 | int nr_ioapics; | 76 | int nr_ioapics; |
77 | 77 | ||
78 | /* MP IRQ source entries */ | 78 | /* MP IRQ source entries */ |
79 | struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; | 79 | struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; |
80 | 80 | ||
81 | /* # of MP IRQ source entries */ | 81 | /* # of MP IRQ source entries */ |
82 | int mp_irq_entries; | 82 | int mp_irq_entries; |
83 | 83 | ||
84 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | ||
85 | int mp_bus_id_to_type[MAX_MP_BUSSES]; | ||
86 | #endif | ||
87 | |||
88 | DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); | ||
89 | |||
84 | static int disable_timer_pin_1 __initdata; | 90 | static int disable_timer_pin_1 __initdata; |
85 | 91 | ||
86 | /* | 92 | /* |
@@ -110,7 +116,7 @@ struct io_apic { | |||
110 | static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) | 116 | static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) |
111 | { | 117 | { |
112 | return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) | 118 | return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) |
113 | + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); | 119 | + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); |
114 | } | 120 | } |
115 | 121 | ||
116 | static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) | 122 | static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) |
@@ -802,10 +808,10 @@ static int find_irq_entry(int apic, int pin, int type) | |||
802 | int i; | 808 | int i; |
803 | 809 | ||
804 | for (i = 0; i < mp_irq_entries; i++) | 810 | for (i = 0; i < mp_irq_entries; i++) |
805 | if (mp_irqs[i].mpc_irqtype == type && | 811 | if (mp_irqs[i].mp_irqtype == type && |
806 | (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || | 812 | (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || |
807 | mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && | 813 | mp_irqs[i].mp_dstapic == MP_APIC_ALL) && |
808 | mp_irqs[i].mpc_dstirq == pin) | 814 | mp_irqs[i].mp_dstirq == pin) |
809 | return i; | 815 | return i; |
810 | 816 | ||
811 | return -1; | 817 | return -1; |
@@ -819,13 +825,13 @@ static int __init find_isa_irq_pin(int irq, int type) | |||
819 | int i; | 825 | int i; |
820 | 826 | ||
821 | for (i = 0; i < mp_irq_entries; i++) { | 827 | for (i = 0; i < mp_irq_entries; i++) { |
822 | int lbus = mp_irqs[i].mpc_srcbus; | 828 | int lbus = mp_irqs[i].mp_srcbus; |
823 | 829 | ||
824 | if (test_bit(lbus, mp_bus_not_pci) && | 830 | if (test_bit(lbus, mp_bus_not_pci) && |
825 | (mp_irqs[i].mpc_irqtype == type) && | 831 | (mp_irqs[i].mp_irqtype == type) && |
826 | (mp_irqs[i].mpc_srcbusirq == irq)) | 832 | (mp_irqs[i].mp_srcbusirq == irq)) |
827 | 833 | ||
828 | return mp_irqs[i].mpc_dstirq; | 834 | return mp_irqs[i].mp_dstirq; |
829 | } | 835 | } |
830 | return -1; | 836 | return -1; |
831 | } | 837 | } |
@@ -835,17 +841,17 @@ static int __init find_isa_irq_apic(int irq, int type) | |||
835 | int i; | 841 | int i; |
836 | 842 | ||
837 | for (i = 0; i < mp_irq_entries; i++) { | 843 | for (i = 0; i < mp_irq_entries; i++) { |
838 | int lbus = mp_irqs[i].mpc_srcbus; | 844 | int lbus = mp_irqs[i].mp_srcbus; |
839 | 845 | ||
840 | if (test_bit(lbus, mp_bus_not_pci) && | 846 | if (test_bit(lbus, mp_bus_not_pci) && |
841 | (mp_irqs[i].mpc_irqtype == type) && | 847 | (mp_irqs[i].mp_irqtype == type) && |
842 | (mp_irqs[i].mpc_srcbusirq == irq)) | 848 | (mp_irqs[i].mp_srcbusirq == irq)) |
843 | break; | 849 | break; |
844 | } | 850 | } |
845 | if (i < mp_irq_entries) { | 851 | if (i < mp_irq_entries) { |
846 | int apic; | 852 | int apic; |
847 | for (apic = 0; apic < nr_ioapics; apic++) { | 853 | for (apic = 0; apic < nr_ioapics; apic++) { |
848 | if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) | 854 | if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) |
849 | return apic; | 855 | return apic; |
850 | } | 856 | } |
851 | } | 857 | } |
@@ -865,28 +871,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | |||
865 | 871 | ||
866 | apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " | 872 | apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " |
867 | "slot:%d, pin:%d.\n", bus, slot, pin); | 873 | "slot:%d, pin:%d.\n", bus, slot, pin); |
868 | if (mp_bus_id_to_pci_bus[bus] == -1) { | 874 | if (test_bit(bus, mp_bus_not_pci)) { |
869 | printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); | 875 | printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); |
870 | return -1; | 876 | return -1; |
871 | } | 877 | } |
872 | for (i = 0; i < mp_irq_entries; i++) { | 878 | for (i = 0; i < mp_irq_entries; i++) { |
873 | int lbus = mp_irqs[i].mpc_srcbus; | 879 | int lbus = mp_irqs[i].mp_srcbus; |
874 | 880 | ||
875 | for (apic = 0; apic < nr_ioapics; apic++) | 881 | for (apic = 0; apic < nr_ioapics; apic++) |
876 | if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || | 882 | if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || |
877 | mp_irqs[i].mpc_dstapic == MP_APIC_ALL) | 883 | mp_irqs[i].mp_dstapic == MP_APIC_ALL) |
878 | break; | 884 | break; |
879 | 885 | ||
880 | if (!test_bit(lbus, mp_bus_not_pci) && | 886 | if (!test_bit(lbus, mp_bus_not_pci) && |
881 | !mp_irqs[i].mpc_irqtype && | 887 | !mp_irqs[i].mp_irqtype && |
882 | (bus == lbus) && | 888 | (bus == lbus) && |
883 | (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { | 889 | (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { |
884 | int irq = pin_2_irq(i, apic, mp_irqs[i].mpc_dstirq); | 890 | int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq); |
885 | 891 | ||
886 | if (!(apic || IO_APIC_IRQ(irq))) | 892 | if (!(apic || IO_APIC_IRQ(irq))) |
887 | continue; | 893 | continue; |
888 | 894 | ||
889 | if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) | 895 | if (pin == (mp_irqs[i].mp_srcbusirq & 3)) |
890 | return irq; | 896 | return irq; |
891 | /* | 897 | /* |
892 | * Use the first all-but-pin matching entry as a | 898 | * Use the first all-but-pin matching entry as a |
@@ -953,7 +959,7 @@ static int EISA_ELCR(unsigned int irq) | |||
953 | * EISA conforming in the MP table, that means its trigger type must | 959 | * EISA conforming in the MP table, that means its trigger type must |
954 | * be read in from the ELCR */ | 960 | * be read in from the ELCR */ |
955 | 961 | ||
956 | #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) | 962 | #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) |
957 | #define default_EISA_polarity(idx) default_ISA_polarity(idx) | 963 | #define default_EISA_polarity(idx) default_ISA_polarity(idx) |
958 | 964 | ||
959 | /* PCI interrupts are always polarity one level triggered, | 965 | /* PCI interrupts are always polarity one level triggered, |
@@ -970,13 +976,13 @@ static int EISA_ELCR(unsigned int irq) | |||
970 | 976 | ||
971 | static int MPBIOS_polarity(int idx) | 977 | static int MPBIOS_polarity(int idx) |
972 | { | 978 | { |
973 | int bus = mp_irqs[idx].mpc_srcbus; | 979 | int bus = mp_irqs[idx].mp_srcbus; |
974 | int polarity; | 980 | int polarity; |
975 | 981 | ||
976 | /* | 982 | /* |
977 | * Determine IRQ line polarity (high active or low active): | 983 | * Determine IRQ line polarity (high active or low active): |
978 | */ | 984 | */ |
979 | switch (mp_irqs[idx].mpc_irqflag & 3) { | 985 | switch (mp_irqs[idx].mp_irqflag & 3) { |
980 | case 0: /* conforms, ie. bus-type dependent polarity */ | 986 | case 0: /* conforms, ie. bus-type dependent polarity */ |
981 | { | 987 | { |
982 | polarity = test_bit(bus, mp_bus_not_pci)? | 988 | polarity = test_bit(bus, mp_bus_not_pci)? |
@@ -1012,13 +1018,13 @@ static int MPBIOS_polarity(int idx) | |||
1012 | 1018 | ||
1013 | static int MPBIOS_trigger(int idx) | 1019 | static int MPBIOS_trigger(int idx) |
1014 | { | 1020 | { |
1015 | int bus = mp_irqs[idx].mpc_srcbus; | 1021 | int bus = mp_irqs[idx].mp_srcbus; |
1016 | int trigger; | 1022 | int trigger; |
1017 | 1023 | ||
1018 | /* | 1024 | /* |
1019 | * Determine IRQ trigger mode (edge or level sensitive): | 1025 | * Determine IRQ trigger mode (edge or level sensitive): |
1020 | */ | 1026 | */ |
1021 | switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) { | 1027 | switch ((mp_irqs[idx].mp_irqflag>>2) & 3) { |
1022 | case 0: /* conforms, ie. bus-type dependent */ | 1028 | case 0: /* conforms, ie. bus-type dependent */ |
1023 | { | 1029 | { |
1024 | trigger = test_bit(bus, mp_bus_not_pci)? | 1030 | trigger = test_bit(bus, mp_bus_not_pci)? |
@@ -1095,16 +1101,16 @@ static inline int irq_trigger(int idx) | |||
1095 | static int pin_2_irq(int idx, int apic, int pin) | 1101 | static int pin_2_irq(int idx, int apic, int pin) |
1096 | { | 1102 | { |
1097 | int irq, i; | 1103 | int irq, i; |
1098 | int bus = mp_irqs[idx].mpc_srcbus; | 1104 | int bus = mp_irqs[idx].mp_srcbus; |
1099 | 1105 | ||
1100 | /* | 1106 | /* |
1101 | * Debugging check, we are in big trouble if this message pops up! | 1107 | * Debugging check, we are in big trouble if this message pops up! |
1102 | */ | 1108 | */ |
1103 | if (mp_irqs[idx].mpc_dstirq != pin) | 1109 | if (mp_irqs[idx].mp_dstirq != pin) |
1104 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); | 1110 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); |
1105 | 1111 | ||
1106 | if (test_bit(bus, mp_bus_not_pci)) | 1112 | if (test_bit(bus, mp_bus_not_pci)) |
1107 | irq = mp_irqs[idx].mpc_srcbusirq; | 1113 | irq = mp_irqs[idx].mp_srcbusirq; |
1108 | else { | 1114 | else { |
1109 | /* | 1115 | /* |
1110 | * PCI IRQs are mapped in order | 1116 | * PCI IRQs are mapped in order |
@@ -1248,12 +1254,12 @@ static void __init setup_IO_APIC_irqs(void) | |||
1248 | if (first_notcon) { | 1254 | if (first_notcon) { |
1249 | apic_printk(APIC_VERBOSE, KERN_DEBUG | 1255 | apic_printk(APIC_VERBOSE, KERN_DEBUG |
1250 | " IO-APIC (apicid-pin) %d-%d", | 1256 | " IO-APIC (apicid-pin) %d-%d", |
1251 | mp_ioapics[apic].mpc_apicid, | 1257 | mp_ioapics[apic].mp_apicid, |
1252 | pin); | 1258 | pin); |
1253 | first_notcon = 0; | 1259 | first_notcon = 0; |
1254 | } else | 1260 | } else |
1255 | apic_printk(APIC_VERBOSE, ", %d-%d", | 1261 | apic_printk(APIC_VERBOSE, ", %d-%d", |
1256 | mp_ioapics[apic].mpc_apicid, pin); | 1262 | mp_ioapics[apic].mp_apicid, pin); |
1257 | continue; | 1263 | continue; |
1258 | } | 1264 | } |
1259 | 1265 | ||
@@ -1348,7 +1354,7 @@ void __init print_IO_APIC(void) | |||
1348 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | 1354 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); |
1349 | for (i = 0; i < nr_ioapics; i++) | 1355 | for (i = 0; i < nr_ioapics; i++) |
1350 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", | 1356 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", |
1351 | mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); | 1357 | mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); |
1352 | 1358 | ||
1353 | /* | 1359 | /* |
1354 | * We are a bit conservative about what we expect. We have to | 1360 | * We are a bit conservative about what we expect. We have to |
@@ -1367,7 +1373,7 @@ void __init print_IO_APIC(void) | |||
1367 | reg_03.raw = io_apic_read(apic, 3); | 1373 | reg_03.raw = io_apic_read(apic, 3); |
1368 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1374 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1369 | 1375 | ||
1370 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); | 1376 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); |
1371 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); | 1377 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); |
1372 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | 1378 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); |
1373 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); | 1379 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); |
@@ -1708,7 +1714,6 @@ void disable_IO_APIC(void) | |||
1708 | * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 | 1714 | * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 |
1709 | */ | 1715 | */ |
1710 | 1716 | ||
1711 | #ifndef CONFIG_X86_NUMAQ | ||
1712 | static void __init setup_ioapic_ids_from_mpc(void) | 1717 | static void __init setup_ioapic_ids_from_mpc(void) |
1713 | { | 1718 | { |
1714 | union IO_APIC_reg_00 reg_00; | 1719 | union IO_APIC_reg_00 reg_00; |
@@ -1718,6 +1723,11 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1718 | unsigned char old_id; | 1723 | unsigned char old_id; |
1719 | unsigned long flags; | 1724 | unsigned long flags; |
1720 | 1725 | ||
1726 | #ifdef CONFIG_X86_NUMAQ | ||
1727 | if (found_numaq) | ||
1728 | return; | ||
1729 | #endif | ||
1730 | |||
1721 | /* | 1731 | /* |
1722 | * Don't check I/O APIC IDs for xAPIC systems. They have | 1732 | * Don't check I/O APIC IDs for xAPIC systems. They have |
1723 | * no meaning without the serial APIC bus. | 1733 | * no meaning without the serial APIC bus. |
@@ -1741,14 +1751,14 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1741 | reg_00.raw = io_apic_read(apic, 0); | 1751 | reg_00.raw = io_apic_read(apic, 0); |
1742 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1752 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1743 | 1753 | ||
1744 | old_id = mp_ioapics[apic].mpc_apicid; | 1754 | old_id = mp_ioapics[apic].mp_apicid; |
1745 | 1755 | ||
1746 | if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { | 1756 | if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { |
1747 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", | 1757 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", |
1748 | apic, mp_ioapics[apic].mpc_apicid); | 1758 | apic, mp_ioapics[apic].mp_apicid); |
1749 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | 1759 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", |
1750 | reg_00.bits.ID); | 1760 | reg_00.bits.ID); |
1751 | mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; | 1761 | mp_ioapics[apic].mp_apicid = reg_00.bits.ID; |
1752 | } | 1762 | } |
1753 | 1763 | ||
1754 | /* | 1764 | /* |
@@ -1757,9 +1767,9 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1757 | * 'stuck on smp_invalidate_needed IPI wait' messages. | 1767 | * 'stuck on smp_invalidate_needed IPI wait' messages. |
1758 | */ | 1768 | */ |
1759 | if (check_apicid_used(phys_id_present_map, | 1769 | if (check_apicid_used(phys_id_present_map, |
1760 | mp_ioapics[apic].mpc_apicid)) { | 1770 | mp_ioapics[apic].mp_apicid)) { |
1761 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", | 1771 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", |
1762 | apic, mp_ioapics[apic].mpc_apicid); | 1772 | apic, mp_ioapics[apic].mp_apicid); |
1763 | for (i = 0; i < get_physical_broadcast(); i++) | 1773 | for (i = 0; i < get_physical_broadcast(); i++) |
1764 | if (!physid_isset(i, phys_id_present_map)) | 1774 | if (!physid_isset(i, phys_id_present_map)) |
1765 | break; | 1775 | break; |
@@ -1768,13 +1778,13 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1768 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | 1778 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", |
1769 | i); | 1779 | i); |
1770 | physid_set(i, phys_id_present_map); | 1780 | physid_set(i, phys_id_present_map); |
1771 | mp_ioapics[apic].mpc_apicid = i; | 1781 | mp_ioapics[apic].mp_apicid = i; |
1772 | } else { | 1782 | } else { |
1773 | physid_mask_t tmp; | 1783 | physid_mask_t tmp; |
1774 | tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid); | 1784 | tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); |
1775 | apic_printk(APIC_VERBOSE, "Setting %d in the " | 1785 | apic_printk(APIC_VERBOSE, "Setting %d in the " |
1776 | "phys_id_present_map\n", | 1786 | "phys_id_present_map\n", |
1777 | mp_ioapics[apic].mpc_apicid); | 1787 | mp_ioapics[apic].mp_apicid); |
1778 | physids_or(phys_id_present_map, phys_id_present_map, tmp); | 1788 | physids_or(phys_id_present_map, phys_id_present_map, tmp); |
1779 | } | 1789 | } |
1780 | 1790 | ||
@@ -1783,11 +1793,11 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1783 | * We need to adjust the IRQ routing table | 1793 | * We need to adjust the IRQ routing table |
1784 | * if the ID changed. | 1794 | * if the ID changed. |
1785 | */ | 1795 | */ |
1786 | if (old_id != mp_ioapics[apic].mpc_apicid) | 1796 | if (old_id != mp_ioapics[apic].mp_apicid) |
1787 | for (i = 0; i < mp_irq_entries; i++) | 1797 | for (i = 0; i < mp_irq_entries; i++) |
1788 | if (mp_irqs[i].mpc_dstapic == old_id) | 1798 | if (mp_irqs[i].mp_dstapic == old_id) |
1789 | mp_irqs[i].mpc_dstapic | 1799 | mp_irqs[i].mp_dstapic |
1790 | = mp_ioapics[apic].mpc_apicid; | 1800 | = mp_ioapics[apic].mp_apicid; |
1791 | 1801 | ||
1792 | /* | 1802 | /* |
1793 | * Read the right value from the MPC table and | 1803 | * Read the right value from the MPC table and |
@@ -1795,9 +1805,9 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1795 | */ | 1805 | */ |
1796 | apic_printk(APIC_VERBOSE, KERN_INFO | 1806 | apic_printk(APIC_VERBOSE, KERN_INFO |
1797 | "...changing IO-APIC physical APIC ID to %d ...", | 1807 | "...changing IO-APIC physical APIC ID to %d ...", |
1798 | mp_ioapics[apic].mpc_apicid); | 1808 | mp_ioapics[apic].mp_apicid); |
1799 | 1809 | ||
1800 | reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; | 1810 | reg_00.bits.ID = mp_ioapics[apic].mp_apicid; |
1801 | spin_lock_irqsave(&ioapic_lock, flags); | 1811 | spin_lock_irqsave(&ioapic_lock, flags); |
1802 | io_apic_write(apic, 0, reg_00.raw); | 1812 | io_apic_write(apic, 0, reg_00.raw); |
1803 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1813 | spin_unlock_irqrestore(&ioapic_lock, flags); |
@@ -1808,15 +1818,12 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1808 | spin_lock_irqsave(&ioapic_lock, flags); | 1818 | spin_lock_irqsave(&ioapic_lock, flags); |
1809 | reg_00.raw = io_apic_read(apic, 0); | 1819 | reg_00.raw = io_apic_read(apic, 0); |
1810 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1820 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1811 | if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) | 1821 | if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) |
1812 | printk("could not set ID!\n"); | 1822 | printk("could not set ID!\n"); |
1813 | else | 1823 | else |
1814 | apic_printk(APIC_VERBOSE, " ok.\n"); | 1824 | apic_printk(APIC_VERBOSE, " ok.\n"); |
1815 | } | 1825 | } |
1816 | } | 1826 | } |
1817 | #else | ||
1818 | static void __init setup_ioapic_ids_from_mpc(void) { } | ||
1819 | #endif | ||
1820 | 1827 | ||
1821 | int no_timer_check __initdata; | 1828 | int no_timer_check __initdata; |
1822 | 1829 | ||
@@ -2352,8 +2359,8 @@ static int ioapic_resume(struct sys_device *dev) | |||
2352 | 2359 | ||
2353 | spin_lock_irqsave(&ioapic_lock, flags); | 2360 | spin_lock_irqsave(&ioapic_lock, flags); |
2354 | reg_00.raw = io_apic_read(dev->id, 0); | 2361 | reg_00.raw = io_apic_read(dev->id, 0); |
2355 | if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { | 2362 | if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { |
2356 | reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; | 2363 | reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; |
2357 | io_apic_write(dev->id, 0, reg_00.raw); | 2364 | io_apic_write(dev->id, 0, reg_00.raw); |
2358 | } | 2365 | } |
2359 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2366 | spin_unlock_irqrestore(&ioapic_lock, flags); |
@@ -2785,7 +2792,7 @@ int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int ac | |||
2785 | 2792 | ||
2786 | apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " | 2793 | apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " |
2787 | "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, | 2794 | "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, |
2788 | mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, | 2795 | mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq, |
2789 | edge_level, active_high_low); | 2796 | edge_level, active_high_low); |
2790 | 2797 | ||
2791 | ioapic_register_intr(irq, entry.vector, edge_level); | 2798 | ioapic_register_intr(irq, entry.vector, edge_level); |
@@ -2806,8 +2813,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | |||
2806 | return -1; | 2813 | return -1; |
2807 | 2814 | ||
2808 | for (i = 0; i < mp_irq_entries; i++) | 2815 | for (i = 0; i < mp_irq_entries; i++) |
2809 | if (mp_irqs[i].mpc_irqtype == mp_INT && | 2816 | if (mp_irqs[i].mp_irqtype == mp_INT && |
2810 | mp_irqs[i].mpc_srcbusirq == bus_irq) | 2817 | mp_irqs[i].mp_srcbusirq == bus_irq) |
2811 | break; | 2818 | break; |
2812 | if (i >= mp_irq_entries) | 2819 | if (i >= mp_irq_entries) |
2813 | return -1; | 2820 | return -1; |
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 78a3866ab367..2eba4f4c14ba 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c | |||
@@ -108,15 +108,17 @@ DEFINE_SPINLOCK(vector_lock); | |||
108 | int nr_ioapic_registers[MAX_IO_APICS]; | 108 | int nr_ioapic_registers[MAX_IO_APICS]; |
109 | 109 | ||
110 | /* I/O APIC entries */ | 110 | /* I/O APIC entries */ |
111 | struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; | 111 | struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; |
112 | int nr_ioapics; | 112 | int nr_ioapics; |
113 | 113 | ||
114 | /* MP IRQ source entries */ | 114 | /* MP IRQ source entries */ |
115 | struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; | 115 | struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; |
116 | 116 | ||
117 | /* # of MP IRQ source entries */ | 117 | /* # of MP IRQ source entries */ |
118 | int mp_irq_entries; | 118 | int mp_irq_entries; |
119 | 119 | ||
120 | DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); | ||
121 | |||
120 | /* | 122 | /* |
121 | * Rough estimation of how many shared IRQs there are, can | 123 | * Rough estimation of how many shared IRQs there are, can |
122 | * be changed anytime. | 124 | * be changed anytime. |
@@ -144,7 +146,7 @@ struct io_apic { | |||
144 | static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) | 146 | static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) |
145 | { | 147 | { |
146 | return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) | 148 | return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) |
147 | + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); | 149 | + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); |
148 | } | 150 | } |
149 | 151 | ||
150 | static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) | 152 | static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) |
@@ -464,10 +466,10 @@ static int find_irq_entry(int apic, int pin, int type) | |||
464 | int i; | 466 | int i; |
465 | 467 | ||
466 | for (i = 0; i < mp_irq_entries; i++) | 468 | for (i = 0; i < mp_irq_entries; i++) |
467 | if (mp_irqs[i].mpc_irqtype == type && | 469 | if (mp_irqs[i].mp_irqtype == type && |
468 | (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || | 470 | (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || |
469 | mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && | 471 | mp_irqs[i].mp_dstapic == MP_APIC_ALL) && |
470 | mp_irqs[i].mpc_dstirq == pin) | 472 | mp_irqs[i].mp_dstirq == pin) |
471 | return i; | 473 | return i; |
472 | 474 | ||
473 | return -1; | 475 | return -1; |
@@ -481,13 +483,13 @@ static int __init find_isa_irq_pin(int irq, int type) | |||
481 | int i; | 483 | int i; |
482 | 484 | ||
483 | for (i = 0; i < mp_irq_entries; i++) { | 485 | for (i = 0; i < mp_irq_entries; i++) { |
484 | int lbus = mp_irqs[i].mpc_srcbus; | 486 | int lbus = mp_irqs[i].mp_srcbus; |
485 | 487 | ||
486 | if (test_bit(lbus, mp_bus_not_pci) && | 488 | if (test_bit(lbus, mp_bus_not_pci) && |
487 | (mp_irqs[i].mpc_irqtype == type) && | 489 | (mp_irqs[i].mp_irqtype == type) && |
488 | (mp_irqs[i].mpc_srcbusirq == irq)) | 490 | (mp_irqs[i].mp_srcbusirq == irq)) |
489 | 491 | ||
490 | return mp_irqs[i].mpc_dstirq; | 492 | return mp_irqs[i].mp_dstirq; |
491 | } | 493 | } |
492 | return -1; | 494 | return -1; |
493 | } | 495 | } |
@@ -497,17 +499,17 @@ static int __init find_isa_irq_apic(int irq, int type) | |||
497 | int i; | 499 | int i; |
498 | 500 | ||
499 | for (i = 0; i < mp_irq_entries; i++) { | 501 | for (i = 0; i < mp_irq_entries; i++) { |
500 | int lbus = mp_irqs[i].mpc_srcbus; | 502 | int lbus = mp_irqs[i].mp_srcbus; |
501 | 503 | ||
502 | if (test_bit(lbus, mp_bus_not_pci) && | 504 | if (test_bit(lbus, mp_bus_not_pci) && |
503 | (mp_irqs[i].mpc_irqtype == type) && | 505 | (mp_irqs[i].mp_irqtype == type) && |
504 | (mp_irqs[i].mpc_srcbusirq == irq)) | 506 | (mp_irqs[i].mp_srcbusirq == irq)) |
505 | break; | 507 | break; |
506 | } | 508 | } |
507 | if (i < mp_irq_entries) { | 509 | if (i < mp_irq_entries) { |
508 | int apic; | 510 | int apic; |
509 | for(apic = 0; apic < nr_ioapics; apic++) { | 511 | for(apic = 0; apic < nr_ioapics; apic++) { |
510 | if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) | 512 | if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) |
511 | return apic; | 513 | return apic; |
512 | } | 514 | } |
513 | } | 515 | } |
@@ -527,28 +529,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | |||
527 | 529 | ||
528 | apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", | 530 | apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", |
529 | bus, slot, pin); | 531 | bus, slot, pin); |
530 | if (mp_bus_id_to_pci_bus[bus] == -1) { | 532 | if (test_bit(bus, mp_bus_not_pci)) { |
531 | apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); | 533 | apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); |
532 | return -1; | 534 | return -1; |
533 | } | 535 | } |
534 | for (i = 0; i < mp_irq_entries; i++) { | 536 | for (i = 0; i < mp_irq_entries; i++) { |
535 | int lbus = mp_irqs[i].mpc_srcbus; | 537 | int lbus = mp_irqs[i].mp_srcbus; |
536 | 538 | ||
537 | for (apic = 0; apic < nr_ioapics; apic++) | 539 | for (apic = 0; apic < nr_ioapics; apic++) |
538 | if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || | 540 | if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || |
539 | mp_irqs[i].mpc_dstapic == MP_APIC_ALL) | 541 | mp_irqs[i].mp_dstapic == MP_APIC_ALL) |
540 | break; | 542 | break; |
541 | 543 | ||
542 | if (!test_bit(lbus, mp_bus_not_pci) && | 544 | if (!test_bit(lbus, mp_bus_not_pci) && |
543 | !mp_irqs[i].mpc_irqtype && | 545 | !mp_irqs[i].mp_irqtype && |
544 | (bus == lbus) && | 546 | (bus == lbus) && |
545 | (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { | 547 | (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { |
546 | int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); | 548 | int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); |
547 | 549 | ||
548 | if (!(apic || IO_APIC_IRQ(irq))) | 550 | if (!(apic || IO_APIC_IRQ(irq))) |
549 | continue; | 551 | continue; |
550 | 552 | ||
551 | if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) | 553 | if (pin == (mp_irqs[i].mp_srcbusirq & 3)) |
552 | return irq; | 554 | return irq; |
553 | /* | 555 | /* |
554 | * Use the first all-but-pin matching entry as a | 556 | * Use the first all-but-pin matching entry as a |
@@ -576,13 +578,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | |||
576 | 578 | ||
577 | static int MPBIOS_polarity(int idx) | 579 | static int MPBIOS_polarity(int idx) |
578 | { | 580 | { |
579 | int bus = mp_irqs[idx].mpc_srcbus; | 581 | int bus = mp_irqs[idx].mp_srcbus; |
580 | int polarity; | 582 | int polarity; |
581 | 583 | ||
582 | /* | 584 | /* |
583 | * Determine IRQ line polarity (high active or low active): | 585 | * Determine IRQ line polarity (high active or low active): |
584 | */ | 586 | */ |
585 | switch (mp_irqs[idx].mpc_irqflag & 3) | 587 | switch (mp_irqs[idx].mp_irqflag & 3) |
586 | { | 588 | { |
587 | case 0: /* conforms, ie. bus-type dependent polarity */ | 589 | case 0: /* conforms, ie. bus-type dependent polarity */ |
588 | if (test_bit(bus, mp_bus_not_pci)) | 590 | if (test_bit(bus, mp_bus_not_pci)) |
@@ -618,13 +620,13 @@ static int MPBIOS_polarity(int idx) | |||
618 | 620 | ||
619 | static int MPBIOS_trigger(int idx) | 621 | static int MPBIOS_trigger(int idx) |
620 | { | 622 | { |
621 | int bus = mp_irqs[idx].mpc_srcbus; | 623 | int bus = mp_irqs[idx].mp_srcbus; |
622 | int trigger; | 624 | int trigger; |
623 | 625 | ||
624 | /* | 626 | /* |
625 | * Determine IRQ trigger mode (edge or level sensitive): | 627 | * Determine IRQ trigger mode (edge or level sensitive): |
626 | */ | 628 | */ |
627 | switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) | 629 | switch ((mp_irqs[idx].mp_irqflag>>2) & 3) |
628 | { | 630 | { |
629 | case 0: /* conforms, ie. bus-type dependent */ | 631 | case 0: /* conforms, ie. bus-type dependent */ |
630 | if (test_bit(bus, mp_bus_not_pci)) | 632 | if (test_bit(bus, mp_bus_not_pci)) |
@@ -671,16 +673,16 @@ static inline int irq_trigger(int idx) | |||
671 | static int pin_2_irq(int idx, int apic, int pin) | 673 | static int pin_2_irq(int idx, int apic, int pin) |
672 | { | 674 | { |
673 | int irq, i; | 675 | int irq, i; |
674 | int bus = mp_irqs[idx].mpc_srcbus; | 676 | int bus = mp_irqs[idx].mp_srcbus; |
675 | 677 | ||
676 | /* | 678 | /* |
677 | * Debugging check, we are in big trouble if this message pops up! | 679 | * Debugging check, we are in big trouble if this message pops up! |
678 | */ | 680 | */ |
679 | if (mp_irqs[idx].mpc_dstirq != pin) | 681 | if (mp_irqs[idx].mp_dstirq != pin) |
680 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); | 682 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); |
681 | 683 | ||
682 | if (test_bit(bus, mp_bus_not_pci)) { | 684 | if (test_bit(bus, mp_bus_not_pci)) { |
683 | irq = mp_irqs[idx].mpc_srcbusirq; | 685 | irq = mp_irqs[idx].mp_srcbusirq; |
684 | } else { | 686 | } else { |
685 | /* | 687 | /* |
686 | * PCI IRQs are mapped in order | 688 | * PCI IRQs are mapped in order |
@@ -857,7 +859,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, | |||
857 | apic_printk(APIC_VERBOSE,KERN_DEBUG | 859 | apic_printk(APIC_VERBOSE,KERN_DEBUG |
858 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " | 860 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " |
859 | "IRQ %d Mode:%i Active:%i)\n", | 861 | "IRQ %d Mode:%i Active:%i)\n", |
860 | apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector, | 862 | apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, |
861 | irq, trigger, polarity); | 863 | irq, trigger, polarity); |
862 | 864 | ||
863 | /* | 865 | /* |
@@ -898,10 +900,10 @@ static void __init setup_IO_APIC_irqs(void) | |||
898 | idx = find_irq_entry(apic,pin,mp_INT); | 900 | idx = find_irq_entry(apic,pin,mp_INT); |
899 | if (idx == -1) { | 901 | if (idx == -1) { |
900 | if (first_notcon) { | 902 | if (first_notcon) { |
901 | apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin); | 903 | apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); |
902 | first_notcon = 0; | 904 | first_notcon = 0; |
903 | } else | 905 | } else |
904 | apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin); | 906 | apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); |
905 | continue; | 907 | continue; |
906 | } | 908 | } |
907 | if (!first_notcon) { | 909 | if (!first_notcon) { |
@@ -969,7 +971,7 @@ void __apicdebuginit print_IO_APIC(void) | |||
969 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | 971 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); |
970 | for (i = 0; i < nr_ioapics; i++) | 972 | for (i = 0; i < nr_ioapics; i++) |
971 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", | 973 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", |
972 | mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); | 974 | mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); |
973 | 975 | ||
974 | /* | 976 | /* |
975 | * We are a bit conservative about what we expect. We have to | 977 | * We are a bit conservative about what we expect. We have to |
@@ -987,7 +989,7 @@ void __apicdebuginit print_IO_APIC(void) | |||
987 | spin_unlock_irqrestore(&ioapic_lock, flags); | 989 | spin_unlock_irqrestore(&ioapic_lock, flags); |
988 | 990 | ||
989 | printk("\n"); | 991 | printk("\n"); |
990 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); | 992 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); |
991 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); | 993 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); |
992 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | 994 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); |
993 | 995 | ||
@@ -1873,8 +1875,8 @@ static int ioapic_resume(struct sys_device *dev) | |||
1873 | 1875 | ||
1874 | spin_lock_irqsave(&ioapic_lock, flags); | 1876 | spin_lock_irqsave(&ioapic_lock, flags); |
1875 | reg_00.raw = io_apic_read(dev->id, 0); | 1877 | reg_00.raw = io_apic_read(dev->id, 0); |
1876 | if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { | 1878 | if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { |
1877 | reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; | 1879 | reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; |
1878 | io_apic_write(dev->id, 0, reg_00.raw); | 1880 | io_apic_write(dev->id, 0, reg_00.raw); |
1879 | } | 1881 | } |
1880 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1882 | spin_unlock_irqrestore(&ioapic_lock, flags); |
@@ -2274,8 +2276,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | |||
2274 | return -1; | 2276 | return -1; |
2275 | 2277 | ||
2276 | for (i = 0; i < mp_irq_entries; i++) | 2278 | for (i = 0; i < mp_irq_entries; i++) |
2277 | if (mp_irqs[i].mpc_irqtype == mp_INT && | 2279 | if (mp_irqs[i].mp_irqtype == mp_INT && |
2278 | mp_irqs[i].mpc_srcbusirq == bus_irq) | 2280 | mp_irqs[i].mp_srcbusirq == bus_irq) |
2279 | break; | 2281 | break; |
2280 | if (i >= mp_irq_entries) | 2282 | if (i >= mp_irq_entries) |
2281 | return -1; | 2283 | return -1; |
@@ -2368,7 +2370,7 @@ void __init ioapic_init_mappings(void) | |||
2368 | ioapic_res = ioapic_setup_resources(); | 2370 | ioapic_res = ioapic_setup_resources(); |
2369 | for (i = 0; i < nr_ioapics; i++) { | 2371 | for (i = 0; i < nr_ioapics; i++) { |
2370 | if (smp_found_config) { | 2372 | if (smp_found_config) { |
2371 | ioapic_phys = mp_ioapics[i].mpc_apicaddr; | 2373 | ioapic_phys = mp_ioapics[i].mp_apicaddr; |
2372 | } else { | 2374 | } else { |
2373 | ioapic_phys = (unsigned long) | 2375 | ioapic_phys = (unsigned long) |
2374 | alloc_bootmem_pages(PAGE_SIZE); | 2376 | alloc_bootmem_pages(PAGE_SIZE); |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 404683b94e79..8b6b1e05c306 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -25,6 +25,8 @@ | |||
25 | #include <asm/proto.h> | 25 | #include <asm/proto.h> |
26 | #include <asm/acpi.h> | 26 | #include <asm/acpi.h> |
27 | #include <asm/bios_ebda.h> | 27 | #include <asm/bios_ebda.h> |
28 | #include <asm/e820.h> | ||
29 | #include <asm/trampoline.h> | ||
28 | 30 | ||
29 | #include <mach_apic.h> | 31 | #include <mach_apic.h> |
30 | #ifdef CONFIG_X86_32 | 32 | #ifdef CONFIG_X86_32 |
@@ -32,28 +34,6 @@ | |||
32 | #include <mach_mpparse.h> | 34 | #include <mach_mpparse.h> |
33 | #endif | 35 | #endif |
34 | 36 | ||
35 | /* Have we found an MP table */ | ||
36 | int smp_found_config; | ||
37 | |||
38 | /* | ||
39 | * Various Linux-internal data structures created from the | ||
40 | * MP-table. | ||
41 | */ | ||
42 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | ||
43 | int mp_bus_id_to_type[MAX_MP_BUSSES]; | ||
44 | #endif | ||
45 | |||
46 | DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); | ||
47 | int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 }; | ||
48 | |||
49 | static int mp_current_pci_id; | ||
50 | |||
51 | int pic_mode; | ||
52 | |||
53 | /* | ||
54 | * Intel MP BIOS table parsing routines: | ||
55 | */ | ||
56 | |||
57 | /* | 37 | /* |
58 | * Checksum an MP configuration block. | 38 | * Checksum an MP configuration block. |
59 | */ | 39 | */ |
@@ -69,15 +49,73 @@ static int __init mpf_checksum(unsigned char *mp, int len) | |||
69 | } | 49 | } |
70 | 50 | ||
71 | #ifdef CONFIG_X86_NUMAQ | 51 | #ifdef CONFIG_X86_NUMAQ |
52 | int found_numaq; | ||
72 | /* | 53 | /* |
73 | * Have to match translation table entries to main table entries by counter | 54 | * Have to match translation table entries to main table entries by counter |
74 | * hence the mpc_record variable .... can't see a less disgusting way of | 55 | * hence the mpc_record variable .... can't see a less disgusting way of |
75 | * doing this .... | 56 | * doing this .... |
76 | */ | 57 | */ |
58 | struct mpc_config_translation { | ||
59 | unsigned char mpc_type; | ||
60 | unsigned char trans_len; | ||
61 | unsigned char trans_type; | ||
62 | unsigned char trans_quad; | ||
63 | unsigned char trans_global; | ||
64 | unsigned char trans_local; | ||
65 | unsigned short trans_reserved; | ||
66 | }; | ||
67 | |||
77 | 68 | ||
78 | static int mpc_record; | 69 | static int mpc_record; |
79 | static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] | 70 | static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] |
80 | __cpuinitdata; | 71 | __cpuinitdata; |
72 | |||
73 | static inline int generate_logical_apicid(int quad, int phys_apicid) | ||
74 | { | ||
75 | return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); | ||
76 | } | ||
77 | |||
78 | |||
79 | static inline int mpc_apic_id(struct mpc_config_processor *m, | ||
80 | struct mpc_config_translation *translation_record) | ||
81 | { | ||
82 | int quad = translation_record->trans_quad; | ||
83 | int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); | ||
84 | |||
85 | printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", | ||
86 | m->mpc_apicid, | ||
87 | (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, | ||
88 | (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, | ||
89 | m->mpc_apicver, quad, logical_apicid); | ||
90 | return logical_apicid; | ||
91 | } | ||
92 | |||
93 | int mp_bus_id_to_node[MAX_MP_BUSSES]; | ||
94 | |||
95 | int mp_bus_id_to_local[MAX_MP_BUSSES]; | ||
96 | |||
97 | static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, | ||
98 | struct mpc_config_translation *translation) | ||
99 | { | ||
100 | int quad = translation->trans_quad; | ||
101 | int local = translation->trans_local; | ||
102 | |||
103 | mp_bus_id_to_node[m->mpc_busid] = quad; | ||
104 | mp_bus_id_to_local[m->mpc_busid] = local; | ||
105 | printk(KERN_INFO "Bus #%d is %s (node %d)\n", | ||
106 | m->mpc_busid, name, quad); | ||
107 | } | ||
108 | |||
109 | int quad_local_to_mp_bus_id [NR_CPUS/4][4]; | ||
110 | static void mpc_oem_pci_bus(struct mpc_config_bus *m, | ||
111 | struct mpc_config_translation *translation) | ||
112 | { | ||
113 | int quad = translation->trans_quad; | ||
114 | int local = translation->trans_local; | ||
115 | |||
116 | quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; | ||
117 | } | ||
118 | |||
81 | #endif | 119 | #endif |
82 | 120 | ||
83 | static void __cpuinit MP_processor_info(struct mpc_config_processor *m) | 121 | static void __cpuinit MP_processor_info(struct mpc_config_processor *m) |
@@ -90,7 +128,10 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) | |||
90 | return; | 128 | return; |
91 | } | 129 | } |
92 | #ifdef CONFIG_X86_NUMAQ | 130 | #ifdef CONFIG_X86_NUMAQ |
93 | apicid = mpc_apic_id(m, translation_table[mpc_record]); | 131 | if (found_numaq) |
132 | apicid = mpc_apic_id(m, translation_table[mpc_record]); | ||
133 | else | ||
134 | apicid = m->mpc_apicid; | ||
94 | #else | 135 | #else |
95 | apicid = m->mpc_apicid; | 136 | apicid = m->mpc_apicid; |
96 | #endif | 137 | #endif |
@@ -103,17 +144,18 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) | |||
103 | generic_processor_info(apicid, m->mpc_apicver); | 144 | generic_processor_info(apicid, m->mpc_apicver); |
104 | } | 145 | } |
105 | 146 | ||
147 | #ifdef CONFIG_X86_IO_APIC | ||
106 | static void __init MP_bus_info(struct mpc_config_bus *m) | 148 | static void __init MP_bus_info(struct mpc_config_bus *m) |
107 | { | 149 | { |
108 | char str[7]; | 150 | char str[7]; |
109 | |||
110 | memcpy(str, m->mpc_bustype, 6); | 151 | memcpy(str, m->mpc_bustype, 6); |
111 | str[6] = 0; | 152 | str[6] = 0; |
112 | 153 | ||
113 | #ifdef CONFIG_X86_NUMAQ | 154 | #ifdef CONFIG_X86_NUMAQ |
114 | mpc_oem_bus_info(m, str, translation_table[mpc_record]); | 155 | if (found_numaq) |
156 | mpc_oem_bus_info(m, str, translation_table[mpc_record]); | ||
115 | #else | 157 | #else |
116 | Dprintk("Bus #%d is %s\n", m->mpc_busid, str); | 158 | printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); |
117 | #endif | 159 | #endif |
118 | 160 | ||
119 | #if MAX_MP_BUSSES < 256 | 161 | #if MAX_MP_BUSSES < 256 |
@@ -132,11 +174,10 @@ static void __init MP_bus_info(struct mpc_config_bus *m) | |||
132 | #endif | 174 | #endif |
133 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { | 175 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { |
134 | #ifdef CONFIG_X86_NUMAQ | 176 | #ifdef CONFIG_X86_NUMAQ |
135 | mpc_oem_pci_bus(m, translation_table[mpc_record]); | 177 | if (found_numaq) |
178 | mpc_oem_pci_bus(m, translation_table[mpc_record]); | ||
136 | #endif | 179 | #endif |
137 | clear_bit(m->mpc_busid, mp_bus_not_pci); | 180 | clear_bit(m->mpc_busid, mp_bus_not_pci); |
138 | mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; | ||
139 | mp_current_pci_id++; | ||
140 | #if defined(CONFIG_EISA) || defined (CONFIG_MCA) | 181 | #if defined(CONFIG_EISA) || defined (CONFIG_MCA) |
141 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; | 182 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; |
142 | } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { | 183 | } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { |
@@ -147,6 +188,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m) | |||
147 | } else | 188 | } else |
148 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); | 189 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); |
149 | } | 190 | } |
191 | #endif | ||
150 | 192 | ||
151 | #ifdef CONFIG_X86_IO_APIC | 193 | #ifdef CONFIG_X86_IO_APIC |
152 | 194 | ||
@@ -176,18 +218,89 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m) | |||
176 | if (bad_ioapic(m->mpc_apicaddr)) | 218 | if (bad_ioapic(m->mpc_apicaddr)) |
177 | return; | 219 | return; |
178 | 220 | ||
179 | mp_ioapics[nr_ioapics] = *m; | 221 | mp_ioapics[nr_ioapics].mp_apicaddr = m->mpc_apicaddr; |
222 | mp_ioapics[nr_ioapics].mp_apicid = m->mpc_apicid; | ||
223 | mp_ioapics[nr_ioapics].mp_type = m->mpc_type; | ||
224 | mp_ioapics[nr_ioapics].mp_apicver = m->mpc_apicver; | ||
225 | mp_ioapics[nr_ioapics].mp_flags = m->mpc_flags; | ||
180 | nr_ioapics++; | 226 | nr_ioapics++; |
181 | } | 227 | } |
182 | 228 | ||
183 | static void __init MP_intsrc_info(struct mpc_config_intsrc *m) | 229 | static void print_MP_intsrc_info(struct mpc_config_intsrc *m) |
184 | { | 230 | { |
185 | mp_irqs[mp_irq_entries] = *m; | 231 | printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," |
186 | Dprintk("Int: type %d, pol %d, trig %d, bus %d," | ||
187 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", | 232 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", |
188 | m->mpc_irqtype, m->mpc_irqflag & 3, | 233 | m->mpc_irqtype, m->mpc_irqflag & 3, |
189 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, | 234 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, |
190 | m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); | 235 | m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); |
236 | } | ||
237 | |||
238 | static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) | ||
239 | { | ||
240 | printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," | ||
241 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", | ||
242 | mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, | ||
243 | (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, | ||
244 | mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); | ||
245 | } | ||
246 | |||
247 | static void __init assign_to_mp_irq(struct mpc_config_intsrc *m, | ||
248 | struct mp_config_intsrc *mp_irq) | ||
249 | { | ||
250 | mp_irq->mp_dstapic = m->mpc_dstapic; | ||
251 | mp_irq->mp_type = m->mpc_type; | ||
252 | mp_irq->mp_irqtype = m->mpc_irqtype; | ||
253 | mp_irq->mp_irqflag = m->mpc_irqflag; | ||
254 | mp_irq->mp_srcbus = m->mpc_srcbus; | ||
255 | mp_irq->mp_srcbusirq = m->mpc_srcbusirq; | ||
256 | mp_irq->mp_dstirq = m->mpc_dstirq; | ||
257 | } | ||
258 | |||
259 | static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, | ||
260 | struct mpc_config_intsrc *m) | ||
261 | { | ||
262 | m->mpc_dstapic = mp_irq->mp_dstapic; | ||
263 | m->mpc_type = mp_irq->mp_type; | ||
264 | m->mpc_irqtype = mp_irq->mp_irqtype; | ||
265 | m->mpc_irqflag = mp_irq->mp_irqflag; | ||
266 | m->mpc_srcbus = mp_irq->mp_srcbus; | ||
267 | m->mpc_srcbusirq = mp_irq->mp_srcbusirq; | ||
268 | m->mpc_dstirq = mp_irq->mp_dstirq; | ||
269 | } | ||
270 | |||
271 | static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, | ||
272 | struct mpc_config_intsrc *m) | ||
273 | { | ||
274 | if (mp_irq->mp_dstapic != m->mpc_dstapic) | ||
275 | return 1; | ||
276 | if (mp_irq->mp_type != m->mpc_type) | ||
277 | return 2; | ||
278 | if (mp_irq->mp_irqtype != m->mpc_irqtype) | ||
279 | return 3; | ||
280 | if (mp_irq->mp_irqflag != m->mpc_irqflag) | ||
281 | return 4; | ||
282 | if (mp_irq->mp_srcbus != m->mpc_srcbus) | ||
283 | return 5; | ||
284 | if (mp_irq->mp_srcbusirq != m->mpc_srcbusirq) | ||
285 | return 6; | ||
286 | if (mp_irq->mp_dstirq != m->mpc_dstirq) | ||
287 | return 7; | ||
288 | |||
289 | return 0; | ||
290 | } | ||
291 | |||
292 | static void __init MP_intsrc_info(struct mpc_config_intsrc *m) | ||
293 | { | ||
294 | int i; | ||
295 | |||
296 | print_MP_intsrc_info(m); | ||
297 | |||
298 | for (i = 0; i < mp_irq_entries; i++) { | ||
299 | if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m)) | ||
300 | return; | ||
301 | } | ||
302 | |||
303 | assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]); | ||
191 | if (++mp_irq_entries == MAX_IRQ_SOURCES) | 304 | if (++mp_irq_entries == MAX_IRQ_SOURCES) |
192 | panic("Max # of irq sources exceeded!!\n"); | 305 | panic("Max # of irq sources exceeded!!\n"); |
193 | } | 306 | } |
@@ -196,7 +309,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m) | |||
196 | 309 | ||
197 | static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) | 310 | static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) |
198 | { | 311 | { |
199 | Dprintk("Lint: type %d, pol %d, trig %d, bus %d," | 312 | printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x," |
200 | " IRQ %02x, APIC ID %x, APIC LINT %02x\n", | 313 | " IRQ %02x, APIC ID %x, APIC LINT %02x\n", |
201 | m->mpc_irqtype, m->mpc_irqflag & 3, | 314 | m->mpc_irqtype, m->mpc_irqflag & 3, |
202 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, | 315 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, |
@@ -266,11 +379,14 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, | |||
266 | } | 379 | } |
267 | } | 380 | } |
268 | 381 | ||
269 | static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, | 382 | void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, |
270 | char *productid) | 383 | char *productid) |
271 | { | 384 | { |
272 | if (strncmp(oem, "IBM NUMA", 8)) | 385 | if (strncmp(oem, "IBM NUMA", 8)) |
273 | printk("Warning! May not be a NUMA-Q system!\n"); | 386 | printk("Warning! Not a NUMA-Q system!\n"); |
387 | else | ||
388 | found_numaq = 1; | ||
389 | |||
274 | if (mpc->mpc_oemptr) | 390 | if (mpc->mpc_oemptr) |
275 | smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr, | 391 | smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr, |
276 | mpc->mpc_oemsize); | 392 | mpc->mpc_oemsize); |
@@ -281,12 +397,9 @@ static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, | |||
281 | * Read/parse the MPC | 397 | * Read/parse the MPC |
282 | */ | 398 | */ |
283 | 399 | ||
284 | static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | 400 | static int __init smp_check_mpc(struct mp_config_table *mpc, char *oem, |
401 | char *str) | ||
285 | { | 402 | { |
286 | char str[16]; | ||
287 | char oem[10]; | ||
288 | int count = sizeof(*mpc); | ||
289 | unsigned char *mpt = ((unsigned char *)mpc) + count; | ||
290 | 403 | ||
291 | if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { | 404 | if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { |
292 | printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", | 405 | printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", |
@@ -309,19 +422,42 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
309 | } | 422 | } |
310 | memcpy(oem, mpc->mpc_oem, 8); | 423 | memcpy(oem, mpc->mpc_oem, 8); |
311 | oem[8] = 0; | 424 | oem[8] = 0; |
312 | printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem); | 425 | printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem); |
313 | 426 | ||
314 | memcpy(str, mpc->mpc_productid, 12); | 427 | memcpy(str, mpc->mpc_productid, 12); |
315 | str[12] = 0; | 428 | str[12] = 0; |
316 | printk("Product ID: %s ", str); | ||
317 | 429 | ||
318 | #ifdef CONFIG_X86_32 | 430 | printk(KERN_INFO "MPTABLE: Product ID: %s\n", str); |
319 | mps_oem_check(mpc, oem, str); | ||
320 | #endif | ||
321 | printk(KERN_INFO "MPTABLE: Product ID: %s ", str); | ||
322 | 431 | ||
323 | printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); | 432 | printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); |
324 | 433 | ||
434 | return 1; | ||
435 | } | ||
436 | |||
437 | static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | ||
438 | { | ||
439 | char str[16]; | ||
440 | char oem[10]; | ||
441 | |||
442 | int count = sizeof(*mpc); | ||
443 | unsigned char *mpt = ((unsigned char *)mpc) + count; | ||
444 | |||
445 | if (!smp_check_mpc(mpc, oem, str)) | ||
446 | return 0; | ||
447 | |||
448 | #ifdef CONFIG_X86_32 | ||
449 | /* | ||
450 | * need to make sure summit and es7000's mps_oem_check is safe to be | ||
451 | * called early via genericarch 's mps_oem_check | ||
452 | */ | ||
453 | if (early) { | ||
454 | #ifdef CONFIG_X86_NUMAQ | ||
455 | numaq_mps_oem_check(mpc, oem, str); | ||
456 | #endif | ||
457 | } else | ||
458 | mps_oem_check(mpc, oem, str); | ||
459 | #endif | ||
460 | |||
325 | /* save the local APIC address, it might be non-default */ | 461 | /* save the local APIC address, it might be non-default */ |
326 | if (!acpi_lapic) | 462 | if (!acpi_lapic) |
327 | mp_lapic_addr = mpc->mpc_lapic; | 463 | mp_lapic_addr = mpc->mpc_lapic; |
@@ -352,7 +488,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
352 | { | 488 | { |
353 | struct mpc_config_bus *m = | 489 | struct mpc_config_bus *m = |
354 | (struct mpc_config_bus *)mpt; | 490 | (struct mpc_config_bus *)mpt; |
491 | #ifdef CONFIG_X86_IO_APIC | ||
355 | MP_bus_info(m); | 492 | MP_bus_info(m); |
493 | #endif | ||
356 | mpt += sizeof(*m); | 494 | mpt += sizeof(*m); |
357 | count += sizeof(*m); | 495 | count += sizeof(*m); |
358 | break; | 496 | break; |
@@ -402,6 +540,11 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
402 | ++mpc_record; | 540 | ++mpc_record; |
403 | #endif | 541 | #endif |
404 | } | 542 | } |
543 | |||
544 | #ifdef CONFIG_X86_GENERICARCH | ||
545 | generic_bigsmp_probe(); | ||
546 | #endif | ||
547 | |||
405 | setup_apic_routing(); | 548 | setup_apic_routing(); |
406 | if (!num_processors) | 549 | if (!num_processors) |
407 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); | 550 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); |
@@ -427,7 +570,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) | |||
427 | intsrc.mpc_type = MP_INTSRC; | 570 | intsrc.mpc_type = MP_INTSRC; |
428 | intsrc.mpc_irqflag = 0; /* conforming */ | 571 | intsrc.mpc_irqflag = 0; /* conforming */ |
429 | intsrc.mpc_srcbus = 0; | 572 | intsrc.mpc_srcbus = 0; |
430 | intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; | 573 | intsrc.mpc_dstapic = mp_ioapics[0].mp_apicid; |
431 | 574 | ||
432 | intsrc.mpc_irqtype = mp_INT; | 575 | intsrc.mpc_irqtype = mp_INT; |
433 | 576 | ||
@@ -488,40 +631,11 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) | |||
488 | MP_intsrc_info(&intsrc); | 631 | MP_intsrc_info(&intsrc); |
489 | } | 632 | } |
490 | 633 | ||
491 | #endif | ||
492 | 634 | ||
493 | static inline void __init construct_default_ISA_mptable(int mpc_default_type) | 635 | static void construct_ioapic_table(int mpc_default_type) |
494 | { | 636 | { |
495 | struct mpc_config_processor processor; | ||
496 | struct mpc_config_bus bus; | ||
497 | #ifdef CONFIG_X86_IO_APIC | ||
498 | struct mpc_config_ioapic ioapic; | 637 | struct mpc_config_ioapic ioapic; |
499 | #endif | 638 | struct mpc_config_bus bus; |
500 | struct mpc_config_lintsrc lintsrc; | ||
501 | int linttypes[2] = { mp_ExtINT, mp_NMI }; | ||
502 | int i; | ||
503 | |||
504 | /* | ||
505 | * local APIC has default address | ||
506 | */ | ||
507 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | ||
508 | |||
509 | /* | ||
510 | * 2 CPUs, numbered 0 & 1. | ||
511 | */ | ||
512 | processor.mpc_type = MP_PROCESSOR; | ||
513 | /* Either an integrated APIC or a discrete 82489DX. */ | ||
514 | processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; | ||
515 | processor.mpc_cpuflag = CPU_ENABLED; | ||
516 | processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | | ||
517 | (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; | ||
518 | processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; | ||
519 | processor.mpc_reserved[0] = 0; | ||
520 | processor.mpc_reserved[1] = 0; | ||
521 | for (i = 0; i < 2; i++) { | ||
522 | processor.mpc_apicid = i; | ||
523 | MP_processor_info(&processor); | ||
524 | } | ||
525 | 639 | ||
526 | bus.mpc_type = MP_BUS; | 640 | bus.mpc_type = MP_BUS; |
527 | bus.mpc_busid = 0; | 641 | bus.mpc_busid = 0; |
@@ -550,7 +664,6 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) | |||
550 | MP_bus_info(&bus); | 664 | MP_bus_info(&bus); |
551 | } | 665 | } |
552 | 666 | ||
553 | #ifdef CONFIG_X86_IO_APIC | ||
554 | ioapic.mpc_type = MP_IOAPIC; | 667 | ioapic.mpc_type = MP_IOAPIC; |
555 | ioapic.mpc_apicid = 2; | 668 | ioapic.mpc_apicid = 2; |
556 | ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; | 669 | ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; |
@@ -562,7 +675,42 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) | |||
562 | * We set up most of the low 16 IO-APIC pins according to MPS rules. | 675 | * We set up most of the low 16 IO-APIC pins according to MPS rules. |
563 | */ | 676 | */ |
564 | construct_default_ioirq_mptable(mpc_default_type); | 677 | construct_default_ioirq_mptable(mpc_default_type); |
678 | } | ||
679 | #else | ||
680 | static inline void construct_ioapic_table(int mpc_default_type) { } | ||
565 | #endif | 681 | #endif |
682 | |||
683 | static inline void __init construct_default_ISA_mptable(int mpc_default_type) | ||
684 | { | ||
685 | struct mpc_config_processor processor; | ||
686 | struct mpc_config_lintsrc lintsrc; | ||
687 | int linttypes[2] = { mp_ExtINT, mp_NMI }; | ||
688 | int i; | ||
689 | |||
690 | /* | ||
691 | * local APIC has default address | ||
692 | */ | ||
693 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | ||
694 | |||
695 | /* | ||
696 | * 2 CPUs, numbered 0 & 1. | ||
697 | */ | ||
698 | processor.mpc_type = MP_PROCESSOR; | ||
699 | /* Either an integrated APIC or a discrete 82489DX. */ | ||
700 | processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; | ||
701 | processor.mpc_cpuflag = CPU_ENABLED; | ||
702 | processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | | ||
703 | (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; | ||
704 | processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; | ||
705 | processor.mpc_reserved[0] = 0; | ||
706 | processor.mpc_reserved[1] = 0; | ||
707 | for (i = 0; i < 2; i++) { | ||
708 | processor.mpc_apicid = i; | ||
709 | MP_processor_info(&processor); | ||
710 | } | ||
711 | |||
712 | construct_ioapic_table(mpc_default_type); | ||
713 | |||
566 | lintsrc.mpc_type = MP_LINTSRC; | 714 | lintsrc.mpc_type = MP_LINTSRC; |
567 | lintsrc.mpc_irqflag = 0; /* conforming */ | 715 | lintsrc.mpc_irqflag = 0; /* conforming */ |
568 | lintsrc.mpc_srcbusid = 0; | 716 | lintsrc.mpc_srcbusid = 0; |
@@ -600,7 +748,7 @@ static void __init __get_smp_config(unsigned early) | |||
600 | 748 | ||
601 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", | 749 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", |
602 | mpf->mpf_specification); | 750 | mpf->mpf_specification); |
603 | #ifdef CONFIG_X86_32 | 751 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) |
604 | if (mpf->mpf_feature2 & (1 << 7)) { | 752 | if (mpf->mpf_feature2 & (1 << 7)) { |
605 | printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); | 753 | printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); |
606 | pic_mode = 1; | 754 | pic_mode = 1; |
@@ -632,7 +780,9 @@ static void __init __get_smp_config(unsigned early) | |||
632 | * override the defaults. | 780 | * override the defaults. |
633 | */ | 781 | */ |
634 | if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { | 782 | if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { |
783 | #ifdef CONFIG_X86_LOCAL_APIC | ||
635 | smp_found_config = 0; | 784 | smp_found_config = 0; |
785 | #endif | ||
636 | printk(KERN_ERR | 786 | printk(KERN_ERR |
637 | "BIOS bug, MP table errors detected!...\n"); | 787 | "BIOS bug, MP table errors detected!...\n"); |
638 | printk(KERN_ERR "... disabling SMP support. " | 788 | printk(KERN_ERR "... disabling SMP support. " |
@@ -689,7 +839,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
689 | unsigned int *bp = phys_to_virt(base); | 839 | unsigned int *bp = phys_to_virt(base); |
690 | struct intel_mp_floating *mpf; | 840 | struct intel_mp_floating *mpf; |
691 | 841 | ||
692 | Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length); | 842 | printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length); |
693 | BUILD_BUG_ON(sizeof(*mpf) != 16); | 843 | BUILD_BUG_ON(sizeof(*mpf) != 16); |
694 | 844 | ||
695 | while (length > 0) { | 845 | while (length > 0) { |
@@ -699,15 +849,21 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
699 | !mpf_checksum((unsigned char *)bp, 16) && | 849 | !mpf_checksum((unsigned char *)bp, 16) && |
700 | ((mpf->mpf_specification == 1) | 850 | ((mpf->mpf_specification == 1) |
701 | || (mpf->mpf_specification == 4))) { | 851 | || (mpf->mpf_specification == 4))) { |
702 | 852 | #ifdef CONFIG_X86_LOCAL_APIC | |
703 | smp_found_config = 1; | 853 | smp_found_config = 1; |
854 | #endif | ||
704 | mpf_found = mpf; | 855 | mpf_found = mpf; |
705 | #ifdef CONFIG_X86_32 | 856 | |
706 | printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", | 857 | printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", |
707 | mpf, virt_to_phys(mpf)); | 858 | mpf, virt_to_phys(mpf)); |
708 | reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE, | 859 | |
860 | if (!reserve) | ||
861 | return 1; | ||
862 | reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, | ||
709 | BOOTMEM_DEFAULT); | 863 | BOOTMEM_DEFAULT); |
710 | if (mpf->mpf_physptr) { | 864 | if (mpf->mpf_physptr) { |
865 | unsigned long size = PAGE_SIZE; | ||
866 | #ifdef CONFIG_X86_32 | ||
711 | /* | 867 | /* |
712 | * We cannot access to MPC table to compute | 868 | * We cannot access to MPC table to compute |
713 | * table size yet, as only few megabytes from | 869 | * table size yet, as only few megabytes from |
@@ -717,24 +873,15 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
717 | * PAGE_SIZE from mpg->mpf_physptr yields BUG() | 873 | * PAGE_SIZE from mpg->mpf_physptr yields BUG() |
718 | * in reserve_bootmem. | 874 | * in reserve_bootmem. |
719 | */ | 875 | */ |
720 | unsigned long size = PAGE_SIZE; | ||
721 | unsigned long end = max_low_pfn * PAGE_SIZE; | 876 | unsigned long end = max_low_pfn * PAGE_SIZE; |
722 | if (mpf->mpf_physptr + size > end) | 877 | if (mpf->mpf_physptr + size > end) |
723 | size = end - mpf->mpf_physptr; | 878 | size = end - mpf->mpf_physptr; |
724 | reserve_bootmem(mpf->mpf_physptr, size, | 879 | #endif |
880 | reserve_bootmem_generic(mpf->mpf_physptr, size, | ||
725 | BOOTMEM_DEFAULT); | 881 | BOOTMEM_DEFAULT); |
726 | } | 882 | } |
727 | 883 | ||
728 | #else | 884 | return 1; |
729 | if (!reserve) | ||
730 | return 1; | ||
731 | |||
732 | reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE); | ||
733 | if (mpf->mpf_physptr) | ||
734 | reserve_bootmem_generic(mpf->mpf_physptr, | ||
735 | PAGE_SIZE); | ||
736 | #endif | ||
737 | return 1; | ||
738 | } | 885 | } |
739 | bp += 4; | 886 | bp += 4; |
740 | length -= 16; | 887 | length -= 16; |
@@ -790,298 +937,294 @@ void __init find_smp_config(void) | |||
790 | __find_smp_config(1); | 937 | __find_smp_config(1); |
791 | } | 938 | } |
792 | 939 | ||
793 | /* -------------------------------------------------------------------------- | 940 | #ifdef CONFIG_X86_IO_APIC |
794 | ACPI-based MP Configuration | 941 | static u8 __initdata irq_used[MAX_IRQ_SOURCES]; |
795 | -------------------------------------------------------------------------- */ | ||
796 | 942 | ||
797 | /* | 943 | static int __init get_MP_intsrc_index(struct mpc_config_intsrc *m) |
798 | * Keep this outside and initialized to 0, for !CONFIG_ACPI builds: | 944 | { |
799 | */ | 945 | int i; |
800 | int es7000_plat; | ||
801 | 946 | ||
802 | #ifdef CONFIG_ACPI | 947 | if (m->mpc_irqtype != mp_INT) |
948 | return 0; | ||
803 | 949 | ||
804 | #ifdef CONFIG_X86_IO_APIC | 950 | if (m->mpc_irqflag != 0x0f) |
951 | return 0; | ||
805 | 952 | ||
806 | #define MP_ISA_BUS 0 | 953 | /* not legacy */ |
807 | 954 | ||
808 | extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; | 955 | for (i = 0; i < mp_irq_entries; i++) { |
956 | if (mp_irqs[i].mp_irqtype != mp_INT) | ||
957 | continue; | ||
809 | 958 | ||
810 | static int mp_find_ioapic(int gsi) | 959 | if (mp_irqs[i].mp_irqflag != 0x0f) |
811 | { | 960 | continue; |
812 | int i = 0; | ||
813 | 961 | ||
814 | /* Find the IOAPIC that manages this GSI. */ | 962 | if (mp_irqs[i].mp_srcbus != m->mpc_srcbus) |
815 | for (i = 0; i < nr_ioapics; i++) { | 963 | continue; |
816 | if ((gsi >= mp_ioapic_routing[i].gsi_base) | 964 | if (mp_irqs[i].mp_srcbusirq != m->mpc_srcbusirq) |
817 | && (gsi <= mp_ioapic_routing[i].gsi_end)) | 965 | continue; |
818 | return i; | 966 | if (irq_used[i]) { |
967 | /* already claimed */ | ||
968 | return -2; | ||
969 | } | ||
970 | irq_used[i] = 1; | ||
971 | return i; | ||
819 | } | 972 | } |
820 | 973 | ||
821 | printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); | 974 | /* not found */ |
822 | return -1; | 975 | return -1; |
823 | } | 976 | } |
824 | 977 | ||
825 | static u8 __init uniq_ioapic_id(u8 id) | 978 | #define SPARE_SLOT_NUM 20 |
826 | { | 979 | |
827 | #ifdef CONFIG_X86_32 | 980 | static struct mpc_config_intsrc __initdata *m_spare[SPARE_SLOT_NUM]; |
828 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && | ||
829 | !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) | ||
830 | return io_apic_get_unique_id(nr_ioapics, id); | ||
831 | else | ||
832 | return id; | ||
833 | #else | ||
834 | int i; | ||
835 | DECLARE_BITMAP(used, 256); | ||
836 | bitmap_zero(used, 256); | ||
837 | for (i = 0; i < nr_ioapics; i++) { | ||
838 | struct mpc_config_ioapic *ia = &mp_ioapics[i]; | ||
839 | __set_bit(ia->mpc_apicid, used); | ||
840 | } | ||
841 | if (!test_bit(id, used)) | ||
842 | return id; | ||
843 | return find_first_zero_bit(used, 256); | ||
844 | #endif | 981 | #endif |
845 | } | ||
846 | 982 | ||
847 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | 983 | static int __init replace_intsrc_all(struct mp_config_table *mpc, |
984 | unsigned long mpc_new_phys, | ||
985 | unsigned long mpc_new_length) | ||
848 | { | 986 | { |
849 | int idx = 0; | 987 | #ifdef CONFIG_X86_IO_APIC |
850 | 988 | int i; | |
851 | if (bad_ioapic(address)) | 989 | int nr_m_spare = 0; |
852 | return; | 990 | #endif |
853 | 991 | ||
854 | idx = nr_ioapics; | 992 | int count = sizeof(*mpc); |
993 | unsigned char *mpt = ((unsigned char *)mpc) + count; | ||
855 | 994 | ||
856 | mp_ioapics[idx].mpc_type = MP_IOAPIC; | 995 | printk(KERN_INFO "mpc_length %x\n", mpc->mpc_length); |
857 | mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; | 996 | while (count < mpc->mpc_length) { |
858 | mp_ioapics[idx].mpc_apicaddr = address; | 997 | switch (*mpt) { |
998 | case MP_PROCESSOR: | ||
999 | { | ||
1000 | struct mpc_config_processor *m = | ||
1001 | (struct mpc_config_processor *)mpt; | ||
1002 | mpt += sizeof(*m); | ||
1003 | count += sizeof(*m); | ||
1004 | break; | ||
1005 | } | ||
1006 | case MP_BUS: | ||
1007 | { | ||
1008 | struct mpc_config_bus *m = | ||
1009 | (struct mpc_config_bus *)mpt; | ||
1010 | mpt += sizeof(*m); | ||
1011 | count += sizeof(*m); | ||
1012 | break; | ||
1013 | } | ||
1014 | case MP_IOAPIC: | ||
1015 | { | ||
1016 | mpt += sizeof(struct mpc_config_ioapic); | ||
1017 | count += sizeof(struct mpc_config_ioapic); | ||
1018 | break; | ||
1019 | } | ||
1020 | case MP_INTSRC: | ||
1021 | { | ||
1022 | #ifdef CONFIG_X86_IO_APIC | ||
1023 | struct mpc_config_intsrc *m = | ||
1024 | (struct mpc_config_intsrc *)mpt; | ||
859 | 1025 | ||
860 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); | 1026 | printk(KERN_INFO "OLD "); |
861 | mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); | 1027 | print_MP_intsrc_info(m); |
862 | #ifdef CONFIG_X86_32 | 1028 | i = get_MP_intsrc_index(m); |
863 | mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); | 1029 | if (i > 0) { |
864 | #else | 1030 | assign_to_mpc_intsrc(&mp_irqs[i], m); |
865 | mp_ioapics[idx].mpc_apicver = 0; | 1031 | printk(KERN_INFO "NEW "); |
1032 | print_mp_irq_info(&mp_irqs[i]); | ||
1033 | } else if (!i) { | ||
1034 | /* legacy, do nothing */ | ||
1035 | } else if (nr_m_spare < SPARE_SLOT_NUM) { | ||
1036 | /* | ||
1037 | * not found (-1), or duplicated (-2) | ||
1038 | * are invalid entries, | ||
1039 | * we need to use the slot later | ||
1040 | */ | ||
1041 | m_spare[nr_m_spare] = m; | ||
1042 | nr_m_spare++; | ||
1043 | } | ||
866 | #endif | 1044 | #endif |
867 | /* | 1045 | mpt += sizeof(struct mpc_config_intsrc); |
868 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups | 1046 | count += sizeof(struct mpc_config_intsrc); |
869 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). | 1047 | break; |
870 | */ | 1048 | } |
871 | mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; | 1049 | case MP_LINTSRC: |
872 | mp_ioapic_routing[idx].gsi_base = gsi_base; | 1050 | { |
873 | mp_ioapic_routing[idx].gsi_end = gsi_base + | 1051 | struct mpc_config_lintsrc *m = |
874 | io_apic_get_redir_entries(idx); | 1052 | (struct mpc_config_lintsrc *)mpt; |
875 | 1053 | mpt += sizeof(*m); | |
876 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " | 1054 | count += sizeof(*m); |
877 | "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, | 1055 | break; |
878 | mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, | 1056 | } |
879 | mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); | 1057 | default: |
880 | 1058 | /* wrong mptable */ | |
881 | nr_ioapics++; | 1059 | printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); |
882 | } | 1060 | printk(KERN_ERR "type %x\n", *mpt); |
1061 | print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, | ||
1062 | 1, mpc, mpc->mpc_length, 1); | ||
1063 | goto out; | ||
1064 | } | ||
1065 | } | ||
883 | 1066 | ||
884 | void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | 1067 | #ifdef CONFIG_X86_IO_APIC |
885 | { | 1068 | for (i = 0; i < mp_irq_entries; i++) { |
886 | struct mpc_config_intsrc intsrc; | 1069 | if (irq_used[i]) |
887 | int ioapic = -1; | 1070 | continue; |
888 | int pin = -1; | ||
889 | 1071 | ||
890 | /* | 1072 | if (mp_irqs[i].mp_irqtype != mp_INT) |
891 | * Convert 'gsi' to 'ioapic.pin'. | 1073 | continue; |
892 | */ | ||
893 | ioapic = mp_find_ioapic(gsi); | ||
894 | if (ioapic < 0) | ||
895 | return; | ||
896 | pin = gsi - mp_ioapic_routing[ioapic].gsi_base; | ||
897 | 1074 | ||
898 | /* | 1075 | if (mp_irqs[i].mp_irqflag != 0x0f) |
899 | * TBD: This check is for faulty timer entries, where the override | 1076 | continue; |
900 | * erroneously sets the trigger to level, resulting in a HUGE | ||
901 | * increase of timer interrupts! | ||
902 | */ | ||
903 | if ((bus_irq == 0) && (trigger == 3)) | ||
904 | trigger = 1; | ||
905 | 1077 | ||
906 | intsrc.mpc_type = MP_INTSRC; | 1078 | if (nr_m_spare > 0) { |
907 | intsrc.mpc_irqtype = mp_INT; | 1079 | printk(KERN_INFO "*NEW* found "); |
908 | intsrc.mpc_irqflag = (trigger << 2) | polarity; | 1080 | nr_m_spare--; |
909 | intsrc.mpc_srcbus = MP_ISA_BUS; | 1081 | assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]); |
910 | intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ | 1082 | m_spare[nr_m_spare] = NULL; |
911 | intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ | 1083 | } else { |
912 | intsrc.mpc_dstirq = pin; /* INTIN# */ | 1084 | struct mpc_config_intsrc *m = |
1085 | (struct mpc_config_intsrc *)mpt; | ||
1086 | count += sizeof(struct mpc_config_intsrc); | ||
1087 | if (!mpc_new_phys) { | ||
1088 | printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count); | ||
1089 | } else { | ||
1090 | if (count <= mpc_new_length) | ||
1091 | printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count); | ||
1092 | else { | ||
1093 | printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length); | ||
1094 | goto out; | ||
1095 | } | ||
1096 | } | ||
1097 | assign_to_mpc_intsrc(&mp_irqs[i], m); | ||
1098 | mpc->mpc_length = count; | ||
1099 | mpt += sizeof(struct mpc_config_intsrc); | ||
1100 | } | ||
1101 | print_mp_irq_info(&mp_irqs[i]); | ||
1102 | } | ||
1103 | #endif | ||
1104 | out: | ||
1105 | /* update checksum */ | ||
1106 | mpc->mpc_checksum = 0; | ||
1107 | mpc->mpc_checksum -= mpf_checksum((unsigned char *)mpc, | ||
1108 | mpc->mpc_length); | ||
913 | 1109 | ||
914 | MP_intsrc_info(&intsrc); | 1110 | return 0; |
915 | } | 1111 | } |
916 | 1112 | ||
917 | void __init mp_config_acpi_legacy_irqs(void) | 1113 | static int __initdata enable_update_mptable; |
918 | { | ||
919 | struct mpc_config_intsrc intsrc; | ||
920 | int i = 0; | ||
921 | int ioapic = -1; | ||
922 | 1114 | ||
923 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | 1115 | static int __init update_mptable_setup(char *str) |
924 | /* | 1116 | { |
925 | * Fabricate the legacy ISA bus (bus #31). | 1117 | enable_update_mptable = 1; |
926 | */ | 1118 | return 0; |
927 | mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; | 1119 | } |
928 | #endif | 1120 | early_param("update_mptable", update_mptable_setup); |
929 | set_bit(MP_ISA_BUS, mp_bus_not_pci); | ||
930 | Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); | ||
931 | 1121 | ||
932 | /* | 1122 | static unsigned long __initdata mpc_new_phys; |
933 | * Older generations of ES7000 have no legacy identity mappings | 1123 | static unsigned long mpc_new_length __initdata = 4096; |
934 | */ | ||
935 | if (es7000_plat == 1) | ||
936 | return; | ||
937 | 1124 | ||
938 | /* | 1125 | /* alloc_mptable or alloc_mptable=4k */ |
939 | * Locate the IOAPIC that manages the ISA IRQs (0-15). | 1126 | static int __initdata alloc_mptable; |
940 | */ | 1127 | static int __init parse_alloc_mptable_opt(char *p) |
941 | ioapic = mp_find_ioapic(0); | 1128 | { |
942 | if (ioapic < 0) | 1129 | enable_update_mptable = 1; |
943 | return; | 1130 | alloc_mptable = 1; |
1131 | if (!p) | ||
1132 | return 0; | ||
1133 | mpc_new_length = memparse(p, &p); | ||
1134 | return 0; | ||
1135 | } | ||
1136 | early_param("alloc_mptable", parse_alloc_mptable_opt); | ||
944 | 1137 | ||
945 | intsrc.mpc_type = MP_INTSRC; | 1138 | void __init early_reserve_e820_mpc_new(void) |
946 | intsrc.mpc_irqflag = 0; /* Conforming */ | 1139 | { |
947 | intsrc.mpc_srcbus = MP_ISA_BUS; | 1140 | if (enable_update_mptable && alloc_mptable) { |
948 | #ifdef CONFIG_X86_IO_APIC | 1141 | u64 startt = 0; |
949 | intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; | 1142 | #ifdef CONFIG_X86_TRAMPOLINE |
1143 | startt = TRAMPOLINE_BASE; | ||
950 | #endif | 1144 | #endif |
951 | /* | 1145 | mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4); |
952 | * Use the default configuration for the IRQs 0-15. Unless | ||
953 | * overridden by (MADT) interrupt source override entries. | ||
954 | */ | ||
955 | for (i = 0; i < 16; i++) { | ||
956 | int idx; | ||
957 | |||
958 | for (idx = 0; idx < mp_irq_entries; idx++) { | ||
959 | struct mpc_config_intsrc *irq = mp_irqs + idx; | ||
960 | |||
961 | /* Do we already have a mapping for this ISA IRQ? */ | ||
962 | if (irq->mpc_srcbus == MP_ISA_BUS | ||
963 | && irq->mpc_srcbusirq == i) | ||
964 | break; | ||
965 | |||
966 | /* Do we already have a mapping for this IOAPIC pin */ | ||
967 | if ((irq->mpc_dstapic == intsrc.mpc_dstapic) && | ||
968 | (irq->mpc_dstirq == i)) | ||
969 | break; | ||
970 | } | ||
971 | |||
972 | if (idx != mp_irq_entries) { | ||
973 | printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); | ||
974 | continue; /* IRQ already used */ | ||
975 | } | ||
976 | |||
977 | intsrc.mpc_irqtype = mp_INT; | ||
978 | intsrc.mpc_srcbusirq = i; /* Identity mapped */ | ||
979 | intsrc.mpc_dstirq = i; | ||
980 | |||
981 | MP_intsrc_info(&intsrc); | ||
982 | } | 1146 | } |
983 | } | 1147 | } |
984 | 1148 | ||
985 | int mp_register_gsi(u32 gsi, int triggering, int polarity) | 1149 | static int __init update_mp_table(void) |
986 | { | 1150 | { |
987 | int ioapic; | 1151 | char str[16]; |
988 | int ioapic_pin; | 1152 | char oem[10]; |
989 | #ifdef CONFIG_X86_32 | 1153 | struct intel_mp_floating *mpf; |
990 | #define MAX_GSI_NUM 4096 | 1154 | struct mp_config_table *mpc; |
991 | #define IRQ_COMPRESSION_START 64 | 1155 | struct mp_config_table *mpc_new; |
1156 | |||
1157 | if (!enable_update_mptable) | ||
1158 | return 0; | ||
1159 | |||
1160 | mpf = mpf_found; | ||
1161 | if (!mpf) | ||
1162 | return 0; | ||
992 | 1163 | ||
993 | static int pci_irq = IRQ_COMPRESSION_START; | ||
994 | /* | 1164 | /* |
995 | * Mapping between Global System Interrupts, which | 1165 | * Now see if we need to go further. |
996 | * represent all possible interrupts, and IRQs | ||
997 | * assigned to actual devices. | ||
998 | */ | 1166 | */ |
999 | static int gsi_to_irq[MAX_GSI_NUM]; | 1167 | if (mpf->mpf_feature1 != 0) |
1000 | #else | 1168 | return 0; |
1001 | |||
1002 | if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) | ||
1003 | return gsi; | ||
1004 | #endif | ||
1005 | 1169 | ||
1006 | /* Don't set up the ACPI SCI because it's already set up */ | 1170 | if (!mpf->mpf_physptr) |
1007 | if (acpi_gbl_FADT.sci_interrupt == gsi) | 1171 | return 0; |
1008 | return gsi; | ||
1009 | 1172 | ||
1010 | ioapic = mp_find_ioapic(gsi); | 1173 | mpc = phys_to_virt(mpf->mpf_physptr); |
1011 | if (ioapic < 0) { | ||
1012 | printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); | ||
1013 | return gsi; | ||
1014 | } | ||
1015 | 1174 | ||
1016 | ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; | 1175 | if (!smp_check_mpc(mpc, oem, str)) |
1176 | return 0; | ||
1017 | 1177 | ||
1018 | #ifdef CONFIG_X86_32 | 1178 | printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); |
1019 | if (ioapic_renumber_irq) | 1179 | printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); |
1020 | gsi = ioapic_renumber_irq(ioapic, gsi); | ||
1021 | #endif | ||
1022 | 1180 | ||
1023 | /* | 1181 | if (mpc_new_phys && mpc->mpc_length > mpc_new_length) { |
1024 | * Avoid pin reprogramming. PRTs typically include entries | 1182 | mpc_new_phys = 0; |
1025 | * with redundant pin->gsi mappings (but unique PCI devices); | 1183 | printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n", |
1026 | * we only program the IOAPIC on the first. | 1184 | mpc_new_length); |
1027 | */ | ||
1028 | if (ioapic_pin > MP_MAX_IOAPIC_PIN) { | ||
1029 | printk(KERN_ERR "Invalid reference to IOAPIC pin " | ||
1030 | "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, | ||
1031 | ioapic_pin); | ||
1032 | return gsi; | ||
1033 | } | 1185 | } |
1034 | if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { | 1186 | |
1035 | Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", | 1187 | if (!mpc_new_phys) { |
1036 | mp_ioapic_routing[ioapic].apic_id, ioapic_pin); | 1188 | unsigned char old, new; |
1037 | #ifdef CONFIG_X86_32 | 1189 | /* check if we can change the postion */ |
1038 | return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); | 1190 | mpc->mpc_checksum = 0; |
1039 | #else | 1191 | old = mpf_checksum((unsigned char *)mpc, mpc->mpc_length); |
1040 | return gsi; | 1192 | mpc->mpc_checksum = 0xff; |
1041 | #endif | 1193 | new = mpf_checksum((unsigned char *)mpc, mpc->mpc_length); |
1194 | if (old == new) { | ||
1195 | printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n"); | ||
1196 | return 0; | ||
1197 | } | ||
1198 | printk(KERN_INFO "use in-positon replacing\n"); | ||
1199 | } else { | ||
1200 | mpf->mpf_physptr = mpc_new_phys; | ||
1201 | mpc_new = phys_to_virt(mpc_new_phys); | ||
1202 | memcpy(mpc_new, mpc, mpc->mpc_length); | ||
1203 | mpc = mpc_new; | ||
1204 | /* check if we can modify that */ | ||
1205 | if (mpc_new_phys - mpf->mpf_physptr) { | ||
1206 | struct intel_mp_floating *mpf_new; | ||
1207 | /* steal 16 bytes from [0, 1k) */ | ||
1208 | printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); | ||
1209 | mpf_new = phys_to_virt(0x400 - 16); | ||
1210 | memcpy(mpf_new, mpf, 16); | ||
1211 | mpf = mpf_new; | ||
1212 | mpf->mpf_physptr = mpc_new_phys; | ||
1213 | } | ||
1214 | mpf->mpf_checksum = 0; | ||
1215 | mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); | ||
1216 | printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); | ||
1042 | } | 1217 | } |
1043 | 1218 | ||
1044 | set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); | ||
1045 | #ifdef CONFIG_X86_32 | ||
1046 | /* | 1219 | /* |
1047 | * For GSI >= 64, use IRQ compression | 1220 | * only replace the one with mp_INT and |
1221 | * MP_IRQ_TRIGGER_LEVEL|MP_IRQ_POLARITY_LOW, | ||
1222 | * already in mp_irqs , stored by ... and mp_config_acpi_gsi, | ||
1223 | * may need pci=routeirq for all coverage | ||
1048 | */ | 1224 | */ |
1049 | if ((gsi >= IRQ_COMPRESSION_START) | 1225 | replace_intsrc_all(mpc, mpc_new_phys, mpc_new_length); |
1050 | && (triggering == ACPI_LEVEL_SENSITIVE)) { | 1226 | |
1051 | /* | 1227 | return 0; |
1052 | * For PCI devices assign IRQs in order, avoiding gaps | ||
1053 | * due to unused I/O APIC pins. | ||
1054 | */ | ||
1055 | int irq = gsi; | ||
1056 | if (gsi < MAX_GSI_NUM) { | ||
1057 | /* | ||
1058 | * Retain the VIA chipset work-around (gsi > 15), but | ||
1059 | * avoid a problem where the 8254 timer (IRQ0) is setup | ||
1060 | * via an override (so it's not on pin 0 of the ioapic), | ||
1061 | * and at the same time, the pin 0 interrupt is a PCI | ||
1062 | * type. The gsi > 15 test could cause these two pins | ||
1063 | * to be shared as IRQ0, and they are not shareable. | ||
1064 | * So test for this condition, and if necessary, avoid | ||
1065 | * the pin collision. | ||
1066 | */ | ||
1067 | gsi = pci_irq++; | ||
1068 | /* | ||
1069 | * Don't assign IRQ used by ACPI SCI | ||
1070 | */ | ||
1071 | if (gsi == acpi_gbl_FADT.sci_interrupt) | ||
1072 | gsi = pci_irq++; | ||
1073 | gsi_to_irq[irq] = gsi; | ||
1074 | } else { | ||
1075 | printk(KERN_ERR "GSI %u is too high\n", gsi); | ||
1076 | return gsi; | ||
1077 | } | ||
1078 | } | ||
1079 | #endif | ||
1080 | io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, | ||
1081 | triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, | ||
1082 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); | ||
1083 | return gsi; | ||
1084 | } | 1228 | } |
1085 | 1229 | ||
1086 | #endif /* CONFIG_X86_IO_APIC */ | 1230 | late_initcall(update_mp_table); |
1087 | #endif /* CONFIG_ACPI */ | ||
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index e65281b1634b..f0f1de1c4a1d 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c | |||
@@ -31,6 +31,8 @@ | |||
31 | #include <asm/numaq.h> | 31 | #include <asm/numaq.h> |
32 | #include <asm/topology.h> | 32 | #include <asm/topology.h> |
33 | #include <asm/processor.h> | 33 | #include <asm/processor.h> |
34 | #include <asm/mpspec.h> | ||
35 | #include <asm/e820.h> | ||
34 | 36 | ||
35 | #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) | 37 | #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) |
36 | 38 | ||
@@ -58,6 +60,8 @@ static void __init smp_dump_qct(void) | |||
58 | node_end_pfn[node] = MB_TO_PAGES( | 60 | node_end_pfn[node] = MB_TO_PAGES( |
59 | eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); | 61 | eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); |
60 | 62 | ||
63 | e820_register_active_regions(node, node_start_pfn[node], | ||
64 | node_end_pfn[node]); | ||
61 | memory_present(node, | 65 | memory_present(node, |
62 | node_start_pfn[node], node_end_pfn[node]); | 66 | node_start_pfn[node], node_end_pfn[node]); |
63 | node_remap_size[node] = node_memmap_size_bytes(node, | 67 | node_remap_size[node] = node_memmap_size_bytes(node, |
@@ -67,13 +71,24 @@ static void __init smp_dump_qct(void) | |||
67 | } | 71 | } |
68 | } | 72 | } |
69 | 73 | ||
70 | /* | 74 | static __init void early_check_numaq(void) |
71 | * Unlike Summit, we don't really care to let the NUMA-Q | 75 | { |
72 | * fall back to flat mode. Don't compile for NUMA-Q | 76 | /* |
73 | * unless you really need it! | 77 | * Find possible boot-time SMP configuration: |
74 | */ | 78 | */ |
79 | early_find_smp_config(); | ||
80 | /* | ||
81 | * get boot-time SMP configuration: | ||
82 | */ | ||
83 | if (smp_found_config) | ||
84 | early_get_smp_config(); | ||
85 | } | ||
86 | |||
75 | int __init get_memcfg_numaq(void) | 87 | int __init get_memcfg_numaq(void) |
76 | { | 88 | { |
89 | early_check_numaq(); | ||
90 | if (!found_numaq) | ||
91 | return 0; | ||
77 | smp_dump_qct(); | 92 | smp_dump_qct(); |
78 | return 1; | 93 | return 1; |
79 | } | 94 | } |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6f80b852a196..5b0de38cde48 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -17,6 +17,7 @@ unsigned int num_processors; | |||
17 | unsigned disabled_cpus __cpuinitdata; | 17 | unsigned disabled_cpus __cpuinitdata; |
18 | /* Processor that is doing the boot up */ | 18 | /* Processor that is doing the boot up */ |
19 | unsigned int boot_cpu_physical_apicid = -1U; | 19 | unsigned int boot_cpu_physical_apicid = -1U; |
20 | unsigned int max_physical_apicid; | ||
20 | EXPORT_SYMBOL(boot_cpu_physical_apicid); | 21 | EXPORT_SYMBOL(boot_cpu_physical_apicid); |
21 | 22 | ||
22 | DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; | 23 | DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; |
@@ -137,3 +138,28 @@ void __init setup_per_cpu_areas(void) | |||
137 | } | 138 | } |
138 | 139 | ||
139 | #endif | 140 | #endif |
141 | |||
142 | void __init parse_setup_data(void) | ||
143 | { | ||
144 | struct setup_data *data; | ||
145 | u64 pa_data; | ||
146 | |||
147 | if (boot_params.hdr.version < 0x0209) | ||
148 | return; | ||
149 | pa_data = boot_params.hdr.setup_data; | ||
150 | while (pa_data) { | ||
151 | data = early_ioremap(pa_data, PAGE_SIZE); | ||
152 | switch (data->type) { | ||
153 | case SETUP_E820_EXT: | ||
154 | parse_e820_ext(data, pa_data); | ||
155 | break; | ||
156 | default: | ||
157 | break; | ||
158 | } | ||
159 | #ifndef CONFIG_DEBUG_BOOT_PARAMS | ||
160 | free_early(pa_data, pa_data+sizeof(*data)+data->len); | ||
161 | #endif | ||
162 | pa_data = data->next; | ||
163 | early_iounmap(data, PAGE_SIZE); | ||
164 | } | ||
165 | } | ||
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 5a2f8e063887..7e06ecd83174 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c | |||
@@ -59,6 +59,7 @@ | |||
59 | #include <asm/setup.h> | 59 | #include <asm/setup.h> |
60 | #include <asm/arch_hooks.h> | 60 | #include <asm/arch_hooks.h> |
61 | #include <asm/sections.h> | 61 | #include <asm/sections.h> |
62 | #include <asm/dmi.h> | ||
62 | #include <asm/io_apic.h> | 63 | #include <asm/io_apic.h> |
63 | #include <asm/ist.h> | 64 | #include <asm/ist.h> |
64 | #include <asm/io.h> | 65 | #include <asm/io.h> |
@@ -67,10 +68,13 @@ | |||
67 | #include <asm/bios_ebda.h> | 68 | #include <asm/bios_ebda.h> |
68 | #include <asm/cacheflush.h> | 69 | #include <asm/cacheflush.h> |
69 | #include <asm/processor.h> | 70 | #include <asm/processor.h> |
71 | #include <asm/efi.h> | ||
72 | #include <asm/bugs.h> | ||
70 | 73 | ||
71 | /* This value is set up by the early boot code to point to the value | 74 | /* This value is set up by the early boot code to point to the value |
72 | immediately after the boot time page tables. It contains a *physical* | 75 | immediately after the boot time page tables. It contains a *physical* |
73 | address, and must not be in the .bss segment! */ | 76 | address, and must not be in the .bss segment! */ |
77 | unsigned long init_pg_tables_start __initdata = ~0UL; | ||
74 | unsigned long init_pg_tables_end __initdata = ~0UL; | 78 | unsigned long init_pg_tables_end __initdata = ~0UL; |
75 | 79 | ||
76 | /* | 80 | /* |
@@ -182,6 +186,12 @@ int bootloader_type; | |||
182 | static unsigned int highmem_pages = -1; | 186 | static unsigned int highmem_pages = -1; |
183 | 187 | ||
184 | /* | 188 | /* |
189 | * Early DMI memory | ||
190 | */ | ||
191 | int dmi_alloc_index; | ||
192 | char dmi_alloc_data[DMI_MAX_DATA]; | ||
193 | |||
194 | /* | ||
185 | * Setup options | 195 | * Setup options |
186 | */ | 196 | */ |
187 | struct screen_info screen_info; | 197 | struct screen_info screen_info; |
@@ -237,42 +247,6 @@ static inline void copy_edd(void) | |||
237 | } | 247 | } |
238 | #endif | 248 | #endif |
239 | 249 | ||
240 | int __initdata user_defined_memmap; | ||
241 | |||
242 | /* | ||
243 | * "mem=nopentium" disables the 4MB page tables. | ||
244 | * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM | ||
245 | * to <mem>, overriding the bios size. | ||
246 | * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from | ||
247 | * <start> to <start>+<mem>, overriding the bios size. | ||
248 | * | ||
249 | * HPA tells me bootloaders need to parse mem=, so no new | ||
250 | * option should be mem= [also see Documentation/i386/boot.txt] | ||
251 | */ | ||
252 | static int __init parse_mem(char *arg) | ||
253 | { | ||
254 | if (!arg) | ||
255 | return -EINVAL; | ||
256 | |||
257 | if (strcmp(arg, "nopentium") == 0) { | ||
258 | setup_clear_cpu_cap(X86_FEATURE_PSE); | ||
259 | } else { | ||
260 | /* If the user specifies memory size, we | ||
261 | * limit the BIOS-provided memory map to | ||
262 | * that size. exactmap can be used to specify | ||
263 | * the exact map. mem=number can be used to | ||
264 | * trim the existing memory map. | ||
265 | */ | ||
266 | unsigned long long mem_size; | ||
267 | |||
268 | mem_size = memparse(arg, &arg); | ||
269 | limit_regions(mem_size); | ||
270 | user_defined_memmap = 1; | ||
271 | } | ||
272 | return 0; | ||
273 | } | ||
274 | early_param("mem", parse_mem); | ||
275 | |||
276 | #ifdef CONFIG_PROC_VMCORE | 250 | #ifdef CONFIG_PROC_VMCORE |
277 | /* elfcorehdr= specifies the location of elf core header | 251 | /* elfcorehdr= specifies the location of elf core header |
278 | * stored by the crashed kernel. | 252 | * stored by the crashed kernel. |
@@ -395,56 +369,6 @@ unsigned long __init find_max_low_pfn(void) | |||
395 | return max_low_pfn; | 369 | return max_low_pfn; |
396 | } | 370 | } |
397 | 371 | ||
398 | #define BIOS_LOWMEM_KILOBYTES 0x413 | ||
399 | |||
400 | /* | ||
401 | * The BIOS places the EBDA/XBDA at the top of conventional | ||
402 | * memory, and usually decreases the reported amount of | ||
403 | * conventional memory (int 0x12) too. This also contains a | ||
404 | * workaround for Dell systems that neglect to reserve EBDA. | ||
405 | * The same workaround also avoids a problem with the AMD768MPX | ||
406 | * chipset: reserve a page before VGA to prevent PCI prefetch | ||
407 | * into it (errata #56). Usually the page is reserved anyways, | ||
408 | * unless you have no PS/2 mouse plugged in. | ||
409 | */ | ||
410 | static void __init reserve_ebda_region(void) | ||
411 | { | ||
412 | unsigned int lowmem, ebda_addr; | ||
413 | |||
414 | /* To determine the position of the EBDA and the */ | ||
415 | /* end of conventional memory, we need to look at */ | ||
416 | /* the BIOS data area. In a paravirtual environment */ | ||
417 | /* that area is absent. We'll just have to assume */ | ||
418 | /* that the paravirt case can handle memory setup */ | ||
419 | /* correctly, without our help. */ | ||
420 | if (paravirt_enabled()) | ||
421 | return; | ||
422 | |||
423 | /* end of low (conventional) memory */ | ||
424 | lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); | ||
425 | lowmem <<= 10; | ||
426 | |||
427 | /* start of EBDA area */ | ||
428 | ebda_addr = get_bios_ebda(); | ||
429 | |||
430 | /* Fixup: bios puts an EBDA in the top 64K segment */ | ||
431 | /* of conventional memory, but does not adjust lowmem. */ | ||
432 | if ((lowmem - ebda_addr) <= 0x10000) | ||
433 | lowmem = ebda_addr; | ||
434 | |||
435 | /* Fixup: bios does not report an EBDA at all. */ | ||
436 | /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ | ||
437 | if ((ebda_addr == 0) && (lowmem >= 0x9f000)) | ||
438 | lowmem = 0x9f000; | ||
439 | |||
440 | /* Paranoia: should never happen, but... */ | ||
441 | if ((lowmem == 0) || (lowmem >= 0x100000)) | ||
442 | lowmem = 0x9f000; | ||
443 | |||
444 | /* reserve all memory between lowmem and the 1MB mark */ | ||
445 | reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT); | ||
446 | } | ||
447 | |||
448 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 372 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
449 | static void __init setup_bootmem_allocator(void); | 373 | static void __init setup_bootmem_allocator(void); |
450 | static unsigned long __init setup_memory(void) | 374 | static unsigned long __init setup_memory(void) |
@@ -462,11 +386,13 @@ static unsigned long __init setup_memory(void) | |||
462 | if (max_pfn > max_low_pfn) { | 386 | if (max_pfn > max_low_pfn) { |
463 | highstart_pfn = max_low_pfn; | 387 | highstart_pfn = max_low_pfn; |
464 | } | 388 | } |
389 | memory_present(0, 0, highend_pfn); | ||
465 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", | 390 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", |
466 | pages_to_mb(highend_pfn - highstart_pfn)); | 391 | pages_to_mb(highend_pfn - highstart_pfn)); |
467 | num_physpages = highend_pfn; | 392 | num_physpages = highend_pfn; |
468 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; | 393 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; |
469 | #else | 394 | #else |
395 | memory_present(0, 0, max_low_pfn); | ||
470 | num_physpages = max_low_pfn; | 396 | num_physpages = max_low_pfn; |
471 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; | 397 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; |
472 | #endif | 398 | #endif |
@@ -488,11 +414,12 @@ static void __init zone_sizes_init(void) | |||
488 | max_zone_pfns[ZONE_DMA] = | 414 | max_zone_pfns[ZONE_DMA] = |
489 | virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | 415 | virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; |
490 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; | 416 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; |
417 | remove_all_active_ranges(); | ||
491 | #ifdef CONFIG_HIGHMEM | 418 | #ifdef CONFIG_HIGHMEM |
492 | max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; | 419 | max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; |
493 | add_active_range(0, 0, highend_pfn); | 420 | e820_register_active_regions(0, 0, highend_pfn); |
494 | #else | 421 | #else |
495 | add_active_range(0, 0, max_low_pfn); | 422 | e820_register_active_regions(0, 0, max_low_pfn); |
496 | #endif | 423 | #endif |
497 | 424 | ||
498 | free_area_init_nodes(max_zone_pfns); | 425 | free_area_init_nodes(max_zone_pfns); |
@@ -526,25 +453,28 @@ static void __init reserve_crashkernel(void) | |||
526 | ret = parse_crashkernel(boot_command_line, total_mem, | 453 | ret = parse_crashkernel(boot_command_line, total_mem, |
527 | &crash_size, &crash_base); | 454 | &crash_size, &crash_base); |
528 | if (ret == 0 && crash_size > 0) { | 455 | if (ret == 0 && crash_size > 0) { |
529 | if (crash_base > 0) { | 456 | if (crash_base <= 0) { |
530 | printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " | ||
531 | "for crashkernel (System RAM: %ldMB)\n", | ||
532 | (unsigned long)(crash_size >> 20), | ||
533 | (unsigned long)(crash_base >> 20), | ||
534 | (unsigned long)(total_mem >> 20)); | ||
535 | |||
536 | if (reserve_bootmem(crash_base, crash_size, | ||
537 | BOOTMEM_EXCLUSIVE) < 0) { | ||
538 | printk(KERN_INFO "crashkernel reservation " | ||
539 | "failed - memory is in use\n"); | ||
540 | return; | ||
541 | } | ||
542 | |||
543 | crashk_res.start = crash_base; | ||
544 | crashk_res.end = crash_base + crash_size - 1; | ||
545 | } else | ||
546 | printk(KERN_INFO "crashkernel reservation failed - " | 457 | printk(KERN_INFO "crashkernel reservation failed - " |
547 | "you have to specify a base address\n"); | 458 | "you have to specify a base address\n"); |
459 | return; | ||
460 | } | ||
461 | |||
462 | if (reserve_bootmem_generic(crash_base, crash_size, | ||
463 | BOOTMEM_EXCLUSIVE) < 0) { | ||
464 | printk(KERN_INFO "crashkernel reservation failed - " | ||
465 | "memory is in use\n"); | ||
466 | return; | ||
467 | } | ||
468 | |||
469 | printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " | ||
470 | "for crashkernel (System RAM: %ldMB)\n", | ||
471 | (unsigned long)(crash_size >> 20), | ||
472 | (unsigned long)(crash_base >> 20), | ||
473 | (unsigned long)(total_mem >> 20)); | ||
474 | |||
475 | crashk_res.start = crash_base; | ||
476 | crashk_res.end = crash_base + crash_size - 1; | ||
477 | insert_resource(&iomem_resource, &crashk_res); | ||
548 | } | 478 | } |
549 | } | 479 | } |
550 | #else | 480 | #else |
@@ -558,44 +488,57 @@ static bool do_relocate_initrd = false; | |||
558 | 488 | ||
559 | static void __init reserve_initrd(void) | 489 | static void __init reserve_initrd(void) |
560 | { | 490 | { |
561 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | 491 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; |
562 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | 492 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
563 | unsigned long ramdisk_end = ramdisk_image + ramdisk_size; | 493 | u64 ramdisk_end = ramdisk_image + ramdisk_size; |
564 | unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; | 494 | u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; |
565 | unsigned long ramdisk_here; | 495 | u64 ramdisk_here; |
566 | |||
567 | initrd_start = 0; | ||
568 | 496 | ||
569 | if (!boot_params.hdr.type_of_loader || | 497 | if (!boot_params.hdr.type_of_loader || |
570 | !ramdisk_image || !ramdisk_size) | 498 | !ramdisk_image || !ramdisk_size) |
571 | return; /* No initrd provided by bootloader */ | 499 | return; /* No initrd provided by bootloader */ |
572 | 500 | ||
573 | if (ramdisk_end < ramdisk_image) { | 501 | initrd_start = 0; |
574 | printk(KERN_ERR "initrd wraps around end of memory, " | 502 | |
575 | "disabling initrd\n"); | ||
576 | return; | ||
577 | } | ||
578 | if (ramdisk_size >= end_of_lowmem/2) { | 503 | if (ramdisk_size >= end_of_lowmem/2) { |
504 | free_early(ramdisk_image, ramdisk_end); | ||
579 | printk(KERN_ERR "initrd too large to handle, " | 505 | printk(KERN_ERR "initrd too large to handle, " |
580 | "disabling initrd\n"); | 506 | "disabling initrd\n"); |
581 | return; | 507 | return; |
582 | } | 508 | } |
509 | |||
510 | printk(KERN_INFO "old RAMDISK: %08llx - %08llx\n", ramdisk_image, | ||
511 | ramdisk_end); | ||
512 | |||
513 | |||
583 | if (ramdisk_end <= end_of_lowmem) { | 514 | if (ramdisk_end <= end_of_lowmem) { |
584 | /* All in lowmem, easy case */ | 515 | /* All in lowmem, easy case */ |
585 | reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT); | 516 | /* |
517 | * don't need to reserve again, already reserved early | ||
518 | * in i386_start_kernel | ||
519 | */ | ||
586 | initrd_start = ramdisk_image + PAGE_OFFSET; | 520 | initrd_start = ramdisk_image + PAGE_OFFSET; |
587 | initrd_end = initrd_start+ramdisk_size; | 521 | initrd_end = initrd_start+ramdisk_size; |
588 | return; | 522 | return; |
589 | } | 523 | } |
590 | 524 | ||
591 | /* We need to move the initrd down into lowmem */ | 525 | /* We need to move the initrd down into lowmem */ |
592 | ramdisk_here = (end_of_lowmem - ramdisk_size) & PAGE_MASK; | 526 | ramdisk_here = find_e820_area(min_low_pfn<<PAGE_SHIFT, |
527 | end_of_lowmem, ramdisk_size, | ||
528 | PAGE_SIZE); | ||
529 | |||
530 | if (ramdisk_here == -1ULL) | ||
531 | panic("Cannot find place for new RAMDISK of size %lld\n", | ||
532 | ramdisk_size); | ||
593 | 533 | ||
594 | /* Note: this includes all the lowmem currently occupied by | 534 | /* Note: this includes all the lowmem currently occupied by |
595 | the initrd, we rely on that fact to keep the data intact. */ | 535 | the initrd, we rely on that fact to keep the data intact. */ |
596 | reserve_bootmem(ramdisk_here, ramdisk_size, BOOTMEM_DEFAULT); | 536 | reserve_early(ramdisk_here, ramdisk_here + ramdisk_size, |
537 | "NEW RAMDISK"); | ||
597 | initrd_start = ramdisk_here + PAGE_OFFSET; | 538 | initrd_start = ramdisk_here + PAGE_OFFSET; |
598 | initrd_end = initrd_start + ramdisk_size; | 539 | initrd_end = initrd_start + ramdisk_size; |
540 | printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", | ||
541 | ramdisk_here, ramdisk_here + ramdisk_size); | ||
599 | 542 | ||
600 | do_relocate_initrd = true; | 543 | do_relocate_initrd = true; |
601 | } | 544 | } |
@@ -604,10 +547,10 @@ static void __init reserve_initrd(void) | |||
604 | 547 | ||
605 | static void __init relocate_initrd(void) | 548 | static void __init relocate_initrd(void) |
606 | { | 549 | { |
607 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | 550 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; |
608 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | 551 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
609 | unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; | 552 | u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; |
610 | unsigned long ramdisk_here; | 553 | u64 ramdisk_here; |
611 | unsigned long slop, clen, mapaddr; | 554 | unsigned long slop, clen, mapaddr; |
612 | char *p, *q; | 555 | char *p, *q; |
613 | 556 | ||
@@ -624,6 +567,10 @@ static void __init relocate_initrd(void) | |||
624 | p = (char *)__va(ramdisk_image); | 567 | p = (char *)__va(ramdisk_image); |
625 | memcpy(q, p, clen); | 568 | memcpy(q, p, clen); |
626 | q += clen; | 569 | q += clen; |
570 | /* need to free these low pages...*/ | ||
571 | printk(KERN_INFO "Freeing old partial RAMDISK %08llx-%08llx\n", | ||
572 | ramdisk_image, ramdisk_image + clen - 1); | ||
573 | free_bootmem(ramdisk_image, clen); | ||
627 | ramdisk_image += clen; | 574 | ramdisk_image += clen; |
628 | ramdisk_size -= clen; | 575 | ramdisk_size -= clen; |
629 | } | 576 | } |
@@ -642,66 +589,47 @@ static void __init relocate_initrd(void) | |||
642 | ramdisk_image += clen; | 589 | ramdisk_image += clen; |
643 | ramdisk_size -= clen; | 590 | ramdisk_size -= clen; |
644 | } | 591 | } |
592 | /* high pages is not converted by early_res_to_bootmem */ | ||
593 | ramdisk_image = boot_params.hdr.ramdisk_image; | ||
594 | ramdisk_size = boot_params.hdr.ramdisk_size; | ||
595 | printk(KERN_INFO "Copied RAMDISK from %016llx - %016llx to %08llx - %08llx\n", | ||
596 | ramdisk_image, ramdisk_image + ramdisk_size - 1, | ||
597 | ramdisk_here, ramdisk_here + ramdisk_size - 1); | ||
598 | |||
599 | /* need to free that, otherwise init highmem will reserve it again */ | ||
600 | free_early(ramdisk_image, ramdisk_image+ramdisk_size); | ||
645 | } | 601 | } |
646 | 602 | ||
647 | #endif /* CONFIG_BLK_DEV_INITRD */ | 603 | #endif /* CONFIG_BLK_DEV_INITRD */ |
648 | 604 | ||
649 | void __init setup_bootmem_allocator(void) | 605 | void __init setup_bootmem_allocator(void) |
650 | { | 606 | { |
651 | unsigned long bootmap_size; | 607 | int i; |
608 | unsigned long bootmap_size, bootmap; | ||
652 | /* | 609 | /* |
653 | * Initialize the boot-time allocator (with low memory only): | 610 | * Initialize the boot-time allocator (with low memory only): |
654 | */ | 611 | */ |
655 | bootmap_size = init_bootmem(min_low_pfn, max_low_pfn); | 612 | bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT; |
656 | 613 | bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT, | |
657 | register_bootmem_low_pages(max_low_pfn); | 614 | max_pfn_mapped<<PAGE_SHIFT, bootmap_size, |
658 | 615 | PAGE_SIZE); | |
659 | /* | 616 | if (bootmap == -1L) |
660 | * Reserve the bootmem bitmap itself as well. We do this in two | 617 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); |
661 | * steps (first step was init_bootmem()) because this catches | 618 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); |
662 | * the (very unlikely) case of us accidentally initializing the | ||
663 | * bootmem allocator with an invalid RAM area. | ||
664 | */ | ||
665 | reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) + | ||
666 | bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text), | ||
667 | BOOTMEM_DEFAULT); | ||
668 | |||
669 | /* | ||
670 | * reserve physical page 0 - it's a special BIOS page on many boxes, | ||
671 | * enabling clean reboots, SMP operation, laptop functions. | ||
672 | */ | ||
673 | reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT); | ||
674 | |||
675 | /* reserve EBDA region */ | ||
676 | reserve_ebda_region(); | ||
677 | |||
678 | #ifdef CONFIG_SMP | ||
679 | /* | ||
680 | * But first pinch a few for the stack/trampoline stuff | ||
681 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
682 | * trampoline before removing it. (see the GDT stuff) | ||
683 | */ | ||
684 | reserve_bootmem(PAGE_SIZE, PAGE_SIZE, BOOTMEM_DEFAULT); | ||
685 | #endif | ||
686 | #ifdef CONFIG_ACPI_SLEEP | ||
687 | /* | ||
688 | * Reserve low memory region for sleep support. | ||
689 | */ | ||
690 | acpi_reserve_bootmem(); | ||
691 | #endif | ||
692 | #ifdef CONFIG_X86_FIND_SMP_CONFIG | ||
693 | /* | ||
694 | * Find and reserve possible boot-time SMP configuration: | ||
695 | */ | ||
696 | find_smp_config(); | ||
697 | #endif | ||
698 | #ifdef CONFIG_BLK_DEV_INITRD | 619 | #ifdef CONFIG_BLK_DEV_INITRD |
699 | reserve_initrd(); | 620 | reserve_initrd(); |
700 | #endif | 621 | #endif |
701 | numa_kva_reserve(); | 622 | bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn); |
702 | reserve_crashkernel(); | 623 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", |
624 | max_pfn_mapped<<PAGE_SHIFT); | ||
625 | printk(KERN_INFO " low ram: %08lx - %08lx\n", | ||
626 | min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT); | ||
627 | printk(KERN_INFO " bootmap %08lx - %08lx\n", | ||
628 | bootmap, bootmap + bootmap_size); | ||
629 | for_each_online_node(i) | ||
630 | free_bootmem_with_active_regions(i, max_low_pfn); | ||
631 | early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); | ||
703 | 632 | ||
704 | reserve_ibft_region(); | ||
705 | } | 633 | } |
706 | 634 | ||
707 | /* | 635 | /* |
@@ -731,12 +659,6 @@ static void set_mca_bus(int x) | |||
731 | static void set_mca_bus(int x) { } | 659 | static void set_mca_bus(int x) { } |
732 | #endif | 660 | #endif |
733 | 661 | ||
734 | /* Overridden in paravirt.c if CONFIG_PARAVIRT */ | ||
735 | char * __init __attribute__((weak)) memory_setup(void) | ||
736 | { | ||
737 | return machine_specific_memory_setup(); | ||
738 | } | ||
739 | |||
740 | #ifdef CONFIG_NUMA | 662 | #ifdef CONFIG_NUMA |
741 | /* | 663 | /* |
742 | * In the golden day, when everything among i386 and x86_64 will be | 664 | * In the golden day, when everything among i386 and x86_64 will be |
@@ -749,6 +671,8 @@ int x86_cpu_to_node_map_init[NR_CPUS] = { | |||
749 | DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; | 671 | DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; |
750 | #endif | 672 | #endif |
751 | 673 | ||
674 | static void probe_roms(void); | ||
675 | |||
752 | /* | 676 | /* |
753 | * Determine if we were loaded by an EFI loader. If so, then we have also been | 677 | * Determine if we were loaded by an EFI loader. If so, then we have also been |
754 | * passed the efi memmap, systab, etc., so we should use these data structures | 678 | * passed the efi memmap, systab, etc., so we should use these data structures |
@@ -758,17 +682,21 @@ DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; | |||
758 | */ | 682 | */ |
759 | void __init setup_arch(char **cmdline_p) | 683 | void __init setup_arch(char **cmdline_p) |
760 | { | 684 | { |
685 | int i; | ||
761 | unsigned long max_low_pfn; | 686 | unsigned long max_low_pfn; |
762 | 687 | ||
763 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 688 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
764 | pre_setup_arch_hook(); | 689 | pre_setup_arch_hook(); |
765 | early_cpu_init(); | 690 | early_cpu_init(); |
766 | early_ioremap_init(); | 691 | early_ioremap_init(); |
692 | reserve_setup_data(); | ||
767 | 693 | ||
768 | #ifdef CONFIG_EFI | 694 | #ifdef CONFIG_EFI |
769 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, | 695 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, |
770 | "EL32", 4)) | 696 | "EL32", 4)) { |
771 | efi_enabled = 1; | 697 | efi_enabled = 1; |
698 | efi_reserve_early(); | ||
699 | } | ||
772 | #endif | 700 | #endif |
773 | 701 | ||
774 | ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); | 702 | ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); |
@@ -792,8 +720,7 @@ void __init setup_arch(char **cmdline_p) | |||
792 | #endif | 720 | #endif |
793 | ARCH_SETUP | 721 | ARCH_SETUP |
794 | 722 | ||
795 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | 723 | setup_memory_map(); |
796 | print_memory_map(memory_setup()); | ||
797 | 724 | ||
798 | copy_edd(); | 725 | copy_edd(); |
799 | 726 | ||
@@ -811,12 +738,18 @@ void __init setup_arch(char **cmdline_p) | |||
811 | bss_resource.start = virt_to_phys(&__bss_start); | 738 | bss_resource.start = virt_to_phys(&__bss_start); |
812 | bss_resource.end = virt_to_phys(&__bss_stop)-1; | 739 | bss_resource.end = virt_to_phys(&__bss_stop)-1; |
813 | 740 | ||
741 | parse_setup_data(); | ||
742 | |||
814 | parse_early_param(); | 743 | parse_early_param(); |
815 | 744 | ||
816 | if (user_defined_memmap) { | 745 | finish_e820_parsing(); |
817 | printk(KERN_INFO "user-defined physical RAM map:\n"); | 746 | |
818 | print_memory_map("user"); | 747 | probe_roms(); |
819 | } | 748 | |
749 | /* after parse_early_param, so could debug it */ | ||
750 | insert_resource(&iomem_resource, &code_resource); | ||
751 | insert_resource(&iomem_resource, &data_resource); | ||
752 | insert_resource(&iomem_resource, &bss_resource); | ||
820 | 753 | ||
821 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | 754 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); |
822 | *cmdline_p = command_line; | 755 | *cmdline_p = command_line; |
@@ -824,14 +757,67 @@ void __init setup_arch(char **cmdline_p) | |||
824 | if (efi_enabled) | 757 | if (efi_enabled) |
825 | efi_init(); | 758 | efi_init(); |
826 | 759 | ||
760 | if (ppro_with_ram_bug()) { | ||
761 | e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM, | ||
762 | E820_RESERVED); | ||
763 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | ||
764 | printk(KERN_INFO "fixed physical RAM map:\n"); | ||
765 | e820_print_map("bad_ppro"); | ||
766 | } | ||
767 | |||
768 | e820_register_active_regions(0, 0, -1UL); | ||
769 | /* | ||
770 | * partially used pages are not usable - thus | ||
771 | * we are rounding upwards: | ||
772 | */ | ||
773 | max_pfn = e820_end_of_ram(); | ||
774 | |||
775 | /* preallocate 4k for mptable mpc */ | ||
776 | early_reserve_e820_mpc_new(); | ||
827 | /* update e820 for memory not covered by WB MTRRs */ | 777 | /* update e820 for memory not covered by WB MTRRs */ |
828 | propagate_e820_map(); | ||
829 | mtrr_bp_init(); | 778 | mtrr_bp_init(); |
830 | if (mtrr_trim_uncached_memory(max_pfn)) | 779 | if (mtrr_trim_uncached_memory(max_pfn)) { |
831 | propagate_e820_map(); | 780 | remove_all_active_ranges(); |
781 | e820_register_active_regions(0, 0, -1UL); | ||
782 | max_pfn = e820_end_of_ram(); | ||
783 | } | ||
784 | |||
785 | dmi_scan_machine(); | ||
786 | |||
787 | io_delay_init(); | ||
788 | |||
789 | #ifdef CONFIG_ACPI | ||
790 | /* | ||
791 | * Parse the ACPI tables for possible boot-time SMP configuration. | ||
792 | */ | ||
793 | acpi_boot_table_init(); | ||
794 | #endif | ||
795 | |||
796 | #ifdef CONFIG_ACPI_NUMA | ||
797 | /* | ||
798 | * Parse SRAT to discover nodes. | ||
799 | */ | ||
800 | acpi_numa_init(); | ||
801 | #endif | ||
832 | 802 | ||
833 | max_low_pfn = setup_memory(); | 803 | max_low_pfn = setup_memory(); |
834 | 804 | ||
805 | #ifdef CONFIG_ACPI_SLEEP | ||
806 | /* | ||
807 | * Reserve low memory region for sleep support. | ||
808 | */ | ||
809 | acpi_reserve_bootmem(); | ||
810 | #endif | ||
811 | #ifdef CONFIG_X86_FIND_SMP_CONFIG | ||
812 | /* | ||
813 | * Find and reserve possible boot-time SMP configuration: | ||
814 | */ | ||
815 | find_smp_config(); | ||
816 | #endif | ||
817 | reserve_crashkernel(); | ||
818 | |||
819 | reserve_ibft_region(); | ||
820 | |||
835 | #ifdef CONFIG_KVM_CLOCK | 821 | #ifdef CONFIG_KVM_CLOCK |
836 | kvmclock_init(); | 822 | kvmclock_init(); |
837 | #endif | 823 | #endif |
@@ -855,9 +841,6 @@ void __init setup_arch(char **cmdline_p) | |||
855 | * not to exceed the 8Mb limit. | 841 | * not to exceed the 8Mb limit. |
856 | */ | 842 | */ |
857 | 843 | ||
858 | #ifdef CONFIG_SMP | ||
859 | smp_alloc_memory(); /* AP processor realmode stacks in low memory*/ | ||
860 | #endif | ||
861 | paging_init(); | 844 | paging_init(); |
862 | 845 | ||
863 | /* | 846 | /* |
@@ -869,10 +852,6 @@ void __init setup_arch(char **cmdline_p) | |||
869 | init_ohci1394_dma_on_all_controllers(); | 852 | init_ohci1394_dma_on_all_controllers(); |
870 | #endif | 853 | #endif |
871 | 854 | ||
872 | remapped_pgdat_init(); | ||
873 | sparse_init(); | ||
874 | zone_sizes_init(); | ||
875 | |||
876 | /* | 855 | /* |
877 | * NOTE: at this point the bootmem allocator is fully available. | 856 | * NOTE: at this point the bootmem allocator is fully available. |
878 | */ | 857 | */ |
@@ -881,11 +860,11 @@ void __init setup_arch(char **cmdline_p) | |||
881 | relocate_initrd(); | 860 | relocate_initrd(); |
882 | #endif | 861 | #endif |
883 | 862 | ||
884 | paravirt_post_allocator_init(); | 863 | remapped_pgdat_init(); |
885 | 864 | sparse_init(); | |
886 | dmi_scan_machine(); | 865 | zone_sizes_init(); |
887 | 866 | ||
888 | io_delay_init(); | 867 | paravirt_post_allocator_init(); |
889 | 868 | ||
890 | #ifdef CONFIG_X86_SMP | 869 | #ifdef CONFIG_X86_SMP |
891 | /* | 870 | /* |
@@ -903,32 +882,31 @@ void __init setup_arch(char **cmdline_p) | |||
903 | generic_apic_probe(); | 882 | generic_apic_probe(); |
904 | #endif | 883 | #endif |
905 | 884 | ||
906 | #ifdef CONFIG_ACPI | ||
907 | /* | ||
908 | * Parse the ACPI tables for possible boot-time SMP configuration. | ||
909 | */ | ||
910 | acpi_boot_table_init(); | ||
911 | #endif | ||
912 | |||
913 | early_quirks(); | 885 | early_quirks(); |
914 | 886 | ||
915 | #ifdef CONFIG_ACPI | 887 | #ifdef CONFIG_ACPI |
916 | acpi_boot_init(); | 888 | acpi_boot_init(); |
917 | 889 | #endif | |
890 | #if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS) | ||
891 | if (smp_found_config) | ||
892 | get_smp_config(); | ||
893 | #endif | ||
918 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) | 894 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) |
919 | if (def_to_bigsmp) | 895 | if (def_to_bigsmp) |
920 | printk(KERN_WARNING "More than 8 CPUs detected and " | 896 | printk(KERN_WARNING "More than 8 CPUs detected and " |
921 | "CONFIG_X86_PC cannot handle it.\nUse " | 897 | "CONFIG_X86_PC cannot handle it.\nUse " |
922 | "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); | 898 | "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); |
923 | #endif | 899 | #endif |
924 | #endif | ||
925 | #ifdef CONFIG_X86_LOCAL_APIC | ||
926 | if (smp_found_config) | ||
927 | get_smp_config(); | ||
928 | #endif | ||
929 | 900 | ||
930 | e820_register_memory(); | 901 | e820_reserve_resources(); |
931 | e820_mark_nosave_regions(); | 902 | e820_mark_nosave_regions(max_low_pfn); |
903 | |||
904 | request_resource(&iomem_resource, &video_ram_resource); | ||
905 | /* request I/O space for devices used on all i[345]86 PCs */ | ||
906 | for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) | ||
907 | request_resource(&ioport_resource, &standard_io_resources[i]); | ||
908 | |||
909 | e820_setup_gap(); | ||
932 | 910 | ||
933 | #ifdef CONFIG_VT | 911 | #ifdef CONFIG_VT |
934 | #if defined(CONFIG_VGA_CONSOLE) | 912 | #if defined(CONFIG_VGA_CONSOLE) |
@@ -940,25 +918,147 @@ void __init setup_arch(char **cmdline_p) | |||
940 | #endif | 918 | #endif |
941 | } | 919 | } |
942 | 920 | ||
943 | /* | 921 | static struct resource system_rom_resource = { |
944 | * Request address space for all standard resources | 922 | .name = "System ROM", |
945 | * | 923 | .start = 0xf0000, |
946 | * This is called just before pcibios_init(), which is also a | 924 | .end = 0xfffff, |
947 | * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). | 925 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
948 | */ | 926 | }; |
949 | static int __init request_standard_resources(void) | 927 | |
928 | static struct resource extension_rom_resource = { | ||
929 | .name = "Extension ROM", | ||
930 | .start = 0xe0000, | ||
931 | .end = 0xeffff, | ||
932 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
933 | }; | ||
934 | |||
935 | static struct resource adapter_rom_resources[] = { { | ||
936 | .name = "Adapter ROM", | ||
937 | .start = 0xc8000, | ||
938 | .end = 0, | ||
939 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
940 | }, { | ||
941 | .name = "Adapter ROM", | ||
942 | .start = 0, | ||
943 | .end = 0, | ||
944 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
945 | }, { | ||
946 | .name = "Adapter ROM", | ||
947 | .start = 0, | ||
948 | .end = 0, | ||
949 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
950 | }, { | ||
951 | .name = "Adapter ROM", | ||
952 | .start = 0, | ||
953 | .end = 0, | ||
954 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
955 | }, { | ||
956 | .name = "Adapter ROM", | ||
957 | .start = 0, | ||
958 | .end = 0, | ||
959 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
960 | }, { | ||
961 | .name = "Adapter ROM", | ||
962 | .start = 0, | ||
963 | .end = 0, | ||
964 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
965 | } }; | ||
966 | |||
967 | static struct resource video_rom_resource = { | ||
968 | .name = "Video ROM", | ||
969 | .start = 0xc0000, | ||
970 | .end = 0xc7fff, | ||
971 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
972 | }; | ||
973 | |||
974 | #define ROMSIGNATURE 0xaa55 | ||
975 | |||
976 | static int __init romsignature(const unsigned char *rom) | ||
950 | { | 977 | { |
978 | const unsigned short * const ptr = (const unsigned short *)rom; | ||
979 | unsigned short sig; | ||
980 | |||
981 | return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; | ||
982 | } | ||
983 | |||
984 | static int __init romchecksum(const unsigned char *rom, unsigned long length) | ||
985 | { | ||
986 | unsigned char sum, c; | ||
987 | |||
988 | for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) | ||
989 | sum += c; | ||
990 | return !length && !sum; | ||
991 | } | ||
992 | |||
993 | static void __init probe_roms(void) | ||
994 | { | ||
995 | const unsigned char *rom; | ||
996 | unsigned long start, length, upper; | ||
997 | unsigned char c; | ||
951 | int i; | 998 | int i; |
952 | 999 | ||
953 | printk(KERN_INFO "Setting up standard PCI resources\n"); | 1000 | /* video rom */ |
954 | init_iomem_resources(&code_resource, &data_resource, &bss_resource); | 1001 | upper = adapter_rom_resources[0].start; |
1002 | for (start = video_rom_resource.start; start < upper; start += 2048) { | ||
1003 | rom = isa_bus_to_virt(start); | ||
1004 | if (!romsignature(rom)) | ||
1005 | continue; | ||
955 | 1006 | ||
956 | request_resource(&iomem_resource, &video_ram_resource); | 1007 | video_rom_resource.start = start; |
957 | 1008 | ||
958 | /* request I/O space for devices used on all i[345]86 PCs */ | 1009 | if (probe_kernel_address(rom + 2, c) != 0) |
959 | for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) | 1010 | continue; |
960 | request_resource(&ioport_resource, &standard_io_resources[i]); | 1011 | |
961 | return 0; | 1012 | /* 0 < length <= 0x7f * 512, historically */ |
1013 | length = c * 512; | ||
1014 | |||
1015 | /* if checksum okay, trust length byte */ | ||
1016 | if (length && romchecksum(rom, length)) | ||
1017 | video_rom_resource.end = start + length - 1; | ||
1018 | |||
1019 | request_resource(&iomem_resource, &video_rom_resource); | ||
1020 | break; | ||
1021 | } | ||
1022 | |||
1023 | start = (video_rom_resource.end + 1 + 2047) & ~2047UL; | ||
1024 | if (start < upper) | ||
1025 | start = upper; | ||
1026 | |||
1027 | /* system rom */ | ||
1028 | request_resource(&iomem_resource, &system_rom_resource); | ||
1029 | upper = system_rom_resource.start; | ||
1030 | |||
1031 | /* check for extension rom (ignore length byte!) */ | ||
1032 | rom = isa_bus_to_virt(extension_rom_resource.start); | ||
1033 | if (romsignature(rom)) { | ||
1034 | length = extension_rom_resource.end - extension_rom_resource.start + 1; | ||
1035 | if (romchecksum(rom, length)) { | ||
1036 | request_resource(&iomem_resource, &extension_rom_resource); | ||
1037 | upper = extension_rom_resource.start; | ||
1038 | } | ||
1039 | } | ||
1040 | |||
1041 | /* check for adapter roms on 2k boundaries */ | ||
1042 | for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { | ||
1043 | rom = isa_bus_to_virt(start); | ||
1044 | if (!romsignature(rom)) | ||
1045 | continue; | ||
1046 | |||
1047 | if (probe_kernel_address(rom + 2, c) != 0) | ||
1048 | continue; | ||
1049 | |||
1050 | /* 0 < length <= 0x7f * 512, historically */ | ||
1051 | length = c * 512; | ||
1052 | |||
1053 | /* but accept any length that fits if checksum okay */ | ||
1054 | if (!length || start + length > upper || !romchecksum(rom, length)) | ||
1055 | continue; | ||
1056 | |||
1057 | adapter_rom_resources[i].start = start; | ||
1058 | adapter_rom_resources[i].end = start + length - 1; | ||
1059 | request_resource(&iomem_resource, &adapter_rom_resources[i]); | ||
1060 | |||
1061 | start = adapter_rom_resources[i++].end & ~2047UL; | ||
1062 | } | ||
962 | } | 1063 | } |
963 | 1064 | ||
964 | subsys_initcall(request_standard_resources); | ||
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 545440e471b2..9a87113ba996 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <asm/desc.h> | 56 | #include <asm/desc.h> |
57 | #include <video/edid.h> | 57 | #include <video/edid.h> |
58 | #include <asm/e820.h> | 58 | #include <asm/e820.h> |
59 | #include <asm/mpspec.h> | ||
59 | #include <asm/dma.h> | 60 | #include <asm/dma.h> |
60 | #include <asm/gart.h> | 61 | #include <asm/gart.h> |
61 | #include <asm/mpspec.h> | 62 | #include <asm/mpspec.h> |
@@ -245,7 +246,7 @@ static void __init reserve_crashkernel(void) | |||
245 | return; | 246 | return; |
246 | } | 247 | } |
247 | 248 | ||
248 | if (reserve_bootmem(crash_base, crash_size, | 249 | if (reserve_bootmem_generic(crash_base, crash_size, |
249 | BOOTMEM_EXCLUSIVE) < 0) { | 250 | BOOTMEM_EXCLUSIVE) < 0) { |
250 | printk(KERN_INFO "crashkernel reservation failed - " | 251 | printk(KERN_INFO "crashkernel reservation failed - " |
251 | "memory is in use\n"); | 252 | "memory is in use\n"); |
@@ -267,34 +268,6 @@ static inline void __init reserve_crashkernel(void) | |||
267 | {} | 268 | {} |
268 | #endif | 269 | #endif |
269 | 270 | ||
270 | /* Overridden in paravirt.c if CONFIG_PARAVIRT */ | ||
271 | void __attribute__((weak)) __init memory_setup(void) | ||
272 | { | ||
273 | machine_specific_memory_setup(); | ||
274 | } | ||
275 | |||
276 | static void __init parse_setup_data(void) | ||
277 | { | ||
278 | struct setup_data *data; | ||
279 | unsigned long pa_data; | ||
280 | |||
281 | if (boot_params.hdr.version < 0x0209) | ||
282 | return; | ||
283 | pa_data = boot_params.hdr.setup_data; | ||
284 | while (pa_data) { | ||
285 | data = early_ioremap(pa_data, PAGE_SIZE); | ||
286 | switch (data->type) { | ||
287 | default: | ||
288 | break; | ||
289 | } | ||
290 | #ifndef CONFIG_DEBUG_BOOT_PARAMS | ||
291 | free_early(pa_data, pa_data+sizeof(*data)+data->len); | ||
292 | #endif | ||
293 | pa_data = data->next; | ||
294 | early_iounmap(data, PAGE_SIZE); | ||
295 | } | ||
296 | } | ||
297 | |||
298 | /* | 271 | /* |
299 | * setup_arch - architecture-specific boot-time initializations | 272 | * setup_arch - architecture-specific boot-time initializations |
300 | * | 273 | * |
@@ -319,13 +292,15 @@ void __init setup_arch(char **cmdline_p) | |||
319 | #endif | 292 | #endif |
320 | #ifdef CONFIG_EFI | 293 | #ifdef CONFIG_EFI |
321 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, | 294 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, |
322 | "EL64", 4)) | 295 | "EL64", 4)) { |
323 | efi_enabled = 1; | 296 | efi_enabled = 1; |
297 | efi_reserve_early(); | ||
298 | } | ||
324 | #endif | 299 | #endif |
325 | 300 | ||
326 | ARCH_SETUP | 301 | ARCH_SETUP |
327 | 302 | ||
328 | memory_setup(); | 303 | setup_memory_map(); |
329 | copy_edd(); | 304 | copy_edd(); |
330 | 305 | ||
331 | if (!boot_params.hdr.root_flags) | 306 | if (!boot_params.hdr.root_flags) |
@@ -372,9 +347,13 @@ void __init setup_arch(char **cmdline_p) | |||
372 | * we are rounding upwards: | 347 | * we are rounding upwards: |
373 | */ | 348 | */ |
374 | end_pfn = e820_end_of_ram(); | 349 | end_pfn = e820_end_of_ram(); |
350 | |||
351 | /* pre allocte 4k for mptable mpc */ | ||
352 | early_reserve_e820_mpc_new(); | ||
375 | /* update e820 for memory not covered by WB MTRRs */ | 353 | /* update e820 for memory not covered by WB MTRRs */ |
376 | mtrr_bp_init(); | 354 | mtrr_bp_init(); |
377 | if (mtrr_trim_uncached_memory(end_pfn)) { | 355 | if (mtrr_trim_uncached_memory(end_pfn)) { |
356 | remove_all_active_ranges(); | ||
378 | e820_register_active_regions(0, 0, -1UL); | 357 | e820_register_active_regions(0, 0, -1UL); |
379 | end_pfn = e820_end_of_ram(); | 358 | end_pfn = e820_end_of_ram(); |
380 | } | 359 | } |
@@ -383,7 +362,7 @@ void __init setup_arch(char **cmdline_p) | |||
383 | 362 | ||
384 | check_efer(); | 363 | check_efer(); |
385 | 364 | ||
386 | max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT)); | 365 | max_pfn_mapped = init_memory_mapping(0, (end_pfn << PAGE_SHIFT)); |
387 | if (efi_enabled) | 366 | if (efi_enabled) |
388 | efi_init(); | 367 | efi_init(); |
389 | 368 | ||
@@ -444,13 +423,12 @@ void __init setup_arch(char **cmdline_p) | |||
444 | acpi_reserve_bootmem(); | 423 | acpi_reserve_bootmem(); |
445 | #endif | 424 | #endif |
446 | 425 | ||
447 | if (efi_enabled) | 426 | #ifdef CONFIG_X86_MPPARSE |
448 | efi_reserve_bootmem(); | ||
449 | |||
450 | /* | 427 | /* |
451 | * Find and reserve possible boot-time SMP configuration: | 428 | * Find and reserve possible boot-time SMP configuration: |
452 | */ | 429 | */ |
453 | find_smp_config(); | 430 | find_smp_config(); |
431 | #endif | ||
454 | #ifdef CONFIG_BLK_DEV_INITRD | 432 | #ifdef CONFIG_BLK_DEV_INITRD |
455 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | 433 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { |
456 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | 434 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; |
@@ -493,11 +471,13 @@ void __init setup_arch(char **cmdline_p) | |||
493 | 471 | ||
494 | init_cpu_to_node(); | 472 | init_cpu_to_node(); |
495 | 473 | ||
474 | #ifdef CONFIG_X86_MPPARSE | ||
496 | /* | 475 | /* |
497 | * get boot-time SMP configuration: | 476 | * get boot-time SMP configuration: |
498 | */ | 477 | */ |
499 | if (smp_found_config) | 478 | if (smp_found_config) |
500 | get_smp_config(); | 479 | get_smp_config(); |
480 | #endif | ||
501 | init_apic_mappings(); | 481 | init_apic_mappings(); |
502 | ioapic_init_mappings(); | 482 | ioapic_init_mappings(); |
503 | 483 | ||
@@ -507,7 +487,7 @@ void __init setup_arch(char **cmdline_p) | |||
507 | * We trust e820 completely. No explicit ROM probing in memory. | 487 | * We trust e820 completely. No explicit ROM probing in memory. |
508 | */ | 488 | */ |
509 | e820_reserve_resources(); | 489 | e820_reserve_resources(); |
510 | e820_mark_nosave_regions(); | 490 | e820_mark_nosave_regions(end_pfn); |
511 | 491 | ||
512 | /* request I/O space for devices used on all i[345]86 PCs */ | 492 | /* request I/O space for devices used on all i[345]86 PCs */ |
513 | for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) | 493 | for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f2b666756299..6be701f3027f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -554,23 +554,6 @@ cpumask_t cpu_coregroup_map(int cpu) | |||
554 | return c->llc_shared_map; | 554 | return c->llc_shared_map; |
555 | } | 555 | } |
556 | 556 | ||
557 | #ifdef CONFIG_X86_32 | ||
558 | /* | ||
559 | * We are called very early to get the low memory for the | ||
560 | * SMP bootup trampoline page. | ||
561 | */ | ||
562 | void __init smp_alloc_memory(void) | ||
563 | { | ||
564 | trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); | ||
565 | /* | ||
566 | * Has to be in very low memory so we can execute | ||
567 | * real-mode AP code. | ||
568 | */ | ||
569 | if (__pa(trampoline_base) >= 0x9F000) | ||
570 | BUG(); | ||
571 | } | ||
572 | #endif | ||
573 | |||
574 | static void impress_friends(void) | 557 | static void impress_friends(void) |
575 | { | 558 | { |
576 | int cpu; | 559 | int cpu; |
diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c index 70e4a374b4e8..5978023b799b 100644 --- a/arch/x86/kernel/srat_32.c +++ b/arch/x86/kernel/srat_32.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <asm/srat.h> | 31 | #include <asm/srat.h> |
32 | #include <asm/topology.h> | 32 | #include <asm/topology.h> |
33 | #include <asm/smp.h> | 33 | #include <asm/smp.h> |
34 | #include <asm/e820.h> | ||
34 | 35 | ||
35 | /* | 36 | /* |
36 | * proximity macros and definitions | 37 | * proximity macros and definitions |
@@ -41,7 +42,7 @@ | |||
41 | #define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit))) | 42 | #define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit))) |
42 | /* bitmap length; _PXM is at most 255 */ | 43 | /* bitmap length; _PXM is at most 255 */ |
43 | #define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) | 44 | #define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) |
44 | static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ | 45 | static u8 __initdata pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ |
45 | 46 | ||
46 | #define MAX_CHUNKS_PER_NODE 3 | 47 | #define MAX_CHUNKS_PER_NODE 3 |
47 | #define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES) | 48 | #define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES) |
@@ -52,16 +53,37 @@ struct node_memory_chunk_s { | |||
52 | u8 nid; // which cnode contains this chunk? | 53 | u8 nid; // which cnode contains this chunk? |
53 | u8 bank; // which mem bank on this node | 54 | u8 bank; // which mem bank on this node |
54 | }; | 55 | }; |
55 | static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS]; | 56 | static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS]; |
56 | 57 | ||
57 | static int num_memory_chunks; /* total number of memory chunks */ | 58 | static int __initdata num_memory_chunks; /* total number of memory chunks */ |
58 | static u8 __initdata apicid_to_pxm[MAX_APICID]; | 59 | static u8 __initdata apicid_to_pxm[MAX_APICID]; |
59 | 60 | ||
61 | int numa_off __initdata; | ||
62 | int acpi_numa __initdata; | ||
63 | |||
64 | static __init void bad_srat(void) | ||
65 | { | ||
66 | printk(KERN_ERR "SRAT: SRAT not used.\n"); | ||
67 | acpi_numa = -1; | ||
68 | num_memory_chunks = 0; | ||
69 | } | ||
70 | |||
71 | static __init inline int srat_disabled(void) | ||
72 | { | ||
73 | return numa_off || acpi_numa < 0; | ||
74 | } | ||
75 | |||
60 | /* Identify CPU proximity domains */ | 76 | /* Identify CPU proximity domains */ |
61 | static void __init parse_cpu_affinity_structure(char *p) | 77 | void __init |
78 | acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity) | ||
62 | { | 79 | { |
63 | struct acpi_srat_cpu_affinity *cpu_affinity = | 80 | if (srat_disabled()) |
64 | (struct acpi_srat_cpu_affinity *) p; | 81 | return; |
82 | if (cpu_affinity->header.length != | ||
83 | sizeof(struct acpi_srat_cpu_affinity)) { | ||
84 | bad_srat(); | ||
85 | return; | ||
86 | } | ||
65 | 87 | ||
66 | if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0) | 88 | if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0) |
67 | return; /* empty entry */ | 89 | return; /* empty entry */ |
@@ -79,14 +101,21 @@ static void __init parse_cpu_affinity_structure(char *p) | |||
79 | * Identify memory proximity domains and hot-remove capabilities. | 101 | * Identify memory proximity domains and hot-remove capabilities. |
80 | * Fill node memory chunk list structure. | 102 | * Fill node memory chunk list structure. |
81 | */ | 103 | */ |
82 | static void __init parse_memory_affinity_structure (char *sratp) | 104 | void __init |
105 | acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity) | ||
83 | { | 106 | { |
84 | unsigned long long paddr, size; | 107 | unsigned long long paddr, size; |
85 | unsigned long start_pfn, end_pfn; | 108 | unsigned long start_pfn, end_pfn; |
86 | u8 pxm; | 109 | u8 pxm; |
87 | struct node_memory_chunk_s *p, *q, *pend; | 110 | struct node_memory_chunk_s *p, *q, *pend; |
88 | struct acpi_srat_mem_affinity *memory_affinity = | 111 | |
89 | (struct acpi_srat_mem_affinity *) sratp; | 112 | if (srat_disabled()) |
113 | return; | ||
114 | if (memory_affinity->header.length != | ||
115 | sizeof(struct acpi_srat_mem_affinity)) { | ||
116 | bad_srat(); | ||
117 | return; | ||
118 | } | ||
90 | 119 | ||
91 | if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0) | 120 | if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0) |
92 | return; /* empty entry */ | 121 | return; /* empty entry */ |
@@ -134,6 +163,14 @@ static void __init parse_memory_affinity_structure (char *sratp) | |||
134 | "enabled and removable" : "enabled" ) ); | 163 | "enabled and removable" : "enabled" ) ); |
135 | } | 164 | } |
136 | 165 | ||
166 | /* Callback for SLIT parsing */ | ||
167 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) | ||
168 | { | ||
169 | } | ||
170 | |||
171 | void acpi_numa_arch_fixup(void) | ||
172 | { | ||
173 | } | ||
137 | /* | 174 | /* |
138 | * The SRAT table always lists ascending addresses, so can always | 175 | * The SRAT table always lists ascending addresses, so can always |
139 | * assume that the first "start" address that you see is the real | 176 | * assume that the first "start" address that you see is the real |
@@ -166,39 +203,13 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c | |||
166 | node_end_pfn[nid] = memory_chunk->end_pfn; | 203 | node_end_pfn[nid] = memory_chunk->end_pfn; |
167 | } | 204 | } |
168 | 205 | ||
169 | /* Parse the ACPI Static Resource Affinity Table */ | 206 | int __init get_memcfg_from_srat(void) |
170 | static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) | ||
171 | { | 207 | { |
172 | u8 *start, *end, *p; | ||
173 | int i, j, nid; | 208 | int i, j, nid; |
174 | 209 | ||
175 | start = (u8 *)(&(sratp->reserved) + 1); /* skip header */ | ||
176 | p = start; | ||
177 | end = (u8 *)sratp + sratp->header.length; | ||
178 | |||
179 | memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */ | ||
180 | memset(node_memory_chunk, 0, sizeof(node_memory_chunk)); | ||
181 | 210 | ||
182 | num_memory_chunks = 0; | 211 | if (srat_disabled()) |
183 | while (p < end) { | 212 | goto out_fail; |
184 | switch (*p) { | ||
185 | case ACPI_SRAT_TYPE_CPU_AFFINITY: | ||
186 | parse_cpu_affinity_structure(p); | ||
187 | break; | ||
188 | case ACPI_SRAT_TYPE_MEMORY_AFFINITY: | ||
189 | parse_memory_affinity_structure(p); | ||
190 | break; | ||
191 | default: | ||
192 | printk("ACPI 2.0 SRAT: unknown entry skipped: type=0x%02X, len=%d\n", p[0], p[1]); | ||
193 | break; | ||
194 | } | ||
195 | p += p[1]; | ||
196 | if (p[1] == 0) { | ||
197 | printk("acpi20_parse_srat: Entry length value is zero;" | ||
198 | " can't parse any further!\n"); | ||
199 | break; | ||
200 | } | ||
201 | } | ||
202 | 213 | ||
203 | if (num_memory_chunks == 0) { | 214 | if (num_memory_chunks == 0) { |
204 | printk("could not finy any ACPI SRAT memory areas.\n"); | 215 | printk("could not finy any ACPI SRAT memory areas.\n"); |
@@ -244,115 +255,19 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) | |||
244 | printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", | 255 | printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", |
245 | j, chunk->nid, chunk->start_pfn, chunk->end_pfn); | 256 | j, chunk->nid, chunk->start_pfn, chunk->end_pfn); |
246 | node_read_chunk(chunk->nid, chunk); | 257 | node_read_chunk(chunk->nid, chunk); |
247 | add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn); | 258 | e820_register_active_regions(chunk->nid, chunk->start_pfn, |
259 | min(chunk->end_pfn, max_pfn)); | ||
248 | } | 260 | } |
249 | 261 | ||
250 | for_each_online_node(nid) { | 262 | for_each_online_node(nid) { |
251 | unsigned long start = node_start_pfn[nid]; | 263 | unsigned long start = node_start_pfn[nid]; |
252 | unsigned long end = node_end_pfn[nid]; | 264 | unsigned long end = min(node_end_pfn[nid], max_pfn); |
253 | 265 | ||
254 | memory_present(nid, start, end); | 266 | memory_present(nid, start, end); |
255 | node_remap_size[nid] = node_memmap_size_bytes(nid, start, end); | 267 | node_remap_size[nid] = node_memmap_size_bytes(nid, start, end); |
256 | } | 268 | } |
257 | return 1; | 269 | return 1; |
258 | out_fail: | 270 | out_fail: |
259 | return 0; | ||
260 | } | ||
261 | |||
262 | struct acpi_static_rsdt { | ||
263 | struct acpi_table_rsdt table; | ||
264 | u32 padding[7]; /* Allow for 7 more table entries */ | ||
265 | }; | ||
266 | |||
267 | int __init get_memcfg_from_srat(void) | ||
268 | { | ||
269 | struct acpi_table_header *header = NULL; | ||
270 | struct acpi_table_rsdp *rsdp = NULL; | ||
271 | struct acpi_table_rsdt *rsdt = NULL; | ||
272 | acpi_native_uint rsdp_address = 0; | ||
273 | struct acpi_static_rsdt saved_rsdt; | ||
274 | int tables = 0; | ||
275 | int i = 0; | ||
276 | |||
277 | rsdp_address = acpi_os_get_root_pointer(); | ||
278 | if (!rsdp_address) { | ||
279 | printk("%s: System description tables not found\n", | ||
280 | __func__); | ||
281 | goto out_err; | ||
282 | } | ||
283 | |||
284 | printk("%s: assigning address to rsdp\n", __func__); | ||
285 | rsdp = (struct acpi_table_rsdp *)(u32)rsdp_address; | ||
286 | if (!rsdp) { | ||
287 | printk("%s: Didn't find ACPI root!\n", __func__); | ||
288 | goto out_err; | ||
289 | } | ||
290 | |||
291 | printk(KERN_INFO "%.8s v%d [%.6s]\n", rsdp->signature, rsdp->revision, | ||
292 | rsdp->oem_id); | ||
293 | |||
294 | if (strncmp(rsdp->signature, ACPI_SIG_RSDP,strlen(ACPI_SIG_RSDP))) { | ||
295 | printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __func__); | ||
296 | goto out_err; | ||
297 | } | ||
298 | |||
299 | rsdt = (struct acpi_table_rsdt *) | ||
300 | early_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt)); | ||
301 | |||
302 | if (!rsdt) { | ||
303 | printk(KERN_WARNING | ||
304 | "%s: ACPI: Invalid root system description tables (RSDT)\n", | ||
305 | __func__); | ||
306 | goto out_err; | ||
307 | } | ||
308 | |||
309 | header = &rsdt->header; | ||
310 | |||
311 | if (strncmp(header->signature, ACPI_SIG_RSDT, strlen(ACPI_SIG_RSDT))) { | ||
312 | printk(KERN_WARNING "ACPI: RSDT signature incorrect\n"); | ||
313 | goto out_err; | ||
314 | } | ||
315 | |||
316 | /* | ||
317 | * The number of tables is computed by taking the | ||
318 | * size of all entries (header size minus total | ||
319 | * size of RSDT) divided by the size of each entry | ||
320 | * (4-byte table pointers). | ||
321 | */ | ||
322 | tables = (header->length - sizeof(struct acpi_table_header)) / 4; | ||
323 | |||
324 | if (!tables) | ||
325 | goto out_err; | ||
326 | |||
327 | memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt)); | ||
328 | |||
329 | if (saved_rsdt.table.header.length > sizeof(saved_rsdt)) { | ||
330 | printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n", | ||
331 | saved_rsdt.table.header.length); | ||
332 | goto out_err; | ||
333 | } | ||
334 | |||
335 | printk("Begin SRAT table scan....\n"); | ||
336 | |||
337 | for (i = 0; i < tables; i++) { | ||
338 | /* Map in header, then map in full table length. */ | ||
339 | header = (struct acpi_table_header *) | ||
340 | early_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header)); | ||
341 | if (!header) | ||
342 | break; | ||
343 | header = (struct acpi_table_header *) | ||
344 | early_ioremap(saved_rsdt.table.table_offset_entry[i], header->length); | ||
345 | if (!header) | ||
346 | break; | ||
347 | |||
348 | if (strncmp((char *) &header->signature, ACPI_SIG_SRAT, 4)) | ||
349 | continue; | ||
350 | |||
351 | /* we've found the srat table. don't need to look at any more tables */ | ||
352 | return acpi20_parse_srat((struct acpi_table_srat *)header); | ||
353 | } | ||
354 | out_err: | ||
355 | remove_all_active_ranges(); | ||
356 | printk("failed to get NUMA memory information from SRAT table\n"); | 271 | printk("failed to get NUMA memory information from SRAT table\n"); |
357 | return 0; | 272 | return 0; |
358 | } | 273 | } |
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index ae751094eba9..d67ce5f044ba 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c | |||
@@ -36,7 +36,9 @@ static struct rio_table_hdr *rio_table_hdr __initdata; | |||
36 | static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; | 36 | static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; |
37 | static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; | 37 | static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; |
38 | 38 | ||
39 | #ifndef CONFIG_X86_NUMAQ | ||
39 | static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata; | 40 | static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata; |
41 | #endif | ||
40 | 42 | ||
41 | static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) | 43 | static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) |
42 | { | 44 | { |
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index abbf199adebb..1106fac6024d 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c | |||
@@ -2,7 +2,7 @@ | |||
2 | 2 | ||
3 | #include <asm/trampoline.h> | 3 | #include <asm/trampoline.h> |
4 | 4 | ||
5 | /* ready for x86_64, no harm for x86, since it will overwrite after alloc */ | 5 | /* ready for x86_64 and x86 */ |
6 | unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); | 6 | unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); |
7 | 7 | ||
8 | /* | 8 | /* |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 5c7e2fd52075..e72cf0793fbe 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -835,7 +835,7 @@ static __init char *lguest_memory_setup(void) | |||
835 | 835 | ||
836 | /* The Linux bootloader header contains an "e820" memory map: the | 836 | /* The Linux bootloader header contains an "e820" memory map: the |
837 | * Launcher populated the first entry with our memory limit. */ | 837 | * Launcher populated the first entry with our memory limit. */ |
838 | add_memory_region(boot_params.e820_map[0].addr, | 838 | e820_add_region(boot_params.e820_map[0].addr, |
839 | boot_params.e820_map[0].size, | 839 | boot_params.e820_map[0].size, |
840 | boot_params.e820_map[0].type); | 840 | boot_params.e820_map[0].type); |
841 | 841 | ||
@@ -1012,6 +1012,7 @@ __init void lguest_init(void) | |||
1012 | * clobbered. The Launcher places our initial pagetables somewhere at | 1012 | * clobbered. The Launcher places our initial pagetables somewhere at |
1013 | * the top of our physical memory, so we don't need extra space: set | 1013 | * the top of our physical memory, so we don't need extra space: set |
1014 | * init_pg_tables_end to the end of the kernel. */ | 1014 | * init_pg_tables_end to the end of the kernel. */ |
1015 | init_pg_tables_start = __pa(pg0); | ||
1015 | init_pg_tables_end = __pa(pg0); | 1016 | init_pg_tables_end = __pa(pg0); |
1016 | 1017 | ||
1017 | /* Load the %fs segment register (the per-cpu segment register) with | 1018 | /* Load the %fs segment register (the per-cpu segment register) with |
@@ -1065,9 +1066,9 @@ __init void lguest_init(void) | |||
1065 | pm_power_off = lguest_power_off; | 1066 | pm_power_off = lguest_power_off; |
1066 | machine_ops.restart = lguest_restart; | 1067 | machine_ops.restart = lguest_restart; |
1067 | 1068 | ||
1068 | /* Now we're set up, call start_kernel() in init/main.c and we proceed | 1069 | /* Now we're set up, call i386_start_kernel() in head32.c and we proceed |
1069 | * to boot as normal. It never returns. */ | 1070 | * to boot as normal. It never returns. */ |
1070 | start_kernel(); | 1071 | i386_start_kernel(); |
1071 | } | 1072 | } |
1072 | /* | 1073 | /* |
1073 | * This marks the end of stage II of our journey, The Guest. | 1074 | * This marks the end of stage II of our journey, The Guest. |
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 0c28a071824c..2f5e277686b8 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c | |||
@@ -142,45 +142,3 @@ static int __init print_ipi_mode(void) | |||
142 | 142 | ||
143 | late_initcall(print_ipi_mode); | 143 | late_initcall(print_ipi_mode); |
144 | 144 | ||
145 | /** | ||
146 | * machine_specific_memory_setup - Hook for machine specific memory setup. | ||
147 | * | ||
148 | * Description: | ||
149 | * This is included late in kernel/setup.c so that it can make | ||
150 | * use of all of the static functions. | ||
151 | **/ | ||
152 | |||
153 | char * __init machine_specific_memory_setup(void) | ||
154 | { | ||
155 | char *who; | ||
156 | |||
157 | |||
158 | who = "BIOS-e820"; | ||
159 | |||
160 | /* | ||
161 | * Try to copy the BIOS-supplied E820-map. | ||
162 | * | ||
163 | * Otherwise fake a memory map; one section from 0k->640k, | ||
164 | * the next section from 1mb->appropriate_mem_k | ||
165 | */ | ||
166 | sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); | ||
167 | if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) | ||
168 | < 0) { | ||
169 | unsigned long mem_size; | ||
170 | |||
171 | /* compare results from other methods and take the greater */ | ||
172 | if (boot_params.alt_mem_k | ||
173 | < boot_params.screen_info.ext_mem_k) { | ||
174 | mem_size = boot_params.screen_info.ext_mem_k; | ||
175 | who = "BIOS-88"; | ||
176 | } else { | ||
177 | mem_size = boot_params.alt_mem_k; | ||
178 | who = "BIOS-e801"; | ||
179 | } | ||
180 | |||
181 | e820.nr_map = 0; | ||
182 | add_memory_region(0, LOWMEMSIZE(), E820_RAM); | ||
183 | add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM); | ||
184 | } | ||
185 | return who; | ||
186 | } | ||
diff --git a/arch/x86/mach-es7000/Makefile b/arch/x86/mach-es7000/Makefile index 69dd4da218dc..3ef8b43b62fc 100644 --- a/arch/x86/mach-es7000/Makefile +++ b/arch/x86/mach-es7000/Makefile | |||
@@ -3,4 +3,3 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_X86_ES7000) := es7000plat.o | 5 | obj-$(CONFIG_X86_ES7000) := es7000plat.o |
6 | obj-$(CONFIG_X86_GENERICARCH) := es7000plat.o | ||
diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/mach-es7000/es7000plat.c index f5d6f7d8b86e..4354ce804889 100644 --- a/arch/x86/mach-es7000/es7000plat.c +++ b/arch/x86/mach-es7000/es7000plat.c | |||
@@ -52,6 +52,8 @@ static struct mip_reg *host_reg; | |||
52 | static int mip_port; | 52 | static int mip_port; |
53 | static unsigned long mip_addr, host_addr; | 53 | static unsigned long mip_addr, host_addr; |
54 | 54 | ||
55 | int es7000_plat; | ||
56 | |||
55 | /* | 57 | /* |
56 | * GSI override for ES7000 platforms. | 58 | * GSI override for ES7000 platforms. |
57 | */ | 59 | */ |
@@ -175,53 +177,6 @@ find_unisys_acpi_oem_table(unsigned long *oem_addr) | |||
175 | } | 177 | } |
176 | #endif | 178 | #endif |
177 | 179 | ||
178 | /* | ||
179 | * This file also gets compiled if CONFIG_X86_GENERICARCH is set. Generic | ||
180 | * arch already has got following function definitions (asm-generic/es7000.c) | ||
181 | * hence no need to define these for that case. | ||
182 | */ | ||
183 | #ifndef CONFIG_X86_GENERICARCH | ||
184 | void es7000_sw_apic(void); | ||
185 | void __init enable_apic_mode(void) | ||
186 | { | ||
187 | es7000_sw_apic(); | ||
188 | return; | ||
189 | } | ||
190 | |||
191 | __init int mps_oem_check(struct mp_config_table *mpc, char *oem, | ||
192 | char *productid) | ||
193 | { | ||
194 | if (mpc->mpc_oemptr) { | ||
195 | struct mp_config_oemtable *oem_table = | ||
196 | (struct mp_config_oemtable *)mpc->mpc_oemptr; | ||
197 | if (!strncmp(oem, "UNISYS", 6)) | ||
198 | return parse_unisys_oem((char *)oem_table); | ||
199 | } | ||
200 | return 0; | ||
201 | } | ||
202 | #ifdef CONFIG_ACPI | ||
203 | /* Hook from generic ACPI tables.c */ | ||
204 | int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
205 | { | ||
206 | unsigned long oem_addr; | ||
207 | if (!find_unisys_acpi_oem_table(&oem_addr)) { | ||
208 | if (es7000_check_dsdt()) | ||
209 | return parse_unisys_oem((char *)oem_addr); | ||
210 | else { | ||
211 | setup_unisys(); | ||
212 | return 1; | ||
213 | } | ||
214 | } | ||
215 | return 0; | ||
216 | } | ||
217 | #else | ||
218 | int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
219 | { | ||
220 | return 0; | ||
221 | } | ||
222 | #endif | ||
223 | #endif /* COFIG_X86_GENERICARCH */ | ||
224 | |||
225 | static void | 180 | static void |
226 | es7000_spin(int n) | 181 | es7000_spin(int n) |
227 | { | 182 | { |
diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile index 19d6d407737b..0dbd7803a1d5 100644 --- a/arch/x86/mach-generic/Makefile +++ b/arch/x86/mach-generic/Makefile | |||
@@ -2,7 +2,11 @@ | |||
2 | # Makefile for the generic architecture | 2 | # Makefile for the generic architecture |
3 | # | 3 | # |
4 | 4 | ||
5 | EXTRA_CFLAGS := -Iarch/x86/kernel | 5 | EXTRA_CFLAGS := -Iarch/x86/kernel |
6 | 6 | ||
7 | obj-y := probe.o summit.o bigsmp.o es7000.o default.o | 7 | obj-y := probe.o default.o |
8 | obj-y += ../../x86/mach-es7000/ | 8 | obj-$(CONFIG_X86_NUMAQ) += numaq.o |
9 | obj-$(CONFIG_X86_SUMMIT) += summit.o | ||
10 | obj-$(CONFIG_X86_BIGSMP) += bigsmp.o | ||
11 | obj-$(CONFIG_X86_ES7000) += es7000.o | ||
12 | obj-$(CONFIG_X86_ES7000) += ../../x86/mach-es7000/ | ||
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 95fc463056d0..59d771714559 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c | |||
@@ -23,10 +23,8 @@ static int dmi_bigsmp; /* can be set by dmi scanners */ | |||
23 | 23 | ||
24 | static int hp_ht_bigsmp(const struct dmi_system_id *d) | 24 | static int hp_ht_bigsmp(const struct dmi_system_id *d) |
25 | { | 25 | { |
26 | #ifdef CONFIG_X86_GENERICARCH | ||
27 | printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); | 26 | printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); |
28 | dmi_bigsmp = 1; | 27 | dmi_bigsmp = 1; |
29 | #endif | ||
30 | return 0; | 28 | return 0; |
31 | } | 29 | } |
32 | 30 | ||
@@ -48,7 +46,7 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { | |||
48 | static int probe_bigsmp(void) | 46 | static int probe_bigsmp(void) |
49 | { | 47 | { |
50 | if (def_to_bigsmp) | 48 | if (def_to_bigsmp) |
51 | dmi_bigsmp = 1; | 49 | dmi_bigsmp = 1; |
52 | else | 50 | else |
53 | dmi_check_system(bigsmp_dmi_table); | 51 | dmi_check_system(bigsmp_dmi_table); |
54 | return dmi_bigsmp; | 52 | return dmi_bigsmp; |
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c new file mode 100644 index 000000000000..8091e68764c4 --- /dev/null +++ b/arch/x86/mach-generic/numaq.c | |||
@@ -0,0 +1,41 @@ | |||
1 | /* | ||
2 | * APIC driver for the IBM NUMAQ chipset. | ||
3 | */ | ||
4 | #define APIC_DEFINITION 1 | ||
5 | #include <linux/threads.h> | ||
6 | #include <linux/cpumask.h> | ||
7 | #include <linux/smp.h> | ||
8 | #include <asm/mpspec.h> | ||
9 | #include <asm/genapic.h> | ||
10 | #include <asm/fixmap.h> | ||
11 | #include <asm/apicdef.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <asm/mach-numaq/mach_apic.h> | ||
16 | #include <asm/mach-numaq/mach_apicdef.h> | ||
17 | #include <asm/mach-numaq/mach_ipi.h> | ||
18 | #include <asm/mach-numaq/mach_mpparse.h> | ||
19 | #include <asm/mach-numaq/mach_wakecpu.h> | ||
20 | #include <asm/numaq.h> | ||
21 | |||
22 | static int mps_oem_check(struct mp_config_table *mpc, char *oem, | ||
23 | char *productid) | ||
24 | { | ||
25 | numaq_mps_oem_check(mpc, oem, productid); | ||
26 | return found_numaq; | ||
27 | } | ||
28 | |||
29 | static int probe_numaq(void) | ||
30 | { | ||
31 | /* already know from get_memcfg_numaq() */ | ||
32 | return found_numaq; | ||
33 | } | ||
34 | |||
35 | /* Hook from generic ACPI tables.c */ | ||
36 | static int acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
37 | { | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); | ||
diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index c5ae751b994a..5a7e4619e1c4 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <asm/apicdef.h> | 16 | #include <asm/apicdef.h> |
17 | #include <asm/genapic.h> | 17 | #include <asm/genapic.h> |
18 | 18 | ||
19 | extern struct genapic apic_numaq; | ||
19 | extern struct genapic apic_summit; | 20 | extern struct genapic apic_summit; |
20 | extern struct genapic apic_bigsmp; | 21 | extern struct genapic apic_bigsmp; |
21 | extern struct genapic apic_es7000; | 22 | extern struct genapic apic_es7000; |
@@ -24,9 +25,18 @@ extern struct genapic apic_default; | |||
24 | struct genapic *genapic = &apic_default; | 25 | struct genapic *genapic = &apic_default; |
25 | 26 | ||
26 | static struct genapic *apic_probe[] __initdata = { | 27 | static struct genapic *apic_probe[] __initdata = { |
28 | #ifdef CONFIG_X86_NUMAQ | ||
29 | &apic_numaq, | ||
30 | #endif | ||
31 | #ifdef CONFIG_X86_SUMMIT | ||
27 | &apic_summit, | 32 | &apic_summit, |
33 | #endif | ||
34 | #ifdef CONFIG_X86_BIGSMP | ||
28 | &apic_bigsmp, | 35 | &apic_bigsmp, |
36 | #endif | ||
37 | #ifdef CONFIG_X86_ES7000 | ||
29 | &apic_es7000, | 38 | &apic_es7000, |
39 | #endif | ||
30 | &apic_default, /* must be last */ | 40 | &apic_default, /* must be last */ |
31 | NULL, | 41 | NULL, |
32 | }; | 42 | }; |
@@ -54,6 +64,7 @@ early_param("apic", parse_apic); | |||
54 | 64 | ||
55 | void __init generic_bigsmp_probe(void) | 65 | void __init generic_bigsmp_probe(void) |
56 | { | 66 | { |
67 | #ifdef CONFIG_X86_BIGSMP | ||
57 | /* | 68 | /* |
58 | * This routine is used to switch to bigsmp mode when | 69 | * This routine is used to switch to bigsmp mode when |
59 | * - There is no apic= option specified by the user | 70 | * - There is no apic= option specified by the user |
@@ -67,6 +78,7 @@ void __init generic_bigsmp_probe(void) | |||
67 | printk(KERN_INFO "Overriding APIC driver with %s\n", | 78 | printk(KERN_INFO "Overriding APIC driver with %s\n", |
68 | genapic->name); | 79 | genapic->name); |
69 | } | 80 | } |
81 | #endif | ||
70 | } | 82 | } |
71 | 83 | ||
72 | void __init generic_apic_probe(void) | 84 | void __init generic_apic_probe(void) |
@@ -88,7 +100,8 @@ void __init generic_apic_probe(void) | |||
88 | 100 | ||
89 | /* These functions can switch the APIC even after the initial ->probe() */ | 101 | /* These functions can switch the APIC even after the initial ->probe() */ |
90 | 102 | ||
91 | int __init mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid) | 103 | int __init mps_oem_check(struct mp_config_table *mpc, char *oem, |
104 | char *productid) | ||
92 | { | 105 | { |
93 | int i; | 106 | int i; |
94 | for (i = 0; apic_probe[i]; ++i) { | 107 | for (i = 0; apic_probe[i]; ++i) { |
diff --git a/arch/x86/mach-visws/mpparse.c b/arch/x86/mach-visws/mpparse.c index 57484e91ab90..a2fb78c0d154 100644 --- a/arch/x86/mach-visws/mpparse.c +++ b/arch/x86/mach-visws/mpparse.c | |||
@@ -8,11 +8,6 @@ | |||
8 | #include "cobalt.h" | 8 | #include "cobalt.h" |
9 | #include "mach_apic.h" | 9 | #include "mach_apic.h" |
10 | 10 | ||
11 | /* Have we found an MP table */ | ||
12 | int smp_found_config; | ||
13 | |||
14 | int pic_mode; | ||
15 | |||
16 | extern unsigned int __cpuinitdata maxcpus; | 11 | extern unsigned int __cpuinitdata maxcpus; |
17 | 12 | ||
18 | /* | 13 | /* |
@@ -76,7 +71,9 @@ void __init find_smp_config(void) | |||
76 | if (ncpus > maxcpus) | 71 | if (ncpus > maxcpus) |
77 | ncpus = maxcpus; | 72 | ncpus = maxcpus; |
78 | 73 | ||
74 | #ifdef CONFIG_X86_LOCAL_APIC | ||
79 | smp_found_config = 1; | 75 | smp_found_config = 1; |
76 | #endif | ||
80 | while (ncpus--) | 77 | while (ncpus--) |
81 | MP_processor_info(mp++); | 78 | MP_processor_info(mp++); |
82 | 79 | ||
diff --git a/arch/x86/mach-visws/setup.c b/arch/x86/mach-visws/setup.c index de4c9dbd086f..d67868ec9b7f 100644 --- a/arch/x86/mach-visws/setup.c +++ b/arch/x86/mach-visws/setup.c | |||
@@ -175,9 +175,9 @@ char * __init machine_specific_memory_setup(void) | |||
175 | sgivwfb_mem_size &= ~((1 << 20) - 1); | 175 | sgivwfb_mem_size &= ~((1 << 20) - 1); |
176 | sgivwfb_mem_phys = mem_size - gfx_mem_size; | 176 | sgivwfb_mem_phys = mem_size - gfx_mem_size; |
177 | 177 | ||
178 | add_memory_region(0, LOWMEMSIZE(), E820_RAM); | 178 | e820_add_region(0, LOWMEMSIZE(), E820_RAM); |
179 | add_memory_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM); | 179 | e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM); |
180 | add_memory_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED); | 180 | e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED); |
181 | 181 | ||
182 | return "PROM"; | 182 | return "PROM"; |
183 | } | 183 | } |
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index 5ae5466b9eb9..6bbdd633864c 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c | |||
@@ -62,6 +62,7 @@ void __init time_init_hook(void) | |||
62 | char *__init machine_specific_memory_setup(void) | 62 | char *__init machine_specific_memory_setup(void) |
63 | { | 63 | { |
64 | char *who; | 64 | char *who; |
65 | int new_nr; | ||
65 | 66 | ||
66 | who = "NOT VOYAGER"; | 67 | who = "NOT VOYAGER"; |
67 | 68 | ||
@@ -73,7 +74,7 @@ char *__init machine_specific_memory_setup(void) | |||
73 | 74 | ||
74 | e820.nr_map = 0; | 75 | e820.nr_map = 0; |
75 | for (i = 0; voyager_memory_detect(i, &addr, &length); i++) { | 76 | for (i = 0; voyager_memory_detect(i, &addr, &length); i++) { |
76 | add_memory_region(addr, length, E820_RAM); | 77 | e820_add_region(addr, length, E820_RAM); |
77 | } | 78 | } |
78 | return who; | 79 | return who; |
79 | } else if (voyager_level == 4) { | 80 | } else if (voyager_level == 4) { |
@@ -91,43 +92,17 @@ char *__init machine_specific_memory_setup(void) | |||
91 | tom = (boot_params.screen_info.ext_mem_k) << 10; | 92 | tom = (boot_params.screen_info.ext_mem_k) << 10; |
92 | } | 93 | } |
93 | who = "Voyager-TOM"; | 94 | who = "Voyager-TOM"; |
94 | add_memory_region(0, 0x9f000, E820_RAM); | 95 | e820_add_region(0, 0x9f000, E820_RAM); |
95 | /* map from 1M to top of memory */ | 96 | /* map from 1M to top of memory */ |
96 | add_memory_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024, | 97 | e820_add_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024, |
97 | E820_RAM); | 98 | E820_RAM); |
98 | /* FIXME: Should check the ASICs to see if I need to | 99 | /* FIXME: Should check the ASICs to see if I need to |
99 | * take out the 8M window. Just do it at the moment | 100 | * take out the 8M window. Just do it at the moment |
100 | * */ | 101 | * */ |
101 | add_memory_region(8 * 1024 * 1024, 8 * 1024 * 1024, | 102 | e820_add_region(8 * 1024 * 1024, 8 * 1024 * 1024, |
102 | E820_RESERVED); | 103 | E820_RESERVED); |
103 | return who; | 104 | return who; |
104 | } | 105 | } |
105 | 106 | ||
106 | who = "BIOS-e820"; | 107 | return default_machine_specific_memory_setup(); |
107 | |||
108 | /* | ||
109 | * Try to copy the BIOS-supplied E820-map. | ||
110 | * | ||
111 | * Otherwise fake a memory map; one section from 0k->640k, | ||
112 | * the next section from 1mb->appropriate_mem_k | ||
113 | */ | ||
114 | sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); | ||
115 | if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) | ||
116 | < 0) { | ||
117 | unsigned long mem_size; | ||
118 | |||
119 | /* compare results from other methods and take the greater */ | ||
120 | if (boot_params.alt_mem_k < boot_params.screen_info.ext_mem_k) { | ||
121 | mem_size = boot_params.screen_info.ext_mem_k; | ||
122 | who = "BIOS-88"; | ||
123 | } else { | ||
124 | mem_size = boot_params.alt_mem_k; | ||
125 | who = "BIOS-e801"; | ||
126 | } | ||
127 | |||
128 | e820.nr_map = 0; | ||
129 | add_memory_region(0, LOWMEMSIZE(), E820_RAM); | ||
130 | add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM); | ||
131 | } | ||
132 | return who; | ||
133 | } | 108 | } |
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 8acbf0cdf1a5..8dedd01e909f 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c | |||
@@ -59,11 +59,6 @@ __u32 voyager_quad_processors = 0; | |||
59 | * activity count. Finally exported by i386_ksyms.c */ | 59 | * activity count. Finally exported by i386_ksyms.c */ |
60 | static int voyager_extended_cpus = 1; | 60 | static int voyager_extended_cpus = 1; |
61 | 61 | ||
62 | /* Have we found an SMP box - used by time.c to do the profiling | ||
63 | interrupt for timeslicing; do not set to 1 until the per CPU timer | ||
64 | interrupt is active */ | ||
65 | int smp_found_config = 0; | ||
66 | |||
67 | /* Used for the invalidate map that's also checked in the spinlock */ | 62 | /* Used for the invalidate map that's also checked in the spinlock */ |
68 | static volatile unsigned long smp_invalidate_needed; | 63 | static volatile unsigned long smp_invalidate_needed; |
69 | 64 | ||
@@ -1137,15 +1132,6 @@ void flush_tlb_all(void) | |||
1137 | on_each_cpu(do_flush_tlb_all, 0, 1, 1); | 1132 | on_each_cpu(do_flush_tlb_all, 0, 1, 1); |
1138 | } | 1133 | } |
1139 | 1134 | ||
1140 | /* used to set up the trampoline for other CPUs when the memory manager | ||
1141 | * is sorted out */ | ||
1142 | void __init smp_alloc_memory(void) | ||
1143 | { | ||
1144 | trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); | ||
1145 | if (__pa(trampoline_base) >= 0x93000) | ||
1146 | BUG(); | ||
1147 | } | ||
1148 | |||
1149 | /* send a reschedule CPI to one CPU by physical CPU number*/ | 1135 | /* send a reschedule CPI to one CPU by physical CPU number*/ |
1150 | static void voyager_smp_send_reschedule(int cpu) | 1136 | static void voyager_smp_send_reschedule(int cpu) |
1151 | { | 1137 | { |
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 8b4eac0ca07d..a2f73ba42b8b 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <asm/setup.h> | 38 | #include <asm/setup.h> |
39 | #include <asm/mmzone.h> | 39 | #include <asm/mmzone.h> |
40 | #include <asm/bios_ebda.h> | 40 | #include <asm/bios_ebda.h> |
41 | #include <asm/proto.h> | ||
41 | 42 | ||
42 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 43 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
43 | EXPORT_SYMBOL(node_data); | 44 | EXPORT_SYMBOL(node_data); |
@@ -59,14 +60,14 @@ unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; | |||
59 | /* | 60 | /* |
60 | * 4) physnode_map - the mapping between a pfn and owning node | 61 | * 4) physnode_map - the mapping between a pfn and owning node |
61 | * physnode_map keeps track of the physical memory layout of a generic | 62 | * physnode_map keeps track of the physical memory layout of a generic |
62 | * numa node on a 256Mb break (each element of the array will | 63 | * numa node on a 64Mb break (each element of the array will |
63 | * represent 256Mb of memory and will be marked by the node id. so, | 64 | * represent 64Mb of memory and will be marked by the node id. so, |
64 | * if the first gig is on node 0, and the second gig is on node 1 | 65 | * if the first gig is on node 0, and the second gig is on node 1 |
65 | * physnode_map will contain: | 66 | * physnode_map will contain: |
66 | * | 67 | * |
67 | * physnode_map[0-3] = 0; | 68 | * physnode_map[0-15] = 0; |
68 | * physnode_map[4-7] = 1; | 69 | * physnode_map[16-31] = 1; |
69 | * physnode_map[8- ] = -1; | 70 | * physnode_map[32- ] = -1; |
70 | */ | 71 | */ |
71 | s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; | 72 | s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; |
72 | EXPORT_SYMBOL(physnode_map); | 73 | EXPORT_SYMBOL(physnode_map); |
@@ -81,9 +82,9 @@ void memory_present(int nid, unsigned long start, unsigned long end) | |||
81 | printk(KERN_DEBUG " "); | 82 | printk(KERN_DEBUG " "); |
82 | for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { | 83 | for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { |
83 | physnode_map[pfn / PAGES_PER_ELEMENT] = nid; | 84 | physnode_map[pfn / PAGES_PER_ELEMENT] = nid; |
84 | printk("%ld ", pfn); | 85 | printk(KERN_CONT "%ld ", pfn); |
85 | } | 86 | } |
86 | printk("\n"); | 87 | printk(KERN_CONT "\n"); |
87 | } | 88 | } |
88 | 89 | ||
89 | unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, | 90 | unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, |
@@ -99,7 +100,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, | |||
99 | #endif | 100 | #endif |
100 | 101 | ||
101 | extern unsigned long find_max_low_pfn(void); | 102 | extern unsigned long find_max_low_pfn(void); |
102 | extern void add_one_highpage_init(struct page *, int, int); | ||
103 | extern unsigned long highend_pfn, highstart_pfn; | 103 | extern unsigned long highend_pfn, highstart_pfn; |
104 | 104 | ||
105 | #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) | 105 | #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) |
@@ -119,11 +119,11 @@ int __init get_memcfg_numa_flat(void) | |||
119 | { | 119 | { |
120 | printk("NUMA - single node, flat memory mode\n"); | 120 | printk("NUMA - single node, flat memory mode\n"); |
121 | 121 | ||
122 | /* Run the memory configuration and find the top of memory. */ | ||
123 | propagate_e820_map(); | ||
124 | node_start_pfn[0] = 0; | 122 | node_start_pfn[0] = 0; |
125 | node_end_pfn[0] = max_pfn; | 123 | node_end_pfn[0] = max_pfn; |
124 | e820_register_active_regions(0, 0, max_pfn); | ||
126 | memory_present(0, 0, max_pfn); | 125 | memory_present(0, 0, max_pfn); |
126 | node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn); | ||
127 | 127 | ||
128 | /* Indicate there is one node available. */ | 128 | /* Indicate there is one node available. */ |
129 | nodes_clear(node_online_map); | 129 | nodes_clear(node_online_map); |
@@ -159,9 +159,17 @@ static void __init allocate_pgdat(int nid) | |||
159 | if (nid && node_has_online_mem(nid) && node_remap_start_vaddr[nid]) | 159 | if (nid && node_has_online_mem(nid) && node_remap_start_vaddr[nid]) |
160 | NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; | 160 | NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; |
161 | else { | 161 | else { |
162 | NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn)); | 162 | unsigned long pgdat_phys; |
163 | min_low_pfn += PFN_UP(sizeof(pg_data_t)); | 163 | pgdat_phys = find_e820_area(min_low_pfn<<PAGE_SHIFT, |
164 | (nid ? max_low_pfn:max_pfn_mapped)<<PAGE_SHIFT, | ||
165 | sizeof(pg_data_t), | ||
166 | PAGE_SIZE); | ||
167 | NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT)); | ||
168 | reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t), | ||
169 | "NODE_DATA"); | ||
164 | } | 170 | } |
171 | printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", | ||
172 | nid, (unsigned long)NODE_DATA(nid)); | ||
165 | } | 173 | } |
166 | 174 | ||
167 | /* | 175 | /* |
@@ -199,8 +207,12 @@ void __init remap_numa_kva(void) | |||
199 | int node; | 207 | int node; |
200 | 208 | ||
201 | for_each_online_node(node) { | 209 | for_each_online_node(node) { |
210 | printk(KERN_DEBUG "remap_numa_kva: node %d\n", node); | ||
202 | for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { | 211 | for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { |
203 | vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); | 212 | vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); |
213 | printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n", | ||
214 | (unsigned long)vaddr, | ||
215 | node_remap_start_pfn[node] + pfn); | ||
204 | set_pmd_pfn((ulong) vaddr, | 216 | set_pmd_pfn((ulong) vaddr, |
205 | node_remap_start_pfn[node] + pfn, | 217 | node_remap_start_pfn[node] + pfn, |
206 | PAGE_KERNEL_LARGE); | 218 | PAGE_KERNEL_LARGE); |
@@ -212,17 +224,21 @@ static unsigned long calculate_numa_remap_pages(void) | |||
212 | { | 224 | { |
213 | int nid; | 225 | int nid; |
214 | unsigned long size, reserve_pages = 0; | 226 | unsigned long size, reserve_pages = 0; |
215 | unsigned long pfn; | ||
216 | 227 | ||
217 | for_each_online_node(nid) { | 228 | for_each_online_node(nid) { |
218 | unsigned old_end_pfn = node_end_pfn[nid]; | 229 | u64 node_kva_target; |
230 | u64 node_kva_final; | ||
219 | 231 | ||
220 | /* | 232 | /* |
221 | * The acpi/srat node info can show hot-add memroy zones | 233 | * The acpi/srat node info can show hot-add memroy zones |
222 | * where memory could be added but not currently present. | 234 | * where memory could be added but not currently present. |
223 | */ | 235 | */ |
236 | printk("node %d pfn: [%lx - %lx]\n", | ||
237 | nid, node_start_pfn[nid], node_end_pfn[nid]); | ||
224 | if (node_start_pfn[nid] > max_pfn) | 238 | if (node_start_pfn[nid] > max_pfn) |
225 | continue; | 239 | continue; |
240 | if (!node_end_pfn[nid]) | ||
241 | continue; | ||
226 | if (node_end_pfn[nid] > max_pfn) | 242 | if (node_end_pfn[nid] > max_pfn) |
227 | node_end_pfn[nid] = max_pfn; | 243 | node_end_pfn[nid] = max_pfn; |
228 | 244 | ||
@@ -234,39 +250,45 @@ static unsigned long calculate_numa_remap_pages(void) | |||
234 | /* now the roundup is correct, convert to PAGE_SIZE pages */ | 250 | /* now the roundup is correct, convert to PAGE_SIZE pages */ |
235 | size = size * PTRS_PER_PTE; | 251 | size = size * PTRS_PER_PTE; |
236 | 252 | ||
237 | /* | 253 | node_kva_target = round_down(node_end_pfn[nid] - size, |
238 | * Validate the region we are allocating only contains valid | 254 | PTRS_PER_PTE); |
239 | * pages. | 255 | node_kva_target <<= PAGE_SHIFT; |
240 | */ | 256 | do { |
241 | for (pfn = node_end_pfn[nid] - size; | 257 | node_kva_final = find_e820_area(node_kva_target, |
242 | pfn < node_end_pfn[nid]; pfn++) | 258 | ((u64)node_end_pfn[nid])<<PAGE_SHIFT, |
243 | if (!page_is_ram(pfn)) | 259 | ((u64)size)<<PAGE_SHIFT, |
244 | break; | 260 | LARGE_PAGE_BYTES); |
261 | node_kva_target -= LARGE_PAGE_BYTES; | ||
262 | } while (node_kva_final == -1ULL && | ||
263 | (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid])); | ||
264 | |||
265 | if (node_kva_final == -1ULL) | ||
266 | panic("Can not get kva ram\n"); | ||
245 | 267 | ||
246 | if (pfn != node_end_pfn[nid]) | ||
247 | size = 0; | ||
248 | |||
249 | printk("Reserving %ld pages of KVA for lmem_map of node %d\n", | ||
250 | size, nid); | ||
251 | node_remap_size[nid] = size; | 268 | node_remap_size[nid] = size; |
252 | node_remap_offset[nid] = reserve_pages; | 269 | node_remap_offset[nid] = reserve_pages; |
253 | reserve_pages += size; | 270 | reserve_pages += size; |
254 | printk("Shrinking node %d from %ld pages to %ld pages\n", | 271 | printk("Reserving %ld pages of KVA for lmem_map of node %d at %llx\n", |
255 | nid, node_end_pfn[nid], node_end_pfn[nid] - size); | 272 | size, nid, node_kva_final>>PAGE_SHIFT); |
256 | 273 | ||
257 | if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) { | 274 | /* |
258 | /* | 275 | * prevent kva address below max_low_pfn want it on system |
259 | * Align node_end_pfn[] and node_remap_start_pfn[] to | 276 | * with less memory later. |
260 | * pmd boundary. remap_numa_kva will barf otherwise. | 277 | * layout will be: KVA address , KVA RAM |
261 | */ | 278 | * |
262 | printk("Shrinking node %d further by %ld pages for proper alignment\n", | 279 | * we are supposed to only record the one less then max_low_pfn |
263 | nid, node_end_pfn[nid] & (PTRS_PER_PTE-1)); | 280 | * but we could have some hole in high memory, and it will only |
264 | size += node_end_pfn[nid] & (PTRS_PER_PTE-1); | 281 | * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide |
265 | } | 282 | * to use it as free. |
283 | * So reserve_early here, hope we don't run out of that array | ||
284 | */ | ||
285 | reserve_early(node_kva_final, | ||
286 | node_kva_final+(((u64)size)<<PAGE_SHIFT), | ||
287 | "KVA RAM"); | ||
266 | 288 | ||
267 | node_end_pfn[nid] -= size; | 289 | node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT; |
268 | node_remap_start_pfn[nid] = node_end_pfn[nid]; | 290 | remove_active_range(nid, node_remap_start_pfn[nid], |
269 | shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]); | 291 | node_remap_start_pfn[nid] + size); |
270 | } | 292 | } |
271 | printk("Reserving total of %ld pages for numa KVA remap\n", | 293 | printk("Reserving total of %ld pages for numa KVA remap\n", |
272 | reserve_pages); | 294 | reserve_pages); |
@@ -284,8 +306,7 @@ static void init_remap_allocator(int nid) | |||
284 | 306 | ||
285 | printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, | 307 | printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, |
286 | (ulong) node_remap_start_vaddr[nid], | 308 | (ulong) node_remap_start_vaddr[nid], |
287 | (ulong) pfn_to_kaddr(highstart_pfn | 309 | (ulong) node_remap_end_vaddr[nid]); |
288 | + node_remap_offset[nid] + node_remap_size[nid])); | ||
289 | } | 310 | } |
290 | 311 | ||
291 | extern void setup_bootmem_allocator(void); | 312 | extern void setup_bootmem_allocator(void); |
@@ -293,7 +314,7 @@ unsigned long __init setup_memory(void) | |||
293 | { | 314 | { |
294 | int nid; | 315 | int nid; |
295 | unsigned long system_start_pfn, system_max_low_pfn; | 316 | unsigned long system_start_pfn, system_max_low_pfn; |
296 | unsigned long wasted_pages; | 317 | long kva_target_pfn; |
297 | 318 | ||
298 | /* | 319 | /* |
299 | * When mapping a NUMA machine we allocate the node_mem_map arrays | 320 | * When mapping a NUMA machine we allocate the node_mem_map arrays |
@@ -302,34 +323,38 @@ unsigned long __init setup_memory(void) | |||
302 | * this space and use it to adjust the boundary between ZONE_NORMAL | 323 | * this space and use it to adjust the boundary between ZONE_NORMAL |
303 | * and ZONE_HIGHMEM. | 324 | * and ZONE_HIGHMEM. |
304 | */ | 325 | */ |
326 | |||
327 | /* call find_max_low_pfn at first, it could update max_pfn */ | ||
328 | system_max_low_pfn = max_low_pfn = find_max_low_pfn(); | ||
329 | |||
330 | remove_all_active_ranges(); | ||
305 | get_memcfg_numa(); | 331 | get_memcfg_numa(); |
306 | 332 | ||
307 | kva_pages = calculate_numa_remap_pages(); | 333 | kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE); |
308 | 334 | ||
309 | /* partially used pages are not usable - thus round upwards */ | 335 | /* partially used pages are not usable - thus round upwards */ |
310 | system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); | 336 | system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); |
311 | 337 | ||
312 | kva_start_pfn = find_max_low_pfn() - kva_pages; | 338 | kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); |
313 | 339 | do { | |
314 | #ifdef CONFIG_BLK_DEV_INITRD | 340 | kva_start_pfn = find_e820_area(kva_target_pfn<<PAGE_SHIFT, |
315 | /* Numa kva area is below the initrd */ | 341 | max_low_pfn<<PAGE_SHIFT, |
316 | if (initrd_start) | 342 | kva_pages<<PAGE_SHIFT, |
317 | kva_start_pfn = PFN_DOWN(initrd_start - PAGE_OFFSET) | 343 | PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT; |
318 | - kva_pages; | 344 | kva_target_pfn -= PTRS_PER_PTE; |
319 | #endif | 345 | } while (kva_start_pfn == -1UL && kva_target_pfn > min_low_pfn); |
320 | 346 | ||
321 | /* | 347 | if (kva_start_pfn == -1UL) |
322 | * We waste pages past at the end of the KVA for no good reason other | 348 | panic("Can not get kva space\n"); |
323 | * than how it is located. This is bad. | ||
324 | */ | ||
325 | wasted_pages = kva_start_pfn & (PTRS_PER_PTE-1); | ||
326 | kva_start_pfn -= wasted_pages; | ||
327 | kva_pages += wasted_pages; | ||
328 | 349 | ||
329 | system_max_low_pfn = max_low_pfn = find_max_low_pfn(); | ||
330 | printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n", | 350 | printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n", |
331 | kva_start_pfn, max_low_pfn); | 351 | kva_start_pfn, max_low_pfn); |
332 | printk("max_pfn = %ld\n", max_pfn); | 352 | printk("max_pfn = %ld\n", max_pfn); |
353 | |||
354 | /* avoid clash with initrd */ | ||
355 | reserve_early(kva_start_pfn<<PAGE_SHIFT, | ||
356 | (kva_start_pfn + kva_pages)<<PAGE_SHIFT, | ||
357 | "KVA PG"); | ||
333 | #ifdef CONFIG_HIGHMEM | 358 | #ifdef CONFIG_HIGHMEM |
334 | highstart_pfn = highend_pfn = max_pfn; | 359 | highstart_pfn = highend_pfn = max_pfn; |
335 | if (max_pfn > system_max_low_pfn) | 360 | if (max_pfn > system_max_low_pfn) |
@@ -365,16 +390,8 @@ unsigned long __init setup_memory(void) | |||
365 | return max_low_pfn; | 390 | return max_low_pfn; |
366 | } | 391 | } |
367 | 392 | ||
368 | void __init numa_kva_reserve(void) | ||
369 | { | ||
370 | if (kva_pages) | ||
371 | reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages), | ||
372 | BOOTMEM_DEFAULT); | ||
373 | } | ||
374 | |||
375 | void __init zone_sizes_init(void) | 393 | void __init zone_sizes_init(void) |
376 | { | 394 | { |
377 | int nid; | ||
378 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | 395 | unsigned long max_zone_pfns[MAX_NR_ZONES]; |
379 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | 396 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); |
380 | max_zone_pfns[ZONE_DMA] = | 397 | max_zone_pfns[ZONE_DMA] = |
@@ -384,27 +401,18 @@ void __init zone_sizes_init(void) | |||
384 | max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; | 401 | max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; |
385 | #endif | 402 | #endif |
386 | 403 | ||
387 | /* If SRAT has not registered memory, register it now */ | ||
388 | if (find_max_pfn_with_active_regions() == 0) { | ||
389 | for_each_online_node(nid) { | ||
390 | if (node_has_online_mem(nid)) | ||
391 | add_active_range(nid, node_start_pfn[nid], | ||
392 | node_end_pfn[nid]); | ||
393 | } | ||
394 | } | ||
395 | |||
396 | free_area_init_nodes(max_zone_pfns); | 404 | free_area_init_nodes(max_zone_pfns); |
397 | return; | 405 | return; |
398 | } | 406 | } |
399 | 407 | ||
400 | void __init set_highmem_pages_init(int bad_ppro) | 408 | void __init set_highmem_pages_init(void) |
401 | { | 409 | { |
402 | #ifdef CONFIG_HIGHMEM | 410 | #ifdef CONFIG_HIGHMEM |
403 | struct zone *zone; | 411 | struct zone *zone; |
404 | struct page *page; | 412 | int nid; |
405 | 413 | ||
406 | for_each_zone(zone) { | 414 | for_each_zone(zone) { |
407 | unsigned long node_pfn, zone_start_pfn, zone_end_pfn; | 415 | unsigned long zone_start_pfn, zone_end_pfn; |
408 | 416 | ||
409 | if (!is_highmem(zone)) | 417 | if (!is_highmem(zone)) |
410 | continue; | 418 | continue; |
@@ -412,16 +420,12 @@ void __init set_highmem_pages_init(int bad_ppro) | |||
412 | zone_start_pfn = zone->zone_start_pfn; | 420 | zone_start_pfn = zone->zone_start_pfn; |
413 | zone_end_pfn = zone_start_pfn + zone->spanned_pages; | 421 | zone_end_pfn = zone_start_pfn + zone->spanned_pages; |
414 | 422 | ||
423 | nid = zone_to_nid(zone); | ||
415 | printk("Initializing %s for node %d (%08lx:%08lx)\n", | 424 | printk("Initializing %s for node %d (%08lx:%08lx)\n", |
416 | zone->name, zone_to_nid(zone), | 425 | zone->name, nid, zone_start_pfn, zone_end_pfn); |
417 | zone_start_pfn, zone_end_pfn); | 426 | |
418 | 427 | add_highpages_with_active_regions(nid, zone_start_pfn, | |
419 | for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { | 428 | zone_end_pfn); |
420 | if (!pfn_valid(node_pfn)) | ||
421 | continue; | ||
422 | page = pfn_to_page(node_pfn); | ||
423 | add_one_highpage_init(page, node_pfn, bad_ppro); | ||
424 | } | ||
425 | } | 429 | } |
426 | totalram_pages += totalhigh_pages; | 430 | totalram_pages += totalhigh_pages; |
427 | #endif | 431 | #endif |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index d71be0eb0130..65d55056b6e7 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -225,13 +225,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) | |||
225 | update_page_count(PG_LEVEL_4K, pages_4k); | 225 | update_page_count(PG_LEVEL_4K, pages_4k); |
226 | } | 226 | } |
227 | 227 | ||
228 | static inline int page_kills_ppro(unsigned long pagenr) | ||
229 | { | ||
230 | if (pagenr >= 0x70000 && pagenr <= 0x7003F) | ||
231 | return 1; | ||
232 | return 0; | ||
233 | } | ||
234 | |||
235 | /* | 228 | /* |
236 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address | 229 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address |
237 | * is valid. The argument is a physical page number. | 230 | * is valid. The argument is a physical page number. |
@@ -292,29 +285,60 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) | |||
292 | pkmap_page_table = pte; | 285 | pkmap_page_table = pte; |
293 | } | 286 | } |
294 | 287 | ||
295 | void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) | 288 | static void __init add_one_highpage_init(struct page *page, int pfn) |
296 | { | 289 | { |
297 | if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { | 290 | ClearPageReserved(page); |
298 | ClearPageReserved(page); | 291 | init_page_count(page); |
299 | init_page_count(page); | 292 | __free_page(page); |
300 | __free_page(page); | 293 | totalhigh_pages++; |
301 | totalhigh_pages++; | ||
302 | } else | ||
303 | SetPageReserved(page); | ||
304 | } | 294 | } |
305 | 295 | ||
306 | #ifndef CONFIG_NUMA | 296 | struct add_highpages_data { |
307 | static void __init set_highmem_pages_init(int bad_ppro) | 297 | unsigned long start_pfn; |
298 | unsigned long end_pfn; | ||
299 | }; | ||
300 | |||
301 | static void __init add_highpages_work_fn(unsigned long start_pfn, | ||
302 | unsigned long end_pfn, void *datax) | ||
308 | { | 303 | { |
309 | int pfn; | 304 | int node_pfn; |
305 | struct page *page; | ||
306 | unsigned long final_start_pfn, final_end_pfn; | ||
307 | struct add_highpages_data *data; | ||
310 | 308 | ||
311 | for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) { | 309 | data = (struct add_highpages_data *)datax; |
312 | /* | 310 | |
313 | * Holes under sparsemem might not have no mem_map[]: | 311 | final_start_pfn = max(start_pfn, data->start_pfn); |
314 | */ | 312 | final_end_pfn = min(end_pfn, data->end_pfn); |
315 | if (pfn_valid(pfn)) | 313 | if (final_start_pfn >= final_end_pfn) |
316 | add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); | 314 | return; |
315 | |||
316 | for (node_pfn = final_start_pfn; node_pfn < final_end_pfn; | ||
317 | node_pfn++) { | ||
318 | if (!pfn_valid(node_pfn)) | ||
319 | continue; | ||
320 | page = pfn_to_page(node_pfn); | ||
321 | add_one_highpage_init(page, node_pfn); | ||
317 | } | 322 | } |
323 | |||
324 | } | ||
325 | |||
326 | void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, | ||
327 | unsigned long end_pfn) | ||
328 | { | ||
329 | struct add_highpages_data data; | ||
330 | |||
331 | data.start_pfn = start_pfn; | ||
332 | data.end_pfn = end_pfn; | ||
333 | |||
334 | work_with_active_regions(nid, add_highpages_work_fn, &data); | ||
335 | } | ||
336 | |||
337 | #ifndef CONFIG_NUMA | ||
338 | static void __init set_highmem_pages_init(void) | ||
339 | { | ||
340 | add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); | ||
341 | |||
318 | totalram_pages += totalhigh_pages; | 342 | totalram_pages += totalhigh_pages; |
319 | } | 343 | } |
320 | #endif /* !CONFIG_NUMA */ | 344 | #endif /* !CONFIG_NUMA */ |
@@ -322,7 +346,7 @@ static void __init set_highmem_pages_init(int bad_ppro) | |||
322 | #else | 346 | #else |
323 | # define kmap_init() do { } while (0) | 347 | # define kmap_init() do { } while (0) |
324 | # define permanent_kmaps_init(pgd_base) do { } while (0) | 348 | # define permanent_kmaps_init(pgd_base) do { } while (0) |
325 | # define set_highmem_pages_init(bad_ppro) do { } while (0) | 349 | # define set_highmem_pages_init() do { } while (0) |
326 | #endif /* CONFIG_HIGHMEM */ | 350 | #endif /* CONFIG_HIGHMEM */ |
327 | 351 | ||
328 | pteval_t __PAGE_KERNEL = _PAGE_KERNEL; | 352 | pteval_t __PAGE_KERNEL = _PAGE_KERNEL; |
@@ -569,13 +593,11 @@ static struct kcore_list kcore_mem, kcore_vmalloc; | |||
569 | void __init mem_init(void) | 593 | void __init mem_init(void) |
570 | { | 594 | { |
571 | int codesize, reservedpages, datasize, initsize; | 595 | int codesize, reservedpages, datasize, initsize; |
572 | int tmp, bad_ppro; | 596 | int tmp; |
573 | 597 | ||
574 | #ifdef CONFIG_FLATMEM | 598 | #ifdef CONFIG_FLATMEM |
575 | BUG_ON(!mem_map); | 599 | BUG_ON(!mem_map); |
576 | #endif | 600 | #endif |
577 | bad_ppro = ppro_with_ram_bug(); | ||
578 | |||
579 | /* this will put all low memory onto the freelists */ | 601 | /* this will put all low memory onto the freelists */ |
580 | totalram_pages += free_all_bootmem(); | 602 | totalram_pages += free_all_bootmem(); |
581 | 603 | ||
@@ -587,7 +609,7 @@ void __init mem_init(void) | |||
587 | if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) | 609 | if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) |
588 | reservedpages++; | 610 | reservedpages++; |
589 | 611 | ||
590 | set_highmem_pages_init(bad_ppro); | 612 | set_highmem_pages_init(); |
591 | 613 | ||
592 | codesize = (unsigned long) &_etext - (unsigned long) &_text; | 614 | codesize = (unsigned long) &_etext - (unsigned long) &_text; |
593 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; | 615 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; |
@@ -776,3 +798,9 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
776 | free_init_pages("initrd memory", start, end); | 798 | free_init_pages("initrd memory", start, end); |
777 | } | 799 | } |
778 | #endif | 800 | #endif |
801 | |||
802 | int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, | ||
803 | int flags) | ||
804 | { | ||
805 | return reserve_bootmem(phys, len, flags); | ||
806 | } | ||
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 48623ae628fb..18c6a006e406 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -48,6 +48,18 @@ | |||
48 | #include <asm/numa.h> | 48 | #include <asm/numa.h> |
49 | #include <asm/cacheflush.h> | 49 | #include <asm/cacheflush.h> |
50 | 50 | ||
51 | /* | ||
52 | * PFN of last memory page. | ||
53 | */ | ||
54 | unsigned long end_pfn; | ||
55 | |||
56 | /* | ||
57 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | ||
58 | * The direct mapping extends to max_pfn_mapped, so that we can directly access | ||
59 | * apertures, ACPI and other tables without having to play with fixmaps. | ||
60 | */ | ||
61 | unsigned long max_pfn_mapped; | ||
62 | |||
51 | static unsigned long dma_reserve __initdata; | 63 | static unsigned long dma_reserve __initdata; |
52 | 64 | ||
53 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | 65 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); |
@@ -808,12 +820,14 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
808 | } | 820 | } |
809 | #endif | 821 | #endif |
810 | 822 | ||
811 | void __init reserve_bootmem_generic(unsigned long phys, unsigned len) | 823 | int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, |
824 | int flags) | ||
812 | { | 825 | { |
813 | #ifdef CONFIG_NUMA | 826 | #ifdef CONFIG_NUMA |
814 | int nid, next_nid; | 827 | int nid, next_nid; |
815 | #endif | 828 | #endif |
816 | unsigned long pfn = phys >> PAGE_SHIFT; | 829 | unsigned long pfn = phys >> PAGE_SHIFT; |
830 | int ret; | ||
817 | 831 | ||
818 | if (pfn >= end_pfn) { | 832 | if (pfn >= end_pfn) { |
819 | /* | 833 | /* |
@@ -821,11 +835,11 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) | |||
821 | * firmware tables: | 835 | * firmware tables: |
822 | */ | 836 | */ |
823 | if (pfn < max_pfn_mapped) | 837 | if (pfn < max_pfn_mapped) |
824 | return; | 838 | return -EFAULT; |
825 | 839 | ||
826 | printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n", | 840 | printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n", |
827 | phys, len); | 841 | phys, len); |
828 | return; | 842 | return -EFAULT; |
829 | } | 843 | } |
830 | 844 | ||
831 | /* Should check here against the e820 map to avoid double free */ | 845 | /* Should check here against the e820 map to avoid double free */ |
@@ -833,9 +847,13 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) | |||
833 | nid = phys_to_nid(phys); | 847 | nid = phys_to_nid(phys); |
834 | next_nid = phys_to_nid(phys + len - 1); | 848 | next_nid = phys_to_nid(phys + len - 1); |
835 | if (nid == next_nid) | 849 | if (nid == next_nid) |
836 | reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); | 850 | ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags); |
837 | else | 851 | else |
838 | reserve_bootmem(phys, len, BOOTMEM_DEFAULT); | 852 | ret = reserve_bootmem(phys, len, flags); |
853 | |||
854 | if (ret != 0) | ||
855 | return ret; | ||
856 | |||
839 | #else | 857 | #else |
840 | reserve_bootmem(phys, len, BOOTMEM_DEFAULT); | 858 | reserve_bootmem(phys, len, BOOTMEM_DEFAULT); |
841 | #endif | 859 | #endif |
@@ -844,6 +862,8 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) | |||
844 | dma_reserve += len / PAGE_SIZE; | 862 | dma_reserve += len / PAGE_SIZE; |
845 | set_dma_reserve(dma_reserve); | 863 | set_dma_reserve(dma_reserve); |
846 | } | 864 | } |
865 | |||
866 | return 0; | ||
847 | } | 867 | } |
848 | 868 | ||
849 | int kern_addr_valid(unsigned long addr) | 869 | int kern_addr_valid(unsigned long addr) |
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 0ea66b532c35..317573ec9256 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c | |||
@@ -57,18 +57,22 @@ static __init void early_get_boot_cpu_id(void) | |||
57 | /* | 57 | /* |
58 | * Find possible boot-time SMP configuration: | 58 | * Find possible boot-time SMP configuration: |
59 | */ | 59 | */ |
60 | #ifdef CONFIG_X86_MPPARSE | ||
60 | early_find_smp_config(); | 61 | early_find_smp_config(); |
62 | #endif | ||
61 | #ifdef CONFIG_ACPI | 63 | #ifdef CONFIG_ACPI |
62 | /* | 64 | /* |
63 | * Read APIC information from ACPI tables. | 65 | * Read APIC information from ACPI tables. |
64 | */ | 66 | */ |
65 | early_acpi_boot_init(); | 67 | early_acpi_boot_init(); |
66 | #endif | 68 | #endif |
69 | #ifdef CONFIG_X86_MPPARSE | ||
67 | /* | 70 | /* |
68 | * get boot-time SMP configuration: | 71 | * get boot-time SMP configuration: |
69 | */ | 72 | */ |
70 | if (smp_found_config) | 73 | if (smp_found_config) |
71 | early_get_smp_config(); | 74 | early_get_smp_config(); |
75 | #endif | ||
72 | early_init_lapic_mapping(); | 76 | early_init_lapic_mapping(); |
73 | } | 77 | } |
74 | 78 | ||
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index c5066d519e5d..afb07ffb931d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -233,7 +233,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
233 | else | 233 | else |
234 | bootmap_start = round_up(start, PAGE_SIZE); | 234 | bootmap_start = round_up(start, PAGE_SIZE); |
235 | /* | 235 | /* |
236 | * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like | 236 | * SMP_CACHE_BYTES could be enough, but init_bootmem_node like |
237 | * to use that to align to PAGE_SIZE | 237 | * to use that to align to PAGE_SIZE |
238 | */ | 238 | */ |
239 | bootmap = early_node_mem(nodeid, bootmap_start, end, | 239 | bootmap = early_node_mem(nodeid, bootmap_start, end, |
diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32 index f647e7e56da4..a34fbf557926 100644 --- a/arch/x86/pci/Makefile_32 +++ b/arch/x86/pci/Makefile_32 | |||
@@ -13,10 +13,11 @@ pci-y := fixup.o | |||
13 | pci-$(CONFIG_ACPI) += acpi.o | 13 | pci-$(CONFIG_ACPI) += acpi.o |
14 | pci-y += legacy.o irq.o | 14 | pci-y += legacy.o irq.o |
15 | 15 | ||
16 | # Careful: VISWS and NUMAQ overrule the pci-y above. The colons are | 16 | # Careful: VISWS overrule the pci-y above. The colons are |
17 | # therefor correct. This needs a proper fix by distangling the code. | 17 | # therefor correct. This needs a proper fix by distangling the code. |
18 | pci-$(CONFIG_X86_VISWS) := visws.o fixup.o | 18 | pci-$(CONFIG_X86_VISWS) := visws.o fixup.o |
19 | pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o | 19 | |
20 | pci-$(CONFIG_X86_NUMAQ) += numa.o | ||
20 | 21 | ||
21 | # Necessary for NUMAQ as well | 22 | # Necessary for NUMAQ as well |
22 | pci-$(CONFIG_NUMA) += mp_bus_to_node.o | 23 | pci-$(CONFIG_NUMA) += mp_bus_to_node.o |
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 15f505d3a78e..d02c598451ec 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c | |||
@@ -388,7 +388,7 @@ static int __init early_fill_mp_bus_info(void) | |||
388 | /* need to take out [0, TOM) for RAM*/ | 388 | /* need to take out [0, TOM) for RAM*/ |
389 | address = MSR_K8_TOP_MEM1; | 389 | address = MSR_K8_TOP_MEM1; |
390 | rdmsrl(address, val); | 390 | rdmsrl(address, val); |
391 | end = (val & 0xffffff8000000ULL); | 391 | end = (val & 0xffffff800000ULL); |
392 | printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); | 392 | printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); |
393 | if (end < (1ULL<<32)) | 393 | if (end < (1ULL<<32)) |
394 | update_range(range, 0, end - 1); | 394 | update_range(range, 0, end - 1); |
@@ -482,7 +482,7 @@ static int __init early_fill_mp_bus_info(void) | |||
482 | /* TOP_MEM2 */ | 482 | /* TOP_MEM2 */ |
483 | address = MSR_K8_TOP_MEM2; | 483 | address = MSR_K8_TOP_MEM2; |
484 | rdmsrl(address, val); | 484 | rdmsrl(address, val); |
485 | end = (val & 0xffffff8000000ULL); | 485 | end = (val & 0xffffff800000ULL); |
486 | printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); | 486 | printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); |
487 | update_range(range, 1ULL<<32, end - 1); | 487 | update_range(range, 1ULL<<32, end - 1); |
488 | } | 488 | } |
diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c index d9afbae5092b..99f1ecd485b5 100644 --- a/arch/x86/pci/numa.c +++ b/arch/x86/pci/numa.c | |||
@@ -6,45 +6,21 @@ | |||
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/nodemask.h> | 7 | #include <linux/nodemask.h> |
8 | #include <mach_apic.h> | 8 | #include <mach_apic.h> |
9 | #include <asm/mpspec.h> | ||
9 | #include "pci.h" | 10 | #include "pci.h" |
10 | 11 | ||
11 | #define XQUAD_PORTIO_BASE 0xfe400000 | 12 | #define XQUAD_PORTIO_BASE 0xfe400000 |
12 | #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ | 13 | #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ |
13 | 14 | ||
14 | int mp_bus_id_to_node[MAX_MP_BUSSES]; | ||
15 | #define BUS2QUAD(global) (mp_bus_id_to_node[global]) | 15 | #define BUS2QUAD(global) (mp_bus_id_to_node[global]) |
16 | 16 | ||
17 | int mp_bus_id_to_local[MAX_MP_BUSSES]; | ||
18 | #define BUS2LOCAL(global) (mp_bus_id_to_local[global]) | 17 | #define BUS2LOCAL(global) (mp_bus_id_to_local[global]) |
19 | 18 | ||
20 | void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, | ||
21 | struct mpc_config_translation *translation) | ||
22 | { | ||
23 | int quad = translation->trans_quad; | ||
24 | int local = translation->trans_local; | ||
25 | |||
26 | mp_bus_id_to_node[m->mpc_busid] = quad; | ||
27 | mp_bus_id_to_local[m->mpc_busid] = local; | ||
28 | printk(KERN_INFO "Bus #%d is %s (node %d)\n", | ||
29 | m->mpc_busid, name, quad); | ||
30 | } | ||
31 | |||
32 | int quad_local_to_mp_bus_id [NR_CPUS/4][4]; | ||
33 | #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) | 19 | #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) |
34 | void mpc_oem_pci_bus(struct mpc_config_bus *m, | ||
35 | struct mpc_config_translation *translation) | ||
36 | { | ||
37 | int quad = translation->trans_quad; | ||
38 | int local = translation->trans_local; | ||
39 | |||
40 | quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; | ||
41 | } | ||
42 | 20 | ||
43 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ | 21 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ |
44 | void *xquad_portio; | 22 | void *xquad_portio; |
45 | #ifdef CONFIG_X86_NUMAQ | ||
46 | EXPORT_SYMBOL(xquad_portio); | 23 | EXPORT_SYMBOL(xquad_portio); |
47 | #endif | ||
48 | 24 | ||
49 | #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) | 25 | #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) |
50 | 26 | ||
@@ -179,6 +155,9 @@ static int __init pci_numa_init(void) | |||
179 | { | 155 | { |
180 | int quad; | 156 | int quad; |
181 | 157 | ||
158 | if (!found_numaq) | ||
159 | return 0; | ||
160 | |||
182 | raw_pci_ops = &pci_direct_conf1_mq; | 161 | raw_pci_ops = &pci_direct_conf1_mq; |
183 | 162 | ||
184 | if (pcibios_scanned++) | 163 | if (pcibios_scanned++) |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bd74229081c3..fe60aa9fed0a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -1273,6 +1273,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
1273 | 1273 | ||
1274 | pgd = (pgd_t *)xen_start_info->pt_base; | 1274 | pgd = (pgd_t *)xen_start_info->pt_base; |
1275 | 1275 | ||
1276 | init_pg_tables_start = __pa(pgd); | ||
1276 | init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; | 1277 | init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; |
1277 | 1278 | ||
1278 | init_mm.pgd = pgd; /* use the Xen pagetables to start */ | 1279 | init_mm.pgd = pgd; /* use the Xen pagetables to start */ |
@@ -1316,5 +1317,5 @@ asmlinkage void __init xen_start_kernel(void) | |||
1316 | } | 1317 | } |
1317 | 1318 | ||
1318 | /* Start the world */ | 1319 | /* Start the world */ |
1319 | start_kernel(); | 1320 | i386_start_kernel(); |
1320 | } | 1321 | } |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 488447878a9d..a29575803204 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -40,8 +40,8 @@ char * __init xen_memory_setup(void) | |||
40 | max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); | 40 | max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); |
41 | 41 | ||
42 | e820.nr_map = 0; | 42 | e820.nr_map = 0; |
43 | add_memory_region(0, LOWMEMSIZE(), E820_RAM); | 43 | e820_add_region(0, LOWMEMSIZE(), E820_RAM); |
44 | add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); | 44 | e820_add_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); |
45 | 45 | ||
46 | return "Xen"; | 46 | return "Xen"; |
47 | } | 47 | } |