diff options
Diffstat (limited to 'arch')
51 files changed, 3394 insertions, 2297 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bf07b6f50fa1..07276ac01c20 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -261,36 +261,6 @@ config X86_VOYAGER | |||
261 | If you do not specifically know you have a Voyager based machine, | 261 | If you do not specifically know you have a Voyager based machine, |
262 | say N here, otherwise the kernel you build will not be bootable. | 262 | say N here, otherwise the kernel you build will not be bootable. |
263 | 263 | ||
264 | config X86_NUMAQ | ||
265 | bool "NUMAQ (IBM/Sequent)" | ||
266 | depends on SMP && X86_32 | ||
267 | select NUMA | ||
268 | help | ||
269 | This option is used for getting Linux to run on a (IBM/Sequent) NUMA | ||
270 | multiquad box. This changes the way that processors are bootstrapped, | ||
271 | and uses Clustered Logical APIC addressing mode instead of Flat Logical. | ||
272 | You will need a new lynxer.elf file to flash your firmware with - send | ||
273 | email to <Martin.Bligh@us.ibm.com>. | ||
274 | |||
275 | config X86_SUMMIT | ||
276 | bool "Summit/EXA (IBM x440)" | ||
277 | depends on X86_32 && SMP | ||
278 | help | ||
279 | This option is needed for IBM systems that use the Summit/EXA chipset. | ||
280 | In particular, it is needed for the x440. | ||
281 | |||
282 | If you don't have one of these computers, you should say N here. | ||
283 | If you want to build a NUMA kernel, you must select ACPI. | ||
284 | |||
285 | config X86_BIGSMP | ||
286 | bool "Support for other sub-arch SMP systems with more than 8 CPUs" | ||
287 | depends on X86_32 && SMP | ||
288 | help | ||
289 | This option is needed for the systems that have more than 8 CPUs | ||
290 | and if the system is not of any sub-arch type above. | ||
291 | |||
292 | If you don't have such a system, you should say N here. | ||
293 | |||
294 | config X86_VISWS | 264 | config X86_VISWS |
295 | bool "SGI 320/540 (Visual Workstation)" | 265 | bool "SGI 320/540 (Visual Workstation)" |
296 | depends on X86_32 | 266 | depends on X86_32 |
@@ -304,12 +274,33 @@ config X86_VISWS | |||
304 | and vice versa. See <file:Documentation/sgi-visws.txt> for details. | 274 | and vice versa. See <file:Documentation/sgi-visws.txt> for details. |
305 | 275 | ||
306 | config X86_GENERICARCH | 276 | config X86_GENERICARCH |
307 | bool "Generic architecture (Summit, bigsmp, ES7000, default)" | 277 | bool "Generic architecture" |
308 | depends on X86_32 | 278 | depends on X86_32 |
309 | help | 279 | help |
310 | This option compiles in the Summit, bigsmp, ES7000, default subarchitectures. | 280 | This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default |
311 | It is intended for a generic binary kernel. | 281 | subarchitectures. It is intended for a generic binary kernel. |
312 | If you want a NUMA kernel, select ACPI. We need SRAT for NUMA. | 282 | if you select them all, kernel will probe it one by one. and will |
283 | fallback to default. | ||
284 | |||
285 | if X86_GENERICARCH | ||
286 | |||
287 | config X86_NUMAQ | ||
288 | bool "NUMAQ (IBM/Sequent)" | ||
289 | depends on SMP && X86_32 | ||
290 | select NUMA | ||
291 | help | ||
292 | This option is used for getting Linux to run on a NUMAQ (IBM/Sequent) | ||
293 | NUMA multiquad box. This changes the way that processors are | ||
294 | bootstrapped, and uses Clustered Logical APIC addressing mode instead | ||
295 | of Flat Logical. You will need a new lynxer.elf file to flash your | ||
296 | firmware with - send email to <Martin.Bligh@us.ibm.com>. | ||
297 | |||
298 | config X86_SUMMIT | ||
299 | bool "Summit/EXA (IBM x440)" | ||
300 | depends on X86_32 && SMP | ||
301 | help | ||
302 | This option is needed for IBM systems that use the Summit/EXA chipset. | ||
303 | In particular, it is needed for the x440. | ||
313 | 304 | ||
314 | config X86_ES7000 | 305 | config X86_ES7000 |
315 | bool "Support for Unisys ES7000 IA32 series" | 306 | bool "Support for Unisys ES7000 IA32 series" |
@@ -317,8 +308,15 @@ config X86_ES7000 | |||
317 | help | 308 | help |
318 | Support for Unisys ES7000 systems. Say 'Y' here if this kernel is | 309 | Support for Unisys ES7000 systems. Say 'Y' here if this kernel is |
319 | supposed to run on an IA32-based Unisys ES7000 system. | 310 | supposed to run on an IA32-based Unisys ES7000 system. |
320 | Only choose this option if you have such a system, otherwise you | 311 | |
321 | should say N here. | 312 | config X86_BIGSMP |
313 | bool "Support for big SMP systems with more than 8 CPUs" | ||
314 | depends on X86_32 && SMP | ||
315 | help | ||
316 | This option is needed for the systems that have more than 8 CPUs | ||
317 | and if the system is not of any sub-arch type above. | ||
318 | |||
319 | endif | ||
322 | 320 | ||
323 | config X86_RDC321X | 321 | config X86_RDC321X |
324 | bool "RDC R-321x SoC" | 322 | bool "RDC R-321x SoC" |
@@ -911,9 +909,9 @@ config X86_PAE | |||
911 | config NUMA | 909 | config NUMA |
912 | bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" | 910 | bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" |
913 | depends on SMP | 911 | depends on SMP |
914 | depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL) | 912 | depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || X86_SUMMIT && ACPI) && EXPERIMENTAL) |
915 | default n if X86_PC | 913 | default n if X86_PC |
916 | default y if (X86_NUMAQ || X86_SUMMIT) | 914 | default y if (X86_NUMAQ || X86_SUMMIT || X86_GENERICARCH) |
917 | help | 915 | help |
918 | Enable NUMA (Non Uniform Memory Access) support. | 916 | Enable NUMA (Non Uniform Memory Access) support. |
919 | The kernel will try to allocate memory used by a CPU on the | 917 | The kernel will try to allocate memory used by a CPU on the |
@@ -1090,6 +1088,40 @@ config MTRR | |||
1090 | 1088 | ||
1091 | See <file:Documentation/mtrr.txt> for more information. | 1089 | See <file:Documentation/mtrr.txt> for more information. |
1092 | 1090 | ||
1091 | config MTRR_SANITIZER | ||
1092 | def_bool y | ||
1093 | prompt "MTRR cleanup support" | ||
1094 | depends on MTRR | ||
1095 | help | ||
1096 | Convert MTRR layout from continuous to discrete, so some X driver | ||
1097 | could add WB entries. | ||
1098 | |||
1099 | Say N here if you see bootup problems (boot crash, boot hang, | ||
1100 | spontaneous reboots). | ||
1101 | |||
1102 | Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size | ||
1103 | could be used to send largest mtrr entry size for continuous block | ||
1104 | to hold holes (aka. UC entries) | ||
1105 | |||
1106 | If unsure, say Y. | ||
1107 | |||
1108 | config MTRR_SANITIZER_ENABLE_DEFAULT | ||
1109 | int "MTRR cleanup enable value (0-1)" | ||
1110 | range 0 1 | ||
1111 | default "0" | ||
1112 | depends on MTRR_SANITIZER | ||
1113 | help | ||
1114 | Enable mtrr cleanup default value | ||
1115 | |||
1116 | config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT | ||
1117 | int "MTRR cleanup spare reg num (0-7)" | ||
1118 | range 0 7 | ||
1119 | default "1" | ||
1120 | depends on MTRR_SANITIZER | ||
1121 | help | ||
1122 | mtrr cleanup spare entries default, it can be changed via | ||
1123 | mtrr_spare_reg_nr= | ||
1124 | |||
1093 | config X86_PAT | 1125 | config X86_PAT |
1094 | bool | 1126 | bool |
1095 | prompt "x86 PAT support" | 1127 | prompt "x86 PAT support" |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 18363374d51a..253e7a5706d3 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -131,7 +131,7 @@ config 4KSTACKS | |||
131 | 131 | ||
132 | config X86_FIND_SMP_CONFIG | 132 | config X86_FIND_SMP_CONFIG |
133 | def_bool y | 133 | def_bool y |
134 | depends on X86_LOCAL_APIC || X86_VOYAGER | 134 | depends on X86_MPPARSE || X86_VOYAGER || X86_VISWS |
135 | depends on X86_32 | 135 | depends on X86_32 |
136 | 136 | ||
137 | config X86_MPPARSE | 137 | config X86_MPPARSE |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 3cff3c894cf3..d6650131659e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -117,29 +117,11 @@ mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/ | |||
117 | mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-x86/mach-visws | 117 | mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-x86/mach-visws |
118 | mcore-$(CONFIG_X86_VISWS) := arch/x86/mach-visws/ | 118 | mcore-$(CONFIG_X86_VISWS) := arch/x86/mach-visws/ |
119 | 119 | ||
120 | # NUMAQ subarch support | ||
121 | mflags-$(CONFIG_X86_NUMAQ) := -Iinclude/asm-x86/mach-numaq | ||
122 | mcore-$(CONFIG_X86_NUMAQ) := arch/x86/mach-default/ | ||
123 | |||
124 | # BIGSMP subarch support | ||
125 | mflags-$(CONFIG_X86_BIGSMP) := -Iinclude/asm-x86/mach-bigsmp | ||
126 | mcore-$(CONFIG_X86_BIGSMP) := arch/x86/mach-default/ | ||
127 | |||
128 | #Summit subarch support | ||
129 | mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-x86/mach-summit | ||
130 | mcore-$(CONFIG_X86_SUMMIT) := arch/x86/mach-default/ | ||
131 | |||
132 | # generic subarchitecture | 120 | # generic subarchitecture |
133 | mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic | 121 | mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic |
134 | fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ | 122 | fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ |
135 | mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ | 123 | mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ |
136 | 124 | ||
137 | |||
138 | # ES7000 subarch support | ||
139 | mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-x86/mach-es7000 | ||
140 | fcore-$(CONFIG_X86_ES7000) := arch/x86/mach-es7000/ | ||
141 | mcore-$(CONFIG_X86_ES7000) := arch/x86/mach-default/ | ||
142 | |||
143 | # RDC R-321x subarch support | 125 | # RDC R-321x subarch support |
144 | mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x | 126 | mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x |
145 | mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default/ | 127 | mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default/ |
@@ -160,6 +142,7 @@ KBUILD_AFLAGS += $(mflags-y) | |||
160 | 142 | ||
161 | head-y := arch/x86/kernel/head_$(BITS).o | 143 | head-y := arch/x86/kernel/head_$(BITS).o |
162 | head-y += arch/x86/kernel/head$(BITS).o | 144 | head-y += arch/x86/kernel/head$(BITS).o |
145 | head-y += arch/x86/kernel/head.o | ||
163 | head-y += arch/x86/kernel/init_task.o | 146 | head-y += arch/x86/kernel/init_task.o |
164 | 147 | ||
165 | libs-y += arch/x86/lib/ | 148 | libs-y += arch/x86/lib/ |
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 90456cee47c3..ba0be6a25ff7 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -221,10 +221,6 @@ static char *vidmem; | |||
221 | static int vidport; | 221 | static int vidport; |
222 | static int lines, cols; | 222 | static int lines, cols; |
223 | 223 | ||
224 | #ifdef CONFIG_X86_NUMAQ | ||
225 | void *xquad_portio; | ||
226 | #endif | ||
227 | |||
228 | #include "../../../../lib/inflate.c" | 224 | #include "../../../../lib/inflate.c" |
229 | 225 | ||
230 | static void *malloc(int size) | 226 | static void *malloc(int size) |
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index acad32eb4290..53165c97336b 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c | |||
@@ -13,6 +13,7 @@ | |||
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include "boot.h" | 15 | #include "boot.h" |
16 | #include <linux/kernel.h> | ||
16 | 17 | ||
17 | #define SMAP 0x534d4150 /* ASCII "SMAP" */ | 18 | #define SMAP 0x534d4150 /* ASCII "SMAP" */ |
18 | 19 | ||
@@ -53,7 +54,7 @@ static int detect_memory_e820(void) | |||
53 | 54 | ||
54 | count++; | 55 | count++; |
55 | desc++; | 56 | desc++; |
56 | } while (next && count < E820MAX); | 57 | } while (next && count < ARRAY_SIZE(boot_params.e820_map)); |
57 | 58 | ||
58 | return boot_params.e820_entries = count; | 59 | return boot_params.e820_entries = count; |
59 | } | 60 | } |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 77807d4769c9..dc3c636d113e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | # Makefile for the linux kernel. | 2 | # Makefile for the linux kernel. |
3 | # | 3 | # |
4 | 4 | ||
5 | extra-y := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds | 5 | extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinux.lds |
6 | 6 | ||
7 | CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) | 7 | CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) |
8 | 8 | ||
@@ -22,7 +22,7 @@ obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o | |||
22 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 22 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
23 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 23 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
24 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o | 24 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o |
25 | obj-y += bootflag.o e820_$(BITS).o | 25 | obj-y += bootflag.o e820_$(BITS).o e820.o |
26 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o | 26 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o |
27 | obj-y += alternative.o i8253.o pci-nommu.o | 27 | obj-y += alternative.o i8253.o pci-nommu.o |
28 | obj-$(CONFIG_X86_64) += bugs_64.o | 28 | obj-$(CONFIG_X86_64) += bugs_64.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 33c5216fd3e1..caf4ed7ca069 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -338,8 +338,6 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e | |||
338 | 338 | ||
339 | #ifdef CONFIG_X86_IO_APIC | 339 | #ifdef CONFIG_X86_IO_APIC |
340 | 340 | ||
341 | struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; | ||
342 | |||
343 | static int __init | 341 | static int __init |
344 | acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) | 342 | acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) |
345 | { | 343 | { |
@@ -860,6 +858,336 @@ static int __init acpi_parse_madt_lapic_entries(void) | |||
860 | #endif /* CONFIG_X86_LOCAL_APIC */ | 858 | #endif /* CONFIG_X86_LOCAL_APIC */ |
861 | 859 | ||
862 | #ifdef CONFIG_X86_IO_APIC | 860 | #ifdef CONFIG_X86_IO_APIC |
861 | #define MP_ISA_BUS 0 | ||
862 | |||
863 | #ifdef CONFIG_X86_ES7000 | ||
864 | extern int es7000_plat; | ||
865 | #endif | ||
866 | |||
867 | static struct { | ||
868 | int apic_id; | ||
869 | int gsi_base; | ||
870 | int gsi_end; | ||
871 | DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); | ||
872 | } mp_ioapic_routing[MAX_IO_APICS]; | ||
873 | |||
874 | static int mp_find_ioapic(int gsi) | ||
875 | { | ||
876 | int i = 0; | ||
877 | |||
878 | /* Find the IOAPIC that manages this GSI. */ | ||
879 | for (i = 0; i < nr_ioapics; i++) { | ||
880 | if ((gsi >= mp_ioapic_routing[i].gsi_base) | ||
881 | && (gsi <= mp_ioapic_routing[i].gsi_end)) | ||
882 | return i; | ||
883 | } | ||
884 | |||
885 | printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); | ||
886 | return -1; | ||
887 | } | ||
888 | |||
889 | static u8 __init uniq_ioapic_id(u8 id) | ||
890 | { | ||
891 | #ifdef CONFIG_X86_32 | ||
892 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && | ||
893 | !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) | ||
894 | return io_apic_get_unique_id(nr_ioapics, id); | ||
895 | else | ||
896 | return id; | ||
897 | #else | ||
898 | int i; | ||
899 | DECLARE_BITMAP(used, 256); | ||
900 | bitmap_zero(used, 256); | ||
901 | for (i = 0; i < nr_ioapics; i++) { | ||
902 | struct mp_config_ioapic *ia = &mp_ioapics[i]; | ||
903 | __set_bit(ia->mp_apicid, used); | ||
904 | } | ||
905 | if (!test_bit(id, used)) | ||
906 | return id; | ||
907 | return find_first_zero_bit(used, 256); | ||
908 | #endif | ||
909 | } | ||
910 | |||
911 | static int bad_ioapic(unsigned long address) | ||
912 | { | ||
913 | if (nr_ioapics >= MAX_IO_APICS) { | ||
914 | printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " | ||
915 | "(found %d)\n", MAX_IO_APICS, nr_ioapics); | ||
916 | panic("Recompile kernel with bigger MAX_IO_APICS!\n"); | ||
917 | } | ||
918 | if (!address) { | ||
919 | printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" | ||
920 | " found in table, skipping!\n"); | ||
921 | return 1; | ||
922 | } | ||
923 | return 0; | ||
924 | } | ||
925 | |||
926 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | ||
927 | { | ||
928 | int idx = 0; | ||
929 | |||
930 | if (bad_ioapic(address)) | ||
931 | return; | ||
932 | |||
933 | idx = nr_ioapics; | ||
934 | |||
935 | mp_ioapics[idx].mp_type = MP_IOAPIC; | ||
936 | mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; | ||
937 | mp_ioapics[idx].mp_apicaddr = address; | ||
938 | |||
939 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); | ||
940 | mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); | ||
941 | #ifdef CONFIG_X86_32 | ||
942 | mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); | ||
943 | #else | ||
944 | mp_ioapics[idx].mp_apicver = 0; | ||
945 | #endif | ||
946 | /* | ||
947 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups | ||
948 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). | ||
949 | */ | ||
950 | mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; | ||
951 | mp_ioapic_routing[idx].gsi_base = gsi_base; | ||
952 | mp_ioapic_routing[idx].gsi_end = gsi_base + | ||
953 | io_apic_get_redir_entries(idx); | ||
954 | |||
955 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " | ||
956 | "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, | ||
957 | mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, | ||
958 | mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); | ||
959 | |||
960 | nr_ioapics++; | ||
961 | } | ||
962 | |||
963 | void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | ||
964 | { | ||
965 | int ioapic = -1; | ||
966 | int pin = -1; | ||
967 | |||
968 | /* | ||
969 | * Convert 'gsi' to 'ioapic.pin'. | ||
970 | */ | ||
971 | ioapic = mp_find_ioapic(gsi); | ||
972 | if (ioapic < 0) | ||
973 | return; | ||
974 | pin = gsi - mp_ioapic_routing[ioapic].gsi_base; | ||
975 | |||
976 | /* | ||
977 | * TBD: This check is for faulty timer entries, where the override | ||
978 | * erroneously sets the trigger to level, resulting in a HUGE | ||
979 | * increase of timer interrupts! | ||
980 | */ | ||
981 | if ((bus_irq == 0) && (trigger == 3)) | ||
982 | trigger = 1; | ||
983 | |||
984 | mp_irqs[mp_irq_entries].mp_type = MP_INTSRC; | ||
985 | mp_irqs[mp_irq_entries].mp_irqtype = mp_INT; | ||
986 | mp_irqs[mp_irq_entries].mp_irqflag = (trigger << 2) | polarity; | ||
987 | mp_irqs[mp_irq_entries].mp_srcbus = MP_ISA_BUS; | ||
988 | mp_irqs[mp_irq_entries].mp_srcbusirq = bus_irq; /* IRQ */ | ||
989 | mp_irqs[mp_irq_entries].mp_dstapic = | ||
990 | mp_ioapics[ioapic].mp_apicid; /* APIC ID */ | ||
991 | mp_irqs[mp_irq_entries].mp_dstirq = pin; /* INTIN# */ | ||
992 | |||
993 | if (++mp_irq_entries == MAX_IRQ_SOURCES) | ||
994 | panic("Max # of irq sources exceeded!!\n"); | ||
995 | |||
996 | } | ||
997 | |||
998 | void __init mp_config_acpi_legacy_irqs(void) | ||
999 | { | ||
1000 | int i = 0; | ||
1001 | int ioapic = -1; | ||
1002 | |||
1003 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | ||
1004 | /* | ||
1005 | * Fabricate the legacy ISA bus (bus #31). | ||
1006 | */ | ||
1007 | mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; | ||
1008 | #endif | ||
1009 | set_bit(MP_ISA_BUS, mp_bus_not_pci); | ||
1010 | Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); | ||
1011 | |||
1012 | #ifdef CONFIG_X86_ES7000 | ||
1013 | /* | ||
1014 | * Older generations of ES7000 have no legacy identity mappings | ||
1015 | */ | ||
1016 | if (es7000_plat == 1) | ||
1017 | return; | ||
1018 | #endif | ||
1019 | |||
1020 | /* | ||
1021 | * Locate the IOAPIC that manages the ISA IRQs (0-15). | ||
1022 | */ | ||
1023 | ioapic = mp_find_ioapic(0); | ||
1024 | if (ioapic < 0) | ||
1025 | return; | ||
1026 | |||
1027 | /* | ||
1028 | * Use the default configuration for the IRQs 0-15. Unless | ||
1029 | * overridden by (MADT) interrupt source override entries. | ||
1030 | */ | ||
1031 | for (i = 0; i < 16; i++) { | ||
1032 | int idx; | ||
1033 | |||
1034 | mp_irqs[mp_irq_entries].mp_type = MP_INTSRC; | ||
1035 | mp_irqs[mp_irq_entries].mp_irqflag = 0; /* Conforming */ | ||
1036 | mp_irqs[mp_irq_entries].mp_srcbus = MP_ISA_BUS; | ||
1037 | mp_irqs[mp_irq_entries].mp_dstapic = mp_ioapics[ioapic].mp_apicid; | ||
1038 | |||
1039 | for (idx = 0; idx < mp_irq_entries; idx++) { | ||
1040 | struct mp_config_intsrc *irq = mp_irqs + idx; | ||
1041 | |||
1042 | /* Do we already have a mapping for this ISA IRQ? */ | ||
1043 | if (irq->mp_srcbus == MP_ISA_BUS | ||
1044 | && irq->mp_srcbusirq == i) | ||
1045 | break; | ||
1046 | |||
1047 | /* Do we already have a mapping for this IOAPIC pin */ | ||
1048 | if ((irq->mp_dstapic == | ||
1049 | mp_irqs[mp_irq_entries].mp_dstapic) && | ||
1050 | (irq->mp_dstirq == i)) | ||
1051 | break; | ||
1052 | } | ||
1053 | |||
1054 | if (idx != mp_irq_entries) { | ||
1055 | printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); | ||
1056 | continue; /* IRQ already used */ | ||
1057 | } | ||
1058 | |||
1059 | mp_irqs[mp_irq_entries].mp_irqtype = mp_INT; | ||
1060 | mp_irqs[mp_irq_entries].mp_srcbusirq = i; /* Identity mapped */ | ||
1061 | mp_irqs[mp_irq_entries].mp_dstirq = i; | ||
1062 | |||
1063 | if (++mp_irq_entries == MAX_IRQ_SOURCES) | ||
1064 | panic("Max # of irq sources exceeded!!\n"); | ||
1065 | } | ||
1066 | } | ||
1067 | |||
1068 | int mp_register_gsi(u32 gsi, int triggering, int polarity) | ||
1069 | { | ||
1070 | int ioapic; | ||
1071 | int ioapic_pin; | ||
1072 | #ifdef CONFIG_X86_32 | ||
1073 | #define MAX_GSI_NUM 4096 | ||
1074 | #define IRQ_COMPRESSION_START 64 | ||
1075 | |||
1076 | static int pci_irq = IRQ_COMPRESSION_START; | ||
1077 | /* | ||
1078 | * Mapping between Global System Interrupts, which | ||
1079 | * represent all possible interrupts, and IRQs | ||
1080 | * assigned to actual devices. | ||
1081 | */ | ||
1082 | static int gsi_to_irq[MAX_GSI_NUM]; | ||
1083 | #else | ||
1084 | |||
1085 | if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) | ||
1086 | return gsi; | ||
1087 | #endif | ||
1088 | |||
1089 | /* Don't set up the ACPI SCI because it's already set up */ | ||
1090 | if (acpi_gbl_FADT.sci_interrupt == gsi) | ||
1091 | return gsi; | ||
1092 | |||
1093 | ioapic = mp_find_ioapic(gsi); | ||
1094 | if (ioapic < 0) { | ||
1095 | printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); | ||
1096 | return gsi; | ||
1097 | } | ||
1098 | |||
1099 | ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; | ||
1100 | |||
1101 | #ifdef CONFIG_X86_32 | ||
1102 | if (ioapic_renumber_irq) | ||
1103 | gsi = ioapic_renumber_irq(ioapic, gsi); | ||
1104 | #endif | ||
1105 | |||
1106 | /* | ||
1107 | * Avoid pin reprogramming. PRTs typically include entries | ||
1108 | * with redundant pin->gsi mappings (but unique PCI devices); | ||
1109 | * we only program the IOAPIC on the first. | ||
1110 | */ | ||
1111 | if (ioapic_pin > MP_MAX_IOAPIC_PIN) { | ||
1112 | printk(KERN_ERR "Invalid reference to IOAPIC pin " | ||
1113 | "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, | ||
1114 | ioapic_pin); | ||
1115 | return gsi; | ||
1116 | } | ||
1117 | if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { | ||
1118 | Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", | ||
1119 | mp_ioapic_routing[ioapic].apic_id, ioapic_pin); | ||
1120 | #ifdef CONFIG_X86_32 | ||
1121 | return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); | ||
1122 | #else | ||
1123 | return gsi; | ||
1124 | #endif | ||
1125 | } | ||
1126 | |||
1127 | set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); | ||
1128 | #ifdef CONFIG_X86_32 | ||
1129 | /* | ||
1130 | * For GSI >= 64, use IRQ compression | ||
1131 | */ | ||
1132 | if ((gsi >= IRQ_COMPRESSION_START) | ||
1133 | && (triggering == ACPI_LEVEL_SENSITIVE)) { | ||
1134 | /* | ||
1135 | * For PCI devices assign IRQs in order, avoiding gaps | ||
1136 | * due to unused I/O APIC pins. | ||
1137 | */ | ||
1138 | int irq = gsi; | ||
1139 | if (gsi < MAX_GSI_NUM) { | ||
1140 | /* | ||
1141 | * Retain the VIA chipset work-around (gsi > 15), but | ||
1142 | * avoid a problem where the 8254 timer (IRQ0) is setup | ||
1143 | * via an override (so it's not on pin 0 of the ioapic), | ||
1144 | * and at the same time, the pin 0 interrupt is a PCI | ||
1145 | * type. The gsi > 15 test could cause these two pins | ||
1146 | * to be shared as IRQ0, and they are not shareable. | ||
1147 | * So test for this condition, and if necessary, avoid | ||
1148 | * the pin collision. | ||
1149 | */ | ||
1150 | gsi = pci_irq++; | ||
1151 | /* | ||
1152 | * Don't assign IRQ used by ACPI SCI | ||
1153 | */ | ||
1154 | if (gsi == acpi_gbl_FADT.sci_interrupt) | ||
1155 | gsi = pci_irq++; | ||
1156 | gsi_to_irq[irq] = gsi; | ||
1157 | } else { | ||
1158 | printk(KERN_ERR "GSI %u is too high\n", gsi); | ||
1159 | return gsi; | ||
1160 | } | ||
1161 | } | ||
1162 | #endif | ||
1163 | io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, | ||
1164 | triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, | ||
1165 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); | ||
1166 | return gsi; | ||
1167 | } | ||
1168 | |||
1169 | int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, | ||
1170 | u32 gsi, int triggering, int polarity) | ||
1171 | { | ||
1172 | struct mpc_config_intsrc intsrc; | ||
1173 | int ioapic; | ||
1174 | |||
1175 | /* print the entry should happen on mptable identically */ | ||
1176 | intsrc.mpc_type = MP_INTSRC; | ||
1177 | intsrc.mpc_irqtype = mp_INT; | ||
1178 | intsrc.mpc_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | | ||
1179 | (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); | ||
1180 | intsrc.mpc_srcbus = number; | ||
1181 | intsrc.mpc_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); | ||
1182 | ioapic = mp_find_ioapic(gsi); | ||
1183 | intsrc.mpc_dstapic = mp_ioapic_routing[ioapic].apic_id; | ||
1184 | intsrc.mpc_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; | ||
1185 | |||
1186 | MP_intsrc_info(&intsrc); | ||
1187 | |||
1188 | return 0; | ||
1189 | } | ||
1190 | |||
863 | /* | 1191 | /* |
864 | * Parse IOAPIC related entries in MADT | 1192 | * Parse IOAPIC related entries in MADT |
865 | * returns 0 on success, < 0 on error | 1193 | * returns 0 on success, < 0 on error |
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 4b99b1bdeb6c..954d67931a50 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c | |||
@@ -76,6 +76,11 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); | |||
76 | */ | 76 | */ |
77 | int apic_verbosity; | 77 | int apic_verbosity; |
78 | 78 | ||
79 | int pic_mode; | ||
80 | |||
81 | /* Have we found an MP table */ | ||
82 | int smp_found_config; | ||
83 | |||
79 | static unsigned int calibration_result; | 84 | static unsigned int calibration_result; |
80 | 85 | ||
81 | static int lapic_next_event(unsigned long delta, | 86 | static int lapic_next_event(unsigned long delta, |
@@ -1202,7 +1207,7 @@ void __init init_apic_mappings(void) | |||
1202 | 1207 | ||
1203 | for (i = 0; i < nr_ioapics; i++) { | 1208 | for (i = 0; i < nr_ioapics; i++) { |
1204 | if (smp_found_config) { | 1209 | if (smp_found_config) { |
1205 | ioapic_phys = mp_ioapics[i].mpc_apicaddr; | 1210 | ioapic_phys = mp_ioapics[i].mp_apicaddr; |
1206 | if (!ioapic_phys) { | 1211 | if (!ioapic_phys) { |
1207 | printk(KERN_ERR | 1212 | printk(KERN_ERR |
1208 | "WARNING: bogus zero IO-APIC " | 1213 | "WARNING: bogus zero IO-APIC " |
@@ -1513,6 +1518,9 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1513 | */ | 1518 | */ |
1514 | cpu = 0; | 1519 | cpu = 0; |
1515 | 1520 | ||
1521 | if (apicid > max_physical_apicid) | ||
1522 | max_physical_apicid = apicid; | ||
1523 | |||
1516 | /* | 1524 | /* |
1517 | * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y | 1525 | * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y |
1518 | * but we need to work other dependencies like SMP_SUSPEND etc | 1526 | * but we need to work other dependencies like SMP_SUSPEND etc |
@@ -1520,7 +1528,7 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1520 | * if (CPU_HOTPLUG_ENABLED || num_processors > 8) | 1528 | * if (CPU_HOTPLUG_ENABLED || num_processors > 8) |
1521 | * - Ashok Raj <ashok.raj@intel.com> | 1529 | * - Ashok Raj <ashok.raj@intel.com> |
1522 | */ | 1530 | */ |
1523 | if (num_processors > 8) { | 1531 | if (max_physical_apicid >= 8) { |
1524 | switch (boot_cpu_data.x86_vendor) { | 1532 | switch (boot_cpu_data.x86_vendor) { |
1525 | case X86_VENDOR_INTEL: | 1533 | case X86_VENDOR_INTEL: |
1526 | if (!APIC_XAPIC(version)) { | 1534 | if (!APIC_XAPIC(version)) { |
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 0633cfd0dc29..a4bd8fbb78a9 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c | |||
@@ -56,6 +56,9 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); | |||
56 | */ | 56 | */ |
57 | int apic_verbosity; | 57 | int apic_verbosity; |
58 | 58 | ||
59 | /* Have we found an MP table */ | ||
60 | int smp_found_config; | ||
61 | |||
59 | static struct resource lapic_resource = { | 62 | static struct resource lapic_resource = { |
60 | .name = "Local APIC", | 63 | .name = "Local APIC", |
61 | .flags = IORESOURCE_MEM | IORESOURCE_BUSY, | 64 | .flags = IORESOURCE_MEM | IORESOURCE_BUSY, |
@@ -1090,6 +1093,9 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1090 | */ | 1093 | */ |
1091 | cpu = 0; | 1094 | cpu = 0; |
1092 | } | 1095 | } |
1096 | if (apicid > max_physical_apicid) | ||
1097 | max_physical_apicid = apicid; | ||
1098 | |||
1093 | /* are we being called early in kernel startup? */ | 1099 | /* are we being called early in kernel startup? */ |
1094 | if (x86_cpu_to_apicid_early_ptr) { | 1100 | if (x86_cpu_to_apicid_early_ptr) { |
1095 | u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; | 1101 | u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 5d241ce94a44..509bd3d9eacd 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -37,7 +37,7 @@ static struct fixed_range_block fixed_range_blocks[] = { | |||
37 | static unsigned long smp_changes_mask; | 37 | static unsigned long smp_changes_mask; |
38 | static struct mtrr_state mtrr_state = {}; | 38 | static struct mtrr_state mtrr_state = {}; |
39 | static int mtrr_state_set; | 39 | static int mtrr_state_set; |
40 | static u64 tom2; | 40 | u64 mtrr_tom2; |
41 | 41 | ||
42 | #undef MODULE_PARAM_PREFIX | 42 | #undef MODULE_PARAM_PREFIX |
43 | #define MODULE_PARAM_PREFIX "mtrr." | 43 | #define MODULE_PARAM_PREFIX "mtrr." |
@@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end) | |||
139 | } | 139 | } |
140 | } | 140 | } |
141 | 141 | ||
142 | if (tom2) { | 142 | if (mtrr_tom2) { |
143 | if (start >= (1ULL<<32) && (end < tom2)) | 143 | if (start >= (1ULL<<32) && (end < mtrr_tom2)) |
144 | return MTRR_TYPE_WRBACK; | 144 | return MTRR_TYPE_WRBACK; |
145 | } | 145 | } |
146 | 146 | ||
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) | |||
158 | rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); | 158 | rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); |
159 | } | 159 | } |
160 | 160 | ||
161 | /* fill the MSR pair relating to a var range */ | ||
162 | void fill_mtrr_var_range(unsigned int index, | ||
163 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi) | ||
164 | { | ||
165 | struct mtrr_var_range *vr; | ||
166 | |||
167 | vr = mtrr_state.var_ranges; | ||
168 | |||
169 | vr[index].base_lo = base_lo; | ||
170 | vr[index].base_hi = base_hi; | ||
171 | vr[index].mask_lo = mask_lo; | ||
172 | vr[index].mask_hi = mask_hi; | ||
173 | } | ||
174 | |||
161 | static void | 175 | static void |
162 | get_fixed_ranges(mtrr_type * frs) | 176 | get_fixed_ranges(mtrr_type * frs) |
163 | { | 177 | { |
@@ -213,13 +227,13 @@ void __init get_mtrr_state(void) | |||
213 | mtrr_state.enabled = (lo & 0xc00) >> 10; | 227 | mtrr_state.enabled = (lo & 0xc00) >> 10; |
214 | 228 | ||
215 | if (amd_special_default_mtrr()) { | 229 | if (amd_special_default_mtrr()) { |
216 | unsigned lo, hi; | 230 | unsigned low, high; |
217 | /* TOP_MEM2 */ | 231 | /* TOP_MEM2 */ |
218 | rdmsr(MSR_K8_TOP_MEM2, lo, hi); | 232 | rdmsr(MSR_K8_TOP_MEM2, low, high); |
219 | tom2 = hi; | 233 | mtrr_tom2 = high; |
220 | tom2 <<= 32; | 234 | mtrr_tom2 <<= 32; |
221 | tom2 |= lo; | 235 | mtrr_tom2 |= low; |
222 | tom2 &= 0xffffff8000000ULL; | 236 | mtrr_tom2 &= 0xffffff800000ULL; |
223 | } | 237 | } |
224 | if (mtrr_show) { | 238 | if (mtrr_show) { |
225 | int high_width; | 239 | int high_width; |
@@ -251,9 +265,9 @@ void __init get_mtrr_state(void) | |||
251 | else | 265 | else |
252 | printk(KERN_INFO "MTRR %u disabled\n", i); | 266 | printk(KERN_INFO "MTRR %u disabled\n", i); |
253 | } | 267 | } |
254 | if (tom2) { | 268 | if (mtrr_tom2) { |
255 | printk(KERN_INFO "TOM2: %016llx aka %lldM\n", | 269 | printk(KERN_INFO "TOM2: %016llx aka %lldM\n", |
256 | tom2, tom2>>20); | 270 | mtrr_tom2, mtrr_tom2>>20); |
257 | } | 271 | } |
258 | } | 272 | } |
259 | mtrr_state_set = 1; | 273 | mtrr_state_set = 1; |
@@ -328,7 +342,7 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) | |||
328 | 342 | ||
329 | if (lo != msrwords[0] || hi != msrwords[1]) { | 343 | if (lo != msrwords[0] || hi != msrwords[1]) { |
330 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | 344 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && |
331 | boot_cpu_data.x86 == 15 && | 345 | (boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) && |
332 | ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) | 346 | ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) |
333 | k8_enable_fixed_iorrs(); | 347 | k8_enable_fixed_iorrs(); |
334 | mtrr_wrmsr(msr, msrwords[0], msrwords[1]); | 348 | mtrr_wrmsr(msr, msrwords[0], msrwords[1]); |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6a1e278d9323..0642201784e0 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/smp.h> | 37 | #include <linux/smp.h> |
38 | #include <linux/cpu.h> | 38 | #include <linux/cpu.h> |
39 | #include <linux/mutex.h> | 39 | #include <linux/mutex.h> |
40 | #include <linux/sort.h> | ||
40 | 41 | ||
41 | #include <asm/e820.h> | 42 | #include <asm/e820.h> |
42 | #include <asm/mtrr.h> | 43 | #include <asm/mtrr.h> |
@@ -609,6 +610,787 @@ static struct sysdev_driver mtrr_sysdev_driver = { | |||
609 | .resume = mtrr_restore, | 610 | .resume = mtrr_restore, |
610 | }; | 611 | }; |
611 | 612 | ||
613 | /* should be related to MTRR_VAR_RANGES nums */ | ||
614 | #define RANGE_NUM 256 | ||
615 | |||
616 | struct res_range { | ||
617 | unsigned long start; | ||
618 | unsigned long end; | ||
619 | }; | ||
620 | |||
621 | static int __init | ||
622 | add_range(struct res_range *range, int nr_range, unsigned long start, | ||
623 | unsigned long end) | ||
624 | { | ||
625 | /* out of slots */ | ||
626 | if (nr_range >= RANGE_NUM) | ||
627 | return nr_range; | ||
628 | |||
629 | range[nr_range].start = start; | ||
630 | range[nr_range].end = end; | ||
631 | |||
632 | nr_range++; | ||
633 | |||
634 | return nr_range; | ||
635 | } | ||
636 | |||
637 | static int __init | ||
638 | add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, | ||
639 | unsigned long end) | ||
640 | { | ||
641 | int i; | ||
642 | |||
643 | /* try to merge it with old one */ | ||
644 | for (i = 0; i < nr_range; i++) { | ||
645 | unsigned long final_start, final_end; | ||
646 | unsigned long common_start, common_end; | ||
647 | |||
648 | if (!range[i].end) | ||
649 | continue; | ||
650 | |||
651 | common_start = max(range[i].start, start); | ||
652 | common_end = min(range[i].end, end); | ||
653 | if (common_start > common_end + 1) | ||
654 | continue; | ||
655 | |||
656 | final_start = min(range[i].start, start); | ||
657 | final_end = max(range[i].end, end); | ||
658 | |||
659 | range[i].start = final_start; | ||
660 | range[i].end = final_end; | ||
661 | return nr_range; | ||
662 | } | ||
663 | |||
664 | /* need to add that */ | ||
665 | return add_range(range, nr_range, start, end); | ||
666 | } | ||
667 | |||
668 | static void __init | ||
669 | subtract_range(struct res_range *range, unsigned long start, unsigned long end) | ||
670 | { | ||
671 | int i, j; | ||
672 | |||
673 | for (j = 0; j < RANGE_NUM; j++) { | ||
674 | if (!range[j].end) | ||
675 | continue; | ||
676 | |||
677 | if (start <= range[j].start && end >= range[j].end) { | ||
678 | range[j].start = 0; | ||
679 | range[j].end = 0; | ||
680 | continue; | ||
681 | } | ||
682 | |||
683 | if (start <= range[j].start && end < range[j].end && | ||
684 | range[j].start < end + 1) { | ||
685 | range[j].start = end + 1; | ||
686 | continue; | ||
687 | } | ||
688 | |||
689 | |||
690 | if (start > range[j].start && end >= range[j].end && | ||
691 | range[j].end > start - 1) { | ||
692 | range[j].end = start - 1; | ||
693 | continue; | ||
694 | } | ||
695 | |||
696 | if (start > range[j].start && end < range[j].end) { | ||
697 | /* find the new spare */ | ||
698 | for (i = 0; i < RANGE_NUM; i++) { | ||
699 | if (range[i].end == 0) | ||
700 | break; | ||
701 | } | ||
702 | if (i < RANGE_NUM) { | ||
703 | range[i].end = range[j].end; | ||
704 | range[i].start = end + 1; | ||
705 | } else { | ||
706 | printk(KERN_ERR "run of slot in ranges\n"); | ||
707 | } | ||
708 | range[j].end = start - 1; | ||
709 | continue; | ||
710 | } | ||
711 | } | ||
712 | } | ||
713 | |||
714 | static int __init cmp_range(const void *x1, const void *x2) | ||
715 | { | ||
716 | const struct res_range *r1 = x1; | ||
717 | const struct res_range *r2 = x2; | ||
718 | long start1, start2; | ||
719 | |||
720 | start1 = r1->start; | ||
721 | start2 = r2->start; | ||
722 | |||
723 | return start1 - start2; | ||
724 | } | ||
725 | |||
726 | struct var_mtrr_range_state { | ||
727 | unsigned long base_pfn; | ||
728 | unsigned long size_pfn; | ||
729 | mtrr_type type; | ||
730 | }; | ||
731 | |||
732 | struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | ||
733 | static int __initdata debug_print; | ||
734 | |||
735 | static int __init | ||
736 | x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | ||
737 | unsigned long extra_remove_base, | ||
738 | unsigned long extra_remove_size) | ||
739 | { | ||
740 | unsigned long i, base, size; | ||
741 | mtrr_type type; | ||
742 | |||
743 | for (i = 0; i < num_var_ranges; i++) { | ||
744 | type = range_state[i].type; | ||
745 | if (type != MTRR_TYPE_WRBACK) | ||
746 | continue; | ||
747 | base = range_state[i].base_pfn; | ||
748 | size = range_state[i].size_pfn; | ||
749 | nr_range = add_range_with_merge(range, nr_range, base, | ||
750 | base + size - 1); | ||
751 | } | ||
752 | if (debug_print) { | ||
753 | printk(KERN_DEBUG "After WB checking\n"); | ||
754 | for (i = 0; i < nr_range; i++) | ||
755 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | ||
756 | range[i].start, range[i].end + 1); | ||
757 | } | ||
758 | |||
759 | /* take out UC ranges */ | ||
760 | for (i = 0; i < num_var_ranges; i++) { | ||
761 | type = range_state[i].type; | ||
762 | if (type != MTRR_TYPE_UNCACHABLE) | ||
763 | continue; | ||
764 | size = range_state[i].size_pfn; | ||
765 | if (!size) | ||
766 | continue; | ||
767 | base = range_state[i].base_pfn; | ||
768 | subtract_range(range, base, base + size - 1); | ||
769 | } | ||
770 | if (extra_remove_size) | ||
771 | subtract_range(range, extra_remove_base, | ||
772 | extra_remove_base + extra_remove_size - 1); | ||
773 | |||
774 | /* get new range num */ | ||
775 | nr_range = 0; | ||
776 | for (i = 0; i < RANGE_NUM; i++) { | ||
777 | if (!range[i].end) | ||
778 | continue; | ||
779 | nr_range++; | ||
780 | } | ||
781 | if (debug_print) { | ||
782 | printk(KERN_DEBUG "After UC checking\n"); | ||
783 | for (i = 0; i < nr_range; i++) | ||
784 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | ||
785 | range[i].start, range[i].end + 1); | ||
786 | } | ||
787 | |||
788 | /* sort the ranges */ | ||
789 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | ||
790 | if (debug_print) { | ||
791 | printk(KERN_DEBUG "After sorting\n"); | ||
792 | for (i = 0; i < nr_range; i++) | ||
793 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | ||
794 | range[i].start, range[i].end + 1); | ||
795 | } | ||
796 | |||
797 | /* clear those is not used */ | ||
798 | for (i = nr_range; i < RANGE_NUM; i++) | ||
799 | memset(&range[i], 0, sizeof(range[i])); | ||
800 | |||
801 | return nr_range; | ||
802 | } | ||
803 | |||
804 | static struct res_range __initdata range[RANGE_NUM]; | ||
805 | |||
806 | #ifdef CONFIG_MTRR_SANITIZER | ||
807 | |||
808 | static unsigned long __init sum_ranges(struct res_range *range, int nr_range) | ||
809 | { | ||
810 | unsigned long sum; | ||
811 | int i; | ||
812 | |||
813 | sum = 0; | ||
814 | for (i = 0; i < nr_range; i++) | ||
815 | sum += range[i].end + 1 - range[i].start; | ||
816 | |||
817 | return sum; | ||
818 | } | ||
819 | |||
820 | static int enable_mtrr_cleanup __initdata = | ||
821 | CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT; | ||
822 | |||
823 | static int __init disable_mtrr_cleanup_setup(char *str) | ||
824 | { | ||
825 | if (enable_mtrr_cleanup != -1) | ||
826 | enable_mtrr_cleanup = 0; | ||
827 | return 0; | ||
828 | } | ||
829 | early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); | ||
830 | |||
831 | static int __init enable_mtrr_cleanup_setup(char *str) | ||
832 | { | ||
833 | if (enable_mtrr_cleanup != -1) | ||
834 | enable_mtrr_cleanup = 1; | ||
835 | return 0; | ||
836 | } | ||
837 | early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); | ||
838 | |||
839 | struct var_mtrr_state { | ||
840 | unsigned long range_startk; | ||
841 | unsigned long range_sizek; | ||
842 | unsigned long chunk_sizek; | ||
843 | unsigned long gran_sizek; | ||
844 | unsigned int reg; | ||
845 | }; | ||
846 | |||
847 | static void __init | ||
848 | set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, | ||
849 | unsigned char type, unsigned int address_bits) | ||
850 | { | ||
851 | u32 base_lo, base_hi, mask_lo, mask_hi; | ||
852 | u64 base, mask; | ||
853 | |||
854 | if (!sizek) { | ||
855 | fill_mtrr_var_range(reg, 0, 0, 0, 0); | ||
856 | return; | ||
857 | } | ||
858 | |||
859 | mask = (1ULL << address_bits) - 1; | ||
860 | mask &= ~((((u64)sizek) << 10) - 1); | ||
861 | |||
862 | base = ((u64)basek) << 10; | ||
863 | |||
864 | base |= type; | ||
865 | mask |= 0x800; | ||
866 | |||
867 | base_lo = base & ((1ULL<<32) - 1); | ||
868 | base_hi = base >> 32; | ||
869 | |||
870 | mask_lo = mask & ((1ULL<<32) - 1); | ||
871 | mask_hi = mask >> 32; | ||
872 | |||
873 | fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi); | ||
874 | } | ||
875 | |||
876 | static void __init | ||
877 | save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, | ||
878 | unsigned char type) | ||
879 | { | ||
880 | range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); | ||
881 | range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); | ||
882 | range_state[reg].type = type; | ||
883 | } | ||
884 | |||
885 | static void __init | ||
886 | set_var_mtrr_all(unsigned int address_bits) | ||
887 | { | ||
888 | unsigned long basek, sizek; | ||
889 | unsigned char type; | ||
890 | unsigned int reg; | ||
891 | |||
892 | for (reg = 0; reg < num_var_ranges; reg++) { | ||
893 | basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10); | ||
894 | sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10); | ||
895 | type = range_state[reg].type; | ||
896 | |||
897 | set_var_mtrr(reg, basek, sizek, type, address_bits); | ||
898 | } | ||
899 | } | ||
900 | |||
901 | static unsigned int __init | ||
902 | range_to_mtrr(unsigned int reg, unsigned long range_startk, | ||
903 | unsigned long range_sizek, unsigned char type) | ||
904 | { | ||
905 | if (!range_sizek || (reg >= num_var_ranges)) | ||
906 | return reg; | ||
907 | |||
908 | while (range_sizek) { | ||
909 | unsigned long max_align, align; | ||
910 | unsigned long sizek; | ||
911 | |||
912 | /* Compute the maximum size I can make a range */ | ||
913 | if (range_startk) | ||
914 | max_align = ffs(range_startk) - 1; | ||
915 | else | ||
916 | max_align = 32; | ||
917 | align = fls(range_sizek) - 1; | ||
918 | if (align > max_align) | ||
919 | align = max_align; | ||
920 | |||
921 | sizek = 1 << align; | ||
922 | if (debug_print) | ||
923 | printk(KERN_DEBUG "Setting variable MTRR %d, " | ||
924 | "base: %ldMB, range: %ldMB, type %s\n", | ||
925 | reg, range_startk >> 10, sizek >> 10, | ||
926 | (type == MTRR_TYPE_UNCACHABLE)?"UC": | ||
927 | ((type == MTRR_TYPE_WRBACK)?"WB":"Other") | ||
928 | ); | ||
929 | save_var_mtrr(reg++, range_startk, sizek, type); | ||
930 | range_startk += sizek; | ||
931 | range_sizek -= sizek; | ||
932 | if (reg >= num_var_ranges) | ||
933 | break; | ||
934 | } | ||
935 | return reg; | ||
936 | } | ||
937 | |||
938 | static unsigned __init | ||
939 | range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | ||
940 | unsigned long sizek) | ||
941 | { | ||
942 | unsigned long hole_basek, hole_sizek; | ||
943 | unsigned long second_basek, second_sizek; | ||
944 | unsigned long range0_basek, range0_sizek; | ||
945 | unsigned long range_basek, range_sizek; | ||
946 | unsigned long chunk_sizek; | ||
947 | unsigned long gran_sizek; | ||
948 | |||
949 | hole_basek = 0; | ||
950 | hole_sizek = 0; | ||
951 | second_basek = 0; | ||
952 | second_sizek = 0; | ||
953 | chunk_sizek = state->chunk_sizek; | ||
954 | gran_sizek = state->gran_sizek; | ||
955 | |||
956 | /* align with gran size, prevent small block used up MTRRs */ | ||
957 | range_basek = ALIGN(state->range_startk, gran_sizek); | ||
958 | if ((range_basek > basek) && basek) | ||
959 | return second_sizek; | ||
960 | state->range_sizek -= (range_basek - state->range_startk); | ||
961 | range_sizek = ALIGN(state->range_sizek, gran_sizek); | ||
962 | |||
963 | while (range_sizek > state->range_sizek) { | ||
964 | range_sizek -= gran_sizek; | ||
965 | if (!range_sizek) | ||
966 | return 0; | ||
967 | } | ||
968 | state->range_sizek = range_sizek; | ||
969 | |||
970 | /* try to append some small hole */ | ||
971 | range0_basek = state->range_startk; | ||
972 | range0_sizek = ALIGN(state->range_sizek, chunk_sizek); | ||
973 | if (range0_sizek == state->range_sizek) { | ||
974 | if (debug_print) | ||
975 | printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", | ||
976 | range0_basek<<10, | ||
977 | (range0_basek + state->range_sizek)<<10); | ||
978 | state->reg = range_to_mtrr(state->reg, range0_basek, | ||
979 | state->range_sizek, MTRR_TYPE_WRBACK); | ||
980 | return 0; | ||
981 | } | ||
982 | |||
983 | range0_sizek -= chunk_sizek; | ||
984 | if (range0_sizek && sizek) { | ||
985 | while (range0_basek + range0_sizek > (basek + sizek)) { | ||
986 | range0_sizek -= chunk_sizek; | ||
987 | if (!range0_sizek) | ||
988 | break; | ||
989 | } | ||
990 | } | ||
991 | |||
992 | if (range0_sizek) { | ||
993 | if (debug_print) | ||
994 | printk(KERN_DEBUG "range0: %016lx - %016lx\n", | ||
995 | range0_basek<<10, | ||
996 | (range0_basek + range0_sizek)<<10); | ||
997 | state->reg = range_to_mtrr(state->reg, range0_basek, | ||
998 | range0_sizek, MTRR_TYPE_WRBACK); | ||
999 | |||
1000 | } | ||
1001 | |||
1002 | range_basek = range0_basek + range0_sizek; | ||
1003 | range_sizek = chunk_sizek; | ||
1004 | |||
1005 | if (range_basek + range_sizek > basek && | ||
1006 | range_basek + range_sizek <= (basek + sizek)) { | ||
1007 | /* one hole */ | ||
1008 | second_basek = basek; | ||
1009 | second_sizek = range_basek + range_sizek - basek; | ||
1010 | } | ||
1011 | |||
1012 | /* if last piece, only could one hole near end */ | ||
1013 | if ((second_basek || !basek) && | ||
1014 | range_sizek - (state->range_sizek - range0_sizek) - second_sizek < | ||
1015 | (chunk_sizek >> 1)) { | ||
1016 | /* | ||
1017 | * one hole in middle (second_sizek is 0) or at end | ||
1018 | * (second_sizek is 0 ) | ||
1019 | */ | ||
1020 | hole_sizek = range_sizek - (state->range_sizek - range0_sizek) | ||
1021 | - second_sizek; | ||
1022 | hole_basek = range_basek + range_sizek - hole_sizek | ||
1023 | - second_sizek; | ||
1024 | } else { | ||
1025 | /* fallback for big hole, or several holes */ | ||
1026 | range_sizek = state->range_sizek - range0_sizek; | ||
1027 | second_basek = 0; | ||
1028 | second_sizek = 0; | ||
1029 | } | ||
1030 | |||
1031 | if (debug_print) | ||
1032 | printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10, | ||
1033 | (range_basek + range_sizek)<<10); | ||
1034 | state->reg = range_to_mtrr(state->reg, range_basek, range_sizek, | ||
1035 | MTRR_TYPE_WRBACK); | ||
1036 | if (hole_sizek) { | ||
1037 | if (debug_print) | ||
1038 | printk(KERN_DEBUG "hole: %016lx - %016lx\n", | ||
1039 | hole_basek<<10, (hole_basek + hole_sizek)<<10); | ||
1040 | state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek, | ||
1041 | MTRR_TYPE_UNCACHABLE); | ||
1042 | |||
1043 | } | ||
1044 | |||
1045 | return second_sizek; | ||
1046 | } | ||
1047 | |||
1048 | static void __init | ||
1049 | set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, | ||
1050 | unsigned long size_pfn) | ||
1051 | { | ||
1052 | unsigned long basek, sizek; | ||
1053 | unsigned long second_sizek = 0; | ||
1054 | |||
1055 | if (state->reg >= num_var_ranges) | ||
1056 | return; | ||
1057 | |||
1058 | basek = base_pfn << (PAGE_SHIFT - 10); | ||
1059 | sizek = size_pfn << (PAGE_SHIFT - 10); | ||
1060 | |||
1061 | /* See if I can merge with the last range */ | ||
1062 | if ((basek <= 1024) || | ||
1063 | (state->range_startk + state->range_sizek == basek)) { | ||
1064 | unsigned long endk = basek + sizek; | ||
1065 | state->range_sizek = endk - state->range_startk; | ||
1066 | return; | ||
1067 | } | ||
1068 | /* Write the range mtrrs */ | ||
1069 | if (state->range_sizek != 0) | ||
1070 | second_sizek = range_to_mtrr_with_hole(state, basek, sizek); | ||
1071 | |||
1072 | /* Allocate an msr */ | ||
1073 | state->range_startk = basek + second_sizek; | ||
1074 | state->range_sizek = sizek - second_sizek; | ||
1075 | } | ||
1076 | |||
1077 | /* mininum size of mtrr block that can take hole */ | ||
1078 | static u64 mtrr_chunk_size __initdata = (256ULL<<20); | ||
1079 | |||
1080 | static int __init parse_mtrr_chunk_size_opt(char *p) | ||
1081 | { | ||
1082 | if (!p) | ||
1083 | return -EINVAL; | ||
1084 | mtrr_chunk_size = memparse(p, &p); | ||
1085 | return 0; | ||
1086 | } | ||
1087 | early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); | ||
1088 | |||
1089 | /* granity of mtrr of block */ | ||
1090 | static u64 mtrr_gran_size __initdata; | ||
1091 | |||
1092 | static int __init parse_mtrr_gran_size_opt(char *p) | ||
1093 | { | ||
1094 | if (!p) | ||
1095 | return -EINVAL; | ||
1096 | mtrr_gran_size = memparse(p, &p); | ||
1097 | return 0; | ||
1098 | } | ||
1099 | early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); | ||
1100 | |||
1101 | static int nr_mtrr_spare_reg __initdata = | ||
1102 | CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; | ||
1103 | |||
1104 | static int __init parse_mtrr_spare_reg(char *arg) | ||
1105 | { | ||
1106 | if (arg) | ||
1107 | nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); | ||
1108 | return 0; | ||
1109 | } | ||
1110 | |||
1111 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); | ||
1112 | |||
1113 | static int __init | ||
1114 | x86_setup_var_mtrrs(struct res_range *range, int nr_range, | ||
1115 | u64 chunk_size, u64 gran_size) | ||
1116 | { | ||
1117 | struct var_mtrr_state var_state; | ||
1118 | int i; | ||
1119 | int num_reg; | ||
1120 | |||
1121 | var_state.range_startk = 0; | ||
1122 | var_state.range_sizek = 0; | ||
1123 | var_state.reg = 0; | ||
1124 | var_state.chunk_sizek = chunk_size >> 10; | ||
1125 | var_state.gran_sizek = gran_size >> 10; | ||
1126 | |||
1127 | memset(range_state, 0, sizeof(range_state)); | ||
1128 | |||
1129 | /* Write the range etc */ | ||
1130 | for (i = 0; i < nr_range; i++) | ||
1131 | set_var_mtrr_range(&var_state, range[i].start, | ||
1132 | range[i].end - range[i].start + 1); | ||
1133 | |||
1134 | /* Write the last range */ | ||
1135 | if (var_state.range_sizek != 0) | ||
1136 | range_to_mtrr_with_hole(&var_state, 0, 0); | ||
1137 | |||
1138 | num_reg = var_state.reg; | ||
1139 | /* Clear out the extra MTRR's */ | ||
1140 | while (var_state.reg < num_var_ranges) { | ||
1141 | save_var_mtrr(var_state.reg, 0, 0, 0); | ||
1142 | var_state.reg++; | ||
1143 | } | ||
1144 | |||
1145 | return num_reg; | ||
1146 | } | ||
1147 | |||
1148 | struct mtrr_cleanup_result { | ||
1149 | unsigned long gran_sizek; | ||
1150 | unsigned long chunk_sizek; | ||
1151 | unsigned long lose_cover_sizek; | ||
1152 | unsigned int num_reg; | ||
1153 | int bad; | ||
1154 | }; | ||
1155 | |||
1156 | /* | ||
1157 | * gran_size: 1M, 2M, ..., 2G | ||
1158 | * chunk size: gran_size, ..., 4G | ||
1159 | * so we need (2+13)*6 | ||
1160 | */ | ||
1161 | #define NUM_RESULT 90 | ||
1162 | #define PSHIFT (PAGE_SHIFT - 10) | ||
1163 | |||
1164 | static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; | ||
1165 | static struct res_range __initdata range_new[RANGE_NUM]; | ||
1166 | static unsigned long __initdata min_loss_pfn[RANGE_NUM]; | ||
1167 | |||
1168 | static int __init mtrr_cleanup(unsigned address_bits) | ||
1169 | { | ||
1170 | unsigned long extra_remove_base, extra_remove_size; | ||
1171 | unsigned long i, base, size, def, dummy; | ||
1172 | mtrr_type type; | ||
1173 | int nr_range, nr_range_new; | ||
1174 | u64 chunk_size, gran_size; | ||
1175 | unsigned long range_sums, range_sums_new; | ||
1176 | int index_good; | ||
1177 | int num_reg_good; | ||
1178 | |||
1179 | /* extra one for all 0 */ | ||
1180 | int num[MTRR_NUM_TYPES + 1]; | ||
1181 | |||
1182 | if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) | ||
1183 | return 0; | ||
1184 | rdmsr(MTRRdefType_MSR, def, dummy); | ||
1185 | def &= 0xff; | ||
1186 | if (def != MTRR_TYPE_UNCACHABLE) | ||
1187 | return 0; | ||
1188 | |||
1189 | /* get it and store it aside */ | ||
1190 | memset(range_state, 0, sizeof(range_state)); | ||
1191 | for (i = 0; i < num_var_ranges; i++) { | ||
1192 | mtrr_if->get(i, &base, &size, &type); | ||
1193 | range_state[i].base_pfn = base; | ||
1194 | range_state[i].size_pfn = size; | ||
1195 | range_state[i].type = type; | ||
1196 | } | ||
1197 | |||
1198 | /* check entries number */ | ||
1199 | memset(num, 0, sizeof(num)); | ||
1200 | for (i = 0; i < num_var_ranges; i++) { | ||
1201 | type = range_state[i].type; | ||
1202 | size = range_state[i].size_pfn; | ||
1203 | if (type >= MTRR_NUM_TYPES) | ||
1204 | continue; | ||
1205 | if (!size) | ||
1206 | type = MTRR_NUM_TYPES; | ||
1207 | num[type]++; | ||
1208 | } | ||
1209 | |||
1210 | /* check if we got UC entries */ | ||
1211 | if (!num[MTRR_TYPE_UNCACHABLE]) | ||
1212 | return 0; | ||
1213 | |||
1214 | /* check if we only had WB and UC */ | ||
1215 | if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != | ||
1216 | num_var_ranges - num[MTRR_NUM_TYPES]) | ||
1217 | return 0; | ||
1218 | |||
1219 | memset(range, 0, sizeof(range)); | ||
1220 | extra_remove_size = 0; | ||
1221 | if (mtrr_tom2) { | ||
1222 | extra_remove_base = 1 << (32 - PAGE_SHIFT); | ||
1223 | extra_remove_size = | ||
1224 | (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; | ||
1225 | } | ||
1226 | nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, | ||
1227 | extra_remove_size); | ||
1228 | range_sums = sum_ranges(range, nr_range); | ||
1229 | printk(KERN_INFO "total RAM coverred: %ldM\n", | ||
1230 | range_sums >> (20 - PAGE_SHIFT)); | ||
1231 | |||
1232 | if (mtrr_chunk_size && mtrr_gran_size) { | ||
1233 | int num_reg; | ||
1234 | |||
1235 | debug_print = 1; | ||
1236 | /* convert ranges to var ranges state */ | ||
1237 | num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, | ||
1238 | mtrr_gran_size); | ||
1239 | |||
1240 | /* we got new setting in range_state, check it */ | ||
1241 | memset(range_new, 0, sizeof(range_new)); | ||
1242 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | ||
1243 | extra_remove_base, | ||
1244 | extra_remove_size); | ||
1245 | range_sums_new = sum_ranges(range_new, nr_range_new); | ||
1246 | |||
1247 | i = 0; | ||
1248 | result[i].chunk_sizek = mtrr_chunk_size >> 10; | ||
1249 | result[i].gran_sizek = mtrr_gran_size >> 10; | ||
1250 | result[i].num_reg = num_reg; | ||
1251 | if (range_sums < range_sums_new) { | ||
1252 | result[i].lose_cover_sizek = | ||
1253 | (range_sums_new - range_sums) << PSHIFT; | ||
1254 | result[i].bad = 1; | ||
1255 | } else | ||
1256 | result[i].lose_cover_sizek = | ||
1257 | (range_sums - range_sums_new) << PSHIFT; | ||
1258 | |||
1259 | printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", | ||
1260 | result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10, | ||
1261 | result[i].chunk_sizek >> 10); | ||
1262 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n", | ||
1263 | result[i].num_reg, result[i].bad?"-":"", | ||
1264 | result[i].lose_cover_sizek >> 10); | ||
1265 | if (!result[i].bad) { | ||
1266 | set_var_mtrr_all(address_bits); | ||
1267 | return 1; | ||
1268 | } | ||
1269 | printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " | ||
1270 | "will find optimal one\n"); | ||
1271 | debug_print = 0; | ||
1272 | memset(result, 0, sizeof(result[0])); | ||
1273 | } | ||
1274 | |||
1275 | i = 0; | ||
1276 | memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); | ||
1277 | memset(result, 0, sizeof(result)); | ||
1278 | for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { | ||
1279 | for (chunk_size = gran_size; chunk_size < (1ULL<<33); | ||
1280 | chunk_size <<= 1) { | ||
1281 | int num_reg; | ||
1282 | |||
1283 | if (debug_print) | ||
1284 | printk(KERN_INFO | ||
1285 | "\ngran_size: %lldM chunk_size_size: %lldM\n", | ||
1286 | gran_size >> 20, chunk_size >> 20); | ||
1287 | if (i >= NUM_RESULT) | ||
1288 | continue; | ||
1289 | |||
1290 | /* convert ranges to var ranges state */ | ||
1291 | num_reg = x86_setup_var_mtrrs(range, nr_range, | ||
1292 | chunk_size, gran_size); | ||
1293 | |||
1294 | /* we got new setting in range_state, check it */ | ||
1295 | memset(range_new, 0, sizeof(range_new)); | ||
1296 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | ||
1297 | extra_remove_base, extra_remove_size); | ||
1298 | range_sums_new = sum_ranges(range_new, nr_range_new); | ||
1299 | |||
1300 | result[i].chunk_sizek = chunk_size >> 10; | ||
1301 | result[i].gran_sizek = gran_size >> 10; | ||
1302 | result[i].num_reg = num_reg; | ||
1303 | if (range_sums < range_sums_new) { | ||
1304 | result[i].lose_cover_sizek = | ||
1305 | (range_sums_new - range_sums) << PSHIFT; | ||
1306 | result[i].bad = 1; | ||
1307 | } else | ||
1308 | result[i].lose_cover_sizek = | ||
1309 | (range_sums - range_sums_new) << PSHIFT; | ||
1310 | |||
1311 | /* double check it */ | ||
1312 | if (!result[i].bad && !result[i].lose_cover_sizek) { | ||
1313 | if (nr_range_new != nr_range || | ||
1314 | memcmp(range, range_new, sizeof(range))) | ||
1315 | result[i].bad = 1; | ||
1316 | } | ||
1317 | |||
1318 | if (!result[i].bad && (range_sums - range_sums_new < | ||
1319 | min_loss_pfn[num_reg])) { | ||
1320 | min_loss_pfn[num_reg] = | ||
1321 | range_sums - range_sums_new; | ||
1322 | } | ||
1323 | i++; | ||
1324 | } | ||
1325 | } | ||
1326 | |||
1327 | /* print out all */ | ||
1328 | for (i = 0; i < NUM_RESULT; i++) { | ||
1329 | printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", | ||
1330 | result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10, | ||
1331 | result[i].chunk_sizek >> 10); | ||
1332 | printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n", | ||
1333 | result[i].num_reg, result[i].bad?"-":"", | ||
1334 | result[i].lose_cover_sizek >> 10); | ||
1335 | } | ||
1336 | |||
1337 | /* try to find the optimal index */ | ||
1338 | if (nr_mtrr_spare_reg >= num_var_ranges) | ||
1339 | nr_mtrr_spare_reg = num_var_ranges - 1; | ||
1340 | num_reg_good = -1; | ||
1341 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { | ||
1342 | if (!min_loss_pfn[i]) { | ||
1343 | num_reg_good = i; | ||
1344 | break; | ||
1345 | } | ||
1346 | } | ||
1347 | |||
1348 | index_good = -1; | ||
1349 | if (num_reg_good != -1) { | ||
1350 | for (i = 0; i < NUM_RESULT; i++) { | ||
1351 | if (!result[i].bad && | ||
1352 | result[i].num_reg == num_reg_good && | ||
1353 | !result[i].lose_cover_sizek) { | ||
1354 | index_good = i; | ||
1355 | break; | ||
1356 | } | ||
1357 | } | ||
1358 | } | ||
1359 | |||
1360 | if (index_good != -1) { | ||
1361 | printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); | ||
1362 | i = index_good; | ||
1363 | printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t", | ||
1364 | result[i].gran_sizek >> 10, | ||
1365 | result[i].chunk_sizek >> 10); | ||
1366 | printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n", | ||
1367 | result[i].num_reg, | ||
1368 | result[i].lose_cover_sizek >> 10); | ||
1369 | /* convert ranges to var ranges state */ | ||
1370 | chunk_size = result[i].chunk_sizek; | ||
1371 | chunk_size <<= 10; | ||
1372 | gran_size = result[i].gran_sizek; | ||
1373 | gran_size <<= 10; | ||
1374 | debug_print = 1; | ||
1375 | x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); | ||
1376 | set_var_mtrr_all(address_bits); | ||
1377 | return 1; | ||
1378 | } | ||
1379 | |||
1380 | printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); | ||
1381 | printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n"); | ||
1382 | |||
1383 | return 0; | ||
1384 | } | ||
1385 | #else | ||
1386 | static int __init mtrr_cleanup(unsigned address_bits) | ||
1387 | { | ||
1388 | return 0; | ||
1389 | } | ||
1390 | #endif | ||
1391 | |||
1392 | static int __initdata changed_by_mtrr_cleanup; | ||
1393 | |||
612 | static int disable_mtrr_trim; | 1394 | static int disable_mtrr_trim; |
613 | 1395 | ||
614 | static int __init disable_mtrr_trim_setup(char *str) | 1396 | static int __init disable_mtrr_trim_setup(char *str) |
@@ -648,6 +1430,19 @@ int __init amd_special_default_mtrr(void) | |||
648 | return 0; | 1430 | return 0; |
649 | } | 1431 | } |
650 | 1432 | ||
1433 | static u64 __init real_trim_memory(unsigned long start_pfn, | ||
1434 | unsigned long limit_pfn) | ||
1435 | { | ||
1436 | u64 trim_start, trim_size; | ||
1437 | trim_start = start_pfn; | ||
1438 | trim_start <<= PAGE_SHIFT; | ||
1439 | trim_size = limit_pfn; | ||
1440 | trim_size <<= PAGE_SHIFT; | ||
1441 | trim_size -= trim_start; | ||
1442 | |||
1443 | return update_memory_range(trim_start, trim_size, E820_RAM, | ||
1444 | E820_RESERVED); | ||
1445 | } | ||
651 | /** | 1446 | /** |
652 | * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs | 1447 | * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs |
653 | * @end_pfn: ending page frame number | 1448 | * @end_pfn: ending page frame number |
@@ -663,8 +1458,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
663 | { | 1458 | { |
664 | unsigned long i, base, size, highest_pfn = 0, def, dummy; | 1459 | unsigned long i, base, size, highest_pfn = 0, def, dummy; |
665 | mtrr_type type; | 1460 | mtrr_type type; |
666 | u64 trim_start, trim_size; | 1461 | int nr_range; |
1462 | u64 total_trim_size; | ||
667 | 1463 | ||
1464 | /* extra one for all 0 */ | ||
1465 | int num[MTRR_NUM_TYPES + 1]; | ||
668 | /* | 1466 | /* |
669 | * Make sure we only trim uncachable memory on machines that | 1467 | * Make sure we only trim uncachable memory on machines that |
670 | * support the Intel MTRR architecture: | 1468 | * support the Intel MTRR architecture: |
@@ -676,14 +1474,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
676 | if (def != MTRR_TYPE_UNCACHABLE) | 1474 | if (def != MTRR_TYPE_UNCACHABLE) |
677 | return 0; | 1475 | return 0; |
678 | 1476 | ||
679 | if (amd_special_default_mtrr()) | 1477 | /* get it and store it aside */ |
680 | return 0; | 1478 | memset(range_state, 0, sizeof(range_state)); |
1479 | for (i = 0; i < num_var_ranges; i++) { | ||
1480 | mtrr_if->get(i, &base, &size, &type); | ||
1481 | range_state[i].base_pfn = base; | ||
1482 | range_state[i].size_pfn = size; | ||
1483 | range_state[i].type = type; | ||
1484 | } | ||
681 | 1485 | ||
682 | /* Find highest cached pfn */ | 1486 | /* Find highest cached pfn */ |
683 | for (i = 0; i < num_var_ranges; i++) { | 1487 | for (i = 0; i < num_var_ranges; i++) { |
684 | mtrr_if->get(i, &base, &size, &type); | 1488 | type = range_state[i].type; |
685 | if (type != MTRR_TYPE_WRBACK) | 1489 | if (type != MTRR_TYPE_WRBACK) |
686 | continue; | 1490 | continue; |
1491 | base = range_state[i].base_pfn; | ||
1492 | size = range_state[i].size_pfn; | ||
687 | if (highest_pfn < base + size) | 1493 | if (highest_pfn < base + size) |
688 | highest_pfn = base + size; | 1494 | highest_pfn = base + size; |
689 | } | 1495 | } |
@@ -698,22 +1504,65 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
698 | return 0; | 1504 | return 0; |
699 | } | 1505 | } |
700 | 1506 | ||
701 | if (highest_pfn < end_pfn) { | 1507 | /* check entries number */ |
1508 | memset(num, 0, sizeof(num)); | ||
1509 | for (i = 0; i < num_var_ranges; i++) { | ||
1510 | type = range_state[i].type; | ||
1511 | if (type >= MTRR_NUM_TYPES) | ||
1512 | continue; | ||
1513 | size = range_state[i].size_pfn; | ||
1514 | if (!size) | ||
1515 | type = MTRR_NUM_TYPES; | ||
1516 | num[type]++; | ||
1517 | } | ||
1518 | |||
1519 | /* no entry for WB? */ | ||
1520 | if (!num[MTRR_TYPE_WRBACK]) | ||
1521 | return 0; | ||
1522 | |||
1523 | /* check if we only had WB and UC */ | ||
1524 | if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != | ||
1525 | num_var_ranges - num[MTRR_NUM_TYPES]) | ||
1526 | return 0; | ||
1527 | |||
1528 | memset(range, 0, sizeof(range)); | ||
1529 | nr_range = 0; | ||
1530 | if (mtrr_tom2) { | ||
1531 | range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); | ||
1532 | range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; | ||
1533 | if (highest_pfn < range[nr_range].end + 1) | ||
1534 | highest_pfn = range[nr_range].end + 1; | ||
1535 | nr_range++; | ||
1536 | } | ||
1537 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); | ||
1538 | |||
1539 | total_trim_size = 0; | ||
1540 | /* check the head */ | ||
1541 | if (range[0].start) | ||
1542 | total_trim_size += real_trim_memory(0, range[0].start); | ||
1543 | /* check the holes */ | ||
1544 | for (i = 0; i < nr_range - 1; i++) { | ||
1545 | if (range[i].end + 1 < range[i+1].start) | ||
1546 | total_trim_size += real_trim_memory(range[i].end + 1, | ||
1547 | range[i+1].start); | ||
1548 | } | ||
1549 | /* check the top */ | ||
1550 | i = nr_range - 1; | ||
1551 | if (range[i].end + 1 < end_pfn) | ||
1552 | total_trim_size += real_trim_memory(range[i].end + 1, | ||
1553 | end_pfn); | ||
1554 | |||
1555 | if (total_trim_size) { | ||
702 | printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" | 1556 | printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" |
703 | " all of memory, losing %luMB of RAM.\n", | 1557 | " all of memory, losing %lluMB of RAM.\n", |
704 | (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT)); | 1558 | total_trim_size >> 20); |
705 | 1559 | ||
706 | WARN_ON(1); | 1560 | if (!changed_by_mtrr_cleanup) |
1561 | WARN_ON(1); | ||
707 | 1562 | ||
708 | printk(KERN_INFO "update e820 for mtrr\n"); | 1563 | printk(KERN_INFO "update e820 for mtrr\n"); |
709 | trim_start = highest_pfn; | ||
710 | trim_start <<= PAGE_SHIFT; | ||
711 | trim_size = end_pfn; | ||
712 | trim_size <<= PAGE_SHIFT; | ||
713 | trim_size -= trim_start; | ||
714 | update_memory_range(trim_start, trim_size, E820_RAM, | ||
715 | E820_RESERVED); | ||
716 | update_e820(); | 1564 | update_e820(); |
1565 | |||
717 | return 1; | 1566 | return 1; |
718 | } | 1567 | } |
719 | 1568 | ||
@@ -729,18 +1578,21 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
729 | */ | 1578 | */ |
730 | void __init mtrr_bp_init(void) | 1579 | void __init mtrr_bp_init(void) |
731 | { | 1580 | { |
1581 | u32 phys_addr; | ||
732 | init_ifs(); | 1582 | init_ifs(); |
733 | 1583 | ||
1584 | phys_addr = 32; | ||
1585 | |||
734 | if (cpu_has_mtrr) { | 1586 | if (cpu_has_mtrr) { |
735 | mtrr_if = &generic_mtrr_ops; | 1587 | mtrr_if = &generic_mtrr_ops; |
736 | size_or_mask = 0xff000000; /* 36 bits */ | 1588 | size_or_mask = 0xff000000; /* 36 bits */ |
737 | size_and_mask = 0x00f00000; | 1589 | size_and_mask = 0x00f00000; |
1590 | phys_addr = 36; | ||
738 | 1591 | ||
739 | /* This is an AMD specific MSR, but we assume(hope?) that | 1592 | /* This is an AMD specific MSR, but we assume(hope?) that |
740 | Intel will implement it to when they extend the address | 1593 | Intel will implement it to when they extend the address |
741 | bus of the Xeon. */ | 1594 | bus of the Xeon. */ |
742 | if (cpuid_eax(0x80000000) >= 0x80000008) { | 1595 | if (cpuid_eax(0x80000000) >= 0x80000008) { |
743 | u32 phys_addr; | ||
744 | phys_addr = cpuid_eax(0x80000008) & 0xff; | 1596 | phys_addr = cpuid_eax(0x80000008) & 0xff; |
745 | /* CPUID workaround for Intel 0F33/0F34 CPU */ | 1597 | /* CPUID workaround for Intel 0F33/0F34 CPU */ |
746 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && | 1598 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && |
@@ -758,6 +1610,7 @@ void __init mtrr_bp_init(void) | |||
758 | don't support PAE */ | 1610 | don't support PAE */ |
759 | size_or_mask = 0xfff00000; /* 32 bits */ | 1611 | size_or_mask = 0xfff00000; /* 32 bits */ |
760 | size_and_mask = 0; | 1612 | size_and_mask = 0; |
1613 | phys_addr = 32; | ||
761 | } | 1614 | } |
762 | } else { | 1615 | } else { |
763 | switch (boot_cpu_data.x86_vendor) { | 1616 | switch (boot_cpu_data.x86_vendor) { |
@@ -791,8 +1644,15 @@ void __init mtrr_bp_init(void) | |||
791 | if (mtrr_if) { | 1644 | if (mtrr_if) { |
792 | set_num_var_ranges(); | 1645 | set_num_var_ranges(); |
793 | init_table(); | 1646 | init_table(); |
794 | if (use_intel()) | 1647 | if (use_intel()) { |
795 | get_mtrr_state(); | 1648 | get_mtrr_state(); |
1649 | |||
1650 | if (mtrr_cleanup(phys_addr)) { | ||
1651 | changed_by_mtrr_cleanup = 1; | ||
1652 | mtrr_if->set_all(); | ||
1653 | } | ||
1654 | |||
1655 | } | ||
796 | } | 1656 | } |
797 | } | 1657 | } |
798 | 1658 | ||
@@ -829,9 +1689,10 @@ static int __init mtrr_init_finialize(void) | |||
829 | { | 1689 | { |
830 | if (!mtrr_if) | 1690 | if (!mtrr_if) |
831 | return 0; | 1691 | return 0; |
832 | if (use_intel()) | 1692 | if (use_intel()) { |
833 | mtrr_state_warn(); | 1693 | if (!changed_by_mtrr_cleanup) |
834 | else { | 1694 | mtrr_state_warn(); |
1695 | } else { | ||
835 | /* The CPUs haven't MTRR and seem to not support SMP. They have | 1696 | /* The CPUs haven't MTRR and seem to not support SMP. They have |
836 | * specific drivers, we use a tricky method to support | 1697 | * specific drivers, we use a tricky method to support |
837 | * suspend/resume for them. | 1698 | * suspend/resume for them. |
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 2cc77eb6fea3..2dc4ec656b23 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h | |||
@@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_context *ctxt); | |||
81 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); | 81 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); |
82 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); | 82 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); |
83 | 83 | ||
84 | void fill_mtrr_var_range(unsigned int index, | ||
85 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); | ||
84 | void get_mtrr_state(void); | 86 | void get_mtrr_state(void); |
85 | 87 | ||
86 | extern void set_mtrr_ops(struct mtrr_ops * ops); | 88 | extern void set_mtrr_ops(struct mtrr_ops * ops); |
@@ -92,6 +94,7 @@ extern struct mtrr_ops * mtrr_if; | |||
92 | #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) | 94 | #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) |
93 | 95 | ||
94 | extern unsigned int num_var_ranges; | 96 | extern unsigned int num_var_ranges; |
97 | extern u64 mtrr_tom2; | ||
95 | 98 | ||
96 | void mtrr_state_warn(void); | 99 | void mtrr_state_warn(void); |
97 | const char *mtrr_attrib_to_str(int x); | 100 | const char *mtrr_attrib_to_str(int x); |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c new file mode 100644 index 000000000000..a706e9057ba5 --- /dev/null +++ b/arch/x86/kernel/e820.c | |||
@@ -0,0 +1,896 @@ | |||
1 | /* | ||
2 | * Handle the memory map. | ||
3 | * The functions here do the job until bootmem takes over. | ||
4 | * | ||
5 | * Getting sanitize_e820_map() in sync with i386 version by applying change: | ||
6 | * - Provisions for empty E820 memory regions (reported by certain BIOSes). | ||
7 | * Alex Achenbach <xela@slit.de>, December 2002. | ||
8 | * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> | ||
9 | * | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/types.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/bootmem.h> | ||
15 | #include <linux/ioport.h> | ||
16 | #include <linux/string.h> | ||
17 | #include <linux/kexec.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/mm.h> | ||
20 | #include <linux/pfn.h> | ||
21 | #include <linux/suspend.h> | ||
22 | |||
23 | #include <asm/pgtable.h> | ||
24 | #include <asm/page.h> | ||
25 | #include <asm/e820.h> | ||
26 | #include <asm/proto.h> | ||
27 | #include <asm/setup.h> | ||
28 | #include <asm/trampoline.h> | ||
29 | |||
30 | struct e820map e820; | ||
31 | |||
32 | /* For PCI or other memory-mapped resources */ | ||
33 | unsigned long pci_mem_start = 0xaeedbabe; | ||
34 | #ifdef CONFIG_PCI | ||
35 | EXPORT_SYMBOL(pci_mem_start); | ||
36 | #endif | ||
37 | |||
38 | /* | ||
39 | * This function checks if any part of the range <start,end> is mapped | ||
40 | * with type. | ||
41 | */ | ||
42 | int | ||
43 | e820_any_mapped(u64 start, u64 end, unsigned type) | ||
44 | { | ||
45 | int i; | ||
46 | |||
47 | for (i = 0; i < e820.nr_map; i++) { | ||
48 | struct e820entry *ei = &e820.map[i]; | ||
49 | |||
50 | if (type && ei->type != type) | ||
51 | continue; | ||
52 | if (ei->addr >= end || ei->addr + ei->size <= start) | ||
53 | continue; | ||
54 | return 1; | ||
55 | } | ||
56 | return 0; | ||
57 | } | ||
58 | EXPORT_SYMBOL_GPL(e820_any_mapped); | ||
59 | |||
60 | /* | ||
61 | * This function checks if the entire range <start,end> is mapped with type. | ||
62 | * | ||
63 | * Note: this function only works correct if the e820 table is sorted and | ||
64 | * not-overlapping, which is the case | ||
65 | */ | ||
66 | int __init e820_all_mapped(u64 start, u64 end, unsigned type) | ||
67 | { | ||
68 | int i; | ||
69 | |||
70 | for (i = 0; i < e820.nr_map; i++) { | ||
71 | struct e820entry *ei = &e820.map[i]; | ||
72 | |||
73 | if (type && ei->type != type) | ||
74 | continue; | ||
75 | /* is the region (part) in overlap with the current region ?*/ | ||
76 | if (ei->addr >= end || ei->addr + ei->size <= start) | ||
77 | continue; | ||
78 | |||
79 | /* if the region is at the beginning of <start,end> we move | ||
80 | * start to the end of the region since it's ok until there | ||
81 | */ | ||
82 | if (ei->addr <= start) | ||
83 | start = ei->addr + ei->size; | ||
84 | /* | ||
85 | * if start is now at or beyond end, we're done, full | ||
86 | * coverage | ||
87 | */ | ||
88 | if (start >= end) | ||
89 | return 1; | ||
90 | } | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * Add a memory region to the kernel e820 map. | ||
96 | */ | ||
97 | void __init add_memory_region(u64 start, u64 size, int type) | ||
98 | { | ||
99 | int x = e820.nr_map; | ||
100 | |||
101 | if (x == ARRAY_SIZE(e820.map)) { | ||
102 | printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); | ||
103 | return; | ||
104 | } | ||
105 | |||
106 | e820.map[x].addr = start; | ||
107 | e820.map[x].size = size; | ||
108 | e820.map[x].type = type; | ||
109 | e820.nr_map++; | ||
110 | } | ||
111 | |||
112 | void __init e820_print_map(char *who) | ||
113 | { | ||
114 | int i; | ||
115 | |||
116 | for (i = 0; i < e820.nr_map; i++) { | ||
117 | printk(KERN_INFO " %s: %016Lx - %016Lx ", who, | ||
118 | (unsigned long long) e820.map[i].addr, | ||
119 | (unsigned long long) | ||
120 | (e820.map[i].addr + e820.map[i].size)); | ||
121 | switch (e820.map[i].type) { | ||
122 | case E820_RAM: | ||
123 | printk(KERN_CONT "(usable)\n"); | ||
124 | break; | ||
125 | case E820_RESERVED: | ||
126 | printk(KERN_CONT "(reserved)\n"); | ||
127 | break; | ||
128 | case E820_ACPI: | ||
129 | printk(KERN_CONT "(ACPI data)\n"); | ||
130 | break; | ||
131 | case E820_NVS: | ||
132 | printk(KERN_CONT "(ACPI NVS)\n"); | ||
133 | break; | ||
134 | default: | ||
135 | printk(KERN_CONT "type %u\n", e820.map[i].type); | ||
136 | break; | ||
137 | } | ||
138 | } | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * Sanitize the BIOS e820 map. | ||
143 | * | ||
144 | * Some e820 responses include overlapping entries. The following | ||
145 | * replaces the original e820 map with a new one, removing overlaps, | ||
146 | * and resolving conflicting memory types in favor of highest | ||
147 | * numbered type. | ||
148 | * | ||
149 | * The input parameter biosmap points to an array of 'struct | ||
150 | * e820entry' which on entry has elements in the range [0, *pnr_map) | ||
151 | * valid, and which has space for up to max_nr_map entries. | ||
152 | * On return, the resulting sanitized e820 map entries will be in | ||
153 | * overwritten in the same location, starting at biosmap. | ||
154 | * | ||
155 | * The integer pointed to by pnr_map must be valid on entry (the | ||
156 | * current number of valid entries located at biosmap) and will | ||
157 | * be updated on return, with the new number of valid entries | ||
158 | * (something no more than max_nr_map.) | ||
159 | * | ||
160 | * The return value from sanitize_e820_map() is zero if it | ||
161 | * successfully 'sanitized' the map entries passed in, and is -1 | ||
162 | * if it did nothing, which can happen if either of (1) it was | ||
163 | * only passed one map entry, or (2) any of the input map entries | ||
164 | * were invalid (start + size < start, meaning that the size was | ||
165 | * so big the described memory range wrapped around through zero.) | ||
166 | * | ||
167 | * Visually we're performing the following | ||
168 | * (1,2,3,4 = memory types)... | ||
169 | * | ||
170 | * Sample memory map (w/overlaps): | ||
171 | * ____22__________________ | ||
172 | * ______________________4_ | ||
173 | * ____1111________________ | ||
174 | * _44_____________________ | ||
175 | * 11111111________________ | ||
176 | * ____________________33__ | ||
177 | * ___________44___________ | ||
178 | * __________33333_________ | ||
179 | * ______________22________ | ||
180 | * ___________________2222_ | ||
181 | * _________111111111______ | ||
182 | * _____________________11_ | ||
183 | * _________________4______ | ||
184 | * | ||
185 | * Sanitized equivalent (no overlap): | ||
186 | * 1_______________________ | ||
187 | * _44_____________________ | ||
188 | * ___1____________________ | ||
189 | * ____22__________________ | ||
190 | * ______11________________ | ||
191 | * _________1______________ | ||
192 | * __________3_____________ | ||
193 | * ___________44___________ | ||
194 | * _____________33_________ | ||
195 | * _______________2________ | ||
196 | * ________________1_______ | ||
197 | * _________________4______ | ||
198 | * ___________________2____ | ||
199 | * ____________________33__ | ||
200 | * ______________________4_ | ||
201 | */ | ||
202 | |||
203 | int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, | ||
204 | int *pnr_map) | ||
205 | { | ||
206 | struct change_member { | ||
207 | struct e820entry *pbios; /* pointer to original bios entry */ | ||
208 | unsigned long long addr; /* address for this change point */ | ||
209 | }; | ||
210 | static struct change_member change_point_list[2*E820_X_MAX] __initdata; | ||
211 | static struct change_member *change_point[2*E820_X_MAX] __initdata; | ||
212 | static struct e820entry *overlap_list[E820_X_MAX] __initdata; | ||
213 | static struct e820entry new_bios[E820_X_MAX] __initdata; | ||
214 | struct change_member *change_tmp; | ||
215 | unsigned long current_type, last_type; | ||
216 | unsigned long long last_addr; | ||
217 | int chgidx, still_changing; | ||
218 | int overlap_entries; | ||
219 | int new_bios_entry; | ||
220 | int old_nr, new_nr, chg_nr; | ||
221 | int i; | ||
222 | |||
223 | /* if there's only one memory region, don't bother */ | ||
224 | if (*pnr_map < 2) | ||
225 | return -1; | ||
226 | |||
227 | old_nr = *pnr_map; | ||
228 | BUG_ON(old_nr > max_nr_map); | ||
229 | |||
230 | /* bail out if we find any unreasonable addresses in bios map */ | ||
231 | for (i = 0; i < old_nr; i++) | ||
232 | if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) | ||
233 | return -1; | ||
234 | |||
235 | /* create pointers for initial change-point information (for sorting) */ | ||
236 | for (i = 0; i < 2 * old_nr; i++) | ||
237 | change_point[i] = &change_point_list[i]; | ||
238 | |||
239 | /* record all known change-points (starting and ending addresses), | ||
240 | omitting those that are for empty memory regions */ | ||
241 | chgidx = 0; | ||
242 | for (i = 0; i < old_nr; i++) { | ||
243 | if (biosmap[i].size != 0) { | ||
244 | change_point[chgidx]->addr = biosmap[i].addr; | ||
245 | change_point[chgidx++]->pbios = &biosmap[i]; | ||
246 | change_point[chgidx]->addr = biosmap[i].addr + | ||
247 | biosmap[i].size; | ||
248 | change_point[chgidx++]->pbios = &biosmap[i]; | ||
249 | } | ||
250 | } | ||
251 | chg_nr = chgidx; | ||
252 | |||
253 | /* sort change-point list by memory addresses (low -> high) */ | ||
254 | still_changing = 1; | ||
255 | while (still_changing) { | ||
256 | still_changing = 0; | ||
257 | for (i = 1; i < chg_nr; i++) { | ||
258 | unsigned long long curaddr, lastaddr; | ||
259 | unsigned long long curpbaddr, lastpbaddr; | ||
260 | |||
261 | curaddr = change_point[i]->addr; | ||
262 | lastaddr = change_point[i - 1]->addr; | ||
263 | curpbaddr = change_point[i]->pbios->addr; | ||
264 | lastpbaddr = change_point[i - 1]->pbios->addr; | ||
265 | |||
266 | /* | ||
267 | * swap entries, when: | ||
268 | * | ||
269 | * curaddr > lastaddr or | ||
270 | * curaddr == lastaddr and curaddr == curpbaddr and | ||
271 | * lastaddr != lastpbaddr | ||
272 | */ | ||
273 | if (curaddr < lastaddr || | ||
274 | (curaddr == lastaddr && curaddr == curpbaddr && | ||
275 | lastaddr != lastpbaddr)) { | ||
276 | change_tmp = change_point[i]; | ||
277 | change_point[i] = change_point[i-1]; | ||
278 | change_point[i-1] = change_tmp; | ||
279 | still_changing = 1; | ||
280 | } | ||
281 | } | ||
282 | } | ||
283 | |||
284 | /* create a new bios memory map, removing overlaps */ | ||
285 | overlap_entries = 0; /* number of entries in the overlap table */ | ||
286 | new_bios_entry = 0; /* index for creating new bios map entries */ | ||
287 | last_type = 0; /* start with undefined memory type */ | ||
288 | last_addr = 0; /* start with 0 as last starting address */ | ||
289 | |||
290 | /* loop through change-points, determining affect on the new bios map */ | ||
291 | for (chgidx = 0; chgidx < chg_nr; chgidx++) { | ||
292 | /* keep track of all overlapping bios entries */ | ||
293 | if (change_point[chgidx]->addr == | ||
294 | change_point[chgidx]->pbios->addr) { | ||
295 | /* | ||
296 | * add map entry to overlap list (> 1 entry | ||
297 | * implies an overlap) | ||
298 | */ | ||
299 | overlap_list[overlap_entries++] = | ||
300 | change_point[chgidx]->pbios; | ||
301 | } else { | ||
302 | /* | ||
303 | * remove entry from list (order independent, | ||
304 | * so swap with last) | ||
305 | */ | ||
306 | for (i = 0; i < overlap_entries; i++) { | ||
307 | if (overlap_list[i] == | ||
308 | change_point[chgidx]->pbios) | ||
309 | overlap_list[i] = | ||
310 | overlap_list[overlap_entries-1]; | ||
311 | } | ||
312 | overlap_entries--; | ||
313 | } | ||
314 | /* | ||
315 | * if there are overlapping entries, decide which | ||
316 | * "type" to use (larger value takes precedence -- | ||
317 | * 1=usable, 2,3,4,4+=unusable) | ||
318 | */ | ||
319 | current_type = 0; | ||
320 | for (i = 0; i < overlap_entries; i++) | ||
321 | if (overlap_list[i]->type > current_type) | ||
322 | current_type = overlap_list[i]->type; | ||
323 | /* | ||
324 | * continue building up new bios map based on this | ||
325 | * information | ||
326 | */ | ||
327 | if (current_type != last_type) { | ||
328 | if (last_type != 0) { | ||
329 | new_bios[new_bios_entry].size = | ||
330 | change_point[chgidx]->addr - last_addr; | ||
331 | /* | ||
332 | * move forward only if the new size | ||
333 | * was non-zero | ||
334 | */ | ||
335 | if (new_bios[new_bios_entry].size != 0) | ||
336 | /* | ||
337 | * no more space left for new | ||
338 | * bios entries ? | ||
339 | */ | ||
340 | if (++new_bios_entry >= max_nr_map) | ||
341 | break; | ||
342 | } | ||
343 | if (current_type != 0) { | ||
344 | new_bios[new_bios_entry].addr = | ||
345 | change_point[chgidx]->addr; | ||
346 | new_bios[new_bios_entry].type = current_type; | ||
347 | last_addr = change_point[chgidx]->addr; | ||
348 | } | ||
349 | last_type = current_type; | ||
350 | } | ||
351 | } | ||
352 | /* retain count for new bios entries */ | ||
353 | new_nr = new_bios_entry; | ||
354 | |||
355 | /* copy new bios mapping into original location */ | ||
356 | memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); | ||
357 | *pnr_map = new_nr; | ||
358 | |||
359 | return 0; | ||
360 | } | ||
361 | |||
362 | /* | ||
363 | * Copy the BIOS e820 map into a safe place. | ||
364 | * | ||
365 | * Sanity-check it while we're at it.. | ||
366 | * | ||
367 | * If we're lucky and live on a modern system, the setup code | ||
368 | * will have given us a memory map that we can use to properly | ||
369 | * set up memory. If we aren't, we'll fake a memory map. | ||
370 | */ | ||
371 | int __init copy_e820_map(struct e820entry *biosmap, int nr_map) | ||
372 | { | ||
373 | /* Only one memory region (or negative)? Ignore it */ | ||
374 | if (nr_map < 2) | ||
375 | return -1; | ||
376 | |||
377 | do { | ||
378 | u64 start = biosmap->addr; | ||
379 | u64 size = biosmap->size; | ||
380 | u64 end = start + size; | ||
381 | u32 type = biosmap->type; | ||
382 | |||
383 | /* Overflow in 64 bits? Ignore the memory map. */ | ||
384 | if (start > end) | ||
385 | return -1; | ||
386 | |||
387 | add_memory_region(start, size, type); | ||
388 | } while (biosmap++, --nr_map); | ||
389 | return 0; | ||
390 | } | ||
391 | |||
392 | u64 __init update_memory_range(u64 start, u64 size, unsigned old_type, | ||
393 | unsigned new_type) | ||
394 | { | ||
395 | int i; | ||
396 | u64 real_updated_size = 0; | ||
397 | |||
398 | BUG_ON(old_type == new_type); | ||
399 | |||
400 | for (i = 0; i < e820.nr_map; i++) { | ||
401 | struct e820entry *ei = &e820.map[i]; | ||
402 | u64 final_start, final_end; | ||
403 | if (ei->type != old_type) | ||
404 | continue; | ||
405 | /* totally covered? */ | ||
406 | if (ei->addr >= start && | ||
407 | (ei->addr + ei->size) <= (start + size)) { | ||
408 | ei->type = new_type; | ||
409 | real_updated_size += ei->size; | ||
410 | continue; | ||
411 | } | ||
412 | /* partially covered */ | ||
413 | final_start = max(start, ei->addr); | ||
414 | final_end = min(start + size, ei->addr + ei->size); | ||
415 | if (final_start >= final_end) | ||
416 | continue; | ||
417 | add_memory_region(final_start, final_end - final_start, | ||
418 | new_type); | ||
419 | real_updated_size += final_end - final_start; | ||
420 | } | ||
421 | return real_updated_size; | ||
422 | } | ||
423 | |||
424 | void __init update_e820(void) | ||
425 | { | ||
426 | int nr_map; | ||
427 | |||
428 | nr_map = e820.nr_map; | ||
429 | if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) | ||
430 | return; | ||
431 | e820.nr_map = nr_map; | ||
432 | printk(KERN_INFO "modified physical RAM map:\n"); | ||
433 | e820_print_map("modified"); | ||
434 | } | ||
435 | |||
436 | /* | ||
437 | * Search for the biggest gap in the low 32 bits of the e820 | ||
438 | * memory space. We pass this space to PCI to assign MMIO resources | ||
439 | * for hotplug or unconfigured devices in. | ||
440 | * Hopefully the BIOS let enough space left. | ||
441 | */ | ||
442 | __init void e820_setup_gap(void) | ||
443 | { | ||
444 | unsigned long gapstart, gapsize, round; | ||
445 | unsigned long long last; | ||
446 | int i; | ||
447 | int found = 0; | ||
448 | |||
449 | last = 0x100000000ull; | ||
450 | gapstart = 0x10000000; | ||
451 | gapsize = 0x400000; | ||
452 | i = e820.nr_map; | ||
453 | while (--i >= 0) { | ||
454 | unsigned long long start = e820.map[i].addr; | ||
455 | unsigned long long end = start + e820.map[i].size; | ||
456 | |||
457 | /* | ||
458 | * Since "last" is at most 4GB, we know we'll | ||
459 | * fit in 32 bits if this condition is true | ||
460 | */ | ||
461 | if (last > end) { | ||
462 | unsigned long gap = last - end; | ||
463 | |||
464 | if (gap > gapsize) { | ||
465 | gapsize = gap; | ||
466 | gapstart = end; | ||
467 | found = 1; | ||
468 | } | ||
469 | } | ||
470 | if (start < last) | ||
471 | last = start; | ||
472 | } | ||
473 | |||
474 | #ifdef CONFIG_X86_64 | ||
475 | if (!found) { | ||
476 | gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; | ||
477 | printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " | ||
478 | "address range\n" | ||
479 | KERN_ERR "PCI: Unassigned devices with 32bit resource " | ||
480 | "registers may break!\n"); | ||
481 | } | ||
482 | #endif | ||
483 | |||
484 | /* | ||
485 | * See how much we want to round up: start off with | ||
486 | * rounding to the next 1MB area. | ||
487 | */ | ||
488 | round = 0x100000; | ||
489 | while ((gapsize >> 4) > round) | ||
490 | round += round; | ||
491 | /* Fun with two's complement */ | ||
492 | pci_mem_start = (gapstart + round) & -round; | ||
493 | |||
494 | printk(KERN_INFO | ||
495 | "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", | ||
496 | pci_mem_start, gapstart, gapsize); | ||
497 | } | ||
498 | |||
499 | #if defined(CONFIG_X86_64) || \ | ||
500 | (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) | ||
501 | /** | ||
502 | * Find the ranges of physical addresses that do not correspond to | ||
503 | * e820 RAM areas and mark the corresponding pages as nosave for | ||
504 | * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). | ||
505 | * | ||
506 | * This function requires the e820 map to be sorted and without any | ||
507 | * overlapping entries and assumes the first e820 area to be RAM. | ||
508 | */ | ||
509 | void __init e820_mark_nosave_regions(unsigned long limit_pfn) | ||
510 | { | ||
511 | int i; | ||
512 | unsigned long pfn; | ||
513 | |||
514 | pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); | ||
515 | for (i = 1; i < e820.nr_map; i++) { | ||
516 | struct e820entry *ei = &e820.map[i]; | ||
517 | |||
518 | if (pfn < PFN_UP(ei->addr)) | ||
519 | register_nosave_region(pfn, PFN_UP(ei->addr)); | ||
520 | |||
521 | pfn = PFN_DOWN(ei->addr + ei->size); | ||
522 | if (ei->type != E820_RAM) | ||
523 | register_nosave_region(PFN_UP(ei->addr), pfn); | ||
524 | |||
525 | if (pfn >= limit_pfn) | ||
526 | break; | ||
527 | } | ||
528 | } | ||
529 | #endif | ||
530 | |||
531 | /* | ||
532 | * Early reserved memory areas. | ||
533 | */ | ||
534 | #define MAX_EARLY_RES 20 | ||
535 | |||
536 | struct early_res { | ||
537 | u64 start, end; | ||
538 | char name[16]; | ||
539 | }; | ||
540 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { | ||
541 | { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ | ||
542 | #if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE) | ||
543 | { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, | ||
544 | #endif | ||
545 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) | ||
546 | /* | ||
547 | * But first pinch a few for the stack/trampoline stuff | ||
548 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
549 | * trampoline before removing it. (see the GDT stuff) | ||
550 | */ | ||
551 | { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" }, | ||
552 | /* | ||
553 | * Has to be in very low memory so we can execute | ||
554 | * real-mode AP code. | ||
555 | */ | ||
556 | { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" }, | ||
557 | #endif | ||
558 | {} | ||
559 | }; | ||
560 | |||
561 | static int __init find_overlapped_early(u64 start, u64 end) | ||
562 | { | ||
563 | int i; | ||
564 | struct early_res *r; | ||
565 | |||
566 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
567 | r = &early_res[i]; | ||
568 | if (end > r->start && start < r->end) | ||
569 | break; | ||
570 | } | ||
571 | |||
572 | return i; | ||
573 | } | ||
574 | |||
575 | void __init reserve_early(u64 start, u64 end, char *name) | ||
576 | { | ||
577 | int i; | ||
578 | struct early_res *r; | ||
579 | |||
580 | i = find_overlapped_early(start, end); | ||
581 | if (i >= MAX_EARLY_RES) | ||
582 | panic("Too many early reservations"); | ||
583 | r = &early_res[i]; | ||
584 | if (r->end) | ||
585 | panic("Overlapping early reservations " | ||
586 | "%llx-%llx %s to %llx-%llx %s\n", | ||
587 | start, end - 1, name?name:"", r->start, | ||
588 | r->end - 1, r->name); | ||
589 | r->start = start; | ||
590 | r->end = end; | ||
591 | if (name) | ||
592 | strncpy(r->name, name, sizeof(r->name) - 1); | ||
593 | } | ||
594 | |||
595 | void __init free_early(u64 start, u64 end) | ||
596 | { | ||
597 | struct early_res *r; | ||
598 | int i, j; | ||
599 | |||
600 | i = find_overlapped_early(start, end); | ||
601 | r = &early_res[i]; | ||
602 | if (i >= MAX_EARLY_RES || r->end != end || r->start != start) | ||
603 | panic("free_early on not reserved area: %llx-%llx!", | ||
604 | start, end - 1); | ||
605 | |||
606 | for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) | ||
607 | ; | ||
608 | |||
609 | memmove(&early_res[i], &early_res[i + 1], | ||
610 | (j - 1 - i) * sizeof(struct early_res)); | ||
611 | |||
612 | early_res[j - 1].end = 0; | ||
613 | } | ||
614 | |||
615 | int __init page_is_reserved_early(unsigned long pagenr) | ||
616 | { | ||
617 | u64 start = (u64)pagenr << PAGE_SHIFT; | ||
618 | int i; | ||
619 | struct early_res *r; | ||
620 | |||
621 | i = find_overlapped_early(start, start + PAGE_SIZE); | ||
622 | r = &early_res[i]; | ||
623 | return (i < MAX_EARLY_RES && r->end); | ||
624 | } | ||
625 | |||
626 | void __init early_res_to_bootmem(u64 start, u64 end) | ||
627 | { | ||
628 | int i; | ||
629 | u64 final_start, final_end; | ||
630 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
631 | struct early_res *r = &early_res[i]; | ||
632 | final_start = max(start, r->start); | ||
633 | final_end = min(end, r->end); | ||
634 | if (final_start >= final_end) | ||
635 | continue; | ||
636 | printk(KERN_INFO " early res: %d [%llx-%llx] %s\n", i, | ||
637 | final_start, final_end - 1, r->name); | ||
638 | #ifdef CONFIG_X86_64 | ||
639 | reserve_bootmem_generic(final_start, final_end - final_start); | ||
640 | #else | ||
641 | reserve_bootmem(final_start, final_end - final_start, | ||
642 | BOOTMEM_DEFAULT); | ||
643 | #endif | ||
644 | } | ||
645 | } | ||
646 | |||
647 | /* Check for already reserved areas */ | ||
648 | static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) | ||
649 | { | ||
650 | int i; | ||
651 | u64 addr = *addrp; | ||
652 | int changed = 0; | ||
653 | struct early_res *r; | ||
654 | again: | ||
655 | i = find_overlapped_early(addr, addr + size); | ||
656 | r = &early_res[i]; | ||
657 | if (i < MAX_EARLY_RES && r->end) { | ||
658 | *addrp = addr = round_up(r->end, align); | ||
659 | changed = 1; | ||
660 | goto again; | ||
661 | } | ||
662 | return changed; | ||
663 | } | ||
664 | |||
665 | /* Check for already reserved areas */ | ||
666 | static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) | ||
667 | { | ||
668 | int i; | ||
669 | u64 addr = *addrp, last; | ||
670 | u64 size = *sizep; | ||
671 | int changed = 0; | ||
672 | again: | ||
673 | last = addr + size; | ||
674 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
675 | struct early_res *r = &early_res[i]; | ||
676 | if (last > r->start && addr < r->start) { | ||
677 | size = r->start - addr; | ||
678 | changed = 1; | ||
679 | goto again; | ||
680 | } | ||
681 | if (last > r->end && addr < r->end) { | ||
682 | addr = round_up(r->end, align); | ||
683 | size = last - addr; | ||
684 | changed = 1; | ||
685 | goto again; | ||
686 | } | ||
687 | if (last <= r->end && addr >= r->start) { | ||
688 | (*sizep)++; | ||
689 | return 0; | ||
690 | } | ||
691 | } | ||
692 | if (changed) { | ||
693 | *addrp = addr; | ||
694 | *sizep = size; | ||
695 | } | ||
696 | return changed; | ||
697 | } | ||
698 | |||
699 | /* | ||
700 | * Find a free area with specified alignment in a specific range. | ||
701 | */ | ||
702 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) | ||
703 | { | ||
704 | int i; | ||
705 | |||
706 | for (i = 0; i < e820.nr_map; i++) { | ||
707 | struct e820entry *ei = &e820.map[i]; | ||
708 | u64 addr, last; | ||
709 | u64 ei_last; | ||
710 | |||
711 | if (ei->type != E820_RAM) | ||
712 | continue; | ||
713 | addr = round_up(ei->addr, align); | ||
714 | ei_last = ei->addr + ei->size; | ||
715 | if (addr < start) | ||
716 | addr = round_up(start, align); | ||
717 | if (addr >= ei_last) | ||
718 | continue; | ||
719 | while (bad_addr(&addr, size, align) && addr+size <= ei_last) | ||
720 | ; | ||
721 | last = addr + size; | ||
722 | if (last > ei_last) | ||
723 | continue; | ||
724 | if (last > end) | ||
725 | continue; | ||
726 | return addr; | ||
727 | } | ||
728 | return -1ULL; | ||
729 | } | ||
730 | |||
731 | /* | ||
732 | * Find next free range after *start | ||
733 | */ | ||
734 | u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) | ||
735 | { | ||
736 | int i; | ||
737 | |||
738 | for (i = 0; i < e820.nr_map; i++) { | ||
739 | struct e820entry *ei = &e820.map[i]; | ||
740 | u64 addr, last; | ||
741 | u64 ei_last; | ||
742 | |||
743 | if (ei->type != E820_RAM) | ||
744 | continue; | ||
745 | addr = round_up(ei->addr, align); | ||
746 | ei_last = ei->addr + ei->size; | ||
747 | if (addr < start) | ||
748 | addr = round_up(start, align); | ||
749 | if (addr >= ei_last) | ||
750 | continue; | ||
751 | *sizep = ei_last - addr; | ||
752 | while (bad_addr_size(&addr, sizep, align) && | ||
753 | addr + *sizep <= ei_last) | ||
754 | ; | ||
755 | last = addr + *sizep; | ||
756 | if (last > ei_last) | ||
757 | continue; | ||
758 | return addr; | ||
759 | } | ||
760 | return -1UL; | ||
761 | |||
762 | } | ||
763 | |||
764 | /* | ||
765 | * pre allocated 4k and reserved it in e820 | ||
766 | */ | ||
767 | u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) | ||
768 | { | ||
769 | u64 size = 0; | ||
770 | u64 addr; | ||
771 | u64 start; | ||
772 | |||
773 | start = startt; | ||
774 | while (size < sizet) | ||
775 | start = find_e820_area_size(start, &size, align); | ||
776 | |||
777 | if (size < sizet) | ||
778 | return 0; | ||
779 | |||
780 | addr = round_down(start + size - sizet, align); | ||
781 | update_memory_range(addr, sizet, E820_RAM, E820_RESERVED); | ||
782 | printk(KERN_INFO "update e820 for early_reserve_e820\n"); | ||
783 | update_e820(); | ||
784 | |||
785 | return addr; | ||
786 | } | ||
787 | |||
788 | #ifdef CONFIG_X86_32 | ||
789 | # ifdef CONFIG_X86_PAE | ||
790 | # define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT)) | ||
791 | # else | ||
792 | # define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT)) | ||
793 | # endif | ||
794 | #else /* CONFIG_X86_32 */ | ||
795 | # define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT | ||
796 | #endif | ||
797 | |||
798 | /* | ||
799 | * Last pfn which the user wants to use. | ||
800 | */ | ||
801 | unsigned long __initdata end_user_pfn = MAX_ARCH_PFN; | ||
802 | |||
803 | /* | ||
804 | * Find the highest page frame number we have available | ||
805 | */ | ||
806 | unsigned long __init e820_end_of_ram(void) | ||
807 | { | ||
808 | unsigned long last_pfn; | ||
809 | unsigned long max_arch_pfn = MAX_ARCH_PFN; | ||
810 | |||
811 | last_pfn = find_max_pfn_with_active_regions(); | ||
812 | |||
813 | if (last_pfn > max_arch_pfn) | ||
814 | last_pfn = max_arch_pfn; | ||
815 | if (last_pfn > end_user_pfn) | ||
816 | last_pfn = end_user_pfn; | ||
817 | |||
818 | printk(KERN_INFO "last_pfn = %lu max_arch_pfn = %lu\n", | ||
819 | last_pfn, max_arch_pfn); | ||
820 | return last_pfn; | ||
821 | } | ||
822 | |||
823 | /* | ||
824 | * Finds an active region in the address range from start_pfn to last_pfn and | ||
825 | * returns its range in ei_startpfn and ei_endpfn for the e820 entry. | ||
826 | */ | ||
827 | int __init e820_find_active_region(const struct e820entry *ei, | ||
828 | unsigned long start_pfn, | ||
829 | unsigned long last_pfn, | ||
830 | unsigned long *ei_startpfn, | ||
831 | unsigned long *ei_endpfn) | ||
832 | { | ||
833 | u64 align = PAGE_SIZE; | ||
834 | |||
835 | *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT; | ||
836 | *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT; | ||
837 | |||
838 | /* Skip map entries smaller than a page */ | ||
839 | if (*ei_startpfn >= *ei_endpfn) | ||
840 | return 0; | ||
841 | |||
842 | /* Skip if map is outside the node */ | ||
843 | if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || | ||
844 | *ei_startpfn >= last_pfn) | ||
845 | return 0; | ||
846 | |||
847 | /* Check for overlaps */ | ||
848 | if (*ei_startpfn < start_pfn) | ||
849 | *ei_startpfn = start_pfn; | ||
850 | if (*ei_endpfn > last_pfn) | ||
851 | *ei_endpfn = last_pfn; | ||
852 | |||
853 | /* Obey end_user_pfn to save on memmap */ | ||
854 | if (*ei_startpfn >= end_user_pfn) | ||
855 | return 0; | ||
856 | if (*ei_endpfn > end_user_pfn) | ||
857 | *ei_endpfn = end_user_pfn; | ||
858 | |||
859 | return 1; | ||
860 | } | ||
861 | |||
862 | /* Walk the e820 map and register active regions within a node */ | ||
863 | void __init e820_register_active_regions(int nid, unsigned long start_pfn, | ||
864 | unsigned long last_pfn) | ||
865 | { | ||
866 | unsigned long ei_startpfn; | ||
867 | unsigned long ei_endpfn; | ||
868 | int i; | ||
869 | |||
870 | for (i = 0; i < e820.nr_map; i++) | ||
871 | if (e820_find_active_region(&e820.map[i], | ||
872 | start_pfn, last_pfn, | ||
873 | &ei_startpfn, &ei_endpfn)) | ||
874 | add_active_range(nid, ei_startpfn, ei_endpfn); | ||
875 | } | ||
876 | |||
877 | /* | ||
878 | * Find the hole size (in bytes) in the memory range. | ||
879 | * @start: starting address of the memory range to scan | ||
880 | * @end: ending address of the memory range to scan | ||
881 | */ | ||
882 | u64 __init e820_hole_size(u64 start, u64 end) | ||
883 | { | ||
884 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
885 | unsigned long last_pfn = end >> PAGE_SHIFT; | ||
886 | unsigned long ei_startpfn, ei_endpfn, ram = 0; | ||
887 | int i; | ||
888 | |||
889 | for (i = 0; i < e820.nr_map; i++) { | ||
890 | if (e820_find_active_region(&e820.map[i], | ||
891 | start_pfn, last_pfn, | ||
892 | &ei_startpfn, &ei_endpfn)) | ||
893 | ram += ei_endpfn - ei_startpfn; | ||
894 | } | ||
895 | return end - start - ((u64)ram << PAGE_SHIFT); | ||
896 | } | ||
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index ed733e7cf4e6..e8a3b968c9fa 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c | |||
@@ -9,29 +9,12 @@ | |||
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/pfn.h> | 10 | #include <linux/pfn.h> |
11 | #include <linux/uaccess.h> | 11 | #include <linux/uaccess.h> |
12 | #include <linux/suspend.h> | ||
13 | 12 | ||
14 | #include <asm/pgtable.h> | 13 | #include <asm/pgtable.h> |
15 | #include <asm/page.h> | 14 | #include <asm/page.h> |
16 | #include <asm/e820.h> | 15 | #include <asm/e820.h> |
17 | #include <asm/setup.h> | 16 | #include <asm/setup.h> |
18 | 17 | ||
19 | struct e820map e820; | ||
20 | struct change_member { | ||
21 | struct e820entry *pbios; /* pointer to original bios entry */ | ||
22 | unsigned long long addr; /* address for this change point */ | ||
23 | }; | ||
24 | static struct change_member change_point_list[2*E820MAX] __initdata; | ||
25 | static struct change_member *change_point[2*E820MAX] __initdata; | ||
26 | static struct e820entry *overlap_list[E820MAX] __initdata; | ||
27 | static struct e820entry new_bios[E820MAX] __initdata; | ||
28 | /* For PCI or other memory-mapped resources */ | ||
29 | unsigned long pci_mem_start = 0x10000000; | ||
30 | #ifdef CONFIG_PCI | ||
31 | EXPORT_SYMBOL(pci_mem_start); | ||
32 | #endif | ||
33 | extern int user_defined_memmap; | ||
34 | |||
35 | static struct resource system_rom_resource = { | 18 | static struct resource system_rom_resource = { |
36 | .name = "System ROM", | 19 | .name = "System ROM", |
37 | .start = 0xf0000, | 20 | .start = 0xf0000, |
@@ -224,398 +207,12 @@ void __init init_iomem_resources(struct resource *code_resource, | |||
224 | } | 207 | } |
225 | } | 208 | } |
226 | 209 | ||
227 | #if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) | ||
228 | /** | ||
229 | * e820_mark_nosave_regions - Find the ranges of physical addresses that do not | ||
230 | * correspond to e820 RAM areas and mark the corresponding pages as nosave for | ||
231 | * hibernation. | ||
232 | * | ||
233 | * This function requires the e820 map to be sorted and without any | ||
234 | * overlapping entries and assumes the first e820 area to be RAM. | ||
235 | */ | ||
236 | void __init e820_mark_nosave_regions(void) | ||
237 | { | ||
238 | int i; | ||
239 | unsigned long pfn; | ||
240 | |||
241 | pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); | ||
242 | for (i = 1; i < e820.nr_map; i++) { | ||
243 | struct e820entry *ei = &e820.map[i]; | ||
244 | |||
245 | if (pfn < PFN_UP(ei->addr)) | ||
246 | register_nosave_region(pfn, PFN_UP(ei->addr)); | ||
247 | |||
248 | pfn = PFN_DOWN(ei->addr + ei->size); | ||
249 | if (ei->type != E820_RAM) | ||
250 | register_nosave_region(PFN_UP(ei->addr), pfn); | ||
251 | |||
252 | if (pfn >= max_low_pfn) | ||
253 | break; | ||
254 | } | ||
255 | } | ||
256 | #endif | ||
257 | |||
258 | void __init add_memory_region(unsigned long long start, | ||
259 | unsigned long long size, int type) | ||
260 | { | ||
261 | int x; | ||
262 | |||
263 | x = e820.nr_map; | ||
264 | |||
265 | if (x == E820MAX) { | ||
266 | printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); | ||
267 | return; | ||
268 | } | ||
269 | |||
270 | e820.map[x].addr = start; | ||
271 | e820.map[x].size = size; | ||
272 | e820.map[x].type = type; | ||
273 | e820.nr_map++; | ||
274 | } /* add_memory_region */ | ||
275 | |||
276 | /* | ||
277 | * Sanitize the BIOS e820 map. | ||
278 | * | ||
279 | * Some e820 responses include overlapping entries. The following | ||
280 | * replaces the original e820 map with a new one, removing overlaps. | ||
281 | * | ||
282 | */ | ||
283 | int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | ||
284 | { | ||
285 | struct change_member *change_tmp; | ||
286 | unsigned long current_type, last_type; | ||
287 | unsigned long long last_addr; | ||
288 | int chgidx, still_changing; | ||
289 | int overlap_entries; | ||
290 | int new_bios_entry; | ||
291 | int old_nr, new_nr, chg_nr; | ||
292 | int i; | ||
293 | |||
294 | /* | ||
295 | Visually we're performing the following (1,2,3,4 = memory types)... | ||
296 | |||
297 | Sample memory map (w/overlaps): | ||
298 | ____22__________________ | ||
299 | ______________________4_ | ||
300 | ____1111________________ | ||
301 | _44_____________________ | ||
302 | 11111111________________ | ||
303 | ____________________33__ | ||
304 | ___________44___________ | ||
305 | __________33333_________ | ||
306 | ______________22________ | ||
307 | ___________________2222_ | ||
308 | _________111111111______ | ||
309 | _____________________11_ | ||
310 | _________________4______ | ||
311 | |||
312 | Sanitized equivalent (no overlap): | ||
313 | 1_______________________ | ||
314 | _44_____________________ | ||
315 | ___1____________________ | ||
316 | ____22__________________ | ||
317 | ______11________________ | ||
318 | _________1______________ | ||
319 | __________3_____________ | ||
320 | ___________44___________ | ||
321 | _____________33_________ | ||
322 | _______________2________ | ||
323 | ________________1_______ | ||
324 | _________________4______ | ||
325 | ___________________2____ | ||
326 | ____________________33__ | ||
327 | ______________________4_ | ||
328 | */ | ||
329 | /* if there's only one memory region, don't bother */ | ||
330 | if (*pnr_map < 2) { | ||
331 | return -1; | ||
332 | } | ||
333 | |||
334 | old_nr = *pnr_map; | ||
335 | |||
336 | /* bail out if we find any unreasonable addresses in bios map */ | ||
337 | for (i=0; i<old_nr; i++) | ||
338 | if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) { | ||
339 | return -1; | ||
340 | } | ||
341 | |||
342 | /* create pointers for initial change-point information (for sorting) */ | ||
343 | for (i=0; i < 2*old_nr; i++) | ||
344 | change_point[i] = &change_point_list[i]; | ||
345 | |||
346 | /* record all known change-points (starting and ending addresses), | ||
347 | omitting those that are for empty memory regions */ | ||
348 | chgidx = 0; | ||
349 | for (i=0; i < old_nr; i++) { | ||
350 | if (biosmap[i].size != 0) { | ||
351 | change_point[chgidx]->addr = biosmap[i].addr; | ||
352 | change_point[chgidx++]->pbios = &biosmap[i]; | ||
353 | change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; | ||
354 | change_point[chgidx++]->pbios = &biosmap[i]; | ||
355 | } | ||
356 | } | ||
357 | chg_nr = chgidx; /* true number of change-points */ | ||
358 | |||
359 | /* sort change-point list by memory addresses (low -> high) */ | ||
360 | still_changing = 1; | ||
361 | while (still_changing) { | ||
362 | still_changing = 0; | ||
363 | for (i=1; i < chg_nr; i++) { | ||
364 | /* if <current_addr> > <last_addr>, swap */ | ||
365 | /* or, if current=<start_addr> & last=<end_addr>, swap */ | ||
366 | if ((change_point[i]->addr < change_point[i-1]->addr) || | ||
367 | ((change_point[i]->addr == change_point[i-1]->addr) && | ||
368 | (change_point[i]->addr == change_point[i]->pbios->addr) && | ||
369 | (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) | ||
370 | ) | ||
371 | { | ||
372 | change_tmp = change_point[i]; | ||
373 | change_point[i] = change_point[i-1]; | ||
374 | change_point[i-1] = change_tmp; | ||
375 | still_changing=1; | ||
376 | } | ||
377 | } | ||
378 | } | ||
379 | |||
380 | /* create a new bios memory map, removing overlaps */ | ||
381 | overlap_entries=0; /* number of entries in the overlap table */ | ||
382 | new_bios_entry=0; /* index for creating new bios map entries */ | ||
383 | last_type = 0; /* start with undefined memory type */ | ||
384 | last_addr = 0; /* start with 0 as last starting address */ | ||
385 | /* loop through change-points, determining affect on the new bios map */ | ||
386 | for (chgidx=0; chgidx < chg_nr; chgidx++) | ||
387 | { | ||
388 | /* keep track of all overlapping bios entries */ | ||
389 | if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) | ||
390 | { | ||
391 | /* add map entry to overlap list (> 1 entry implies an overlap) */ | ||
392 | overlap_list[overlap_entries++]=change_point[chgidx]->pbios; | ||
393 | } | ||
394 | else | ||
395 | { | ||
396 | /* remove entry from list (order independent, so swap with last) */ | ||
397 | for (i=0; i<overlap_entries; i++) | ||
398 | { | ||
399 | if (overlap_list[i] == change_point[chgidx]->pbios) | ||
400 | overlap_list[i] = overlap_list[overlap_entries-1]; | ||
401 | } | ||
402 | overlap_entries--; | ||
403 | } | ||
404 | /* if there are overlapping entries, decide which "type" to use */ | ||
405 | /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ | ||
406 | current_type = 0; | ||
407 | for (i=0; i<overlap_entries; i++) | ||
408 | if (overlap_list[i]->type > current_type) | ||
409 | current_type = overlap_list[i]->type; | ||
410 | /* continue building up new bios map based on this information */ | ||
411 | if (current_type != last_type) { | ||
412 | if (last_type != 0) { | ||
413 | new_bios[new_bios_entry].size = | ||
414 | change_point[chgidx]->addr - last_addr; | ||
415 | /* move forward only if the new size was non-zero */ | ||
416 | if (new_bios[new_bios_entry].size != 0) | ||
417 | if (++new_bios_entry >= E820MAX) | ||
418 | break; /* no more space left for new bios entries */ | ||
419 | } | ||
420 | if (current_type != 0) { | ||
421 | new_bios[new_bios_entry].addr = change_point[chgidx]->addr; | ||
422 | new_bios[new_bios_entry].type = current_type; | ||
423 | last_addr=change_point[chgidx]->addr; | ||
424 | } | ||
425 | last_type = current_type; | ||
426 | } | ||
427 | } | ||
428 | new_nr = new_bios_entry; /* retain count for new bios entries */ | ||
429 | |||
430 | /* copy new bios mapping into original location */ | ||
431 | memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); | ||
432 | *pnr_map = new_nr; | ||
433 | |||
434 | return 0; | ||
435 | } | ||
436 | |||
437 | /* | ||
438 | * Copy the BIOS e820 map into a safe place. | ||
439 | * | ||
440 | * Sanity-check it while we're at it.. | ||
441 | * | ||
442 | * If we're lucky and live on a modern system, the setup code | ||
443 | * will have given us a memory map that we can use to properly | ||
444 | * set up memory. If we aren't, we'll fake a memory map. | ||
445 | * | ||
446 | * We check to see that the memory map contains at least 2 elements | ||
447 | * before we'll use it, because the detection code in setup.S may | ||
448 | * not be perfect and most every PC known to man has two memory | ||
449 | * regions: one from 0 to 640k, and one from 1mb up. (The IBM | ||
450 | * thinkpad 560x, for example, does not cooperate with the memory | ||
451 | * detection code.) | ||
452 | */ | ||
453 | int __init copy_e820_map(struct e820entry *biosmap, int nr_map) | ||
454 | { | ||
455 | /* Only one memory region (or negative)? Ignore it */ | ||
456 | if (nr_map < 2) | ||
457 | return -1; | ||
458 | |||
459 | do { | ||
460 | u64 start = biosmap->addr; | ||
461 | u64 size = biosmap->size; | ||
462 | u64 end = start + size; | ||
463 | u32 type = biosmap->type; | ||
464 | |||
465 | /* Overflow in 64 bits? Ignore the memory map. */ | ||
466 | if (start > end) | ||
467 | return -1; | ||
468 | |||
469 | add_memory_region(start, size, type); | ||
470 | } while (biosmap++, --nr_map); | ||
471 | |||
472 | return 0; | ||
473 | } | ||
474 | |||
475 | /* | ||
476 | * Find the highest page frame number we have available | ||
477 | */ | ||
478 | void __init propagate_e820_map(void) | ||
479 | { | ||
480 | int i; | ||
481 | |||
482 | max_pfn = 0; | ||
483 | |||
484 | for (i = 0; i < e820.nr_map; i++) { | ||
485 | unsigned long start, end; | ||
486 | /* RAM? */ | ||
487 | if (e820.map[i].type != E820_RAM) | ||
488 | continue; | ||
489 | start = PFN_UP(e820.map[i].addr); | ||
490 | end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); | ||
491 | if (start >= end) | ||
492 | continue; | ||
493 | if (end > max_pfn) | ||
494 | max_pfn = end; | ||
495 | memory_present(0, start, end); | ||
496 | } | ||
497 | } | ||
498 | |||
499 | /* | ||
500 | * Register fully available low RAM pages with the bootmem allocator. | ||
501 | */ | ||
502 | void __init register_bootmem_low_pages(unsigned long max_low_pfn) | ||
503 | { | ||
504 | int i; | ||
505 | |||
506 | for (i = 0; i < e820.nr_map; i++) { | ||
507 | unsigned long curr_pfn, last_pfn, size; | ||
508 | /* | ||
509 | * Reserve usable low memory | ||
510 | */ | ||
511 | if (e820.map[i].type != E820_RAM) | ||
512 | continue; | ||
513 | /* | ||
514 | * We are rounding up the start address of usable memory: | ||
515 | */ | ||
516 | curr_pfn = PFN_UP(e820.map[i].addr); | ||
517 | if (curr_pfn >= max_low_pfn) | ||
518 | continue; | ||
519 | /* | ||
520 | * ... and at the end of the usable range downwards: | ||
521 | */ | ||
522 | last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); | ||
523 | |||
524 | if (last_pfn > max_low_pfn) | ||
525 | last_pfn = max_low_pfn; | ||
526 | |||
527 | /* | ||
528 | * .. finally, did all the rounding and playing | ||
529 | * around just make the area go away? | ||
530 | */ | ||
531 | if (last_pfn <= curr_pfn) | ||
532 | continue; | ||
533 | |||
534 | size = last_pfn - curr_pfn; | ||
535 | free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); | ||
536 | } | ||
537 | } | ||
538 | |||
539 | void __init e820_register_memory(void) | ||
540 | { | ||
541 | unsigned long gapstart, gapsize, round; | ||
542 | unsigned long long last; | ||
543 | int i; | ||
544 | |||
545 | /* | ||
546 | * Search for the biggest gap in the low 32 bits of the e820 | ||
547 | * memory space. | ||
548 | */ | ||
549 | last = 0x100000000ull; | ||
550 | gapstart = 0x10000000; | ||
551 | gapsize = 0x400000; | ||
552 | i = e820.nr_map; | ||
553 | while (--i >= 0) { | ||
554 | unsigned long long start = e820.map[i].addr; | ||
555 | unsigned long long end = start + e820.map[i].size; | ||
556 | |||
557 | /* | ||
558 | * Since "last" is at most 4GB, we know we'll | ||
559 | * fit in 32 bits if this condition is true | ||
560 | */ | ||
561 | if (last > end) { | ||
562 | unsigned long gap = last - end; | ||
563 | |||
564 | if (gap > gapsize) { | ||
565 | gapsize = gap; | ||
566 | gapstart = end; | ||
567 | } | ||
568 | } | ||
569 | if (start < last) | ||
570 | last = start; | ||
571 | } | ||
572 | |||
573 | /* | ||
574 | * See how much we want to round up: start off with | ||
575 | * rounding to the next 1MB area. | ||
576 | */ | ||
577 | round = 0x100000; | ||
578 | while ((gapsize >> 4) > round) | ||
579 | round += round; | ||
580 | /* Fun with two's complement */ | ||
581 | pci_mem_start = (gapstart + round) & -round; | ||
582 | |||
583 | printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", | ||
584 | pci_mem_start, gapstart, gapsize); | ||
585 | } | ||
586 | |||
587 | void __init print_memory_map(char *who) | ||
588 | { | ||
589 | int i; | ||
590 | |||
591 | for (i = 0; i < e820.nr_map; i++) { | ||
592 | printk(" %s: %016Lx - %016Lx ", who, | ||
593 | e820.map[i].addr, | ||
594 | e820.map[i].addr + e820.map[i].size); | ||
595 | switch (e820.map[i].type) { | ||
596 | case E820_RAM: printk("(usable)\n"); | ||
597 | break; | ||
598 | case E820_RESERVED: | ||
599 | printk("(reserved)\n"); | ||
600 | break; | ||
601 | case E820_ACPI: | ||
602 | printk("(ACPI data)\n"); | ||
603 | break; | ||
604 | case E820_NVS: | ||
605 | printk("(ACPI NVS)\n"); | ||
606 | break; | ||
607 | default: printk("type %u\n", e820.map[i].type); | ||
608 | break; | ||
609 | } | ||
610 | } | ||
611 | } | ||
612 | |||
613 | void __init limit_regions(unsigned long long size) | 210 | void __init limit_regions(unsigned long long size) |
614 | { | 211 | { |
615 | unsigned long long current_addr; | 212 | unsigned long long current_addr; |
616 | int i; | 213 | int i; |
617 | 214 | ||
618 | print_memory_map("limit_regions start"); | 215 | e820_print_map("limit_regions start"); |
619 | for (i = 0; i < e820.nr_map; i++) { | 216 | for (i = 0; i < e820.nr_map; i++) { |
620 | current_addr = e820.map[i].addr + e820.map[i].size; | 217 | current_addr = e820.map[i].addr + e820.map[i].size; |
621 | if (current_addr < size) | 218 | if (current_addr < size) |
@@ -634,63 +231,59 @@ void __init limit_regions(unsigned long long size) | |||
634 | e820.nr_map = i + 1; | 231 | e820.nr_map = i + 1; |
635 | e820.map[i].size -= current_addr - size; | 232 | e820.map[i].size -= current_addr - size; |
636 | } | 233 | } |
637 | print_memory_map("limit_regions endfor"); | 234 | e820_print_map("limit_regions endfor"); |
638 | return; | 235 | return; |
639 | } | 236 | } |
640 | print_memory_map("limit_regions endfunc"); | 237 | e820_print_map("limit_regions endfunc"); |
641 | } | 238 | } |
642 | 239 | ||
643 | /* | 240 | /* Overridden in paravirt.c if CONFIG_PARAVIRT */ |
644 | * This function checks if any part of the range <start,end> is mapped | 241 | char * __init __attribute__((weak)) memory_setup(void) |
645 | * with type. | ||
646 | */ | ||
647 | int | ||
648 | e820_any_mapped(u64 start, u64 end, unsigned type) | ||
649 | { | 242 | { |
650 | int i; | 243 | return machine_specific_memory_setup(); |
651 | for (i = 0; i < e820.nr_map; i++) { | ||
652 | const struct e820entry *ei = &e820.map[i]; | ||
653 | if (type && ei->type != type) | ||
654 | continue; | ||
655 | if (ei->addr >= end || ei->addr + ei->size <= start) | ||
656 | continue; | ||
657 | return 1; | ||
658 | } | ||
659 | return 0; | ||
660 | } | 244 | } |
661 | EXPORT_SYMBOL_GPL(e820_any_mapped); | 245 | |
662 | 246 | void __init setup_memory_map(void) | |
663 | /* | ||
664 | * This function checks if the entire range <start,end> is mapped with type. | ||
665 | * | ||
666 | * Note: this function only works correct if the e820 table is sorted and | ||
667 | * not-overlapping, which is the case | ||
668 | */ | ||
669 | int __init | ||
670 | e820_all_mapped(unsigned long s, unsigned long e, unsigned type) | ||
671 | { | 247 | { |
672 | u64 start = s; | 248 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); |
673 | u64 end = e; | 249 | e820_print_map(memory_setup()); |
674 | int i; | 250 | } |
675 | for (i = 0; i < e820.nr_map; i++) { | 251 | |
676 | struct e820entry *ei = &e820.map[i]; | 252 | static int __initdata user_defined_memmap; |
677 | if (type && ei->type != type) | 253 | |
678 | continue; | 254 | /* |
679 | /* is the region (part) in overlap with the current region ?*/ | 255 | * "mem=nopentium" disables the 4MB page tables. |
680 | if (ei->addr >= end || ei->addr + ei->size <= start) | 256 | * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM |
681 | continue; | 257 | * to <mem>, overriding the bios size. |
682 | /* if the region is at the beginning of <start,end> we move | 258 | * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from |
683 | * start to the end of the region since it's ok until there | 259 | * <start> to <start>+<mem>, overriding the bios size. |
260 | * | ||
261 | * HPA tells me bootloaders need to parse mem=, so no new | ||
262 | * option should be mem= [also see Documentation/i386/boot.txt] | ||
263 | */ | ||
264 | static int __init parse_mem(char *arg) | ||
265 | { | ||
266 | if (!arg) | ||
267 | return -EINVAL; | ||
268 | |||
269 | if (strcmp(arg, "nopentium") == 0) { | ||
270 | setup_clear_cpu_cap(X86_FEATURE_PSE); | ||
271 | } else { | ||
272 | /* If the user specifies memory size, we | ||
273 | * limit the BIOS-provided memory map to | ||
274 | * that size. exactmap can be used to specify | ||
275 | * the exact map. mem=number can be used to | ||
276 | * trim the existing memory map. | ||
684 | */ | 277 | */ |
685 | if (ei->addr <= start) | 278 | unsigned long long mem_size; |
686 | start = ei->addr + ei->size; | 279 | |
687 | /* if start is now at or beyond end, we're done, full | 280 | mem_size = memparse(arg, &arg); |
688 | * coverage */ | 281 | limit_regions(mem_size); |
689 | if (start >= end) | 282 | user_defined_memmap = 1; |
690 | return 1; /* we're done */ | ||
691 | } | 283 | } |
692 | return 0; | 284 | return 0; |
693 | } | 285 | } |
286 | early_param("mem", parse_mem); | ||
694 | 287 | ||
695 | static int __init parse_memmap(char *arg) | 288 | static int __init parse_memmap(char *arg) |
696 | { | 289 | { |
@@ -704,8 +297,9 @@ static int __init parse_memmap(char *arg) | |||
704 | * size before original memory map is | 297 | * size before original memory map is |
705 | * reset. | 298 | * reset. |
706 | */ | 299 | */ |
707 | propagate_e820_map(); | 300 | e820_register_active_regions(0, 0, -1UL); |
708 | saved_max_pfn = max_pfn; | 301 | saved_max_pfn = e820_end_of_ram(); |
302 | remove_all_active_ranges(); | ||
709 | #endif | 303 | #endif |
710 | e820.nr_map = 0; | 304 | e820.nr_map = 0; |
711 | user_defined_memmap = 1; | 305 | user_defined_memmap = 1; |
@@ -736,40 +330,12 @@ static int __init parse_memmap(char *arg) | |||
736 | return 0; | 330 | return 0; |
737 | } | 331 | } |
738 | early_param("memmap", parse_memmap); | 332 | early_param("memmap", parse_memmap); |
739 | void __init update_memory_range(u64 start, u64 size, unsigned old_type, | ||
740 | unsigned new_type) | ||
741 | { | ||
742 | int i; | ||
743 | |||
744 | BUG_ON(old_type == new_type); | ||
745 | 333 | ||
746 | for (i = 0; i < e820.nr_map; i++) { | 334 | void __init finish_e820_parsing(void) |
747 | struct e820entry *ei = &e820.map[i]; | 335 | { |
748 | u64 final_start, final_end; | 336 | if (user_defined_memmap) { |
749 | if (ei->type != old_type) | 337 | printk(KERN_INFO "user-defined physical RAM map:\n"); |
750 | continue; | 338 | e820_print_map("user"); |
751 | /* totally covered? */ | ||
752 | if (ei->addr >= start && ei->size <= size) { | ||
753 | ei->type = new_type; | ||
754 | continue; | ||
755 | } | ||
756 | /* partially covered */ | ||
757 | final_start = max(start, ei->addr); | ||
758 | final_end = min(start + size, ei->addr + ei->size); | ||
759 | if (final_start >= final_end) | ||
760 | continue; | ||
761 | add_memory_region(final_start, final_end - final_start, | ||
762 | new_type); | ||
763 | } | 339 | } |
764 | } | 340 | } |
765 | void __init update_e820(void) | ||
766 | { | ||
767 | u8 nr_map; | ||
768 | 341 | ||
769 | nr_map = e820.nr_map; | ||
770 | if (sanitize_e820_map(e820.map, &nr_map)) | ||
771 | return; | ||
772 | e820.nr_map = nr_map; | ||
773 | printk(KERN_INFO "modified physical RAM map:\n"); | ||
774 | print_memory_map("modified"); | ||
775 | } | ||
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 124480c0008d..0afee2ca0bf8 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c | |||
@@ -17,8 +17,8 @@ | |||
17 | #include <linux/kexec.h> | 17 | #include <linux/kexec.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/mm.h> | 19 | #include <linux/mm.h> |
20 | #include <linux/suspend.h> | ||
21 | #include <linux/pfn.h> | 20 | #include <linux/pfn.h> |
21 | #include <linux/pci.h> | ||
22 | 22 | ||
23 | #include <asm/pgtable.h> | 23 | #include <asm/pgtable.h> |
24 | #include <asm/page.h> | 24 | #include <asm/page.h> |
@@ -29,8 +29,6 @@ | |||
29 | #include <asm/kdebug.h> | 29 | #include <asm/kdebug.h> |
30 | #include <asm/trampoline.h> | 30 | #include <asm/trampoline.h> |
31 | 31 | ||
32 | struct e820map e820; | ||
33 | |||
34 | /* | 32 | /* |
35 | * PFN of last memory page. | 33 | * PFN of last memory page. |
36 | */ | 34 | */ |
@@ -44,285 +42,6 @@ unsigned long end_pfn; | |||
44 | unsigned long max_pfn_mapped; | 42 | unsigned long max_pfn_mapped; |
45 | 43 | ||
46 | /* | 44 | /* |
47 | * Last pfn which the user wants to use. | ||
48 | */ | ||
49 | static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; | ||
50 | |||
51 | /* | ||
52 | * Early reserved memory areas. | ||
53 | */ | ||
54 | #define MAX_EARLY_RES 20 | ||
55 | |||
56 | struct early_res { | ||
57 | unsigned long start, end; | ||
58 | char name[16]; | ||
59 | }; | ||
60 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { | ||
61 | { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ | ||
62 | #ifdef CONFIG_X86_TRAMPOLINE | ||
63 | { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, | ||
64 | #endif | ||
65 | {} | ||
66 | }; | ||
67 | |||
68 | void __init reserve_early(unsigned long start, unsigned long end, char *name) | ||
69 | { | ||
70 | int i; | ||
71 | struct early_res *r; | ||
72 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
73 | r = &early_res[i]; | ||
74 | if (end > r->start && start < r->end) | ||
75 | panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n", | ||
76 | start, end - 1, name?name:"", r->start, r->end - 1, r->name); | ||
77 | } | ||
78 | if (i >= MAX_EARLY_RES) | ||
79 | panic("Too many early reservations"); | ||
80 | r = &early_res[i]; | ||
81 | r->start = start; | ||
82 | r->end = end; | ||
83 | if (name) | ||
84 | strncpy(r->name, name, sizeof(r->name) - 1); | ||
85 | } | ||
86 | |||
87 | void __init free_early(unsigned long start, unsigned long end) | ||
88 | { | ||
89 | struct early_res *r; | ||
90 | int i, j; | ||
91 | |||
92 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
93 | r = &early_res[i]; | ||
94 | if (start == r->start && end == r->end) | ||
95 | break; | ||
96 | } | ||
97 | if (i >= MAX_EARLY_RES || !early_res[i].end) | ||
98 | panic("free_early on not reserved area: %lx-%lx!", start, end); | ||
99 | |||
100 | for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) | ||
101 | ; | ||
102 | |||
103 | memmove(&early_res[i], &early_res[i + 1], | ||
104 | (j - 1 - i) * sizeof(struct early_res)); | ||
105 | |||
106 | early_res[j - 1].end = 0; | ||
107 | } | ||
108 | |||
109 | void __init early_res_to_bootmem(unsigned long start, unsigned long end) | ||
110 | { | ||
111 | int i; | ||
112 | unsigned long final_start, final_end; | ||
113 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
114 | struct early_res *r = &early_res[i]; | ||
115 | final_start = max(start, r->start); | ||
116 | final_end = min(end, r->end); | ||
117 | if (final_start >= final_end) | ||
118 | continue; | ||
119 | printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i, | ||
120 | final_start, final_end - 1, r->name); | ||
121 | reserve_bootmem_generic(final_start, final_end - final_start); | ||
122 | } | ||
123 | } | ||
124 | |||
125 | /* Check for already reserved areas */ | ||
126 | static inline int __init | ||
127 | bad_addr(unsigned long *addrp, unsigned long size, unsigned long align) | ||
128 | { | ||
129 | int i; | ||
130 | unsigned long addr = *addrp, last; | ||
131 | int changed = 0; | ||
132 | again: | ||
133 | last = addr + size; | ||
134 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
135 | struct early_res *r = &early_res[i]; | ||
136 | if (last >= r->start && addr < r->end) { | ||
137 | *addrp = addr = round_up(r->end, align); | ||
138 | changed = 1; | ||
139 | goto again; | ||
140 | } | ||
141 | } | ||
142 | return changed; | ||
143 | } | ||
144 | |||
145 | /* Check for already reserved areas */ | ||
146 | static inline int __init | ||
147 | bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align) | ||
148 | { | ||
149 | int i; | ||
150 | unsigned long addr = *addrp, last; | ||
151 | unsigned long size = *sizep; | ||
152 | int changed = 0; | ||
153 | again: | ||
154 | last = addr + size; | ||
155 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
156 | struct early_res *r = &early_res[i]; | ||
157 | if (last > r->start && addr < r->start) { | ||
158 | size = r->start - addr; | ||
159 | changed = 1; | ||
160 | goto again; | ||
161 | } | ||
162 | if (last > r->end && addr < r->end) { | ||
163 | addr = round_up(r->end, align); | ||
164 | size = last - addr; | ||
165 | changed = 1; | ||
166 | goto again; | ||
167 | } | ||
168 | if (last <= r->end && addr >= r->start) { | ||
169 | (*sizep)++; | ||
170 | return 0; | ||
171 | } | ||
172 | } | ||
173 | if (changed) { | ||
174 | *addrp = addr; | ||
175 | *sizep = size; | ||
176 | } | ||
177 | return changed; | ||
178 | } | ||
179 | /* | ||
180 | * This function checks if any part of the range <start,end> is mapped | ||
181 | * with type. | ||
182 | */ | ||
183 | int | ||
184 | e820_any_mapped(unsigned long start, unsigned long end, unsigned type) | ||
185 | { | ||
186 | int i; | ||
187 | |||
188 | for (i = 0; i < e820.nr_map; i++) { | ||
189 | struct e820entry *ei = &e820.map[i]; | ||
190 | |||
191 | if (type && ei->type != type) | ||
192 | continue; | ||
193 | if (ei->addr >= end || ei->addr + ei->size <= start) | ||
194 | continue; | ||
195 | return 1; | ||
196 | } | ||
197 | return 0; | ||
198 | } | ||
199 | EXPORT_SYMBOL_GPL(e820_any_mapped); | ||
200 | |||
201 | /* | ||
202 | * This function checks if the entire range <start,end> is mapped with type. | ||
203 | * | ||
204 | * Note: this function only works correct if the e820 table is sorted and | ||
205 | * not-overlapping, which is the case | ||
206 | */ | ||
207 | int __init e820_all_mapped(unsigned long start, unsigned long end, | ||
208 | unsigned type) | ||
209 | { | ||
210 | int i; | ||
211 | |||
212 | for (i = 0; i < e820.nr_map; i++) { | ||
213 | struct e820entry *ei = &e820.map[i]; | ||
214 | |||
215 | if (type && ei->type != type) | ||
216 | continue; | ||
217 | /* is the region (part) in overlap with the current region ?*/ | ||
218 | if (ei->addr >= end || ei->addr + ei->size <= start) | ||
219 | continue; | ||
220 | |||
221 | /* if the region is at the beginning of <start,end> we move | ||
222 | * start to the end of the region since it's ok until there | ||
223 | */ | ||
224 | if (ei->addr <= start) | ||
225 | start = ei->addr + ei->size; | ||
226 | /* | ||
227 | * if start is now at or beyond end, we're done, full | ||
228 | * coverage | ||
229 | */ | ||
230 | if (start >= end) | ||
231 | return 1; | ||
232 | } | ||
233 | return 0; | ||
234 | } | ||
235 | |||
236 | /* | ||
237 | * Find a free area with specified alignment in a specific range. | ||
238 | */ | ||
239 | unsigned long __init find_e820_area(unsigned long start, unsigned long end, | ||
240 | unsigned long size, unsigned long align) | ||
241 | { | ||
242 | int i; | ||
243 | |||
244 | for (i = 0; i < e820.nr_map; i++) { | ||
245 | struct e820entry *ei = &e820.map[i]; | ||
246 | unsigned long addr, last; | ||
247 | unsigned long ei_last; | ||
248 | |||
249 | if (ei->type != E820_RAM) | ||
250 | continue; | ||
251 | addr = round_up(ei->addr, align); | ||
252 | ei_last = ei->addr + ei->size; | ||
253 | if (addr < start) | ||
254 | addr = round_up(start, align); | ||
255 | if (addr >= ei_last) | ||
256 | continue; | ||
257 | while (bad_addr(&addr, size, align) && addr+size <= ei_last) | ||
258 | ; | ||
259 | last = addr + size; | ||
260 | if (last > ei_last) | ||
261 | continue; | ||
262 | if (last > end) | ||
263 | continue; | ||
264 | return addr; | ||
265 | } | ||
266 | return -1UL; | ||
267 | } | ||
268 | |||
269 | /* | ||
270 | * Find next free range after *start | ||
271 | */ | ||
272 | unsigned long __init find_e820_area_size(unsigned long start, | ||
273 | unsigned long *sizep, | ||
274 | unsigned long align) | ||
275 | { | ||
276 | int i; | ||
277 | |||
278 | for (i = 0; i < e820.nr_map; i++) { | ||
279 | struct e820entry *ei = &e820.map[i]; | ||
280 | unsigned long addr, last; | ||
281 | unsigned long ei_last; | ||
282 | |||
283 | if (ei->type != E820_RAM) | ||
284 | continue; | ||
285 | addr = round_up(ei->addr, align); | ||
286 | ei_last = ei->addr + ei->size; | ||
287 | if (addr < start) | ||
288 | addr = round_up(start, align); | ||
289 | if (addr >= ei_last) | ||
290 | continue; | ||
291 | *sizep = ei_last - addr; | ||
292 | while (bad_addr_size(&addr, sizep, align) && | ||
293 | addr + *sizep <= ei_last) | ||
294 | ; | ||
295 | last = addr + *sizep; | ||
296 | if (last > ei_last) | ||
297 | continue; | ||
298 | return addr; | ||
299 | } | ||
300 | return -1UL; | ||
301 | |||
302 | } | ||
303 | /* | ||
304 | * Find the highest page frame number we have available | ||
305 | */ | ||
306 | unsigned long __init e820_end_of_ram(void) | ||
307 | { | ||
308 | unsigned long end_pfn; | ||
309 | |||
310 | end_pfn = find_max_pfn_with_active_regions(); | ||
311 | |||
312 | if (end_pfn > max_pfn_mapped) | ||
313 | max_pfn_mapped = end_pfn; | ||
314 | if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT) | ||
315 | max_pfn_mapped = MAXMEM>>PAGE_SHIFT; | ||
316 | if (end_pfn > end_user_pfn) | ||
317 | end_pfn = end_user_pfn; | ||
318 | if (end_pfn > max_pfn_mapped) | ||
319 | end_pfn = max_pfn_mapped; | ||
320 | |||
321 | printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped); | ||
322 | return end_pfn; | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * Mark e820 reserved areas as busy for the resource manager. | 45 | * Mark e820 reserved areas as busy for the resource manager. |
327 | */ | 46 | */ |
328 | void __init e820_reserve_resources(void) | 47 | void __init e820_reserve_resources(void) |
@@ -346,393 +65,6 @@ void __init e820_reserve_resources(void) | |||
346 | } | 65 | } |
347 | } | 66 | } |
348 | 67 | ||
349 | /* | ||
350 | * Find the ranges of physical addresses that do not correspond to | ||
351 | * e820 RAM areas and mark the corresponding pages as nosave for software | ||
352 | * suspend and suspend to RAM. | ||
353 | * | ||
354 | * This function requires the e820 map to be sorted and without any | ||
355 | * overlapping entries and assumes the first e820 area to be RAM. | ||
356 | */ | ||
357 | void __init e820_mark_nosave_regions(void) | ||
358 | { | ||
359 | int i; | ||
360 | unsigned long paddr; | ||
361 | |||
362 | paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE); | ||
363 | for (i = 1; i < e820.nr_map; i++) { | ||
364 | struct e820entry *ei = &e820.map[i]; | ||
365 | |||
366 | if (paddr < ei->addr) | ||
367 | register_nosave_region(PFN_DOWN(paddr), | ||
368 | PFN_UP(ei->addr)); | ||
369 | |||
370 | paddr = round_down(ei->addr + ei->size, PAGE_SIZE); | ||
371 | if (ei->type != E820_RAM) | ||
372 | register_nosave_region(PFN_UP(ei->addr), | ||
373 | PFN_DOWN(paddr)); | ||
374 | |||
375 | if (paddr >= (end_pfn << PAGE_SHIFT)) | ||
376 | break; | ||
377 | } | ||
378 | } | ||
379 | |||
380 | /* | ||
381 | * Finds an active region in the address range from start_pfn to end_pfn and | ||
382 | * returns its range in ei_startpfn and ei_endpfn for the e820 entry. | ||
383 | */ | ||
384 | static int __init e820_find_active_region(const struct e820entry *ei, | ||
385 | unsigned long start_pfn, | ||
386 | unsigned long end_pfn, | ||
387 | unsigned long *ei_startpfn, | ||
388 | unsigned long *ei_endpfn) | ||
389 | { | ||
390 | *ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT; | ||
391 | *ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT; | ||
392 | |||
393 | /* Skip map entries smaller than a page */ | ||
394 | if (*ei_startpfn >= *ei_endpfn) | ||
395 | return 0; | ||
396 | |||
397 | /* Check if max_pfn_mapped should be updated */ | ||
398 | if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped) | ||
399 | max_pfn_mapped = *ei_endpfn; | ||
400 | |||
401 | /* Skip if map is outside the node */ | ||
402 | if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || | ||
403 | *ei_startpfn >= end_pfn) | ||
404 | return 0; | ||
405 | |||
406 | /* Check for overlaps */ | ||
407 | if (*ei_startpfn < start_pfn) | ||
408 | *ei_startpfn = start_pfn; | ||
409 | if (*ei_endpfn > end_pfn) | ||
410 | *ei_endpfn = end_pfn; | ||
411 | |||
412 | /* Obey end_user_pfn to save on memmap */ | ||
413 | if (*ei_startpfn >= end_user_pfn) | ||
414 | return 0; | ||
415 | if (*ei_endpfn > end_user_pfn) | ||
416 | *ei_endpfn = end_user_pfn; | ||
417 | |||
418 | return 1; | ||
419 | } | ||
420 | |||
421 | /* Walk the e820 map and register active regions within a node */ | ||
422 | void __init | ||
423 | e820_register_active_regions(int nid, unsigned long start_pfn, | ||
424 | unsigned long end_pfn) | ||
425 | { | ||
426 | unsigned long ei_startpfn; | ||
427 | unsigned long ei_endpfn; | ||
428 | int i; | ||
429 | |||
430 | for (i = 0; i < e820.nr_map; i++) | ||
431 | if (e820_find_active_region(&e820.map[i], | ||
432 | start_pfn, end_pfn, | ||
433 | &ei_startpfn, &ei_endpfn)) | ||
434 | add_active_range(nid, ei_startpfn, ei_endpfn); | ||
435 | } | ||
436 | |||
437 | /* | ||
438 | * Add a memory region to the kernel e820 map. | ||
439 | */ | ||
440 | void __init add_memory_region(unsigned long start, unsigned long size, int type) | ||
441 | { | ||
442 | int x = e820.nr_map; | ||
443 | |||
444 | if (x == E820MAX) { | ||
445 | printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); | ||
446 | return; | ||
447 | } | ||
448 | |||
449 | e820.map[x].addr = start; | ||
450 | e820.map[x].size = size; | ||
451 | e820.map[x].type = type; | ||
452 | e820.nr_map++; | ||
453 | } | ||
454 | |||
455 | /* | ||
456 | * Find the hole size (in bytes) in the memory range. | ||
457 | * @start: starting address of the memory range to scan | ||
458 | * @end: ending address of the memory range to scan | ||
459 | */ | ||
460 | unsigned long __init e820_hole_size(unsigned long start, unsigned long end) | ||
461 | { | ||
462 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
463 | unsigned long end_pfn = end >> PAGE_SHIFT; | ||
464 | unsigned long ei_startpfn, ei_endpfn, ram = 0; | ||
465 | int i; | ||
466 | |||
467 | for (i = 0; i < e820.nr_map; i++) { | ||
468 | if (e820_find_active_region(&e820.map[i], | ||
469 | start_pfn, end_pfn, | ||
470 | &ei_startpfn, &ei_endpfn)) | ||
471 | ram += ei_endpfn - ei_startpfn; | ||
472 | } | ||
473 | return end - start - (ram << PAGE_SHIFT); | ||
474 | } | ||
475 | |||
476 | static void __init e820_print_map(char *who) | ||
477 | { | ||
478 | int i; | ||
479 | |||
480 | for (i = 0; i < e820.nr_map; i++) { | ||
481 | printk(KERN_INFO " %s: %016Lx - %016Lx ", who, | ||
482 | (unsigned long long) e820.map[i].addr, | ||
483 | (unsigned long long) | ||
484 | (e820.map[i].addr + e820.map[i].size)); | ||
485 | switch (e820.map[i].type) { | ||
486 | case E820_RAM: | ||
487 | printk(KERN_CONT "(usable)\n"); | ||
488 | break; | ||
489 | case E820_RESERVED: | ||
490 | printk(KERN_CONT "(reserved)\n"); | ||
491 | break; | ||
492 | case E820_ACPI: | ||
493 | printk(KERN_CONT "(ACPI data)\n"); | ||
494 | break; | ||
495 | case E820_NVS: | ||
496 | printk(KERN_CONT "(ACPI NVS)\n"); | ||
497 | break; | ||
498 | default: | ||
499 | printk(KERN_CONT "type %u\n", e820.map[i].type); | ||
500 | break; | ||
501 | } | ||
502 | } | ||
503 | } | ||
504 | |||
505 | /* | ||
506 | * Sanitize the BIOS e820 map. | ||
507 | * | ||
508 | * Some e820 responses include overlapping entries. The following | ||
509 | * replaces the original e820 map with a new one, removing overlaps. | ||
510 | * | ||
511 | */ | ||
512 | static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) | ||
513 | { | ||
514 | struct change_member { | ||
515 | struct e820entry *pbios; /* pointer to original bios entry */ | ||
516 | unsigned long long addr; /* address for this change point */ | ||
517 | }; | ||
518 | static struct change_member change_point_list[2*E820MAX] __initdata; | ||
519 | static struct change_member *change_point[2*E820MAX] __initdata; | ||
520 | static struct e820entry *overlap_list[E820MAX] __initdata; | ||
521 | static struct e820entry new_bios[E820MAX] __initdata; | ||
522 | struct change_member *change_tmp; | ||
523 | unsigned long current_type, last_type; | ||
524 | unsigned long long last_addr; | ||
525 | int chgidx, still_changing; | ||
526 | int overlap_entries; | ||
527 | int new_bios_entry; | ||
528 | int old_nr, new_nr, chg_nr; | ||
529 | int i; | ||
530 | |||
531 | /* | ||
532 | Visually we're performing the following | ||
533 | (1,2,3,4 = memory types)... | ||
534 | |||
535 | Sample memory map (w/overlaps): | ||
536 | ____22__________________ | ||
537 | ______________________4_ | ||
538 | ____1111________________ | ||
539 | _44_____________________ | ||
540 | 11111111________________ | ||
541 | ____________________33__ | ||
542 | ___________44___________ | ||
543 | __________33333_________ | ||
544 | ______________22________ | ||
545 | ___________________2222_ | ||
546 | _________111111111______ | ||
547 | _____________________11_ | ||
548 | _________________4______ | ||
549 | |||
550 | Sanitized equivalent (no overlap): | ||
551 | 1_______________________ | ||
552 | _44_____________________ | ||
553 | ___1____________________ | ||
554 | ____22__________________ | ||
555 | ______11________________ | ||
556 | _________1______________ | ||
557 | __________3_____________ | ||
558 | ___________44___________ | ||
559 | _____________33_________ | ||
560 | _______________2________ | ||
561 | ________________1_______ | ||
562 | _________________4______ | ||
563 | ___________________2____ | ||
564 | ____________________33__ | ||
565 | ______________________4_ | ||
566 | */ | ||
567 | |||
568 | /* if there's only one memory region, don't bother */ | ||
569 | if (*pnr_map < 2) | ||
570 | return -1; | ||
571 | |||
572 | old_nr = *pnr_map; | ||
573 | |||
574 | /* bail out if we find any unreasonable addresses in bios map */ | ||
575 | for (i = 0; i < old_nr; i++) | ||
576 | if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) | ||
577 | return -1; | ||
578 | |||
579 | /* create pointers for initial change-point information (for sorting) */ | ||
580 | for (i = 0; i < 2 * old_nr; i++) | ||
581 | change_point[i] = &change_point_list[i]; | ||
582 | |||
583 | /* record all known change-points (starting and ending addresses), | ||
584 | omitting those that are for empty memory regions */ | ||
585 | chgidx = 0; | ||
586 | for (i = 0; i < old_nr; i++) { | ||
587 | if (biosmap[i].size != 0) { | ||
588 | change_point[chgidx]->addr = biosmap[i].addr; | ||
589 | change_point[chgidx++]->pbios = &biosmap[i]; | ||
590 | change_point[chgidx]->addr = biosmap[i].addr + | ||
591 | biosmap[i].size; | ||
592 | change_point[chgidx++]->pbios = &biosmap[i]; | ||
593 | } | ||
594 | } | ||
595 | chg_nr = chgidx; | ||
596 | |||
597 | /* sort change-point list by memory addresses (low -> high) */ | ||
598 | still_changing = 1; | ||
599 | while (still_changing) { | ||
600 | still_changing = 0; | ||
601 | for (i = 1; i < chg_nr; i++) { | ||
602 | unsigned long long curaddr, lastaddr; | ||
603 | unsigned long long curpbaddr, lastpbaddr; | ||
604 | |||
605 | curaddr = change_point[i]->addr; | ||
606 | lastaddr = change_point[i - 1]->addr; | ||
607 | curpbaddr = change_point[i]->pbios->addr; | ||
608 | lastpbaddr = change_point[i - 1]->pbios->addr; | ||
609 | |||
610 | /* | ||
611 | * swap entries, when: | ||
612 | * | ||
613 | * curaddr > lastaddr or | ||
614 | * curaddr == lastaddr and curaddr == curpbaddr and | ||
615 | * lastaddr != lastpbaddr | ||
616 | */ | ||
617 | if (curaddr < lastaddr || | ||
618 | (curaddr == lastaddr && curaddr == curpbaddr && | ||
619 | lastaddr != lastpbaddr)) { | ||
620 | change_tmp = change_point[i]; | ||
621 | change_point[i] = change_point[i-1]; | ||
622 | change_point[i-1] = change_tmp; | ||
623 | still_changing = 1; | ||
624 | } | ||
625 | } | ||
626 | } | ||
627 | |||
628 | /* create a new bios memory map, removing overlaps */ | ||
629 | overlap_entries = 0; /* number of entries in the overlap table */ | ||
630 | new_bios_entry = 0; /* index for creating new bios map entries */ | ||
631 | last_type = 0; /* start with undefined memory type */ | ||
632 | last_addr = 0; /* start with 0 as last starting address */ | ||
633 | |||
634 | /* loop through change-points, determining affect on the new bios map */ | ||
635 | for (chgidx = 0; chgidx < chg_nr; chgidx++) { | ||
636 | /* keep track of all overlapping bios entries */ | ||
637 | if (change_point[chgidx]->addr == | ||
638 | change_point[chgidx]->pbios->addr) { | ||
639 | /* | ||
640 | * add map entry to overlap list (> 1 entry | ||
641 | * implies an overlap) | ||
642 | */ | ||
643 | overlap_list[overlap_entries++] = | ||
644 | change_point[chgidx]->pbios; | ||
645 | } else { | ||
646 | /* | ||
647 | * remove entry from list (order independent, | ||
648 | * so swap with last) | ||
649 | */ | ||
650 | for (i = 0; i < overlap_entries; i++) { | ||
651 | if (overlap_list[i] == | ||
652 | change_point[chgidx]->pbios) | ||
653 | overlap_list[i] = | ||
654 | overlap_list[overlap_entries-1]; | ||
655 | } | ||
656 | overlap_entries--; | ||
657 | } | ||
658 | /* | ||
659 | * if there are overlapping entries, decide which | ||
660 | * "type" to use (larger value takes precedence -- | ||
661 | * 1=usable, 2,3,4,4+=unusable) | ||
662 | */ | ||
663 | current_type = 0; | ||
664 | for (i = 0; i < overlap_entries; i++) | ||
665 | if (overlap_list[i]->type > current_type) | ||
666 | current_type = overlap_list[i]->type; | ||
667 | /* | ||
668 | * continue building up new bios map based on this | ||
669 | * information | ||
670 | */ | ||
671 | if (current_type != last_type) { | ||
672 | if (last_type != 0) { | ||
673 | new_bios[new_bios_entry].size = | ||
674 | change_point[chgidx]->addr - last_addr; | ||
675 | /* | ||
676 | * move forward only if the new size | ||
677 | * was non-zero | ||
678 | */ | ||
679 | if (new_bios[new_bios_entry].size != 0) | ||
680 | /* | ||
681 | * no more space left for new | ||
682 | * bios entries ? | ||
683 | */ | ||
684 | if (++new_bios_entry >= E820MAX) | ||
685 | break; | ||
686 | } | ||
687 | if (current_type != 0) { | ||
688 | new_bios[new_bios_entry].addr = | ||
689 | change_point[chgidx]->addr; | ||
690 | new_bios[new_bios_entry].type = current_type; | ||
691 | last_addr = change_point[chgidx]->addr; | ||
692 | } | ||
693 | last_type = current_type; | ||
694 | } | ||
695 | } | ||
696 | /* retain count for new bios entries */ | ||
697 | new_nr = new_bios_entry; | ||
698 | |||
699 | /* copy new bios mapping into original location */ | ||
700 | memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); | ||
701 | *pnr_map = new_nr; | ||
702 | |||
703 | return 0; | ||
704 | } | ||
705 | |||
706 | /* | ||
707 | * Copy the BIOS e820 map into a safe place. | ||
708 | * | ||
709 | * Sanity-check it while we're at it.. | ||
710 | * | ||
711 | * If we're lucky and live on a modern system, the setup code | ||
712 | * will have given us a memory map that we can use to properly | ||
713 | * set up memory. If we aren't, we'll fake a memory map. | ||
714 | */ | ||
715 | static int __init copy_e820_map(struct e820entry *biosmap, int nr_map) | ||
716 | { | ||
717 | /* Only one memory region (or negative)? Ignore it */ | ||
718 | if (nr_map < 2) | ||
719 | return -1; | ||
720 | |||
721 | do { | ||
722 | u64 start = biosmap->addr; | ||
723 | u64 size = biosmap->size; | ||
724 | u64 end = start + size; | ||
725 | u32 type = biosmap->type; | ||
726 | |||
727 | /* Overflow in 64 bits? Ignore the memory map. */ | ||
728 | if (start > end) | ||
729 | return -1; | ||
730 | |||
731 | add_memory_region(start, size, type); | ||
732 | } while (biosmap++, --nr_map); | ||
733 | return 0; | ||
734 | } | ||
735 | |||
736 | static void early_panic(char *msg) | 68 | static void early_panic(char *msg) |
737 | { | 69 | { |
738 | early_printk(msg); | 70 | early_printk(msg); |
@@ -740,16 +72,21 @@ static void early_panic(char *msg) | |||
740 | } | 72 | } |
741 | 73 | ||
742 | /* We're not void only for x86 32-bit compat */ | 74 | /* We're not void only for x86 32-bit compat */ |
743 | char * __init machine_specific_memory_setup(void) | 75 | char *__init machine_specific_memory_setup(void) |
744 | { | 76 | { |
745 | char *who = "BIOS-e820"; | 77 | char *who = "BIOS-e820"; |
78 | int new_nr; | ||
746 | /* | 79 | /* |
747 | * Try to copy the BIOS-supplied E820-map. | 80 | * Try to copy the BIOS-supplied E820-map. |
748 | * | 81 | * |
749 | * Otherwise fake a memory map; one section from 0k->640k, | 82 | * Otherwise fake a memory map; one section from 0k->640k, |
750 | * the next section from 1mb->appropriate_mem_k | 83 | * the next section from 1mb->appropriate_mem_k |
751 | */ | 84 | */ |
752 | sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); | 85 | new_nr = boot_params.e820_entries; |
86 | sanitize_e820_map(boot_params.e820_map, | ||
87 | ARRAY_SIZE(boot_params.e820_map), | ||
88 | &new_nr); | ||
89 | boot_params.e820_entries = new_nr; | ||
753 | if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) | 90 | if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) |
754 | early_panic("Cannot find a valid memory map"); | 91 | early_panic("Cannot find a valid memory map"); |
755 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | 92 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); |
@@ -787,7 +124,6 @@ static int __init parse_memmap_opt(char *p) | |||
787 | saved_max_pfn = e820_end_of_ram(); | 124 | saved_max_pfn = e820_end_of_ram(); |
788 | remove_all_active_ranges(); | 125 | remove_all_active_ranges(); |
789 | #endif | 126 | #endif |
790 | max_pfn_mapped = 0; | ||
791 | e820.nr_map = 0; | 127 | e820.nr_map = 0; |
792 | userdef = 1; | 128 | userdef = 1; |
793 | return 0; | 129 | return 0; |
@@ -818,9 +154,9 @@ early_param("memmap", parse_memmap_opt); | |||
818 | void __init finish_e820_parsing(void) | 154 | void __init finish_e820_parsing(void) |
819 | { | 155 | { |
820 | if (userdef) { | 156 | if (userdef) { |
821 | char nr = e820.nr_map; | 157 | int nr = e820.nr_map; |
822 | 158 | ||
823 | if (sanitize_e820_map(e820.map, &nr) < 0) | 159 | if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) |
824 | early_panic("Invalid user supplied memory map"); | 160 | early_panic("Invalid user supplied memory map"); |
825 | e820.nr_map = nr; | 161 | e820.nr_map = nr; |
826 | 162 | ||
@@ -829,109 +165,6 @@ void __init finish_e820_parsing(void) | |||
829 | } | 165 | } |
830 | } | 166 | } |
831 | 167 | ||
832 | void __init update_memory_range(u64 start, u64 size, unsigned old_type, | ||
833 | unsigned new_type) | ||
834 | { | ||
835 | int i; | ||
836 | |||
837 | BUG_ON(old_type == new_type); | ||
838 | |||
839 | for (i = 0; i < e820.nr_map; i++) { | ||
840 | struct e820entry *ei = &e820.map[i]; | ||
841 | u64 final_start, final_end; | ||
842 | if (ei->type != old_type) | ||
843 | continue; | ||
844 | /* totally covered? */ | ||
845 | if (ei->addr >= start && ei->size <= size) { | ||
846 | ei->type = new_type; | ||
847 | continue; | ||
848 | } | ||
849 | /* partially covered */ | ||
850 | final_start = max(start, ei->addr); | ||
851 | final_end = min(start + size, ei->addr + ei->size); | ||
852 | if (final_start >= final_end) | ||
853 | continue; | ||
854 | add_memory_region(final_start, final_end - final_start, | ||
855 | new_type); | ||
856 | } | ||
857 | } | ||
858 | |||
859 | void __init update_e820(void) | ||
860 | { | ||
861 | u8 nr_map; | ||
862 | |||
863 | nr_map = e820.nr_map; | ||
864 | if (sanitize_e820_map(e820.map, &nr_map)) | ||
865 | return; | ||
866 | e820.nr_map = nr_map; | ||
867 | printk(KERN_INFO "modified physical RAM map:\n"); | ||
868 | e820_print_map("modified"); | ||
869 | } | ||
870 | |||
871 | unsigned long pci_mem_start = 0xaeedbabe; | ||
872 | EXPORT_SYMBOL(pci_mem_start); | ||
873 | |||
874 | /* | ||
875 | * Search for the biggest gap in the low 32 bits of the e820 | ||
876 | * memory space. We pass this space to PCI to assign MMIO resources | ||
877 | * for hotplug or unconfigured devices in. | ||
878 | * Hopefully the BIOS let enough space left. | ||
879 | */ | ||
880 | __init void e820_setup_gap(void) | ||
881 | { | ||
882 | unsigned long gapstart, gapsize, round; | ||
883 | unsigned long last; | ||
884 | int i; | ||
885 | int found = 0; | ||
886 | |||
887 | last = 0x100000000ull; | ||
888 | gapstart = 0x10000000; | ||
889 | gapsize = 0x400000; | ||
890 | i = e820.nr_map; | ||
891 | while (--i >= 0) { | ||
892 | unsigned long long start = e820.map[i].addr; | ||
893 | unsigned long long end = start + e820.map[i].size; | ||
894 | |||
895 | /* | ||
896 | * Since "last" is at most 4GB, we know we'll | ||
897 | * fit in 32 bits if this condition is true | ||
898 | */ | ||
899 | if (last > end) { | ||
900 | unsigned long gap = last - end; | ||
901 | |||
902 | if (gap > gapsize) { | ||
903 | gapsize = gap; | ||
904 | gapstart = end; | ||
905 | found = 1; | ||
906 | } | ||
907 | } | ||
908 | if (start < last) | ||
909 | last = start; | ||
910 | } | ||
911 | |||
912 | if (!found) { | ||
913 | gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; | ||
914 | printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " | ||
915 | "address range\n" | ||
916 | KERN_ERR "PCI: Unassigned devices with 32bit resource " | ||
917 | "registers may break!\n"); | ||
918 | } | ||
919 | |||
920 | /* | ||
921 | * See how much we want to round up: start off with | ||
922 | * rounding to the next 1MB area. | ||
923 | */ | ||
924 | round = 0x100000; | ||
925 | while ((gapsize >> 4) > round) | ||
926 | round += round; | ||
927 | /* Fun with two's complement */ | ||
928 | pci_mem_start = (gapstart + round) & -round; | ||
929 | |||
930 | printk(KERN_INFO | ||
931 | "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", | ||
932 | pci_mem_start, gapstart, gapsize); | ||
933 | } | ||
934 | |||
935 | int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) | 168 | int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) |
936 | { | 169 | { |
937 | int i; | 170 | int i; |
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 77d424cf68b3..d5c7fcdd1861 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
@@ -213,6 +213,48 @@ unsigned long efi_get_time(void) | |||
213 | eft.minute, eft.second); | 213 | eft.minute, eft.second); |
214 | } | 214 | } |
215 | 215 | ||
216 | /* | ||
217 | * Tell the kernel about the EFI memory map. This might include | ||
218 | * more than the max 128 entries that can fit in the e820 legacy | ||
219 | * (zeropage) memory map. | ||
220 | */ | ||
221 | |||
222 | static void __init add_efi_memmap(void) | ||
223 | { | ||
224 | void *p; | ||
225 | |||
226 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
227 | efi_memory_desc_t *md = p; | ||
228 | unsigned long long start = md->phys_addr; | ||
229 | unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; | ||
230 | int e820_type; | ||
231 | |||
232 | if (md->attribute & EFI_MEMORY_WB) | ||
233 | e820_type = E820_RAM; | ||
234 | else | ||
235 | e820_type = E820_RESERVED; | ||
236 | add_memory_region(start, size, e820_type); | ||
237 | } | ||
238 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | ||
239 | } | ||
240 | |||
241 | void __init efi_reserve_early(void) | ||
242 | { | ||
243 | unsigned long pmap; | ||
244 | |||
245 | pmap = boot_params.efi_info.efi_memmap; | ||
246 | #ifdef CONFIG_X86_64 | ||
247 | pmap += (__u64)boot_params.efi_info.efi_memmap_hi << 32; | ||
248 | #endif | ||
249 | memmap.phys_map = (void *)pmap; | ||
250 | memmap.nr_map = boot_params.efi_info.efi_memmap_size / | ||
251 | boot_params.efi_info.efi_memdesc_size; | ||
252 | memmap.desc_version = boot_params.efi_info.efi_memdesc_version; | ||
253 | memmap.desc_size = boot_params.efi_info.efi_memdesc_size; | ||
254 | reserve_early(pmap, pmap + memmap.nr_map * memmap.desc_size, | ||
255 | "EFI memmap"); | ||
256 | } | ||
257 | |||
216 | #if EFI_DEBUG | 258 | #if EFI_DEBUG |
217 | static void __init print_efi_memmap(void) | 259 | static void __init print_efi_memmap(void) |
218 | { | 260 | { |
@@ -242,21 +284,11 @@ void __init efi_init(void) | |||
242 | int i = 0; | 284 | int i = 0; |
243 | void *tmp; | 285 | void *tmp; |
244 | 286 | ||
245 | #ifdef CONFIG_X86_32 | ||
246 | efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; | 287 | efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; |
247 | memmap.phys_map = (void *)boot_params.efi_info.efi_memmap; | 288 | #ifdef CONFIG_X86_64 |
248 | #else | 289 | efi_phys.systab = (void *)efi_phys.systab + |
249 | efi_phys.systab = (efi_system_table_t *) | 290 | ((__u64)boot_params.efi_info.efi_systab_hi<<32); |
250 | (boot_params.efi_info.efi_systab | | ||
251 | ((__u64)boot_params.efi_info.efi_systab_hi<<32)); | ||
252 | memmap.phys_map = (void *) | ||
253 | (boot_params.efi_info.efi_memmap | | ||
254 | ((__u64)boot_params.efi_info.efi_memmap_hi<<32)); | ||
255 | #endif | 291 | #endif |
256 | memmap.nr_map = boot_params.efi_info.efi_memmap_size / | ||
257 | boot_params.efi_info.efi_memdesc_size; | ||
258 | memmap.desc_version = boot_params.efi_info.efi_memdesc_version; | ||
259 | memmap.desc_size = boot_params.efi_info.efi_memdesc_size; | ||
260 | 292 | ||
261 | efi.systab = early_ioremap((unsigned long)efi_phys.systab, | 293 | efi.systab = early_ioremap((unsigned long)efi_phys.systab, |
262 | sizeof(efi_system_table_t)); | 294 | sizeof(efi_system_table_t)); |
@@ -370,6 +402,7 @@ void __init efi_init(void) | |||
370 | if (memmap.desc_size != sizeof(efi_memory_desc_t)) | 402 | if (memmap.desc_size != sizeof(efi_memory_desc_t)) |
371 | printk(KERN_WARNING "Kernel-defined memdesc" | 403 | printk(KERN_WARNING "Kernel-defined memdesc" |
372 | "doesn't match the one from EFI!\n"); | 404 | "doesn't match the one from EFI!\n"); |
405 | add_efi_memmap(); | ||
373 | 406 | ||
374 | /* Setup for EFI runtime service */ | 407 | /* Setup for EFI runtime service */ |
375 | reboot_type = BOOT_EFI; | 408 | reboot_type = BOOT_EFI; |
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index d0060fdcccac..652c5287215f 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c | |||
@@ -97,13 +97,7 @@ void __init efi_call_phys_epilog(void) | |||
97 | early_runtime_code_mapping_set_exec(0); | 97 | early_runtime_code_mapping_set_exec(0); |
98 | } | 98 | } |
99 | 99 | ||
100 | void __init efi_reserve_bootmem(void) | 100 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size) |
101 | { | ||
102 | reserve_bootmem_generic((unsigned long)memmap.phys_map, | ||
103 | memmap.nr_map * memmap.desc_size); | ||
104 | } | ||
105 | |||
106 | void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) | ||
107 | { | 101 | { |
108 | static unsigned pages_mapped __initdata; | 102 | static unsigned pages_mapped __initdata; |
109 | unsigned i, pages; | 103 | unsigned i, pages; |
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index cbaaf69bedb2..1fa8be5bd217 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c | |||
@@ -51,7 +51,7 @@ void __init setup_apic_routing(void) | |||
51 | else | 51 | else |
52 | #endif | 52 | #endif |
53 | 53 | ||
54 | if (num_possible_cpus() <= 8) | 54 | if (max_physical_apicid < 8) |
55 | genapic = &apic_flat; | 55 | genapic = &apic_flat; |
56 | else | 56 | else |
57 | genapic = &apic_physflat; | 57 | genapic = &apic_physflat; |
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c new file mode 100644 index 000000000000..a727c0b9819c --- /dev/null +++ b/arch/x86/kernel/head.c | |||
@@ -0,0 +1,73 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/init.h> | ||
3 | |||
4 | #include <asm/setup.h> | ||
5 | #include <asm/bios_ebda.h> | ||
6 | |||
7 | #define BIOS_LOWMEM_KILOBYTES 0x413 | ||
8 | |||
9 | /* | ||
10 | * The BIOS places the EBDA/XBDA at the top of conventional | ||
11 | * memory, and usually decreases the reported amount of | ||
12 | * conventional memory (int 0x12) too. This also contains a | ||
13 | * workaround for Dell systems that neglect to reserve EBDA. | ||
14 | * The same workaround also avoids a problem with the AMD768MPX | ||
15 | * chipset: reserve a page before VGA to prevent PCI prefetch | ||
16 | * into it (errata #56). Usually the page is reserved anyways, | ||
17 | * unless you have no PS/2 mouse plugged in. | ||
18 | */ | ||
19 | void __init reserve_ebda_region(void) | ||
20 | { | ||
21 | unsigned int lowmem, ebda_addr; | ||
22 | |||
23 | /* To determine the position of the EBDA and the */ | ||
24 | /* end of conventional memory, we need to look at */ | ||
25 | /* the BIOS data area. In a paravirtual environment */ | ||
26 | /* that area is absent. We'll just have to assume */ | ||
27 | /* that the paravirt case can handle memory setup */ | ||
28 | /* correctly, without our help. */ | ||
29 | if (paravirt_enabled()) | ||
30 | return; | ||
31 | |||
32 | /* end of low (conventional) memory */ | ||
33 | lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); | ||
34 | lowmem <<= 10; | ||
35 | |||
36 | /* start of EBDA area */ | ||
37 | ebda_addr = get_bios_ebda(); | ||
38 | |||
39 | /* Fixup: bios puts an EBDA in the top 64K segment */ | ||
40 | /* of conventional memory, but does not adjust lowmem. */ | ||
41 | if ((lowmem - ebda_addr) <= 0x10000) | ||
42 | lowmem = ebda_addr; | ||
43 | |||
44 | /* Fixup: bios does not report an EBDA at all. */ | ||
45 | /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ | ||
46 | if ((ebda_addr == 0) && (lowmem >= 0x9f000)) | ||
47 | lowmem = 0x9f000; | ||
48 | |||
49 | /* Paranoia: should never happen, but... */ | ||
50 | if ((lowmem == 0) || (lowmem >= 0x100000)) | ||
51 | lowmem = 0x9f000; | ||
52 | |||
53 | /* reserve all memory between lowmem and the 1MB mark */ | ||
54 | reserve_early(lowmem, 0x100000, "BIOS reserved"); | ||
55 | } | ||
56 | |||
57 | void __init reserve_setup_data(void) | ||
58 | { | ||
59 | struct setup_data *data; | ||
60 | u64 pa_data; | ||
61 | char buf[32]; | ||
62 | |||
63 | if (boot_params.hdr.version < 0x0209) | ||
64 | return; | ||
65 | pa_data = boot_params.hdr.setup_data; | ||
66 | while (pa_data) { | ||
67 | data = early_ioremap(pa_data, sizeof(*data)); | ||
68 | sprintf(buf, "setup data %x", data->type); | ||
69 | reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); | ||
70 | pa_data = data->next; | ||
71 | early_iounmap(data, sizeof(*data)); | ||
72 | } | ||
73 | } | ||
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 3db059058927..fa1d25dd83e3 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -8,7 +8,34 @@ | |||
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/start_kernel.h> | 9 | #include <linux/start_kernel.h> |
10 | 10 | ||
11 | #include <asm/setup.h> | ||
12 | #include <asm/sections.h> | ||
13 | #include <asm/e820.h> | ||
14 | #include <asm/bios_ebda.h> | ||
15 | |||
11 | void __init i386_start_kernel(void) | 16 | void __init i386_start_kernel(void) |
12 | { | 17 | { |
18 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); | ||
19 | |||
20 | #ifdef CONFIG_BLK_DEV_INITRD | ||
21 | /* Reserve INITRD */ | ||
22 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | ||
23 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | ||
24 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | ||
25 | u64 ramdisk_end = ramdisk_image + ramdisk_size; | ||
26 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); | ||
27 | } | ||
28 | #endif | ||
29 | reserve_early(init_pg_tables_start, init_pg_tables_end, | ||
30 | "INIT_PG_TABLE"); | ||
31 | |||
32 | reserve_ebda_region(); | ||
33 | |||
34 | /* | ||
35 | * At this point everything still needed from the boot loader | ||
36 | * or BIOS or kernel text should be early reserved or marked not | ||
37 | * RAM in e820. All other memory is free game. | ||
38 | */ | ||
39 | |||
13 | start_kernel(); | 40 | start_kernel(); |
14 | } | 41 | } |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index e25c57b8aa84..5fbed459ff3b 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -51,74 +51,6 @@ static void __init copy_bootdata(char *real_mode_data) | |||
51 | } | 51 | } |
52 | } | 52 | } |
53 | 53 | ||
54 | #define BIOS_LOWMEM_KILOBYTES 0x413 | ||
55 | |||
56 | /* | ||
57 | * The BIOS places the EBDA/XBDA at the top of conventional | ||
58 | * memory, and usually decreases the reported amount of | ||
59 | * conventional memory (int 0x12) too. This also contains a | ||
60 | * workaround for Dell systems that neglect to reserve EBDA. | ||
61 | * The same workaround also avoids a problem with the AMD768MPX | ||
62 | * chipset: reserve a page before VGA to prevent PCI prefetch | ||
63 | * into it (errata #56). Usually the page is reserved anyways, | ||
64 | * unless you have no PS/2 mouse plugged in. | ||
65 | */ | ||
66 | static void __init reserve_ebda_region(void) | ||
67 | { | ||
68 | unsigned int lowmem, ebda_addr; | ||
69 | |||
70 | /* To determine the position of the EBDA and the */ | ||
71 | /* end of conventional memory, we need to look at */ | ||
72 | /* the BIOS data area. In a paravirtual environment */ | ||
73 | /* that area is absent. We'll just have to assume */ | ||
74 | /* that the paravirt case can handle memory setup */ | ||
75 | /* correctly, without our help. */ | ||
76 | if (paravirt_enabled()) | ||
77 | return; | ||
78 | |||
79 | /* end of low (conventional) memory */ | ||
80 | lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); | ||
81 | lowmem <<= 10; | ||
82 | |||
83 | /* start of EBDA area */ | ||
84 | ebda_addr = get_bios_ebda(); | ||
85 | |||
86 | /* Fixup: bios puts an EBDA in the top 64K segment */ | ||
87 | /* of conventional memory, but does not adjust lowmem. */ | ||
88 | if ((lowmem - ebda_addr) <= 0x10000) | ||
89 | lowmem = ebda_addr; | ||
90 | |||
91 | /* Fixup: bios does not report an EBDA at all. */ | ||
92 | /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ | ||
93 | if ((ebda_addr == 0) && (lowmem >= 0x9f000)) | ||
94 | lowmem = 0x9f000; | ||
95 | |||
96 | /* Paranoia: should never happen, but... */ | ||
97 | if ((lowmem == 0) || (lowmem >= 0x100000)) | ||
98 | lowmem = 0x9f000; | ||
99 | |||
100 | /* reserve all memory between lowmem and the 1MB mark */ | ||
101 | reserve_early(lowmem, 0x100000, "BIOS reserved"); | ||
102 | } | ||
103 | |||
104 | static void __init reserve_setup_data(void) | ||
105 | { | ||
106 | struct setup_data *data; | ||
107 | unsigned long pa_data; | ||
108 | char buf[32]; | ||
109 | |||
110 | if (boot_params.hdr.version < 0x0209) | ||
111 | return; | ||
112 | pa_data = boot_params.hdr.setup_data; | ||
113 | while (pa_data) { | ||
114 | data = early_ioremap(pa_data, sizeof(*data)); | ||
115 | sprintf(buf, "setup data %x", data->type); | ||
116 | reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); | ||
117 | pa_data = data->next; | ||
118 | early_iounmap(data, sizeof(*data)); | ||
119 | } | ||
120 | } | ||
121 | |||
122 | void __init x86_64_start_kernel(char * real_mode_data) | 54 | void __init x86_64_start_kernel(char * real_mode_data) |
123 | { | 55 | { |
124 | int i; | 56 | int i; |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index f7357cc0162c..b98b338aae1a 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -194,6 +194,7 @@ default_entry: | |||
194 | xorl %ebx,%ebx /* %ebx is kept at zero */ | 194 | xorl %ebx,%ebx /* %ebx is kept at zero */ |
195 | 195 | ||
196 | movl $pa(pg0), %edi | 196 | movl $pa(pg0), %edi |
197 | movl %edi, pa(init_pg_tables_start) | ||
197 | movl $pa(swapper_pg_pmd), %edx | 198 | movl $pa(swapper_pg_pmd), %edx |
198 | movl $PTE_ATTR, %eax | 199 | movl $PTE_ATTR, %eax |
199 | 10: | 200 | 10: |
@@ -219,6 +220,8 @@ default_entry: | |||
219 | jb 10b | 220 | jb 10b |
220 | 1: | 221 | 1: |
221 | movl %edi,pa(init_pg_tables_end) | 222 | movl %edi,pa(init_pg_tables_end) |
223 | shrl $12, %eax | ||
224 | movl %eax, pa(max_pfn_mapped) | ||
222 | 225 | ||
223 | /* Do early initialization of the fixmap area */ | 226 | /* Do early initialization of the fixmap area */ |
224 | movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax | 227 | movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax |
@@ -228,6 +231,7 @@ default_entry: | |||
228 | page_pde_offset = (__PAGE_OFFSET >> 20); | 231 | page_pde_offset = (__PAGE_OFFSET >> 20); |
229 | 232 | ||
230 | movl $pa(pg0), %edi | 233 | movl $pa(pg0), %edi |
234 | movl %edi, pa(init_pg_tables_start) | ||
231 | movl $pa(swapper_pg_dir), %edx | 235 | movl $pa(swapper_pg_dir), %edx |
232 | movl $PTE_ATTR, %eax | 236 | movl $PTE_ATTR, %eax |
233 | 10: | 237 | 10: |
@@ -249,6 +253,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
249 | cmpl %ebp,%eax | 253 | cmpl %ebp,%eax |
250 | jb 10b | 254 | jb 10b |
251 | movl %edi,pa(init_pg_tables_end) | 255 | movl %edi,pa(init_pg_tables_end) |
256 | shrl $12, %eax | ||
257 | movl %eax, pa(max_pfn_mapped) | ||
252 | 258 | ||
253 | /* Do early initialization of the fixmap area */ | 259 | /* Do early initialization of the fixmap area */ |
254 | movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax | 260 | movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax |
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 4dc8600d9d20..0662817d61bf 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c | |||
@@ -72,15 +72,21 @@ int sis_apic_bug = -1; | |||
72 | int nr_ioapic_registers[MAX_IO_APICS]; | 72 | int nr_ioapic_registers[MAX_IO_APICS]; |
73 | 73 | ||
74 | /* I/O APIC entries */ | 74 | /* I/O APIC entries */ |
75 | struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; | 75 | struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; |
76 | int nr_ioapics; | 76 | int nr_ioapics; |
77 | 77 | ||
78 | /* MP IRQ source entries */ | 78 | /* MP IRQ source entries */ |
79 | struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; | 79 | struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; |
80 | 80 | ||
81 | /* # of MP IRQ source entries */ | 81 | /* # of MP IRQ source entries */ |
82 | int mp_irq_entries; | 82 | int mp_irq_entries; |
83 | 83 | ||
84 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | ||
85 | int mp_bus_id_to_type[MAX_MP_BUSSES]; | ||
86 | #endif | ||
87 | |||
88 | DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); | ||
89 | |||
84 | static int disable_timer_pin_1 __initdata; | 90 | static int disable_timer_pin_1 __initdata; |
85 | 91 | ||
86 | /* | 92 | /* |
@@ -110,7 +116,7 @@ struct io_apic { | |||
110 | static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) | 116 | static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) |
111 | { | 117 | { |
112 | return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) | 118 | return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) |
113 | + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); | 119 | + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); |
114 | } | 120 | } |
115 | 121 | ||
116 | static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) | 122 | static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) |
@@ -801,10 +807,10 @@ static int find_irq_entry(int apic, int pin, int type) | |||
801 | int i; | 807 | int i; |
802 | 808 | ||
803 | for (i = 0; i < mp_irq_entries; i++) | 809 | for (i = 0; i < mp_irq_entries; i++) |
804 | if (mp_irqs[i].mpc_irqtype == type && | 810 | if (mp_irqs[i].mp_irqtype == type && |
805 | (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || | 811 | (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || |
806 | mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && | 812 | mp_irqs[i].mp_dstapic == MP_APIC_ALL) && |
807 | mp_irqs[i].mpc_dstirq == pin) | 813 | mp_irqs[i].mp_dstirq == pin) |
808 | return i; | 814 | return i; |
809 | 815 | ||
810 | return -1; | 816 | return -1; |
@@ -818,13 +824,13 @@ static int __init find_isa_irq_pin(int irq, int type) | |||
818 | int i; | 824 | int i; |
819 | 825 | ||
820 | for (i = 0; i < mp_irq_entries; i++) { | 826 | for (i = 0; i < mp_irq_entries; i++) { |
821 | int lbus = mp_irqs[i].mpc_srcbus; | 827 | int lbus = mp_irqs[i].mp_srcbus; |
822 | 828 | ||
823 | if (test_bit(lbus, mp_bus_not_pci) && | 829 | if (test_bit(lbus, mp_bus_not_pci) && |
824 | (mp_irqs[i].mpc_irqtype == type) && | 830 | (mp_irqs[i].mp_irqtype == type) && |
825 | (mp_irqs[i].mpc_srcbusirq == irq)) | 831 | (mp_irqs[i].mp_srcbusirq == irq)) |
826 | 832 | ||
827 | return mp_irqs[i].mpc_dstirq; | 833 | return mp_irqs[i].mp_dstirq; |
828 | } | 834 | } |
829 | return -1; | 835 | return -1; |
830 | } | 836 | } |
@@ -834,17 +840,17 @@ static int __init find_isa_irq_apic(int irq, int type) | |||
834 | int i; | 840 | int i; |
835 | 841 | ||
836 | for (i = 0; i < mp_irq_entries; i++) { | 842 | for (i = 0; i < mp_irq_entries; i++) { |
837 | int lbus = mp_irqs[i].mpc_srcbus; | 843 | int lbus = mp_irqs[i].mp_srcbus; |
838 | 844 | ||
839 | if (test_bit(lbus, mp_bus_not_pci) && | 845 | if (test_bit(lbus, mp_bus_not_pci) && |
840 | (mp_irqs[i].mpc_irqtype == type) && | 846 | (mp_irqs[i].mp_irqtype == type) && |
841 | (mp_irqs[i].mpc_srcbusirq == irq)) | 847 | (mp_irqs[i].mp_srcbusirq == irq)) |
842 | break; | 848 | break; |
843 | } | 849 | } |
844 | if (i < mp_irq_entries) { | 850 | if (i < mp_irq_entries) { |
845 | int apic; | 851 | int apic; |
846 | for(apic = 0; apic < nr_ioapics; apic++) { | 852 | for(apic = 0; apic < nr_ioapics; apic++) { |
847 | if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) | 853 | if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) |
848 | return apic; | 854 | return apic; |
849 | } | 855 | } |
850 | } | 856 | } |
@@ -864,28 +870,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | |||
864 | 870 | ||
865 | apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " | 871 | apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " |
866 | "slot:%d, pin:%d.\n", bus, slot, pin); | 872 | "slot:%d, pin:%d.\n", bus, slot, pin); |
867 | if (mp_bus_id_to_pci_bus[bus] == -1) { | 873 | if (test_bit(bus, mp_bus_not_pci)) { |
868 | printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); | 874 | printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); |
869 | return -1; | 875 | return -1; |
870 | } | 876 | } |
871 | for (i = 0; i < mp_irq_entries; i++) { | 877 | for (i = 0; i < mp_irq_entries; i++) { |
872 | int lbus = mp_irqs[i].mpc_srcbus; | 878 | int lbus = mp_irqs[i].mp_srcbus; |
873 | 879 | ||
874 | for (apic = 0; apic < nr_ioapics; apic++) | 880 | for (apic = 0; apic < nr_ioapics; apic++) |
875 | if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || | 881 | if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || |
876 | mp_irqs[i].mpc_dstapic == MP_APIC_ALL) | 882 | mp_irqs[i].mp_dstapic == MP_APIC_ALL) |
877 | break; | 883 | break; |
878 | 884 | ||
879 | if (!test_bit(lbus, mp_bus_not_pci) && | 885 | if (!test_bit(lbus, mp_bus_not_pci) && |
880 | !mp_irqs[i].mpc_irqtype && | 886 | !mp_irqs[i].mp_irqtype && |
881 | (bus == lbus) && | 887 | (bus == lbus) && |
882 | (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { | 888 | (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { |
883 | int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); | 889 | int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); |
884 | 890 | ||
885 | if (!(apic || IO_APIC_IRQ(irq))) | 891 | if (!(apic || IO_APIC_IRQ(irq))) |
886 | continue; | 892 | continue; |
887 | 893 | ||
888 | if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) | 894 | if (pin == (mp_irqs[i].mp_srcbusirq & 3)) |
889 | return irq; | 895 | return irq; |
890 | /* | 896 | /* |
891 | * Use the first all-but-pin matching entry as a | 897 | * Use the first all-but-pin matching entry as a |
@@ -952,7 +958,7 @@ static int EISA_ELCR(unsigned int irq) | |||
952 | * EISA conforming in the MP table, that means its trigger type must | 958 | * EISA conforming in the MP table, that means its trigger type must |
953 | * be read in from the ELCR */ | 959 | * be read in from the ELCR */ |
954 | 960 | ||
955 | #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) | 961 | #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) |
956 | #define default_EISA_polarity(idx) default_ISA_polarity(idx) | 962 | #define default_EISA_polarity(idx) default_ISA_polarity(idx) |
957 | 963 | ||
958 | /* PCI interrupts are always polarity one level triggered, | 964 | /* PCI interrupts are always polarity one level triggered, |
@@ -969,13 +975,13 @@ static int EISA_ELCR(unsigned int irq) | |||
969 | 975 | ||
970 | static int MPBIOS_polarity(int idx) | 976 | static int MPBIOS_polarity(int idx) |
971 | { | 977 | { |
972 | int bus = mp_irqs[idx].mpc_srcbus; | 978 | int bus = mp_irqs[idx].mp_srcbus; |
973 | int polarity; | 979 | int polarity; |
974 | 980 | ||
975 | /* | 981 | /* |
976 | * Determine IRQ line polarity (high active or low active): | 982 | * Determine IRQ line polarity (high active or low active): |
977 | */ | 983 | */ |
978 | switch (mp_irqs[idx].mpc_irqflag & 3) | 984 | switch (mp_irqs[idx].mp_irqflag & 3) |
979 | { | 985 | { |
980 | case 0: /* conforms, ie. bus-type dependent polarity */ | 986 | case 0: /* conforms, ie. bus-type dependent polarity */ |
981 | { | 987 | { |
@@ -1012,13 +1018,13 @@ static int MPBIOS_polarity(int idx) | |||
1012 | 1018 | ||
1013 | static int MPBIOS_trigger(int idx) | 1019 | static int MPBIOS_trigger(int idx) |
1014 | { | 1020 | { |
1015 | int bus = mp_irqs[idx].mpc_srcbus; | 1021 | int bus = mp_irqs[idx].mp_srcbus; |
1016 | int trigger; | 1022 | int trigger; |
1017 | 1023 | ||
1018 | /* | 1024 | /* |
1019 | * Determine IRQ trigger mode (edge or level sensitive): | 1025 | * Determine IRQ trigger mode (edge or level sensitive): |
1020 | */ | 1026 | */ |
1021 | switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) | 1027 | switch ((mp_irqs[idx].mp_irqflag>>2) & 3) |
1022 | { | 1028 | { |
1023 | case 0: /* conforms, ie. bus-type dependent */ | 1029 | case 0: /* conforms, ie. bus-type dependent */ |
1024 | { | 1030 | { |
@@ -1097,16 +1103,16 @@ static inline int irq_trigger(int idx) | |||
1097 | static int pin_2_irq(int idx, int apic, int pin) | 1103 | static int pin_2_irq(int idx, int apic, int pin) |
1098 | { | 1104 | { |
1099 | int irq, i; | 1105 | int irq, i; |
1100 | int bus = mp_irqs[idx].mpc_srcbus; | 1106 | int bus = mp_irqs[idx].mp_srcbus; |
1101 | 1107 | ||
1102 | /* | 1108 | /* |
1103 | * Debugging check, we are in big trouble if this message pops up! | 1109 | * Debugging check, we are in big trouble if this message pops up! |
1104 | */ | 1110 | */ |
1105 | if (mp_irqs[idx].mpc_dstirq != pin) | 1111 | if (mp_irqs[idx].mp_dstirq != pin) |
1106 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); | 1112 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); |
1107 | 1113 | ||
1108 | if (test_bit(bus, mp_bus_not_pci)) | 1114 | if (test_bit(bus, mp_bus_not_pci)) |
1109 | irq = mp_irqs[idx].mpc_srcbusirq; | 1115 | irq = mp_irqs[idx].mp_srcbusirq; |
1110 | else { | 1116 | else { |
1111 | /* | 1117 | /* |
1112 | * PCI IRQs are mapped in order | 1118 | * PCI IRQs are mapped in order |
@@ -1250,12 +1256,12 @@ static void __init setup_IO_APIC_irqs(void) | |||
1250 | if (first_notcon) { | 1256 | if (first_notcon) { |
1251 | apic_printk(APIC_VERBOSE, KERN_DEBUG | 1257 | apic_printk(APIC_VERBOSE, KERN_DEBUG |
1252 | " IO-APIC (apicid-pin) %d-%d", | 1258 | " IO-APIC (apicid-pin) %d-%d", |
1253 | mp_ioapics[apic].mpc_apicid, | 1259 | mp_ioapics[apic].mp_apicid, |
1254 | pin); | 1260 | pin); |
1255 | first_notcon = 0; | 1261 | first_notcon = 0; |
1256 | } else | 1262 | } else |
1257 | apic_printk(APIC_VERBOSE, ", %d-%d", | 1263 | apic_printk(APIC_VERBOSE, ", %d-%d", |
1258 | mp_ioapics[apic].mpc_apicid, pin); | 1264 | mp_ioapics[apic].mp_apicid, pin); |
1259 | continue; | 1265 | continue; |
1260 | } | 1266 | } |
1261 | 1267 | ||
@@ -1357,7 +1363,7 @@ void __init print_IO_APIC(void) | |||
1357 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | 1363 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); |
1358 | for (i = 0; i < nr_ioapics; i++) | 1364 | for (i = 0; i < nr_ioapics; i++) |
1359 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", | 1365 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", |
1360 | mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); | 1366 | mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); |
1361 | 1367 | ||
1362 | /* | 1368 | /* |
1363 | * We are a bit conservative about what we expect. We have to | 1369 | * We are a bit conservative about what we expect. We have to |
@@ -1376,7 +1382,7 @@ void __init print_IO_APIC(void) | |||
1376 | reg_03.raw = io_apic_read(apic, 3); | 1382 | reg_03.raw = io_apic_read(apic, 3); |
1377 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1383 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1378 | 1384 | ||
1379 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); | 1385 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); |
1380 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); | 1386 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); |
1381 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | 1387 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); |
1382 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); | 1388 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); |
@@ -1716,7 +1722,6 @@ void disable_IO_APIC(void) | |||
1716 | * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 | 1722 | * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 |
1717 | */ | 1723 | */ |
1718 | 1724 | ||
1719 | #ifndef CONFIG_X86_NUMAQ | ||
1720 | static void __init setup_ioapic_ids_from_mpc(void) | 1725 | static void __init setup_ioapic_ids_from_mpc(void) |
1721 | { | 1726 | { |
1722 | union IO_APIC_reg_00 reg_00; | 1727 | union IO_APIC_reg_00 reg_00; |
@@ -1726,6 +1731,11 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1726 | unsigned char old_id; | 1731 | unsigned char old_id; |
1727 | unsigned long flags; | 1732 | unsigned long flags; |
1728 | 1733 | ||
1734 | #ifdef CONFIG_X86_NUMAQ | ||
1735 | if (found_numaq) | ||
1736 | return; | ||
1737 | #endif | ||
1738 | |||
1729 | /* | 1739 | /* |
1730 | * Don't check I/O APIC IDs for xAPIC systems. They have | 1740 | * Don't check I/O APIC IDs for xAPIC systems. They have |
1731 | * no meaning without the serial APIC bus. | 1741 | * no meaning without the serial APIC bus. |
@@ -1749,14 +1759,14 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1749 | reg_00.raw = io_apic_read(apic, 0); | 1759 | reg_00.raw = io_apic_read(apic, 0); |
1750 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1760 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1751 | 1761 | ||
1752 | old_id = mp_ioapics[apic].mpc_apicid; | 1762 | old_id = mp_ioapics[apic].mp_apicid; |
1753 | 1763 | ||
1754 | if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { | 1764 | if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { |
1755 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", | 1765 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", |
1756 | apic, mp_ioapics[apic].mpc_apicid); | 1766 | apic, mp_ioapics[apic].mp_apicid); |
1757 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | 1767 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", |
1758 | reg_00.bits.ID); | 1768 | reg_00.bits.ID); |
1759 | mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; | 1769 | mp_ioapics[apic].mp_apicid = reg_00.bits.ID; |
1760 | } | 1770 | } |
1761 | 1771 | ||
1762 | /* | 1772 | /* |
@@ -1765,9 +1775,9 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1765 | * 'stuck on smp_invalidate_needed IPI wait' messages. | 1775 | * 'stuck on smp_invalidate_needed IPI wait' messages. |
1766 | */ | 1776 | */ |
1767 | if (check_apicid_used(phys_id_present_map, | 1777 | if (check_apicid_used(phys_id_present_map, |
1768 | mp_ioapics[apic].mpc_apicid)) { | 1778 | mp_ioapics[apic].mp_apicid)) { |
1769 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", | 1779 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", |
1770 | apic, mp_ioapics[apic].mpc_apicid); | 1780 | apic, mp_ioapics[apic].mp_apicid); |
1771 | for (i = 0; i < get_physical_broadcast(); i++) | 1781 | for (i = 0; i < get_physical_broadcast(); i++) |
1772 | if (!physid_isset(i, phys_id_present_map)) | 1782 | if (!physid_isset(i, phys_id_present_map)) |
1773 | break; | 1783 | break; |
@@ -1776,13 +1786,13 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1776 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | 1786 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", |
1777 | i); | 1787 | i); |
1778 | physid_set(i, phys_id_present_map); | 1788 | physid_set(i, phys_id_present_map); |
1779 | mp_ioapics[apic].mpc_apicid = i; | 1789 | mp_ioapics[apic].mp_apicid = i; |
1780 | } else { | 1790 | } else { |
1781 | physid_mask_t tmp; | 1791 | physid_mask_t tmp; |
1782 | tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid); | 1792 | tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); |
1783 | apic_printk(APIC_VERBOSE, "Setting %d in the " | 1793 | apic_printk(APIC_VERBOSE, "Setting %d in the " |
1784 | "phys_id_present_map\n", | 1794 | "phys_id_present_map\n", |
1785 | mp_ioapics[apic].mpc_apicid); | 1795 | mp_ioapics[apic].mp_apicid); |
1786 | physids_or(phys_id_present_map, phys_id_present_map, tmp); | 1796 | physids_or(phys_id_present_map, phys_id_present_map, tmp); |
1787 | } | 1797 | } |
1788 | 1798 | ||
@@ -1791,11 +1801,11 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1791 | * We need to adjust the IRQ routing table | 1801 | * We need to adjust the IRQ routing table |
1792 | * if the ID changed. | 1802 | * if the ID changed. |
1793 | */ | 1803 | */ |
1794 | if (old_id != mp_ioapics[apic].mpc_apicid) | 1804 | if (old_id != mp_ioapics[apic].mp_apicid) |
1795 | for (i = 0; i < mp_irq_entries; i++) | 1805 | for (i = 0; i < mp_irq_entries; i++) |
1796 | if (mp_irqs[i].mpc_dstapic == old_id) | 1806 | if (mp_irqs[i].mp_dstapic == old_id) |
1797 | mp_irqs[i].mpc_dstapic | 1807 | mp_irqs[i].mp_dstapic |
1798 | = mp_ioapics[apic].mpc_apicid; | 1808 | = mp_ioapics[apic].mp_apicid; |
1799 | 1809 | ||
1800 | /* | 1810 | /* |
1801 | * Read the right value from the MPC table and | 1811 | * Read the right value from the MPC table and |
@@ -1803,9 +1813,9 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1803 | */ | 1813 | */ |
1804 | apic_printk(APIC_VERBOSE, KERN_INFO | 1814 | apic_printk(APIC_VERBOSE, KERN_INFO |
1805 | "...changing IO-APIC physical APIC ID to %d ...", | 1815 | "...changing IO-APIC physical APIC ID to %d ...", |
1806 | mp_ioapics[apic].mpc_apicid); | 1816 | mp_ioapics[apic].mp_apicid); |
1807 | 1817 | ||
1808 | reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; | 1818 | reg_00.bits.ID = mp_ioapics[apic].mp_apicid; |
1809 | spin_lock_irqsave(&ioapic_lock, flags); | 1819 | spin_lock_irqsave(&ioapic_lock, flags); |
1810 | io_apic_write(apic, 0, reg_00.raw); | 1820 | io_apic_write(apic, 0, reg_00.raw); |
1811 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1821 | spin_unlock_irqrestore(&ioapic_lock, flags); |
@@ -1816,15 +1826,12 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1816 | spin_lock_irqsave(&ioapic_lock, flags); | 1826 | spin_lock_irqsave(&ioapic_lock, flags); |
1817 | reg_00.raw = io_apic_read(apic, 0); | 1827 | reg_00.raw = io_apic_read(apic, 0); |
1818 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1828 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1819 | if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) | 1829 | if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) |
1820 | printk("could not set ID!\n"); | 1830 | printk("could not set ID!\n"); |
1821 | else | 1831 | else |
1822 | apic_printk(APIC_VERBOSE, " ok.\n"); | 1832 | apic_printk(APIC_VERBOSE, " ok.\n"); |
1823 | } | 1833 | } |
1824 | } | 1834 | } |
1825 | #else | ||
1826 | static void __init setup_ioapic_ids_from_mpc(void) { } | ||
1827 | #endif | ||
1828 | 1835 | ||
1829 | int no_timer_check __initdata; | 1836 | int no_timer_check __initdata; |
1830 | 1837 | ||
@@ -2347,8 +2354,8 @@ static int ioapic_resume(struct sys_device *dev) | |||
2347 | 2354 | ||
2348 | spin_lock_irqsave(&ioapic_lock, flags); | 2355 | spin_lock_irqsave(&ioapic_lock, flags); |
2349 | reg_00.raw = io_apic_read(dev->id, 0); | 2356 | reg_00.raw = io_apic_read(dev->id, 0); |
2350 | if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { | 2357 | if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { |
2351 | reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; | 2358 | reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; |
2352 | io_apic_write(dev->id, 0, reg_00.raw); | 2359 | io_apic_write(dev->id, 0, reg_00.raw); |
2353 | } | 2360 | } |
2354 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2361 | spin_unlock_irqrestore(&ioapic_lock, flags); |
@@ -2781,7 +2788,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a | |||
2781 | 2788 | ||
2782 | apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " | 2789 | apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " |
2783 | "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, | 2790 | "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, |
2784 | mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, | 2791 | mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq, |
2785 | edge_level, active_high_low); | 2792 | edge_level, active_high_low); |
2786 | 2793 | ||
2787 | ioapic_register_intr(irq, entry.vector, edge_level); | 2794 | ioapic_register_intr(irq, entry.vector, edge_level); |
@@ -2802,8 +2809,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | |||
2802 | return -1; | 2809 | return -1; |
2803 | 2810 | ||
2804 | for (i = 0; i < mp_irq_entries; i++) | 2811 | for (i = 0; i < mp_irq_entries; i++) |
2805 | if (mp_irqs[i].mpc_irqtype == mp_INT && | 2812 | if (mp_irqs[i].mp_irqtype == mp_INT && |
2806 | mp_irqs[i].mpc_srcbusirq == bus_irq) | 2813 | mp_irqs[i].mp_srcbusirq == bus_irq) |
2807 | break; | 2814 | break; |
2808 | if (i >= mp_irq_entries) | 2815 | if (i >= mp_irq_entries) |
2809 | return -1; | 2816 | return -1; |
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index ef1a8dfcc529..339cf6f926dc 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c | |||
@@ -104,15 +104,17 @@ DEFINE_SPINLOCK(vector_lock); | |||
104 | int nr_ioapic_registers[MAX_IO_APICS]; | 104 | int nr_ioapic_registers[MAX_IO_APICS]; |
105 | 105 | ||
106 | /* I/O APIC entries */ | 106 | /* I/O APIC entries */ |
107 | struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; | 107 | struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; |
108 | int nr_ioapics; | 108 | int nr_ioapics; |
109 | 109 | ||
110 | /* MP IRQ source entries */ | 110 | /* MP IRQ source entries */ |
111 | struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; | 111 | struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; |
112 | 112 | ||
113 | /* # of MP IRQ source entries */ | 113 | /* # of MP IRQ source entries */ |
114 | int mp_irq_entries; | 114 | int mp_irq_entries; |
115 | 115 | ||
116 | DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); | ||
117 | |||
116 | /* | 118 | /* |
117 | * Rough estimation of how many shared IRQs there are, can | 119 | * Rough estimation of how many shared IRQs there are, can |
118 | * be changed anytime. | 120 | * be changed anytime. |
@@ -140,7 +142,7 @@ struct io_apic { | |||
140 | static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) | 142 | static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) |
141 | { | 143 | { |
142 | return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) | 144 | return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) |
143 | + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); | 145 | + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); |
144 | } | 146 | } |
145 | 147 | ||
146 | static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) | 148 | static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) |
@@ -453,10 +455,10 @@ static int find_irq_entry(int apic, int pin, int type) | |||
453 | int i; | 455 | int i; |
454 | 456 | ||
455 | for (i = 0; i < mp_irq_entries; i++) | 457 | for (i = 0; i < mp_irq_entries; i++) |
456 | if (mp_irqs[i].mpc_irqtype == type && | 458 | if (mp_irqs[i].mp_irqtype == type && |
457 | (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || | 459 | (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || |
458 | mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && | 460 | mp_irqs[i].mp_dstapic == MP_APIC_ALL) && |
459 | mp_irqs[i].mpc_dstirq == pin) | 461 | mp_irqs[i].mp_dstirq == pin) |
460 | return i; | 462 | return i; |
461 | 463 | ||
462 | return -1; | 464 | return -1; |
@@ -470,13 +472,13 @@ static int __init find_isa_irq_pin(int irq, int type) | |||
470 | int i; | 472 | int i; |
471 | 473 | ||
472 | for (i = 0; i < mp_irq_entries; i++) { | 474 | for (i = 0; i < mp_irq_entries; i++) { |
473 | int lbus = mp_irqs[i].mpc_srcbus; | 475 | int lbus = mp_irqs[i].mp_srcbus; |
474 | 476 | ||
475 | if (test_bit(lbus, mp_bus_not_pci) && | 477 | if (test_bit(lbus, mp_bus_not_pci) && |
476 | (mp_irqs[i].mpc_irqtype == type) && | 478 | (mp_irqs[i].mp_irqtype == type) && |
477 | (mp_irqs[i].mpc_srcbusirq == irq)) | 479 | (mp_irqs[i].mp_srcbusirq == irq)) |
478 | 480 | ||
479 | return mp_irqs[i].mpc_dstirq; | 481 | return mp_irqs[i].mp_dstirq; |
480 | } | 482 | } |
481 | return -1; | 483 | return -1; |
482 | } | 484 | } |
@@ -486,17 +488,17 @@ static int __init find_isa_irq_apic(int irq, int type) | |||
486 | int i; | 488 | int i; |
487 | 489 | ||
488 | for (i = 0; i < mp_irq_entries; i++) { | 490 | for (i = 0; i < mp_irq_entries; i++) { |
489 | int lbus = mp_irqs[i].mpc_srcbus; | 491 | int lbus = mp_irqs[i].mp_srcbus; |
490 | 492 | ||
491 | if (test_bit(lbus, mp_bus_not_pci) && | 493 | if (test_bit(lbus, mp_bus_not_pci) && |
492 | (mp_irqs[i].mpc_irqtype == type) && | 494 | (mp_irqs[i].mp_irqtype == type) && |
493 | (mp_irqs[i].mpc_srcbusirq == irq)) | 495 | (mp_irqs[i].mp_srcbusirq == irq)) |
494 | break; | 496 | break; |
495 | } | 497 | } |
496 | if (i < mp_irq_entries) { | 498 | if (i < mp_irq_entries) { |
497 | int apic; | 499 | int apic; |
498 | for(apic = 0; apic < nr_ioapics; apic++) { | 500 | for(apic = 0; apic < nr_ioapics; apic++) { |
499 | if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) | 501 | if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) |
500 | return apic; | 502 | return apic; |
501 | } | 503 | } |
502 | } | 504 | } |
@@ -516,28 +518,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | |||
516 | 518 | ||
517 | apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", | 519 | apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", |
518 | bus, slot, pin); | 520 | bus, slot, pin); |
519 | if (mp_bus_id_to_pci_bus[bus] == -1) { | 521 | if (test_bit(bus, mp_bus_not_pci)) { |
520 | apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); | 522 | apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); |
521 | return -1; | 523 | return -1; |
522 | } | 524 | } |
523 | for (i = 0; i < mp_irq_entries; i++) { | 525 | for (i = 0; i < mp_irq_entries; i++) { |
524 | int lbus = mp_irqs[i].mpc_srcbus; | 526 | int lbus = mp_irqs[i].mp_srcbus; |
525 | 527 | ||
526 | for (apic = 0; apic < nr_ioapics; apic++) | 528 | for (apic = 0; apic < nr_ioapics; apic++) |
527 | if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || | 529 | if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || |
528 | mp_irqs[i].mpc_dstapic == MP_APIC_ALL) | 530 | mp_irqs[i].mp_dstapic == MP_APIC_ALL) |
529 | break; | 531 | break; |
530 | 532 | ||
531 | if (!test_bit(lbus, mp_bus_not_pci) && | 533 | if (!test_bit(lbus, mp_bus_not_pci) && |
532 | !mp_irqs[i].mpc_irqtype && | 534 | !mp_irqs[i].mp_irqtype && |
533 | (bus == lbus) && | 535 | (bus == lbus) && |
534 | (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { | 536 | (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { |
535 | int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); | 537 | int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); |
536 | 538 | ||
537 | if (!(apic || IO_APIC_IRQ(irq))) | 539 | if (!(apic || IO_APIC_IRQ(irq))) |
538 | continue; | 540 | continue; |
539 | 541 | ||
540 | if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) | 542 | if (pin == (mp_irqs[i].mp_srcbusirq & 3)) |
541 | return irq; | 543 | return irq; |
542 | /* | 544 | /* |
543 | * Use the first all-but-pin matching entry as a | 545 | * Use the first all-but-pin matching entry as a |
@@ -565,13 +567,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | |||
565 | 567 | ||
566 | static int MPBIOS_polarity(int idx) | 568 | static int MPBIOS_polarity(int idx) |
567 | { | 569 | { |
568 | int bus = mp_irqs[idx].mpc_srcbus; | 570 | int bus = mp_irqs[idx].mp_srcbus; |
569 | int polarity; | 571 | int polarity; |
570 | 572 | ||
571 | /* | 573 | /* |
572 | * Determine IRQ line polarity (high active or low active): | 574 | * Determine IRQ line polarity (high active or low active): |
573 | */ | 575 | */ |
574 | switch (mp_irqs[idx].mpc_irqflag & 3) | 576 | switch (mp_irqs[idx].mp_irqflag & 3) |
575 | { | 577 | { |
576 | case 0: /* conforms, ie. bus-type dependent polarity */ | 578 | case 0: /* conforms, ie. bus-type dependent polarity */ |
577 | if (test_bit(bus, mp_bus_not_pci)) | 579 | if (test_bit(bus, mp_bus_not_pci)) |
@@ -607,13 +609,13 @@ static int MPBIOS_polarity(int idx) | |||
607 | 609 | ||
608 | static int MPBIOS_trigger(int idx) | 610 | static int MPBIOS_trigger(int idx) |
609 | { | 611 | { |
610 | int bus = mp_irqs[idx].mpc_srcbus; | 612 | int bus = mp_irqs[idx].mp_srcbus; |
611 | int trigger; | 613 | int trigger; |
612 | 614 | ||
613 | /* | 615 | /* |
614 | * Determine IRQ trigger mode (edge or level sensitive): | 616 | * Determine IRQ trigger mode (edge or level sensitive): |
615 | */ | 617 | */ |
616 | switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) | 618 | switch ((mp_irqs[idx].mp_irqflag>>2) & 3) |
617 | { | 619 | { |
618 | case 0: /* conforms, ie. bus-type dependent */ | 620 | case 0: /* conforms, ie. bus-type dependent */ |
619 | if (test_bit(bus, mp_bus_not_pci)) | 621 | if (test_bit(bus, mp_bus_not_pci)) |
@@ -660,16 +662,16 @@ static inline int irq_trigger(int idx) | |||
660 | static int pin_2_irq(int idx, int apic, int pin) | 662 | static int pin_2_irq(int idx, int apic, int pin) |
661 | { | 663 | { |
662 | int irq, i; | 664 | int irq, i; |
663 | int bus = mp_irqs[idx].mpc_srcbus; | 665 | int bus = mp_irqs[idx].mp_srcbus; |
664 | 666 | ||
665 | /* | 667 | /* |
666 | * Debugging check, we are in big trouble if this message pops up! | 668 | * Debugging check, we are in big trouble if this message pops up! |
667 | */ | 669 | */ |
668 | if (mp_irqs[idx].mpc_dstirq != pin) | 670 | if (mp_irqs[idx].mp_dstirq != pin) |
669 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); | 671 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); |
670 | 672 | ||
671 | if (test_bit(bus, mp_bus_not_pci)) { | 673 | if (test_bit(bus, mp_bus_not_pci)) { |
672 | irq = mp_irqs[idx].mpc_srcbusirq; | 674 | irq = mp_irqs[idx].mp_srcbusirq; |
673 | } else { | 675 | } else { |
674 | /* | 676 | /* |
675 | * PCI IRQs are mapped in order | 677 | * PCI IRQs are mapped in order |
@@ -846,7 +848,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, | |||
846 | apic_printk(APIC_VERBOSE,KERN_DEBUG | 848 | apic_printk(APIC_VERBOSE,KERN_DEBUG |
847 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " | 849 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " |
848 | "IRQ %d Mode:%i Active:%i)\n", | 850 | "IRQ %d Mode:%i Active:%i)\n", |
849 | apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector, | 851 | apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, |
850 | irq, trigger, polarity); | 852 | irq, trigger, polarity); |
851 | 853 | ||
852 | /* | 854 | /* |
@@ -887,10 +889,10 @@ static void __init setup_IO_APIC_irqs(void) | |||
887 | idx = find_irq_entry(apic,pin,mp_INT); | 889 | idx = find_irq_entry(apic,pin,mp_INT); |
888 | if (idx == -1) { | 890 | if (idx == -1) { |
889 | if (first_notcon) { | 891 | if (first_notcon) { |
890 | apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin); | 892 | apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); |
891 | first_notcon = 0; | 893 | first_notcon = 0; |
892 | } else | 894 | } else |
893 | apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin); | 895 | apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); |
894 | continue; | 896 | continue; |
895 | } | 897 | } |
896 | if (!first_notcon) { | 898 | if (!first_notcon) { |
@@ -965,7 +967,7 @@ void __apicdebuginit print_IO_APIC(void) | |||
965 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | 967 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); |
966 | for (i = 0; i < nr_ioapics; i++) | 968 | for (i = 0; i < nr_ioapics; i++) |
967 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", | 969 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", |
968 | mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); | 970 | mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); |
969 | 971 | ||
970 | /* | 972 | /* |
971 | * We are a bit conservative about what we expect. We have to | 973 | * We are a bit conservative about what we expect. We have to |
@@ -983,7 +985,7 @@ void __apicdebuginit print_IO_APIC(void) | |||
983 | spin_unlock_irqrestore(&ioapic_lock, flags); | 985 | spin_unlock_irqrestore(&ioapic_lock, flags); |
984 | 986 | ||
985 | printk("\n"); | 987 | printk("\n"); |
986 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); | 988 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); |
987 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); | 989 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); |
988 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | 990 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); |
989 | 991 | ||
@@ -1841,8 +1843,8 @@ static int ioapic_resume(struct sys_device *dev) | |||
1841 | 1843 | ||
1842 | spin_lock_irqsave(&ioapic_lock, flags); | 1844 | spin_lock_irqsave(&ioapic_lock, flags); |
1843 | reg_00.raw = io_apic_read(dev->id, 0); | 1845 | reg_00.raw = io_apic_read(dev->id, 0); |
1844 | if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { | 1846 | if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { |
1845 | reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; | 1847 | reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; |
1846 | io_apic_write(dev->id, 0, reg_00.raw); | 1848 | io_apic_write(dev->id, 0, reg_00.raw); |
1847 | } | 1849 | } |
1848 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1850 | spin_unlock_irqrestore(&ioapic_lock, flags); |
@@ -2242,8 +2244,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | |||
2242 | return -1; | 2244 | return -1; |
2243 | 2245 | ||
2244 | for (i = 0; i < mp_irq_entries; i++) | 2246 | for (i = 0; i < mp_irq_entries; i++) |
2245 | if (mp_irqs[i].mpc_irqtype == mp_INT && | 2247 | if (mp_irqs[i].mp_irqtype == mp_INT && |
2246 | mp_irqs[i].mpc_srcbusirq == bus_irq) | 2248 | mp_irqs[i].mp_srcbusirq == bus_irq) |
2247 | break; | 2249 | break; |
2248 | if (i >= mp_irq_entries) | 2250 | if (i >= mp_irq_entries) |
2249 | return -1; | 2251 | return -1; |
@@ -2336,7 +2338,7 @@ void __init ioapic_init_mappings(void) | |||
2336 | ioapic_res = ioapic_setup_resources(); | 2338 | ioapic_res = ioapic_setup_resources(); |
2337 | for (i = 0; i < nr_ioapics; i++) { | 2339 | for (i = 0; i < nr_ioapics; i++) { |
2338 | if (smp_found_config) { | 2340 | if (smp_found_config) { |
2339 | ioapic_phys = mp_ioapics[i].mpc_apicaddr; | 2341 | ioapic_phys = mp_ioapics[i].mp_apicaddr; |
2340 | } else { | 2342 | } else { |
2341 | ioapic_phys = (unsigned long) | 2343 | ioapic_phys = (unsigned long) |
2342 | alloc_bootmem_pages(PAGE_SIZE); | 2344 | alloc_bootmem_pages(PAGE_SIZE); |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 404683b94e79..1cc7a4b8643f 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -25,6 +25,8 @@ | |||
25 | #include <asm/proto.h> | 25 | #include <asm/proto.h> |
26 | #include <asm/acpi.h> | 26 | #include <asm/acpi.h> |
27 | #include <asm/bios_ebda.h> | 27 | #include <asm/bios_ebda.h> |
28 | #include <asm/e820.h> | ||
29 | #include <asm/trampoline.h> | ||
28 | 30 | ||
29 | #include <mach_apic.h> | 31 | #include <mach_apic.h> |
30 | #ifdef CONFIG_X86_32 | 32 | #ifdef CONFIG_X86_32 |
@@ -32,28 +34,6 @@ | |||
32 | #include <mach_mpparse.h> | 34 | #include <mach_mpparse.h> |
33 | #endif | 35 | #endif |
34 | 36 | ||
35 | /* Have we found an MP table */ | ||
36 | int smp_found_config; | ||
37 | |||
38 | /* | ||
39 | * Various Linux-internal data structures created from the | ||
40 | * MP-table. | ||
41 | */ | ||
42 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | ||
43 | int mp_bus_id_to_type[MAX_MP_BUSSES]; | ||
44 | #endif | ||
45 | |||
46 | DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); | ||
47 | int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 }; | ||
48 | |||
49 | static int mp_current_pci_id; | ||
50 | |||
51 | int pic_mode; | ||
52 | |||
53 | /* | ||
54 | * Intel MP BIOS table parsing routines: | ||
55 | */ | ||
56 | |||
57 | /* | 37 | /* |
58 | * Checksum an MP configuration block. | 38 | * Checksum an MP configuration block. |
59 | */ | 39 | */ |
@@ -69,15 +49,73 @@ static int __init mpf_checksum(unsigned char *mp, int len) | |||
69 | } | 49 | } |
70 | 50 | ||
71 | #ifdef CONFIG_X86_NUMAQ | 51 | #ifdef CONFIG_X86_NUMAQ |
52 | int found_numaq; | ||
72 | /* | 53 | /* |
73 | * Have to match translation table entries to main table entries by counter | 54 | * Have to match translation table entries to main table entries by counter |
74 | * hence the mpc_record variable .... can't see a less disgusting way of | 55 | * hence the mpc_record variable .... can't see a less disgusting way of |
75 | * doing this .... | 56 | * doing this .... |
76 | */ | 57 | */ |
58 | struct mpc_config_translation { | ||
59 | unsigned char mpc_type; | ||
60 | unsigned char trans_len; | ||
61 | unsigned char trans_type; | ||
62 | unsigned char trans_quad; | ||
63 | unsigned char trans_global; | ||
64 | unsigned char trans_local; | ||
65 | unsigned short trans_reserved; | ||
66 | }; | ||
67 | |||
77 | 68 | ||
78 | static int mpc_record; | 69 | static int mpc_record; |
79 | static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] | 70 | static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] |
80 | __cpuinitdata; | 71 | __cpuinitdata; |
72 | |||
73 | static inline int generate_logical_apicid(int quad, int phys_apicid) | ||
74 | { | ||
75 | return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); | ||
76 | } | ||
77 | |||
78 | |||
79 | static inline int mpc_apic_id(struct mpc_config_processor *m, | ||
80 | struct mpc_config_translation *translation_record) | ||
81 | { | ||
82 | int quad = translation_record->trans_quad; | ||
83 | int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); | ||
84 | |||
85 | printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", | ||
86 | m->mpc_apicid, | ||
87 | (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, | ||
88 | (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, | ||
89 | m->mpc_apicver, quad, logical_apicid); | ||
90 | return logical_apicid; | ||
91 | } | ||
92 | |||
93 | int mp_bus_id_to_node[MAX_MP_BUSSES]; | ||
94 | |||
95 | int mp_bus_id_to_local[MAX_MP_BUSSES]; | ||
96 | |||
97 | static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, | ||
98 | struct mpc_config_translation *translation) | ||
99 | { | ||
100 | int quad = translation->trans_quad; | ||
101 | int local = translation->trans_local; | ||
102 | |||
103 | mp_bus_id_to_node[m->mpc_busid] = quad; | ||
104 | mp_bus_id_to_local[m->mpc_busid] = local; | ||
105 | printk(KERN_INFO "Bus #%d is %s (node %d)\n", | ||
106 | m->mpc_busid, name, quad); | ||
107 | } | ||
108 | |||
109 | int quad_local_to_mp_bus_id [NR_CPUS/4][4]; | ||
110 | static void mpc_oem_pci_bus(struct mpc_config_bus *m, | ||
111 | struct mpc_config_translation *translation) | ||
112 | { | ||
113 | int quad = translation->trans_quad; | ||
114 | int local = translation->trans_local; | ||
115 | |||
116 | quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; | ||
117 | } | ||
118 | |||
81 | #endif | 119 | #endif |
82 | 120 | ||
83 | static void __cpuinit MP_processor_info(struct mpc_config_processor *m) | 121 | static void __cpuinit MP_processor_info(struct mpc_config_processor *m) |
@@ -90,7 +128,10 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) | |||
90 | return; | 128 | return; |
91 | } | 129 | } |
92 | #ifdef CONFIG_X86_NUMAQ | 130 | #ifdef CONFIG_X86_NUMAQ |
93 | apicid = mpc_apic_id(m, translation_table[mpc_record]); | 131 | if (found_numaq) |
132 | apicid = mpc_apic_id(m, translation_table[mpc_record]); | ||
133 | else | ||
134 | apicid = m->mpc_apicid; | ||
94 | #else | 135 | #else |
95 | apicid = m->mpc_apicid; | 136 | apicid = m->mpc_apicid; |
96 | #endif | 137 | #endif |
@@ -103,17 +144,18 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) | |||
103 | generic_processor_info(apicid, m->mpc_apicver); | 144 | generic_processor_info(apicid, m->mpc_apicver); |
104 | } | 145 | } |
105 | 146 | ||
147 | #ifdef CONFIG_X86_IO_APIC | ||
106 | static void __init MP_bus_info(struct mpc_config_bus *m) | 148 | static void __init MP_bus_info(struct mpc_config_bus *m) |
107 | { | 149 | { |
108 | char str[7]; | 150 | char str[7]; |
109 | |||
110 | memcpy(str, m->mpc_bustype, 6); | 151 | memcpy(str, m->mpc_bustype, 6); |
111 | str[6] = 0; | 152 | str[6] = 0; |
112 | 153 | ||
113 | #ifdef CONFIG_X86_NUMAQ | 154 | #ifdef CONFIG_X86_NUMAQ |
114 | mpc_oem_bus_info(m, str, translation_table[mpc_record]); | 155 | if (found_numaq) |
156 | mpc_oem_bus_info(m, str, translation_table[mpc_record]); | ||
115 | #else | 157 | #else |
116 | Dprintk("Bus #%d is %s\n", m->mpc_busid, str); | 158 | printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); |
117 | #endif | 159 | #endif |
118 | 160 | ||
119 | #if MAX_MP_BUSSES < 256 | 161 | #if MAX_MP_BUSSES < 256 |
@@ -132,11 +174,10 @@ static void __init MP_bus_info(struct mpc_config_bus *m) | |||
132 | #endif | 174 | #endif |
133 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { | 175 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { |
134 | #ifdef CONFIG_X86_NUMAQ | 176 | #ifdef CONFIG_X86_NUMAQ |
135 | mpc_oem_pci_bus(m, translation_table[mpc_record]); | 177 | if (found_numaq) |
178 | mpc_oem_pci_bus(m, translation_table[mpc_record]); | ||
136 | #endif | 179 | #endif |
137 | clear_bit(m->mpc_busid, mp_bus_not_pci); | 180 | clear_bit(m->mpc_busid, mp_bus_not_pci); |
138 | mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; | ||
139 | mp_current_pci_id++; | ||
140 | #if defined(CONFIG_EISA) || defined (CONFIG_MCA) | 181 | #if defined(CONFIG_EISA) || defined (CONFIG_MCA) |
141 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; | 182 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; |
142 | } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { | 183 | } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { |
@@ -147,6 +188,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m) | |||
147 | } else | 188 | } else |
148 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); | 189 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); |
149 | } | 190 | } |
191 | #endif | ||
150 | 192 | ||
151 | #ifdef CONFIG_X86_IO_APIC | 193 | #ifdef CONFIG_X86_IO_APIC |
152 | 194 | ||
@@ -176,18 +218,89 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m) | |||
176 | if (bad_ioapic(m->mpc_apicaddr)) | 218 | if (bad_ioapic(m->mpc_apicaddr)) |
177 | return; | 219 | return; |
178 | 220 | ||
179 | mp_ioapics[nr_ioapics] = *m; | 221 | mp_ioapics[nr_ioapics].mp_apicaddr = m->mpc_apicaddr; |
222 | mp_ioapics[nr_ioapics].mp_apicid = m->mpc_apicid; | ||
223 | mp_ioapics[nr_ioapics].mp_type = m->mpc_type; | ||
224 | mp_ioapics[nr_ioapics].mp_apicver = m->mpc_apicver; | ||
225 | mp_ioapics[nr_ioapics].mp_flags = m->mpc_flags; | ||
180 | nr_ioapics++; | 226 | nr_ioapics++; |
181 | } | 227 | } |
182 | 228 | ||
183 | static void __init MP_intsrc_info(struct mpc_config_intsrc *m) | 229 | static void print_MP_intsrc_info(struct mpc_config_intsrc *m) |
184 | { | 230 | { |
185 | mp_irqs[mp_irq_entries] = *m; | 231 | printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," |
186 | Dprintk("Int: type %d, pol %d, trig %d, bus %d," | ||
187 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", | 232 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", |
188 | m->mpc_irqtype, m->mpc_irqflag & 3, | 233 | m->mpc_irqtype, m->mpc_irqflag & 3, |
189 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, | 234 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, |
190 | m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); | 235 | m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); |
236 | } | ||
237 | |||
238 | static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) | ||
239 | { | ||
240 | printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," | ||
241 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", | ||
242 | mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, | ||
243 | (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, | ||
244 | mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); | ||
245 | } | ||
246 | |||
247 | static void assign_to_mp_irq(struct mpc_config_intsrc *m, | ||
248 | struct mp_config_intsrc *mp_irq) | ||
249 | { | ||
250 | mp_irq->mp_dstapic = m->mpc_dstapic; | ||
251 | mp_irq->mp_type = m->mpc_type; | ||
252 | mp_irq->mp_irqtype = m->mpc_irqtype; | ||
253 | mp_irq->mp_irqflag = m->mpc_irqflag; | ||
254 | mp_irq->mp_srcbus = m->mpc_srcbus; | ||
255 | mp_irq->mp_srcbusirq = m->mpc_srcbusirq; | ||
256 | mp_irq->mp_dstirq = m->mpc_dstirq; | ||
257 | } | ||
258 | |||
259 | static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, | ||
260 | struct mpc_config_intsrc *m) | ||
261 | { | ||
262 | m->mpc_dstapic = mp_irq->mp_dstapic; | ||
263 | m->mpc_type = mp_irq->mp_type; | ||
264 | m->mpc_irqtype = mp_irq->mp_irqtype; | ||
265 | m->mpc_irqflag = mp_irq->mp_irqflag; | ||
266 | m->mpc_srcbus = mp_irq->mp_srcbus; | ||
267 | m->mpc_srcbusirq = mp_irq->mp_srcbusirq; | ||
268 | m->mpc_dstirq = mp_irq->mp_dstirq; | ||
269 | } | ||
270 | |||
271 | static int mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, | ||
272 | struct mpc_config_intsrc *m) | ||
273 | { | ||
274 | if (mp_irq->mp_dstapic != m->mpc_dstapic) | ||
275 | return 1; | ||
276 | if (mp_irq->mp_type != m->mpc_type) | ||
277 | return 2; | ||
278 | if (mp_irq->mp_irqtype != m->mpc_irqtype) | ||
279 | return 3; | ||
280 | if (mp_irq->mp_irqflag != m->mpc_irqflag) | ||
281 | return 4; | ||
282 | if (mp_irq->mp_srcbus != m->mpc_srcbus) | ||
283 | return 5; | ||
284 | if (mp_irq->mp_srcbusirq != m->mpc_srcbusirq) | ||
285 | return 6; | ||
286 | if (mp_irq->mp_dstirq != m->mpc_dstirq) | ||
287 | return 7; | ||
288 | |||
289 | return 0; | ||
290 | } | ||
291 | |||
292 | void MP_intsrc_info(struct mpc_config_intsrc *m) | ||
293 | { | ||
294 | int i; | ||
295 | |||
296 | print_MP_intsrc_info(m); | ||
297 | |||
298 | for (i = 0; i < mp_irq_entries; i++) { | ||
299 | if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m)) | ||
300 | return; | ||
301 | } | ||
302 | |||
303 | assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]); | ||
191 | if (++mp_irq_entries == MAX_IRQ_SOURCES) | 304 | if (++mp_irq_entries == MAX_IRQ_SOURCES) |
192 | panic("Max # of irq sources exceeded!!\n"); | 305 | panic("Max # of irq sources exceeded!!\n"); |
193 | } | 306 | } |
@@ -196,7 +309,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m) | |||
196 | 309 | ||
197 | static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) | 310 | static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) |
198 | { | 311 | { |
199 | Dprintk("Lint: type %d, pol %d, trig %d, bus %d," | 312 | printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x," |
200 | " IRQ %02x, APIC ID %x, APIC LINT %02x\n", | 313 | " IRQ %02x, APIC ID %x, APIC LINT %02x\n", |
201 | m->mpc_irqtype, m->mpc_irqflag & 3, | 314 | m->mpc_irqtype, m->mpc_irqflag & 3, |
202 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, | 315 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, |
@@ -266,11 +379,14 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, | |||
266 | } | 379 | } |
267 | } | 380 | } |
268 | 381 | ||
269 | static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, | 382 | void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, |
270 | char *productid) | 383 | char *productid) |
271 | { | 384 | { |
272 | if (strncmp(oem, "IBM NUMA", 8)) | 385 | if (strncmp(oem, "IBM NUMA", 8)) |
273 | printk("Warning! May not be a NUMA-Q system!\n"); | 386 | printk("Warning! Not a NUMA-Q system!\n"); |
387 | else | ||
388 | found_numaq = 1; | ||
389 | |||
274 | if (mpc->mpc_oemptr) | 390 | if (mpc->mpc_oemptr) |
275 | smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr, | 391 | smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr, |
276 | mpc->mpc_oemsize); | 392 | mpc->mpc_oemsize); |
@@ -281,12 +397,9 @@ static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, | |||
281 | * Read/parse the MPC | 397 | * Read/parse the MPC |
282 | */ | 398 | */ |
283 | 399 | ||
284 | static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | 400 | static int __init smp_check_mpc(struct mp_config_table *mpc, char *oem, |
401 | char *str) | ||
285 | { | 402 | { |
286 | char str[16]; | ||
287 | char oem[10]; | ||
288 | int count = sizeof(*mpc); | ||
289 | unsigned char *mpt = ((unsigned char *)mpc) + count; | ||
290 | 403 | ||
291 | if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { | 404 | if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { |
292 | printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", | 405 | printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", |
@@ -309,19 +422,42 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
309 | } | 422 | } |
310 | memcpy(oem, mpc->mpc_oem, 8); | 423 | memcpy(oem, mpc->mpc_oem, 8); |
311 | oem[8] = 0; | 424 | oem[8] = 0; |
312 | printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem); | 425 | printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem); |
313 | 426 | ||
314 | memcpy(str, mpc->mpc_productid, 12); | 427 | memcpy(str, mpc->mpc_productid, 12); |
315 | str[12] = 0; | 428 | str[12] = 0; |
316 | printk("Product ID: %s ", str); | ||
317 | 429 | ||
318 | #ifdef CONFIG_X86_32 | 430 | printk(KERN_INFO "MPTABLE: Product ID: %s\n", str); |
319 | mps_oem_check(mpc, oem, str); | ||
320 | #endif | ||
321 | printk(KERN_INFO "MPTABLE: Product ID: %s ", str); | ||
322 | 431 | ||
323 | printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); | 432 | printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); |
324 | 433 | ||
434 | return 1; | ||
435 | } | ||
436 | |||
437 | static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | ||
438 | { | ||
439 | char str[16]; | ||
440 | char oem[10]; | ||
441 | |||
442 | int count = sizeof(*mpc); | ||
443 | unsigned char *mpt = ((unsigned char *)mpc) + count; | ||
444 | |||
445 | if (!smp_check_mpc(mpc, oem, str)) | ||
446 | return 0; | ||
447 | |||
448 | #ifdef CONFIG_X86_32 | ||
449 | /* | ||
450 | * need to make sure summit and es7000's mps_oem_check is safe to be | ||
451 | * called early via genericarch 's mps_oem_check | ||
452 | */ | ||
453 | if (early) { | ||
454 | #ifdef CONFIG_X86_NUMAQ | ||
455 | numaq_mps_oem_check(mpc, oem, str); | ||
456 | #endif | ||
457 | } else | ||
458 | mps_oem_check(mpc, oem, str); | ||
459 | #endif | ||
460 | |||
325 | /* save the local APIC address, it might be non-default */ | 461 | /* save the local APIC address, it might be non-default */ |
326 | if (!acpi_lapic) | 462 | if (!acpi_lapic) |
327 | mp_lapic_addr = mpc->mpc_lapic; | 463 | mp_lapic_addr = mpc->mpc_lapic; |
@@ -352,7 +488,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
352 | { | 488 | { |
353 | struct mpc_config_bus *m = | 489 | struct mpc_config_bus *m = |
354 | (struct mpc_config_bus *)mpt; | 490 | (struct mpc_config_bus *)mpt; |
491 | #ifdef CONFIG_X86_IO_APIC | ||
355 | MP_bus_info(m); | 492 | MP_bus_info(m); |
493 | #endif | ||
356 | mpt += sizeof(*m); | 494 | mpt += sizeof(*m); |
357 | count += sizeof(*m); | 495 | count += sizeof(*m); |
358 | break; | 496 | break; |
@@ -402,6 +540,11 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
402 | ++mpc_record; | 540 | ++mpc_record; |
403 | #endif | 541 | #endif |
404 | } | 542 | } |
543 | |||
544 | #ifdef CONFIG_X86_GENERICARCH | ||
545 | generic_bigsmp_probe(); | ||
546 | #endif | ||
547 | |||
405 | setup_apic_routing(); | 548 | setup_apic_routing(); |
406 | if (!num_processors) | 549 | if (!num_processors) |
407 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); | 550 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); |
@@ -427,7 +570,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) | |||
427 | intsrc.mpc_type = MP_INTSRC; | 570 | intsrc.mpc_type = MP_INTSRC; |
428 | intsrc.mpc_irqflag = 0; /* conforming */ | 571 | intsrc.mpc_irqflag = 0; /* conforming */ |
429 | intsrc.mpc_srcbus = 0; | 572 | intsrc.mpc_srcbus = 0; |
430 | intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; | 573 | intsrc.mpc_dstapic = mp_ioapics[0].mp_apicid; |
431 | 574 | ||
432 | intsrc.mpc_irqtype = mp_INT; | 575 | intsrc.mpc_irqtype = mp_INT; |
433 | 576 | ||
@@ -488,40 +631,11 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) | |||
488 | MP_intsrc_info(&intsrc); | 631 | MP_intsrc_info(&intsrc); |
489 | } | 632 | } |
490 | 633 | ||
491 | #endif | ||
492 | 634 | ||
493 | static inline void __init construct_default_ISA_mptable(int mpc_default_type) | 635 | static void construct_ioapic_table(int mpc_default_type) |
494 | { | 636 | { |
495 | struct mpc_config_processor processor; | ||
496 | struct mpc_config_bus bus; | ||
497 | #ifdef CONFIG_X86_IO_APIC | ||
498 | struct mpc_config_ioapic ioapic; | 637 | struct mpc_config_ioapic ioapic; |
499 | #endif | 638 | struct mpc_config_bus bus; |
500 | struct mpc_config_lintsrc lintsrc; | ||
501 | int linttypes[2] = { mp_ExtINT, mp_NMI }; | ||
502 | int i; | ||
503 | |||
504 | /* | ||
505 | * local APIC has default address | ||
506 | */ | ||
507 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | ||
508 | |||
509 | /* | ||
510 | * 2 CPUs, numbered 0 & 1. | ||
511 | */ | ||
512 | processor.mpc_type = MP_PROCESSOR; | ||
513 | /* Either an integrated APIC or a discrete 82489DX. */ | ||
514 | processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; | ||
515 | processor.mpc_cpuflag = CPU_ENABLED; | ||
516 | processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | | ||
517 | (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; | ||
518 | processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; | ||
519 | processor.mpc_reserved[0] = 0; | ||
520 | processor.mpc_reserved[1] = 0; | ||
521 | for (i = 0; i < 2; i++) { | ||
522 | processor.mpc_apicid = i; | ||
523 | MP_processor_info(&processor); | ||
524 | } | ||
525 | 639 | ||
526 | bus.mpc_type = MP_BUS; | 640 | bus.mpc_type = MP_BUS; |
527 | bus.mpc_busid = 0; | 641 | bus.mpc_busid = 0; |
@@ -550,7 +664,6 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) | |||
550 | MP_bus_info(&bus); | 664 | MP_bus_info(&bus); |
551 | } | 665 | } |
552 | 666 | ||
553 | #ifdef CONFIG_X86_IO_APIC | ||
554 | ioapic.mpc_type = MP_IOAPIC; | 667 | ioapic.mpc_type = MP_IOAPIC; |
555 | ioapic.mpc_apicid = 2; | 668 | ioapic.mpc_apicid = 2; |
556 | ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; | 669 | ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; |
@@ -562,7 +675,42 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) | |||
562 | * We set up most of the low 16 IO-APIC pins according to MPS rules. | 675 | * We set up most of the low 16 IO-APIC pins according to MPS rules. |
563 | */ | 676 | */ |
564 | construct_default_ioirq_mptable(mpc_default_type); | 677 | construct_default_ioirq_mptable(mpc_default_type); |
678 | } | ||
679 | #else | ||
680 | static inline void construct_ioapic_table(int mpc_default_type) { } | ||
565 | #endif | 681 | #endif |
682 | |||
683 | static inline void __init construct_default_ISA_mptable(int mpc_default_type) | ||
684 | { | ||
685 | struct mpc_config_processor processor; | ||
686 | struct mpc_config_lintsrc lintsrc; | ||
687 | int linttypes[2] = { mp_ExtINT, mp_NMI }; | ||
688 | int i; | ||
689 | |||
690 | /* | ||
691 | * local APIC has default address | ||
692 | */ | ||
693 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | ||
694 | |||
695 | /* | ||
696 | * 2 CPUs, numbered 0 & 1. | ||
697 | */ | ||
698 | processor.mpc_type = MP_PROCESSOR; | ||
699 | /* Either an integrated APIC or a discrete 82489DX. */ | ||
700 | processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; | ||
701 | processor.mpc_cpuflag = CPU_ENABLED; | ||
702 | processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | | ||
703 | (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; | ||
704 | processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; | ||
705 | processor.mpc_reserved[0] = 0; | ||
706 | processor.mpc_reserved[1] = 0; | ||
707 | for (i = 0; i < 2; i++) { | ||
708 | processor.mpc_apicid = i; | ||
709 | MP_processor_info(&processor); | ||
710 | } | ||
711 | |||
712 | construct_ioapic_table(mpc_default_type); | ||
713 | |||
566 | lintsrc.mpc_type = MP_LINTSRC; | 714 | lintsrc.mpc_type = MP_LINTSRC; |
567 | lintsrc.mpc_irqflag = 0; /* conforming */ | 715 | lintsrc.mpc_irqflag = 0; /* conforming */ |
568 | lintsrc.mpc_srcbusid = 0; | 716 | lintsrc.mpc_srcbusid = 0; |
@@ -600,7 +748,7 @@ static void __init __get_smp_config(unsigned early) | |||
600 | 748 | ||
601 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", | 749 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", |
602 | mpf->mpf_specification); | 750 | mpf->mpf_specification); |
603 | #ifdef CONFIG_X86_32 | 751 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) |
604 | if (mpf->mpf_feature2 & (1 << 7)) { | 752 | if (mpf->mpf_feature2 & (1 << 7)) { |
605 | printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); | 753 | printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); |
606 | pic_mode = 1; | 754 | pic_mode = 1; |
@@ -632,7 +780,9 @@ static void __init __get_smp_config(unsigned early) | |||
632 | * override the defaults. | 780 | * override the defaults. |
633 | */ | 781 | */ |
634 | if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { | 782 | if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { |
783 | #ifdef CONFIG_X86_LOCAL_APIC | ||
635 | smp_found_config = 0; | 784 | smp_found_config = 0; |
785 | #endif | ||
636 | printk(KERN_ERR | 786 | printk(KERN_ERR |
637 | "BIOS bug, MP table errors detected!...\n"); | 787 | "BIOS bug, MP table errors detected!...\n"); |
638 | printk(KERN_ERR "... disabling SMP support. " | 788 | printk(KERN_ERR "... disabling SMP support. " |
@@ -689,7 +839,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
689 | unsigned int *bp = phys_to_virt(base); | 839 | unsigned int *bp = phys_to_virt(base); |
690 | struct intel_mp_floating *mpf; | 840 | struct intel_mp_floating *mpf; |
691 | 841 | ||
692 | Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length); | 842 | printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length); |
693 | BUILD_BUG_ON(sizeof(*mpf) != 16); | 843 | BUILD_BUG_ON(sizeof(*mpf) != 16); |
694 | 844 | ||
695 | while (length > 0) { | 845 | while (length > 0) { |
@@ -699,8 +849,9 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
699 | !mpf_checksum((unsigned char *)bp, 16) && | 849 | !mpf_checksum((unsigned char *)bp, 16) && |
700 | ((mpf->mpf_specification == 1) | 850 | ((mpf->mpf_specification == 1) |
701 | || (mpf->mpf_specification == 4))) { | 851 | || (mpf->mpf_specification == 4))) { |
702 | 852 | #ifdef CONFIG_X86_LOCAL_APIC | |
703 | smp_found_config = 1; | 853 | smp_found_config = 1; |
854 | #endif | ||
704 | mpf_found = mpf; | 855 | mpf_found = mpf; |
705 | #ifdef CONFIG_X86_32 | 856 | #ifdef CONFIG_X86_32 |
706 | printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", | 857 | printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", |
@@ -790,298 +941,294 @@ void __init find_smp_config(void) | |||
790 | __find_smp_config(1); | 941 | __find_smp_config(1); |
791 | } | 942 | } |
792 | 943 | ||
793 | /* -------------------------------------------------------------------------- | 944 | #ifdef CONFIG_X86_IO_APIC |
794 | ACPI-based MP Configuration | 945 | static u8 __initdata irq_used[MAX_IRQ_SOURCES]; |
795 | -------------------------------------------------------------------------- */ | ||
796 | 946 | ||
797 | /* | 947 | static int __init get_MP_intsrc_index(struct mpc_config_intsrc *m) |
798 | * Keep this outside and initialized to 0, for !CONFIG_ACPI builds: | 948 | { |
799 | */ | 949 | int i; |
800 | int es7000_plat; | ||
801 | 950 | ||
802 | #ifdef CONFIG_ACPI | 951 | if (m->mpc_irqtype != mp_INT) |
952 | return 0; | ||
803 | 953 | ||
804 | #ifdef CONFIG_X86_IO_APIC | 954 | if (m->mpc_irqflag != 0x0f) |
955 | return 0; | ||
805 | 956 | ||
806 | #define MP_ISA_BUS 0 | 957 | /* not legacy */ |
807 | 958 | ||
808 | extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; | 959 | for (i = 0; i < mp_irq_entries; i++) { |
960 | if (mp_irqs[i].mp_irqtype != mp_INT) | ||
961 | continue; | ||
809 | 962 | ||
810 | static int mp_find_ioapic(int gsi) | 963 | if (mp_irqs[i].mp_irqflag != 0x0f) |
811 | { | 964 | continue; |
812 | int i = 0; | ||
813 | 965 | ||
814 | /* Find the IOAPIC that manages this GSI. */ | 966 | if (mp_irqs[i].mp_srcbus != m->mpc_srcbus) |
815 | for (i = 0; i < nr_ioapics; i++) { | 967 | continue; |
816 | if ((gsi >= mp_ioapic_routing[i].gsi_base) | 968 | if (mp_irqs[i].mp_srcbusirq != m->mpc_srcbusirq) |
817 | && (gsi <= mp_ioapic_routing[i].gsi_end)) | 969 | continue; |
818 | return i; | 970 | if (irq_used[i]) { |
971 | /* already claimed */ | ||
972 | return -2; | ||
973 | } | ||
974 | irq_used[i] = 1; | ||
975 | return i; | ||
819 | } | 976 | } |
820 | 977 | ||
821 | printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); | 978 | /* not found */ |
822 | return -1; | 979 | return -1; |
823 | } | 980 | } |
824 | 981 | ||
825 | static u8 __init uniq_ioapic_id(u8 id) | 982 | #define SPARE_SLOT_NUM 20 |
826 | { | 983 | |
827 | #ifdef CONFIG_X86_32 | 984 | static struct mpc_config_intsrc __initdata *m_spare[SPARE_SLOT_NUM]; |
828 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && | ||
829 | !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) | ||
830 | return io_apic_get_unique_id(nr_ioapics, id); | ||
831 | else | ||
832 | return id; | ||
833 | #else | ||
834 | int i; | ||
835 | DECLARE_BITMAP(used, 256); | ||
836 | bitmap_zero(used, 256); | ||
837 | for (i = 0; i < nr_ioapics; i++) { | ||
838 | struct mpc_config_ioapic *ia = &mp_ioapics[i]; | ||
839 | __set_bit(ia->mpc_apicid, used); | ||
840 | } | ||
841 | if (!test_bit(id, used)) | ||
842 | return id; | ||
843 | return find_first_zero_bit(used, 256); | ||
844 | #endif | 985 | #endif |
845 | } | ||
846 | 986 | ||
847 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | 987 | static int __init replace_intsrc_all(struct mp_config_table *mpc, |
988 | unsigned long mpc_new_phys, | ||
989 | unsigned long mpc_new_length) | ||
848 | { | 990 | { |
849 | int idx = 0; | 991 | #ifdef CONFIG_X86_IO_APIC |
850 | 992 | int i; | |
851 | if (bad_ioapic(address)) | 993 | int nr_m_spare = 0; |
852 | return; | 994 | #endif |
853 | 995 | ||
854 | idx = nr_ioapics; | 996 | int count = sizeof(*mpc); |
997 | unsigned char *mpt = ((unsigned char *)mpc) + count; | ||
855 | 998 | ||
856 | mp_ioapics[idx].mpc_type = MP_IOAPIC; | 999 | printk(KERN_INFO "mpc_length %x\n", mpc->mpc_length); |
857 | mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; | 1000 | while (count < mpc->mpc_length) { |
858 | mp_ioapics[idx].mpc_apicaddr = address; | 1001 | switch (*mpt) { |
1002 | case MP_PROCESSOR: | ||
1003 | { | ||
1004 | struct mpc_config_processor *m = | ||
1005 | (struct mpc_config_processor *)mpt; | ||
1006 | mpt += sizeof(*m); | ||
1007 | count += sizeof(*m); | ||
1008 | break; | ||
1009 | } | ||
1010 | case MP_BUS: | ||
1011 | { | ||
1012 | struct mpc_config_bus *m = | ||
1013 | (struct mpc_config_bus *)mpt; | ||
1014 | mpt += sizeof(*m); | ||
1015 | count += sizeof(*m); | ||
1016 | break; | ||
1017 | } | ||
1018 | case MP_IOAPIC: | ||
1019 | { | ||
1020 | mpt += sizeof(struct mpc_config_ioapic); | ||
1021 | count += sizeof(struct mpc_config_ioapic); | ||
1022 | break; | ||
1023 | } | ||
1024 | case MP_INTSRC: | ||
1025 | { | ||
1026 | #ifdef CONFIG_X86_IO_APIC | ||
1027 | struct mpc_config_intsrc *m = | ||
1028 | (struct mpc_config_intsrc *)mpt; | ||
859 | 1029 | ||
860 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); | 1030 | printk(KERN_INFO "OLD "); |
861 | mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); | 1031 | print_MP_intsrc_info(m); |
862 | #ifdef CONFIG_X86_32 | 1032 | i = get_MP_intsrc_index(m); |
863 | mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); | 1033 | if (i > 0) { |
864 | #else | 1034 | assign_to_mpc_intsrc(&mp_irqs[i], m); |
865 | mp_ioapics[idx].mpc_apicver = 0; | 1035 | printk(KERN_INFO "NEW "); |
1036 | print_mp_irq_info(&mp_irqs[i]); | ||
1037 | } else if (!i) { | ||
1038 | /* legacy, do nothing */ | ||
1039 | } else if (nr_m_spare < SPARE_SLOT_NUM) { | ||
1040 | /* | ||
1041 | * not found (-1), or duplicated (-2) | ||
1042 | * are invalid entries, | ||
1043 | * we need to use the slot later | ||
1044 | */ | ||
1045 | m_spare[nr_m_spare] = m; | ||
1046 | nr_m_spare++; | ||
1047 | } | ||
866 | #endif | 1048 | #endif |
867 | /* | 1049 | mpt += sizeof(struct mpc_config_intsrc); |
868 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups | 1050 | count += sizeof(struct mpc_config_intsrc); |
869 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). | 1051 | break; |
870 | */ | 1052 | } |
871 | mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; | 1053 | case MP_LINTSRC: |
872 | mp_ioapic_routing[idx].gsi_base = gsi_base; | 1054 | { |
873 | mp_ioapic_routing[idx].gsi_end = gsi_base + | 1055 | struct mpc_config_lintsrc *m = |
874 | io_apic_get_redir_entries(idx); | 1056 | (struct mpc_config_lintsrc *)mpt; |
875 | 1057 | mpt += sizeof(*m); | |
876 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " | 1058 | count += sizeof(*m); |
877 | "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, | 1059 | break; |
878 | mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, | 1060 | } |
879 | mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); | 1061 | default: |
880 | 1062 | /* wrong mptable */ | |
881 | nr_ioapics++; | 1063 | printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); |
882 | } | 1064 | printk(KERN_ERR "type %x\n", *mpt); |
1065 | print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16, | ||
1066 | 1, mpc, mpc->mpc_length, 1); | ||
1067 | goto out; | ||
1068 | } | ||
1069 | } | ||
883 | 1070 | ||
884 | void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | 1071 | #ifdef CONFIG_X86_IO_APIC |
885 | { | 1072 | for (i = 0; i < mp_irq_entries; i++) { |
886 | struct mpc_config_intsrc intsrc; | 1073 | if (irq_used[i]) |
887 | int ioapic = -1; | 1074 | continue; |
888 | int pin = -1; | ||
889 | 1075 | ||
890 | /* | 1076 | if (mp_irqs[i].mp_irqtype != mp_INT) |
891 | * Convert 'gsi' to 'ioapic.pin'. | 1077 | continue; |
892 | */ | ||
893 | ioapic = mp_find_ioapic(gsi); | ||
894 | if (ioapic < 0) | ||
895 | return; | ||
896 | pin = gsi - mp_ioapic_routing[ioapic].gsi_base; | ||
897 | 1078 | ||
898 | /* | 1079 | if (mp_irqs[i].mp_irqflag != 0x0f) |
899 | * TBD: This check is for faulty timer entries, where the override | 1080 | continue; |
900 | * erroneously sets the trigger to level, resulting in a HUGE | ||
901 | * increase of timer interrupts! | ||
902 | */ | ||
903 | if ((bus_irq == 0) && (trigger == 3)) | ||
904 | trigger = 1; | ||
905 | 1081 | ||
906 | intsrc.mpc_type = MP_INTSRC; | 1082 | if (nr_m_spare > 0) { |
907 | intsrc.mpc_irqtype = mp_INT; | 1083 | printk(KERN_INFO "*NEW* found "); |
908 | intsrc.mpc_irqflag = (trigger << 2) | polarity; | 1084 | nr_m_spare--; |
909 | intsrc.mpc_srcbus = MP_ISA_BUS; | 1085 | assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]); |
910 | intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ | 1086 | m_spare[nr_m_spare] = NULL; |
911 | intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ | 1087 | } else { |
912 | intsrc.mpc_dstirq = pin; /* INTIN# */ | 1088 | struct mpc_config_intsrc *m = |
1089 | (struct mpc_config_intsrc *)mpt; | ||
1090 | count += sizeof(struct mpc_config_intsrc); | ||
1091 | if (!mpc_new_phys) { | ||
1092 | printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count); | ||
1093 | } else { | ||
1094 | if (count <= mpc_new_length) | ||
1095 | printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count); | ||
1096 | else { | ||
1097 | printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length); | ||
1098 | goto out; | ||
1099 | } | ||
1100 | } | ||
1101 | assign_to_mpc_intsrc(&mp_irqs[i], m); | ||
1102 | mpc->mpc_length = count; | ||
1103 | mpt += sizeof(struct mpc_config_intsrc); | ||
1104 | } | ||
1105 | print_mp_irq_info(&mp_irqs[i]); | ||
1106 | } | ||
1107 | #endif | ||
1108 | out: | ||
1109 | /* update checksum */ | ||
1110 | mpc->mpc_checksum = 0; | ||
1111 | mpc->mpc_checksum -= mpf_checksum((unsigned char *)mpc, | ||
1112 | mpc->mpc_length); | ||
913 | 1113 | ||
914 | MP_intsrc_info(&intsrc); | 1114 | return 0; |
915 | } | 1115 | } |
916 | 1116 | ||
917 | void __init mp_config_acpi_legacy_irqs(void) | 1117 | int __initdata enable_update_mptable; |
918 | { | ||
919 | struct mpc_config_intsrc intsrc; | ||
920 | int i = 0; | ||
921 | int ioapic = -1; | ||
922 | 1118 | ||
923 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | 1119 | static int __init update_mptable_setup(char *str) |
924 | /* | 1120 | { |
925 | * Fabricate the legacy ISA bus (bus #31). | 1121 | enable_update_mptable = 1; |
926 | */ | 1122 | return 0; |
927 | mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; | 1123 | } |
928 | #endif | 1124 | early_param("update_mptable", update_mptable_setup); |
929 | set_bit(MP_ISA_BUS, mp_bus_not_pci); | ||
930 | Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); | ||
931 | 1125 | ||
932 | /* | 1126 | static unsigned long __initdata mpc_new_phys; |
933 | * Older generations of ES7000 have no legacy identity mappings | 1127 | static unsigned long mpc_new_length __initdata = 4096; |
934 | */ | ||
935 | if (es7000_plat == 1) | ||
936 | return; | ||
937 | 1128 | ||
938 | /* | 1129 | /* alloc_mptable or alloc_mptable=4k */ |
939 | * Locate the IOAPIC that manages the ISA IRQs (0-15). | 1130 | static int __initdata alloc_mptable; |
940 | */ | 1131 | static int __init parse_alloc_mptable_opt(char *p) |
941 | ioapic = mp_find_ioapic(0); | 1132 | { |
942 | if (ioapic < 0) | 1133 | enable_update_mptable = 1; |
943 | return; | 1134 | alloc_mptable = 1; |
1135 | if (!p) | ||
1136 | return 0; | ||
1137 | mpc_new_length = memparse(p, &p); | ||
1138 | return 0; | ||
1139 | } | ||
1140 | early_param("alloc_mptable", parse_alloc_mptable_opt); | ||
944 | 1141 | ||
945 | intsrc.mpc_type = MP_INTSRC; | 1142 | void __init early_reserve_e820_mpc_new(void) |
946 | intsrc.mpc_irqflag = 0; /* Conforming */ | 1143 | { |
947 | intsrc.mpc_srcbus = MP_ISA_BUS; | 1144 | if (enable_update_mptable && alloc_mptable) { |
948 | #ifdef CONFIG_X86_IO_APIC | 1145 | u64 startt = 0; |
949 | intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; | 1146 | #ifdef CONFIG_X86_TRAMPOLINE |
1147 | startt = TRAMPOLINE_BASE; | ||
950 | #endif | 1148 | #endif |
951 | /* | 1149 | mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4); |
952 | * Use the default configuration for the IRQs 0-15. Unless | ||
953 | * overridden by (MADT) interrupt source override entries. | ||
954 | */ | ||
955 | for (i = 0; i < 16; i++) { | ||
956 | int idx; | ||
957 | |||
958 | for (idx = 0; idx < mp_irq_entries; idx++) { | ||
959 | struct mpc_config_intsrc *irq = mp_irqs + idx; | ||
960 | |||
961 | /* Do we already have a mapping for this ISA IRQ? */ | ||
962 | if (irq->mpc_srcbus == MP_ISA_BUS | ||
963 | && irq->mpc_srcbusirq == i) | ||
964 | break; | ||
965 | |||
966 | /* Do we already have a mapping for this IOAPIC pin */ | ||
967 | if ((irq->mpc_dstapic == intsrc.mpc_dstapic) && | ||
968 | (irq->mpc_dstirq == i)) | ||
969 | break; | ||
970 | } | ||
971 | |||
972 | if (idx != mp_irq_entries) { | ||
973 | printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); | ||
974 | continue; /* IRQ already used */ | ||
975 | } | ||
976 | |||
977 | intsrc.mpc_irqtype = mp_INT; | ||
978 | intsrc.mpc_srcbusirq = i; /* Identity mapped */ | ||
979 | intsrc.mpc_dstirq = i; | ||
980 | |||
981 | MP_intsrc_info(&intsrc); | ||
982 | } | 1150 | } |
983 | } | 1151 | } |
984 | 1152 | ||
985 | int mp_register_gsi(u32 gsi, int triggering, int polarity) | 1153 | static int __init update_mp_table(void) |
986 | { | 1154 | { |
987 | int ioapic; | 1155 | char str[16]; |
988 | int ioapic_pin; | 1156 | char oem[10]; |
989 | #ifdef CONFIG_X86_32 | 1157 | struct intel_mp_floating *mpf; |
990 | #define MAX_GSI_NUM 4096 | 1158 | struct mp_config_table *mpc; |
991 | #define IRQ_COMPRESSION_START 64 | 1159 | struct mp_config_table *mpc_new; |
1160 | |||
1161 | if (!enable_update_mptable) | ||
1162 | return 0; | ||
1163 | |||
1164 | mpf = mpf_found; | ||
1165 | if (!mpf) | ||
1166 | return 0; | ||
992 | 1167 | ||
993 | static int pci_irq = IRQ_COMPRESSION_START; | ||
994 | /* | 1168 | /* |
995 | * Mapping between Global System Interrupts, which | 1169 | * Now see if we need to go further. |
996 | * represent all possible interrupts, and IRQs | ||
997 | * assigned to actual devices. | ||
998 | */ | 1170 | */ |
999 | static int gsi_to_irq[MAX_GSI_NUM]; | 1171 | if (mpf->mpf_feature1 != 0) |
1000 | #else | 1172 | return 0; |
1001 | |||
1002 | if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) | ||
1003 | return gsi; | ||
1004 | #endif | ||
1005 | 1173 | ||
1006 | /* Don't set up the ACPI SCI because it's already set up */ | 1174 | if (!mpf->mpf_physptr) |
1007 | if (acpi_gbl_FADT.sci_interrupt == gsi) | 1175 | return 0; |
1008 | return gsi; | ||
1009 | 1176 | ||
1010 | ioapic = mp_find_ioapic(gsi); | 1177 | mpc = phys_to_virt(mpf->mpf_physptr); |
1011 | if (ioapic < 0) { | ||
1012 | printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); | ||
1013 | return gsi; | ||
1014 | } | ||
1015 | 1178 | ||
1016 | ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; | 1179 | if (!smp_check_mpc(mpc, oem, str)) |
1180 | return 0; | ||
1017 | 1181 | ||
1018 | #ifdef CONFIG_X86_32 | 1182 | printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); |
1019 | if (ioapic_renumber_irq) | 1183 | printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); |
1020 | gsi = ioapic_renumber_irq(ioapic, gsi); | ||
1021 | #endif | ||
1022 | 1184 | ||
1023 | /* | 1185 | if (mpc_new_phys && mpc->mpc_length > mpc_new_length) { |
1024 | * Avoid pin reprogramming. PRTs typically include entries | 1186 | mpc_new_phys = 0; |
1025 | * with redundant pin->gsi mappings (but unique PCI devices); | 1187 | printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n", |
1026 | * we only program the IOAPIC on the first. | 1188 | mpc_new_length); |
1027 | */ | ||
1028 | if (ioapic_pin > MP_MAX_IOAPIC_PIN) { | ||
1029 | printk(KERN_ERR "Invalid reference to IOAPIC pin " | ||
1030 | "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, | ||
1031 | ioapic_pin); | ||
1032 | return gsi; | ||
1033 | } | 1189 | } |
1034 | if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { | 1190 | |
1035 | Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", | 1191 | if (!mpc_new_phys) { |
1036 | mp_ioapic_routing[ioapic].apic_id, ioapic_pin); | 1192 | unsigned char old, new; |
1037 | #ifdef CONFIG_X86_32 | 1193 | /* check if we can change the postion */ |
1038 | return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); | 1194 | mpc->mpc_checksum = 0; |
1039 | #else | 1195 | old = mpf_checksum((unsigned char *)mpc, mpc->mpc_length); |
1040 | return gsi; | 1196 | mpc->mpc_checksum = 0xff; |
1041 | #endif | 1197 | new = mpf_checksum((unsigned char *)mpc, mpc->mpc_length); |
1198 | if (old == new) { | ||
1199 | printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n"); | ||
1200 | return 0; | ||
1201 | } | ||
1202 | printk(KERN_INFO "use in-positon replacing\n"); | ||
1203 | } else { | ||
1204 | mpf->mpf_physptr = mpc_new_phys; | ||
1205 | mpc_new = phys_to_virt(mpc_new_phys); | ||
1206 | memcpy(mpc_new, mpc, mpc->mpc_length); | ||
1207 | mpc = mpc_new; | ||
1208 | /* check if we can modify that */ | ||
1209 | if (mpc_new_phys - mpf->mpf_physptr) { | ||
1210 | struct intel_mp_floating *mpf_new; | ||
1211 | /* steal 16 bytes from [0, 1k) */ | ||
1212 | printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); | ||
1213 | mpf_new = phys_to_virt(0x400 - 16); | ||
1214 | memcpy(mpf_new, mpf, 16); | ||
1215 | mpf = mpf_new; | ||
1216 | mpf->mpf_physptr = mpc_new_phys; | ||
1217 | } | ||
1218 | mpf->mpf_checksum = 0; | ||
1219 | mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); | ||
1220 | printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); | ||
1042 | } | 1221 | } |
1043 | 1222 | ||
1044 | set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); | ||
1045 | #ifdef CONFIG_X86_32 | ||
1046 | /* | 1223 | /* |
1047 | * For GSI >= 64, use IRQ compression | 1224 | * only replace the one with mp_INT and |
1225 | * MP_IRQ_TRIGGER_LEVEL|MP_IRQ_POLARITY_LOW, | ||
1226 | * already in mp_irqs , stored by ... and mp_config_acpi_gsi, | ||
1227 | * may need pci=routeirq for all coverage | ||
1048 | */ | 1228 | */ |
1049 | if ((gsi >= IRQ_COMPRESSION_START) | 1229 | replace_intsrc_all(mpc, mpc_new_phys, mpc_new_length); |
1050 | && (triggering == ACPI_LEVEL_SENSITIVE)) { | 1230 | |
1051 | /* | 1231 | return 0; |
1052 | * For PCI devices assign IRQs in order, avoiding gaps | ||
1053 | * due to unused I/O APIC pins. | ||
1054 | */ | ||
1055 | int irq = gsi; | ||
1056 | if (gsi < MAX_GSI_NUM) { | ||
1057 | /* | ||
1058 | * Retain the VIA chipset work-around (gsi > 15), but | ||
1059 | * avoid a problem where the 8254 timer (IRQ0) is setup | ||
1060 | * via an override (so it's not on pin 0 of the ioapic), | ||
1061 | * and at the same time, the pin 0 interrupt is a PCI | ||
1062 | * type. The gsi > 15 test could cause these two pins | ||
1063 | * to be shared as IRQ0, and they are not shareable. | ||
1064 | * So test for this condition, and if necessary, avoid | ||
1065 | * the pin collision. | ||
1066 | */ | ||
1067 | gsi = pci_irq++; | ||
1068 | /* | ||
1069 | * Don't assign IRQ used by ACPI SCI | ||
1070 | */ | ||
1071 | if (gsi == acpi_gbl_FADT.sci_interrupt) | ||
1072 | gsi = pci_irq++; | ||
1073 | gsi_to_irq[irq] = gsi; | ||
1074 | } else { | ||
1075 | printk(KERN_ERR "GSI %u is too high\n", gsi); | ||
1076 | return gsi; | ||
1077 | } | ||
1078 | } | ||
1079 | #endif | ||
1080 | io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, | ||
1081 | triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, | ||
1082 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); | ||
1083 | return gsi; | ||
1084 | } | 1232 | } |
1085 | 1233 | ||
1086 | #endif /* CONFIG_X86_IO_APIC */ | 1234 | late_initcall(update_mp_table); |
1087 | #endif /* CONFIG_ACPI */ | ||
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index e65281b1634b..f0f1de1c4a1d 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c | |||
@@ -31,6 +31,8 @@ | |||
31 | #include <asm/numaq.h> | 31 | #include <asm/numaq.h> |
32 | #include <asm/topology.h> | 32 | #include <asm/topology.h> |
33 | #include <asm/processor.h> | 33 | #include <asm/processor.h> |
34 | #include <asm/mpspec.h> | ||
35 | #include <asm/e820.h> | ||
34 | 36 | ||
35 | #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) | 37 | #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) |
36 | 38 | ||
@@ -58,6 +60,8 @@ static void __init smp_dump_qct(void) | |||
58 | node_end_pfn[node] = MB_TO_PAGES( | 60 | node_end_pfn[node] = MB_TO_PAGES( |
59 | eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); | 61 | eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); |
60 | 62 | ||
63 | e820_register_active_regions(node, node_start_pfn[node], | ||
64 | node_end_pfn[node]); | ||
61 | memory_present(node, | 65 | memory_present(node, |
62 | node_start_pfn[node], node_end_pfn[node]); | 66 | node_start_pfn[node], node_end_pfn[node]); |
63 | node_remap_size[node] = node_memmap_size_bytes(node, | 67 | node_remap_size[node] = node_memmap_size_bytes(node, |
@@ -67,13 +71,24 @@ static void __init smp_dump_qct(void) | |||
67 | } | 71 | } |
68 | } | 72 | } |
69 | 73 | ||
70 | /* | 74 | static __init void early_check_numaq(void) |
71 | * Unlike Summit, we don't really care to let the NUMA-Q | 75 | { |
72 | * fall back to flat mode. Don't compile for NUMA-Q | 76 | /* |
73 | * unless you really need it! | 77 | * Find possible boot-time SMP configuration: |
74 | */ | 78 | */ |
79 | early_find_smp_config(); | ||
80 | /* | ||
81 | * get boot-time SMP configuration: | ||
82 | */ | ||
83 | if (smp_found_config) | ||
84 | early_get_smp_config(); | ||
85 | } | ||
86 | |||
75 | int __init get_memcfg_numaq(void) | 87 | int __init get_memcfg_numaq(void) |
76 | { | 88 | { |
89 | early_check_numaq(); | ||
90 | if (!found_numaq) | ||
91 | return 0; | ||
77 | smp_dump_qct(); | 92 | smp_dump_qct(); |
78 | return 1; | 93 | return 1; |
79 | } | 94 | } |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6f80b852a196..45a5e247d450 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -17,6 +17,7 @@ unsigned int num_processors; | |||
17 | unsigned disabled_cpus __cpuinitdata; | 17 | unsigned disabled_cpus __cpuinitdata; |
18 | /* Processor that is doing the boot up */ | 18 | /* Processor that is doing the boot up */ |
19 | unsigned int boot_cpu_physical_apicid = -1U; | 19 | unsigned int boot_cpu_physical_apicid = -1U; |
20 | unsigned int max_physical_apicid; | ||
20 | EXPORT_SYMBOL(boot_cpu_physical_apicid); | 21 | EXPORT_SYMBOL(boot_cpu_physical_apicid); |
21 | 22 | ||
22 | DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; | 23 | DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; |
@@ -137,3 +138,25 @@ void __init setup_per_cpu_areas(void) | |||
137 | } | 138 | } |
138 | 139 | ||
139 | #endif | 140 | #endif |
141 | |||
142 | void __init parse_setup_data(void) | ||
143 | { | ||
144 | struct setup_data *data; | ||
145 | u64 pa_data; | ||
146 | |||
147 | if (boot_params.hdr.version < 0x0209) | ||
148 | return; | ||
149 | pa_data = boot_params.hdr.setup_data; | ||
150 | while (pa_data) { | ||
151 | data = early_ioremap(pa_data, PAGE_SIZE); | ||
152 | switch (data->type) { | ||
153 | default: | ||
154 | break; | ||
155 | } | ||
156 | #ifndef CONFIG_DEBUG_BOOT_PARAMS | ||
157 | free_early(pa_data, pa_data+sizeof(*data)+data->len); | ||
158 | #endif | ||
159 | pa_data = data->next; | ||
160 | early_iounmap(data, PAGE_SIZE); | ||
161 | } | ||
162 | } | ||
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 5a2f8e063887..1d4be07e15e5 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c | |||
@@ -67,10 +67,12 @@ | |||
67 | #include <asm/bios_ebda.h> | 67 | #include <asm/bios_ebda.h> |
68 | #include <asm/cacheflush.h> | 68 | #include <asm/cacheflush.h> |
69 | #include <asm/processor.h> | 69 | #include <asm/processor.h> |
70 | #include <asm/efi.h> | ||
70 | 71 | ||
71 | /* This value is set up by the early boot code to point to the value | 72 | /* This value is set up by the early boot code to point to the value |
72 | immediately after the boot time page tables. It contains a *physical* | 73 | immediately after the boot time page tables. It contains a *physical* |
73 | address, and must not be in the .bss segment! */ | 74 | address, and must not be in the .bss segment! */ |
75 | unsigned long init_pg_tables_start __initdata = ~0UL; | ||
74 | unsigned long init_pg_tables_end __initdata = ~0UL; | 76 | unsigned long init_pg_tables_end __initdata = ~0UL; |
75 | 77 | ||
76 | /* | 78 | /* |
@@ -237,42 +239,6 @@ static inline void copy_edd(void) | |||
237 | } | 239 | } |
238 | #endif | 240 | #endif |
239 | 241 | ||
240 | int __initdata user_defined_memmap; | ||
241 | |||
242 | /* | ||
243 | * "mem=nopentium" disables the 4MB page tables. | ||
244 | * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM | ||
245 | * to <mem>, overriding the bios size. | ||
246 | * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from | ||
247 | * <start> to <start>+<mem>, overriding the bios size. | ||
248 | * | ||
249 | * HPA tells me bootloaders need to parse mem=, so no new | ||
250 | * option should be mem= [also see Documentation/i386/boot.txt] | ||
251 | */ | ||
252 | static int __init parse_mem(char *arg) | ||
253 | { | ||
254 | if (!arg) | ||
255 | return -EINVAL; | ||
256 | |||
257 | if (strcmp(arg, "nopentium") == 0) { | ||
258 | setup_clear_cpu_cap(X86_FEATURE_PSE); | ||
259 | } else { | ||
260 | /* If the user specifies memory size, we | ||
261 | * limit the BIOS-provided memory map to | ||
262 | * that size. exactmap can be used to specify | ||
263 | * the exact map. mem=number can be used to | ||
264 | * trim the existing memory map. | ||
265 | */ | ||
266 | unsigned long long mem_size; | ||
267 | |||
268 | mem_size = memparse(arg, &arg); | ||
269 | limit_regions(mem_size); | ||
270 | user_defined_memmap = 1; | ||
271 | } | ||
272 | return 0; | ||
273 | } | ||
274 | early_param("mem", parse_mem); | ||
275 | |||
276 | #ifdef CONFIG_PROC_VMCORE | 242 | #ifdef CONFIG_PROC_VMCORE |
277 | /* elfcorehdr= specifies the location of elf core header | 243 | /* elfcorehdr= specifies the location of elf core header |
278 | * stored by the crashed kernel. | 244 | * stored by the crashed kernel. |
@@ -395,56 +361,6 @@ unsigned long __init find_max_low_pfn(void) | |||
395 | return max_low_pfn; | 361 | return max_low_pfn; |
396 | } | 362 | } |
397 | 363 | ||
398 | #define BIOS_LOWMEM_KILOBYTES 0x413 | ||
399 | |||
400 | /* | ||
401 | * The BIOS places the EBDA/XBDA at the top of conventional | ||
402 | * memory, and usually decreases the reported amount of | ||
403 | * conventional memory (int 0x12) too. This also contains a | ||
404 | * workaround for Dell systems that neglect to reserve EBDA. | ||
405 | * The same workaround also avoids a problem with the AMD768MPX | ||
406 | * chipset: reserve a page before VGA to prevent PCI prefetch | ||
407 | * into it (errata #56). Usually the page is reserved anyways, | ||
408 | * unless you have no PS/2 mouse plugged in. | ||
409 | */ | ||
410 | static void __init reserve_ebda_region(void) | ||
411 | { | ||
412 | unsigned int lowmem, ebda_addr; | ||
413 | |||
414 | /* To determine the position of the EBDA and the */ | ||
415 | /* end of conventional memory, we need to look at */ | ||
416 | /* the BIOS data area. In a paravirtual environment */ | ||
417 | /* that area is absent. We'll just have to assume */ | ||
418 | /* that the paravirt case can handle memory setup */ | ||
419 | /* correctly, without our help. */ | ||
420 | if (paravirt_enabled()) | ||
421 | return; | ||
422 | |||
423 | /* end of low (conventional) memory */ | ||
424 | lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); | ||
425 | lowmem <<= 10; | ||
426 | |||
427 | /* start of EBDA area */ | ||
428 | ebda_addr = get_bios_ebda(); | ||
429 | |||
430 | /* Fixup: bios puts an EBDA in the top 64K segment */ | ||
431 | /* of conventional memory, but does not adjust lowmem. */ | ||
432 | if ((lowmem - ebda_addr) <= 0x10000) | ||
433 | lowmem = ebda_addr; | ||
434 | |||
435 | /* Fixup: bios does not report an EBDA at all. */ | ||
436 | /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ | ||
437 | if ((ebda_addr == 0) && (lowmem >= 0x9f000)) | ||
438 | lowmem = 0x9f000; | ||
439 | |||
440 | /* Paranoia: should never happen, but... */ | ||
441 | if ((lowmem == 0) || (lowmem >= 0x100000)) | ||
442 | lowmem = 0x9f000; | ||
443 | |||
444 | /* reserve all memory between lowmem and the 1MB mark */ | ||
445 | reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT); | ||
446 | } | ||
447 | |||
448 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 364 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
449 | static void __init setup_bootmem_allocator(void); | 365 | static void __init setup_bootmem_allocator(void); |
450 | static unsigned long __init setup_memory(void) | 366 | static unsigned long __init setup_memory(void) |
@@ -462,11 +378,13 @@ static unsigned long __init setup_memory(void) | |||
462 | if (max_pfn > max_low_pfn) { | 378 | if (max_pfn > max_low_pfn) { |
463 | highstart_pfn = max_low_pfn; | 379 | highstart_pfn = max_low_pfn; |
464 | } | 380 | } |
381 | memory_present(0, 0, highend_pfn); | ||
465 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", | 382 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", |
466 | pages_to_mb(highend_pfn - highstart_pfn)); | 383 | pages_to_mb(highend_pfn - highstart_pfn)); |
467 | num_physpages = highend_pfn; | 384 | num_physpages = highend_pfn; |
468 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; | 385 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; |
469 | #else | 386 | #else |
387 | memory_present(0, 0, max_low_pfn); | ||
470 | num_physpages = max_low_pfn; | 388 | num_physpages = max_low_pfn; |
471 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; | 389 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; |
472 | #endif | 390 | #endif |
@@ -488,11 +406,12 @@ static void __init zone_sizes_init(void) | |||
488 | max_zone_pfns[ZONE_DMA] = | 406 | max_zone_pfns[ZONE_DMA] = |
489 | virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | 407 | virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; |
490 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; | 408 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; |
409 | remove_all_active_ranges(); | ||
491 | #ifdef CONFIG_HIGHMEM | 410 | #ifdef CONFIG_HIGHMEM |
492 | max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; | 411 | max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; |
493 | add_active_range(0, 0, highend_pfn); | 412 | e820_register_active_regions(0, 0, highend_pfn); |
494 | #else | 413 | #else |
495 | add_active_range(0, 0, max_low_pfn); | 414 | e820_register_active_regions(0, 0, max_low_pfn); |
496 | #endif | 415 | #endif |
497 | 416 | ||
498 | free_area_init_nodes(max_zone_pfns); | 417 | free_area_init_nodes(max_zone_pfns); |
@@ -558,44 +477,57 @@ static bool do_relocate_initrd = false; | |||
558 | 477 | ||
559 | static void __init reserve_initrd(void) | 478 | static void __init reserve_initrd(void) |
560 | { | 479 | { |
561 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | 480 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; |
562 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | 481 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
563 | unsigned long ramdisk_end = ramdisk_image + ramdisk_size; | 482 | u64 ramdisk_end = ramdisk_image + ramdisk_size; |
564 | unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; | 483 | u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; |
565 | unsigned long ramdisk_here; | 484 | u64 ramdisk_here; |
566 | |||
567 | initrd_start = 0; | ||
568 | 485 | ||
569 | if (!boot_params.hdr.type_of_loader || | 486 | if (!boot_params.hdr.type_of_loader || |
570 | !ramdisk_image || !ramdisk_size) | 487 | !ramdisk_image || !ramdisk_size) |
571 | return; /* No initrd provided by bootloader */ | 488 | return; /* No initrd provided by bootloader */ |
572 | 489 | ||
573 | if (ramdisk_end < ramdisk_image) { | 490 | initrd_start = 0; |
574 | printk(KERN_ERR "initrd wraps around end of memory, " | 491 | |
575 | "disabling initrd\n"); | ||
576 | return; | ||
577 | } | ||
578 | if (ramdisk_size >= end_of_lowmem/2) { | 492 | if (ramdisk_size >= end_of_lowmem/2) { |
493 | free_early(ramdisk_image, ramdisk_end); | ||
579 | printk(KERN_ERR "initrd too large to handle, " | 494 | printk(KERN_ERR "initrd too large to handle, " |
580 | "disabling initrd\n"); | 495 | "disabling initrd\n"); |
581 | return; | 496 | return; |
582 | } | 497 | } |
498 | |||
499 | printk(KERN_INFO "old RAMDISK: %08llx - %08llx\n", ramdisk_image, | ||
500 | ramdisk_end); | ||
501 | |||
502 | |||
583 | if (ramdisk_end <= end_of_lowmem) { | 503 | if (ramdisk_end <= end_of_lowmem) { |
584 | /* All in lowmem, easy case */ | 504 | /* All in lowmem, easy case */ |
585 | reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT); | 505 | /* |
506 | * don't need to reserve again, already reserved early | ||
507 | * in i386_start_kernel | ||
508 | */ | ||
586 | initrd_start = ramdisk_image + PAGE_OFFSET; | 509 | initrd_start = ramdisk_image + PAGE_OFFSET; |
587 | initrd_end = initrd_start+ramdisk_size; | 510 | initrd_end = initrd_start+ramdisk_size; |
588 | return; | 511 | return; |
589 | } | 512 | } |
590 | 513 | ||
591 | /* We need to move the initrd down into lowmem */ | 514 | /* We need to move the initrd down into lowmem */ |
592 | ramdisk_here = (end_of_lowmem - ramdisk_size) & PAGE_MASK; | 515 | ramdisk_here = find_e820_area(min_low_pfn<<PAGE_SHIFT, |
516 | end_of_lowmem, ramdisk_size, | ||
517 | PAGE_SIZE); | ||
518 | |||
519 | if (ramdisk_here == -1ULL) | ||
520 | panic("Cannot find place for new RAMDISK of size %lld\n", | ||
521 | ramdisk_size); | ||
593 | 522 | ||
594 | /* Note: this includes all the lowmem currently occupied by | 523 | /* Note: this includes all the lowmem currently occupied by |
595 | the initrd, we rely on that fact to keep the data intact. */ | 524 | the initrd, we rely on that fact to keep the data intact. */ |
596 | reserve_bootmem(ramdisk_here, ramdisk_size, BOOTMEM_DEFAULT); | 525 | reserve_early(ramdisk_here, ramdisk_here + ramdisk_size, |
526 | "NEW RAMDISK"); | ||
597 | initrd_start = ramdisk_here + PAGE_OFFSET; | 527 | initrd_start = ramdisk_here + PAGE_OFFSET; |
598 | initrd_end = initrd_start + ramdisk_size; | 528 | initrd_end = initrd_start + ramdisk_size; |
529 | printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", | ||
530 | ramdisk_here, ramdisk_here + ramdisk_size); | ||
599 | 531 | ||
600 | do_relocate_initrd = true; | 532 | do_relocate_initrd = true; |
601 | } | 533 | } |
@@ -604,10 +536,10 @@ static void __init reserve_initrd(void) | |||
604 | 536 | ||
605 | static void __init relocate_initrd(void) | 537 | static void __init relocate_initrd(void) |
606 | { | 538 | { |
607 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | 539 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; |
608 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | 540 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
609 | unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; | 541 | u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; |
610 | unsigned long ramdisk_here; | 542 | u64 ramdisk_here; |
611 | unsigned long slop, clen, mapaddr; | 543 | unsigned long slop, clen, mapaddr; |
612 | char *p, *q; | 544 | char *p, *q; |
613 | 545 | ||
@@ -624,6 +556,10 @@ static void __init relocate_initrd(void) | |||
624 | p = (char *)__va(ramdisk_image); | 556 | p = (char *)__va(ramdisk_image); |
625 | memcpy(q, p, clen); | 557 | memcpy(q, p, clen); |
626 | q += clen; | 558 | q += clen; |
559 | /* need to free these low pages...*/ | ||
560 | printk(KERN_INFO "Freeing old partial RAMDISK %08llx-%08llx\n", | ||
561 | ramdisk_image, ramdisk_image + clen - 1); | ||
562 | free_bootmem(ramdisk_image, clen); | ||
627 | ramdisk_image += clen; | 563 | ramdisk_image += clen; |
628 | ramdisk_size -= clen; | 564 | ramdisk_size -= clen; |
629 | } | 565 | } |
@@ -642,47 +578,44 @@ static void __init relocate_initrd(void) | |||
642 | ramdisk_image += clen; | 578 | ramdisk_image += clen; |
643 | ramdisk_size -= clen; | 579 | ramdisk_size -= clen; |
644 | } | 580 | } |
581 | /* high pages is not converted by early_res_to_bootmem */ | ||
582 | ramdisk_image = boot_params.hdr.ramdisk_image; | ||
583 | ramdisk_size = boot_params.hdr.ramdisk_size; | ||
584 | printk(KERN_INFO "Copied RAMDISK from %016llx - %016llx to %08llx - %08llx\n", | ||
585 | ramdisk_image, ramdisk_image + ramdisk_size - 1, | ||
586 | ramdisk_here, ramdisk_here + ramdisk_size - 1); | ||
645 | } | 587 | } |
646 | 588 | ||
647 | #endif /* CONFIG_BLK_DEV_INITRD */ | 589 | #endif /* CONFIG_BLK_DEV_INITRD */ |
648 | 590 | ||
649 | void __init setup_bootmem_allocator(void) | 591 | void __init setup_bootmem_allocator(void) |
650 | { | 592 | { |
651 | unsigned long bootmap_size; | 593 | int i; |
594 | unsigned long bootmap_size, bootmap; | ||
652 | /* | 595 | /* |
653 | * Initialize the boot-time allocator (with low memory only): | 596 | * Initialize the boot-time allocator (with low memory only): |
654 | */ | 597 | */ |
655 | bootmap_size = init_bootmem(min_low_pfn, max_low_pfn); | 598 | bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT; |
656 | 599 | bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT, | |
657 | register_bootmem_low_pages(max_low_pfn); | 600 | max_pfn_mapped<<PAGE_SHIFT, bootmap_size, |
658 | 601 | PAGE_SIZE); | |
659 | /* | 602 | if (bootmap == -1L) |
660 | * Reserve the bootmem bitmap itself as well. We do this in two | 603 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); |
661 | * steps (first step was init_bootmem()) because this catches | 604 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); |
662 | * the (very unlikely) case of us accidentally initializing the | 605 | #ifdef CONFIG_BLK_DEV_INITRD |
663 | * bootmem allocator with an invalid RAM area. | 606 | reserve_initrd(); |
664 | */ | ||
665 | reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) + | ||
666 | bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text), | ||
667 | BOOTMEM_DEFAULT); | ||
668 | |||
669 | /* | ||
670 | * reserve physical page 0 - it's a special BIOS page on many boxes, | ||
671 | * enabling clean reboots, SMP operation, laptop functions. | ||
672 | */ | ||
673 | reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT); | ||
674 | |||
675 | /* reserve EBDA region */ | ||
676 | reserve_ebda_region(); | ||
677 | |||
678 | #ifdef CONFIG_SMP | ||
679 | /* | ||
680 | * But first pinch a few for the stack/trampoline stuff | ||
681 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
682 | * trampoline before removing it. (see the GDT stuff) | ||
683 | */ | ||
684 | reserve_bootmem(PAGE_SIZE, PAGE_SIZE, BOOTMEM_DEFAULT); | ||
685 | #endif | 607 | #endif |
608 | bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn); | ||
609 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", | ||
610 | max_pfn_mapped<<PAGE_SHIFT); | ||
611 | printk(KERN_INFO " low ram: %08lx - %08lx\n", | ||
612 | min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT); | ||
613 | printk(KERN_INFO " bootmap %08lx - %08lx\n", | ||
614 | bootmap, bootmap + bootmap_size); | ||
615 | for_each_online_node(i) | ||
616 | free_bootmem_with_active_regions(i, max_low_pfn); | ||
617 | early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); | ||
618 | |||
686 | #ifdef CONFIG_ACPI_SLEEP | 619 | #ifdef CONFIG_ACPI_SLEEP |
687 | /* | 620 | /* |
688 | * Reserve low memory region for sleep support. | 621 | * Reserve low memory region for sleep support. |
@@ -695,10 +628,6 @@ void __init setup_bootmem_allocator(void) | |||
695 | */ | 628 | */ |
696 | find_smp_config(); | 629 | find_smp_config(); |
697 | #endif | 630 | #endif |
698 | #ifdef CONFIG_BLK_DEV_INITRD | ||
699 | reserve_initrd(); | ||
700 | #endif | ||
701 | numa_kva_reserve(); | ||
702 | reserve_crashkernel(); | 631 | reserve_crashkernel(); |
703 | 632 | ||
704 | reserve_ibft_region(); | 633 | reserve_ibft_region(); |
@@ -731,12 +660,6 @@ static void set_mca_bus(int x) | |||
731 | static void set_mca_bus(int x) { } | 660 | static void set_mca_bus(int x) { } |
732 | #endif | 661 | #endif |
733 | 662 | ||
734 | /* Overridden in paravirt.c if CONFIG_PARAVIRT */ | ||
735 | char * __init __attribute__((weak)) memory_setup(void) | ||
736 | { | ||
737 | return machine_specific_memory_setup(); | ||
738 | } | ||
739 | |||
740 | #ifdef CONFIG_NUMA | 663 | #ifdef CONFIG_NUMA |
741 | /* | 664 | /* |
742 | * In the golden day, when everything among i386 and x86_64 will be | 665 | * In the golden day, when everything among i386 and x86_64 will be |
@@ -764,11 +687,14 @@ void __init setup_arch(char **cmdline_p) | |||
764 | pre_setup_arch_hook(); | 687 | pre_setup_arch_hook(); |
765 | early_cpu_init(); | 688 | early_cpu_init(); |
766 | early_ioremap_init(); | 689 | early_ioremap_init(); |
690 | reserve_setup_data(); | ||
767 | 691 | ||
768 | #ifdef CONFIG_EFI | 692 | #ifdef CONFIG_EFI |
769 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, | 693 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, |
770 | "EL32", 4)) | 694 | "EL32", 4)) { |
771 | efi_enabled = 1; | 695 | efi_enabled = 1; |
696 | efi_reserve_early(); | ||
697 | } | ||
772 | #endif | 698 | #endif |
773 | 699 | ||
774 | ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); | 700 | ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); |
@@ -792,8 +718,7 @@ void __init setup_arch(char **cmdline_p) | |||
792 | #endif | 718 | #endif |
793 | ARCH_SETUP | 719 | ARCH_SETUP |
794 | 720 | ||
795 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | 721 | setup_memory_map(); |
796 | print_memory_map(memory_setup()); | ||
797 | 722 | ||
798 | copy_edd(); | 723 | copy_edd(); |
799 | 724 | ||
@@ -811,12 +736,11 @@ void __init setup_arch(char **cmdline_p) | |||
811 | bss_resource.start = virt_to_phys(&__bss_start); | 736 | bss_resource.start = virt_to_phys(&__bss_start); |
812 | bss_resource.end = virt_to_phys(&__bss_stop)-1; | 737 | bss_resource.end = virt_to_phys(&__bss_stop)-1; |
813 | 738 | ||
739 | parse_setup_data(); | ||
740 | |||
814 | parse_early_param(); | 741 | parse_early_param(); |
815 | 742 | ||
816 | if (user_defined_memmap) { | 743 | finish_e820_parsing(); |
817 | printk(KERN_INFO "user-defined physical RAM map:\n"); | ||
818 | print_memory_map("user"); | ||
819 | } | ||
820 | 744 | ||
821 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | 745 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); |
822 | *cmdline_p = command_line; | 746 | *cmdline_p = command_line; |
@@ -824,11 +748,22 @@ void __init setup_arch(char **cmdline_p) | |||
824 | if (efi_enabled) | 748 | if (efi_enabled) |
825 | efi_init(); | 749 | efi_init(); |
826 | 750 | ||
751 | e820_register_active_regions(0, 0, -1UL); | ||
752 | /* | ||
753 | * partially used pages are not usable - thus | ||
754 | * we are rounding upwards: | ||
755 | */ | ||
756 | max_pfn = e820_end_of_ram(); | ||
757 | |||
758 | /* preallocate 4k for mptable mpc */ | ||
759 | early_reserve_e820_mpc_new(); | ||
827 | /* update e820 for memory not covered by WB MTRRs */ | 760 | /* update e820 for memory not covered by WB MTRRs */ |
828 | propagate_e820_map(); | ||
829 | mtrr_bp_init(); | 761 | mtrr_bp_init(); |
830 | if (mtrr_trim_uncached_memory(max_pfn)) | 762 | if (mtrr_trim_uncached_memory(max_pfn)) { |
831 | propagate_e820_map(); | 763 | remove_all_active_ranges(); |
764 | e820_register_active_regions(0, 0, -1UL); | ||
765 | max_pfn = e820_end_of_ram(); | ||
766 | } | ||
832 | 767 | ||
833 | max_low_pfn = setup_memory(); | 768 | max_low_pfn = setup_memory(); |
834 | 769 | ||
@@ -855,9 +790,6 @@ void __init setup_arch(char **cmdline_p) | |||
855 | * not to exceed the 8Mb limit. | 790 | * not to exceed the 8Mb limit. |
856 | */ | 791 | */ |
857 | 792 | ||
858 | #ifdef CONFIG_SMP | ||
859 | smp_alloc_memory(); /* AP processor realmode stacks in low memory*/ | ||
860 | #endif | ||
861 | paging_init(); | 793 | paging_init(); |
862 | 794 | ||
863 | /* | 795 | /* |
@@ -914,21 +846,20 @@ void __init setup_arch(char **cmdline_p) | |||
914 | 846 | ||
915 | #ifdef CONFIG_ACPI | 847 | #ifdef CONFIG_ACPI |
916 | acpi_boot_init(); | 848 | acpi_boot_init(); |
917 | 849 | #endif | |
850 | #if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS) | ||
851 | if (smp_found_config) | ||
852 | get_smp_config(); | ||
853 | #endif | ||
918 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) | 854 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) |
919 | if (def_to_bigsmp) | 855 | if (def_to_bigsmp) |
920 | printk(KERN_WARNING "More than 8 CPUs detected and " | 856 | printk(KERN_WARNING "More than 8 CPUs detected and " |
921 | "CONFIG_X86_PC cannot handle it.\nUse " | 857 | "CONFIG_X86_PC cannot handle it.\nUse " |
922 | "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); | 858 | "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); |
923 | #endif | 859 | #endif |
924 | #endif | ||
925 | #ifdef CONFIG_X86_LOCAL_APIC | ||
926 | if (smp_found_config) | ||
927 | get_smp_config(); | ||
928 | #endif | ||
929 | 860 | ||
930 | e820_register_memory(); | 861 | e820_setup_gap(); |
931 | e820_mark_nosave_regions(); | 862 | e820_mark_nosave_regions(max_low_pfn); |
932 | 863 | ||
933 | #ifdef CONFIG_VT | 864 | #ifdef CONFIG_VT |
934 | #if defined(CONFIG_VGA_CONSOLE) | 865 | #if defined(CONFIG_VGA_CONSOLE) |
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 6dff1286ad8a..26d60cc0e370 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <asm/desc.h> | 56 | #include <asm/desc.h> |
57 | #include <video/edid.h> | 57 | #include <video/edid.h> |
58 | #include <asm/e820.h> | 58 | #include <asm/e820.h> |
59 | #include <asm/mpspec.h> | ||
59 | #include <asm/dma.h> | 60 | #include <asm/dma.h> |
60 | #include <asm/gart.h> | 61 | #include <asm/gart.h> |
61 | #include <asm/mpspec.h> | 62 | #include <asm/mpspec.h> |
@@ -271,28 +272,6 @@ void __attribute__((weak)) __init memory_setup(void) | |||
271 | machine_specific_memory_setup(); | 272 | machine_specific_memory_setup(); |
272 | } | 273 | } |
273 | 274 | ||
274 | static void __init parse_setup_data(void) | ||
275 | { | ||
276 | struct setup_data *data; | ||
277 | unsigned long pa_data; | ||
278 | |||
279 | if (boot_params.hdr.version < 0x0209) | ||
280 | return; | ||
281 | pa_data = boot_params.hdr.setup_data; | ||
282 | while (pa_data) { | ||
283 | data = early_ioremap(pa_data, PAGE_SIZE); | ||
284 | switch (data->type) { | ||
285 | default: | ||
286 | break; | ||
287 | } | ||
288 | #ifndef CONFIG_DEBUG_BOOT_PARAMS | ||
289 | free_early(pa_data, pa_data+sizeof(*data)+data->len); | ||
290 | #endif | ||
291 | pa_data = data->next; | ||
292 | early_iounmap(data, PAGE_SIZE); | ||
293 | } | ||
294 | } | ||
295 | |||
296 | #ifdef CONFIG_PCI_MMCONFIG | 275 | #ifdef CONFIG_PCI_MMCONFIG |
297 | extern void __cpuinit fam10h_check_enable_mmcfg(void); | 276 | extern void __cpuinit fam10h_check_enable_mmcfg(void); |
298 | extern void __init check_enable_amd_mmconf_dmi(void); | 277 | extern void __init check_enable_amd_mmconf_dmi(void); |
@@ -329,8 +308,10 @@ void __init setup_arch(char **cmdline_p) | |||
329 | #endif | 308 | #endif |
330 | #ifdef CONFIG_EFI | 309 | #ifdef CONFIG_EFI |
331 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, | 310 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, |
332 | "EL64", 4)) | 311 | "EL64", 4)) { |
333 | efi_enabled = 1; | 312 | efi_enabled = 1; |
313 | efi_reserve_early(); | ||
314 | } | ||
334 | #endif | 315 | #endif |
335 | 316 | ||
336 | ARCH_SETUP | 317 | ARCH_SETUP |
@@ -381,9 +362,13 @@ void __init setup_arch(char **cmdline_p) | |||
381 | * we are rounding upwards: | 362 | * we are rounding upwards: |
382 | */ | 363 | */ |
383 | end_pfn = e820_end_of_ram(); | 364 | end_pfn = e820_end_of_ram(); |
365 | |||
366 | /* pre allocte 4k for mptable mpc */ | ||
367 | early_reserve_e820_mpc_new(); | ||
384 | /* update e820 for memory not covered by WB MTRRs */ | 368 | /* update e820 for memory not covered by WB MTRRs */ |
385 | mtrr_bp_init(); | 369 | mtrr_bp_init(); |
386 | if (mtrr_trim_uncached_memory(end_pfn)) { | 370 | if (mtrr_trim_uncached_memory(end_pfn)) { |
371 | remove_all_active_ranges(); | ||
387 | e820_register_active_regions(0, 0, -1UL); | 372 | e820_register_active_regions(0, 0, -1UL); |
388 | end_pfn = e820_end_of_ram(); | 373 | end_pfn = e820_end_of_ram(); |
389 | } | 374 | } |
@@ -392,7 +377,7 @@ void __init setup_arch(char **cmdline_p) | |||
392 | 377 | ||
393 | check_efer(); | 378 | check_efer(); |
394 | 379 | ||
395 | max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT)); | 380 | max_pfn_mapped = init_memory_mapping(0, (end_pfn << PAGE_SHIFT)); |
396 | if (efi_enabled) | 381 | if (efi_enabled) |
397 | efi_init(); | 382 | efi_init(); |
398 | 383 | ||
@@ -453,13 +438,12 @@ void __init setup_arch(char **cmdline_p) | |||
453 | acpi_reserve_bootmem(); | 438 | acpi_reserve_bootmem(); |
454 | #endif | 439 | #endif |
455 | 440 | ||
456 | if (efi_enabled) | 441 | #ifdef CONFIG_X86_MPPARSE |
457 | efi_reserve_bootmem(); | ||
458 | |||
459 | /* | 442 | /* |
460 | * Find and reserve possible boot-time SMP configuration: | 443 | * Find and reserve possible boot-time SMP configuration: |
461 | */ | 444 | */ |
462 | find_smp_config(); | 445 | find_smp_config(); |
446 | #endif | ||
463 | #ifdef CONFIG_BLK_DEV_INITRD | 447 | #ifdef CONFIG_BLK_DEV_INITRD |
464 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | 448 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { |
465 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | 449 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; |
@@ -502,11 +486,13 @@ void __init setup_arch(char **cmdline_p) | |||
502 | 486 | ||
503 | init_cpu_to_node(); | 487 | init_cpu_to_node(); |
504 | 488 | ||
489 | #ifdef CONFIG_X86_MPPARSE | ||
505 | /* | 490 | /* |
506 | * get boot-time SMP configuration: | 491 | * get boot-time SMP configuration: |
507 | */ | 492 | */ |
508 | if (smp_found_config) | 493 | if (smp_found_config) |
509 | get_smp_config(); | 494 | get_smp_config(); |
495 | #endif | ||
510 | init_apic_mappings(); | 496 | init_apic_mappings(); |
511 | ioapic_init_mappings(); | 497 | ioapic_init_mappings(); |
512 | 498 | ||
@@ -516,7 +502,7 @@ void __init setup_arch(char **cmdline_p) | |||
516 | * We trust e820 completely. No explicit ROM probing in memory. | 502 | * We trust e820 completely. No explicit ROM probing in memory. |
517 | */ | 503 | */ |
518 | e820_reserve_resources(); | 504 | e820_reserve_resources(); |
519 | e820_mark_nosave_regions(); | 505 | e820_mark_nosave_regions(end_pfn); |
520 | 506 | ||
521 | /* request I/O space for devices used on all i[345]86 PCs */ | 507 | /* request I/O space for devices used on all i[345]86 PCs */ |
522 | for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) | 508 | for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3e1cecedde42..83e62137911b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -555,23 +555,6 @@ cpumask_t cpu_coregroup_map(int cpu) | |||
555 | return c->llc_shared_map; | 555 | return c->llc_shared_map; |
556 | } | 556 | } |
557 | 557 | ||
558 | #ifdef CONFIG_X86_32 | ||
559 | /* | ||
560 | * We are called very early to get the low memory for the | ||
561 | * SMP bootup trampoline page. | ||
562 | */ | ||
563 | void __init smp_alloc_memory(void) | ||
564 | { | ||
565 | trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); | ||
566 | /* | ||
567 | * Has to be in very low memory so we can execute | ||
568 | * real-mode AP code. | ||
569 | */ | ||
570 | if (__pa(trampoline_base) >= 0x9F000) | ||
571 | BUG(); | ||
572 | } | ||
573 | #endif | ||
574 | |||
575 | static void impress_friends(void) | 558 | static void impress_friends(void) |
576 | { | 559 | { |
577 | int cpu; | 560 | int cpu; |
diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c index 70e4a374b4e8..e9d91720a40f 100644 --- a/arch/x86/kernel/srat_32.c +++ b/arch/x86/kernel/srat_32.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <asm/srat.h> | 31 | #include <asm/srat.h> |
32 | #include <asm/topology.h> | 32 | #include <asm/topology.h> |
33 | #include <asm/smp.h> | 33 | #include <asm/smp.h> |
34 | #include <asm/e820.h> | ||
34 | 35 | ||
35 | /* | 36 | /* |
36 | * proximity macros and definitions | 37 | * proximity macros and definitions |
@@ -244,12 +245,13 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) | |||
244 | printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", | 245 | printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", |
245 | j, chunk->nid, chunk->start_pfn, chunk->end_pfn); | 246 | j, chunk->nid, chunk->start_pfn, chunk->end_pfn); |
246 | node_read_chunk(chunk->nid, chunk); | 247 | node_read_chunk(chunk->nid, chunk); |
247 | add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn); | 248 | e820_register_active_regions(chunk->nid, chunk->start_pfn, |
249 | min(chunk->end_pfn, max_pfn)); | ||
248 | } | 250 | } |
249 | 251 | ||
250 | for_each_online_node(nid) { | 252 | for_each_online_node(nid) { |
251 | unsigned long start = node_start_pfn[nid]; | 253 | unsigned long start = node_start_pfn[nid]; |
252 | unsigned long end = node_end_pfn[nid]; | 254 | unsigned long end = min(node_end_pfn[nid], max_pfn); |
253 | 255 | ||
254 | memory_present(nid, start, end); | 256 | memory_present(nid, start, end); |
255 | node_remap_size[nid] = node_memmap_size_bytes(nid, start, end); | 257 | node_remap_size[nid] = node_memmap_size_bytes(nid, start, end); |
@@ -261,7 +263,7 @@ out_fail: | |||
261 | 263 | ||
262 | struct acpi_static_rsdt { | 264 | struct acpi_static_rsdt { |
263 | struct acpi_table_rsdt table; | 265 | struct acpi_table_rsdt table; |
264 | u32 padding[7]; /* Allow for 7 more table entries */ | 266 | u32 padding[32]; /* Allow for 32 more table entries */ |
265 | }; | 267 | }; |
266 | 268 | ||
267 | int __init get_memcfg_from_srat(void) | 269 | int __init get_memcfg_from_srat(void) |
@@ -297,7 +299,7 @@ int __init get_memcfg_from_srat(void) | |||
297 | } | 299 | } |
298 | 300 | ||
299 | rsdt = (struct acpi_table_rsdt *) | 301 | rsdt = (struct acpi_table_rsdt *) |
300 | early_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt)); | 302 | early_ioremap(rsdp->rsdt_physical_address, sizeof(saved_rsdt)); |
301 | 303 | ||
302 | if (!rsdt) { | 304 | if (!rsdt) { |
303 | printk(KERN_WARNING | 305 | printk(KERN_WARNING |
@@ -310,6 +312,7 @@ int __init get_memcfg_from_srat(void) | |||
310 | 312 | ||
311 | if (strncmp(header->signature, ACPI_SIG_RSDT, strlen(ACPI_SIG_RSDT))) { | 313 | if (strncmp(header->signature, ACPI_SIG_RSDT, strlen(ACPI_SIG_RSDT))) { |
312 | printk(KERN_WARNING "ACPI: RSDT signature incorrect\n"); | 314 | printk(KERN_WARNING "ACPI: RSDT signature incorrect\n"); |
315 | early_iounmap(rsdt, sizeof(saved_rsdt)); | ||
313 | goto out_err; | 316 | goto out_err; |
314 | } | 317 | } |
315 | 318 | ||
@@ -319,37 +322,51 @@ int __init get_memcfg_from_srat(void) | |||
319 | * size of RSDT) divided by the size of each entry | 322 | * size of RSDT) divided by the size of each entry |
320 | * (4-byte table pointers). | 323 | * (4-byte table pointers). |
321 | */ | 324 | */ |
322 | tables = (header->length - sizeof(struct acpi_table_header)) / 4; | 325 | tables = (header->length - sizeof(struct acpi_table_header)) / sizeof(u32); |
323 | 326 | ||
324 | if (!tables) | 327 | if (!tables) |
325 | goto out_err; | 328 | goto out_err; |
326 | 329 | ||
327 | memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt)); | 330 | memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt)); |
328 | 331 | early_iounmap(rsdt, sizeof(saved_rsdt)); | |
329 | if (saved_rsdt.table.header.length > sizeof(saved_rsdt)) { | 332 | if (saved_rsdt.table.header.length > sizeof(saved_rsdt)) { |
330 | printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n", | 333 | printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n", |
331 | saved_rsdt.table.header.length); | 334 | saved_rsdt.table.header.length); |
332 | goto out_err; | 335 | goto out_err; |
333 | } | 336 | } |
334 | 337 | ||
335 | printk("Begin SRAT table scan....\n"); | 338 | printk("Begin SRAT table scan....%d\n", tables); |
336 | 339 | ||
337 | for (i = 0; i < tables; i++) { | 340 | for (i = 0; i < tables; i++){ |
341 | int result; | ||
342 | u32 length; | ||
338 | /* Map in header, then map in full table length. */ | 343 | /* Map in header, then map in full table length. */ |
339 | header = (struct acpi_table_header *) | 344 | header = (struct acpi_table_header *) |
340 | early_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header)); | 345 | early_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header)); |
341 | if (!header) | 346 | if (!header) |
342 | break; | 347 | break; |
348 | |||
349 | printk(KERN_INFO "ACPI: %4.4s %08lX, %04X\n", | ||
350 | header->signature, | ||
351 | (unsigned long)saved_rsdt.table.table_offset_entry[i], | ||
352 | header->length); | ||
353 | |||
354 | if (strncmp((char *) &header->signature, ACPI_SIG_SRAT, 4)) { | ||
355 | early_iounmap(header, sizeof(struct acpi_table_header)); | ||
356 | continue; | ||
357 | } | ||
358 | |||
359 | length = header->length; | ||
360 | early_iounmap(header, sizeof(struct acpi_table_header)); | ||
343 | header = (struct acpi_table_header *) | 361 | header = (struct acpi_table_header *) |
344 | early_ioremap(saved_rsdt.table.table_offset_entry[i], header->length); | 362 | early_ioremap(saved_rsdt.table.table_offset_entry[i], length); |
345 | if (!header) | 363 | if (!header) |
346 | break; | 364 | break; |
347 | 365 | ||
348 | if (strncmp((char *) &header->signature, ACPI_SIG_SRAT, 4)) | ||
349 | continue; | ||
350 | |||
351 | /* we've found the srat table. don't need to look at any more tables */ | 366 | /* we've found the srat table. don't need to look at any more tables */ |
352 | return acpi20_parse_srat((struct acpi_table_srat *)header); | 367 | result = acpi20_parse_srat((struct acpi_table_srat *)header); |
368 | early_iounmap(header, length); | ||
369 | return result; | ||
353 | } | 370 | } |
354 | out_err: | 371 | out_err: |
355 | remove_all_active_ranges(); | 372 | remove_all_active_ranges(); |
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index ae751094eba9..d67ce5f044ba 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c | |||
@@ -36,7 +36,9 @@ static struct rio_table_hdr *rio_table_hdr __initdata; | |||
36 | static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; | 36 | static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; |
37 | static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; | 37 | static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; |
38 | 38 | ||
39 | #ifndef CONFIG_X86_NUMAQ | ||
39 | static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata; | 40 | static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata; |
41 | #endif | ||
40 | 42 | ||
41 | static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) | 43 | static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) |
42 | { | 44 | { |
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index abbf199adebb..1106fac6024d 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c | |||
@@ -2,7 +2,7 @@ | |||
2 | 2 | ||
3 | #include <asm/trampoline.h> | 3 | #include <asm/trampoline.h> |
4 | 4 | ||
5 | /* ready for x86_64, no harm for x86, since it will overwrite after alloc */ | 5 | /* ready for x86_64 and x86 */ |
6 | unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); | 6 | unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); |
7 | 7 | ||
8 | /* | 8 | /* |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 5c7e2fd52075..5e4772907c6e 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -1012,6 +1012,7 @@ __init void lguest_init(void) | |||
1012 | * clobbered. The Launcher places our initial pagetables somewhere at | 1012 | * clobbered. The Launcher places our initial pagetables somewhere at |
1013 | * the top of our physical memory, so we don't need extra space: set | 1013 | * the top of our physical memory, so we don't need extra space: set |
1014 | * init_pg_tables_end to the end of the kernel. */ | 1014 | * init_pg_tables_end to the end of the kernel. */ |
1015 | init_pg_tables_start = __pa(pg0); | ||
1015 | init_pg_tables_end = __pa(pg0); | 1016 | init_pg_tables_end = __pa(pg0); |
1016 | 1017 | ||
1017 | /* Load the %fs segment register (the per-cpu segment register) with | 1018 | /* Load the %fs segment register (the per-cpu segment register) with |
@@ -1065,9 +1066,9 @@ __init void lguest_init(void) | |||
1065 | pm_power_off = lguest_power_off; | 1066 | pm_power_off = lguest_power_off; |
1066 | machine_ops.restart = lguest_restart; | 1067 | machine_ops.restart = lguest_restart; |
1067 | 1068 | ||
1068 | /* Now we're set up, call start_kernel() in init/main.c and we proceed | 1069 | /* Now we're set up, call i386_start_kernel() in head32.c and we proceed |
1069 | * to boot as normal. It never returns. */ | 1070 | * to boot as normal. It never returns. */ |
1070 | start_kernel(); | 1071 | i386_start_kernel(); |
1071 | } | 1072 | } |
1072 | /* | 1073 | /* |
1073 | * This marks the end of stage II of our journey, The Guest. | 1074 | * This marks the end of stage II of our journey, The Guest. |
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 0c28a071824c..56b4c39cb7fa 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c | |||
@@ -153,6 +153,7 @@ late_initcall(print_ipi_mode); | |||
153 | char * __init machine_specific_memory_setup(void) | 153 | char * __init machine_specific_memory_setup(void) |
154 | { | 154 | { |
155 | char *who; | 155 | char *who; |
156 | int new_nr; | ||
156 | 157 | ||
157 | 158 | ||
158 | who = "BIOS-e820"; | 159 | who = "BIOS-e820"; |
@@ -163,7 +164,11 @@ char * __init machine_specific_memory_setup(void) | |||
163 | * Otherwise fake a memory map; one section from 0k->640k, | 164 | * Otherwise fake a memory map; one section from 0k->640k, |
164 | * the next section from 1mb->appropriate_mem_k | 165 | * the next section from 1mb->appropriate_mem_k |
165 | */ | 166 | */ |
166 | sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); | 167 | new_nr = boot_params.e820_entries; |
168 | sanitize_e820_map(boot_params.e820_map, | ||
169 | ARRAY_SIZE(boot_params.e820_map), | ||
170 | &new_nr); | ||
171 | boot_params.e820_entries = new_nr; | ||
167 | if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) | 172 | if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) |
168 | < 0) { | 173 | < 0) { |
169 | unsigned long mem_size; | 174 | unsigned long mem_size; |
diff --git a/arch/x86/mach-es7000/Makefile b/arch/x86/mach-es7000/Makefile index 69dd4da218dc..3ef8b43b62fc 100644 --- a/arch/x86/mach-es7000/Makefile +++ b/arch/x86/mach-es7000/Makefile | |||
@@ -3,4 +3,3 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_X86_ES7000) := es7000plat.o | 5 | obj-$(CONFIG_X86_ES7000) := es7000plat.o |
6 | obj-$(CONFIG_X86_GENERICARCH) := es7000plat.o | ||
diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/mach-es7000/es7000plat.c index f5d6f7d8b86e..4354ce804889 100644 --- a/arch/x86/mach-es7000/es7000plat.c +++ b/arch/x86/mach-es7000/es7000plat.c | |||
@@ -52,6 +52,8 @@ static struct mip_reg *host_reg; | |||
52 | static int mip_port; | 52 | static int mip_port; |
53 | static unsigned long mip_addr, host_addr; | 53 | static unsigned long mip_addr, host_addr; |
54 | 54 | ||
55 | int es7000_plat; | ||
56 | |||
55 | /* | 57 | /* |
56 | * GSI override for ES7000 platforms. | 58 | * GSI override for ES7000 platforms. |
57 | */ | 59 | */ |
@@ -175,53 +177,6 @@ find_unisys_acpi_oem_table(unsigned long *oem_addr) | |||
175 | } | 177 | } |
176 | #endif | 178 | #endif |
177 | 179 | ||
178 | /* | ||
179 | * This file also gets compiled if CONFIG_X86_GENERICARCH is set. Generic | ||
180 | * arch already has got following function definitions (asm-generic/es7000.c) | ||
181 | * hence no need to define these for that case. | ||
182 | */ | ||
183 | #ifndef CONFIG_X86_GENERICARCH | ||
184 | void es7000_sw_apic(void); | ||
185 | void __init enable_apic_mode(void) | ||
186 | { | ||
187 | es7000_sw_apic(); | ||
188 | return; | ||
189 | } | ||
190 | |||
191 | __init int mps_oem_check(struct mp_config_table *mpc, char *oem, | ||
192 | char *productid) | ||
193 | { | ||
194 | if (mpc->mpc_oemptr) { | ||
195 | struct mp_config_oemtable *oem_table = | ||
196 | (struct mp_config_oemtable *)mpc->mpc_oemptr; | ||
197 | if (!strncmp(oem, "UNISYS", 6)) | ||
198 | return parse_unisys_oem((char *)oem_table); | ||
199 | } | ||
200 | return 0; | ||
201 | } | ||
202 | #ifdef CONFIG_ACPI | ||
203 | /* Hook from generic ACPI tables.c */ | ||
204 | int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
205 | { | ||
206 | unsigned long oem_addr; | ||
207 | if (!find_unisys_acpi_oem_table(&oem_addr)) { | ||
208 | if (es7000_check_dsdt()) | ||
209 | return parse_unisys_oem((char *)oem_addr); | ||
210 | else { | ||
211 | setup_unisys(); | ||
212 | return 1; | ||
213 | } | ||
214 | } | ||
215 | return 0; | ||
216 | } | ||
217 | #else | ||
218 | int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
219 | { | ||
220 | return 0; | ||
221 | } | ||
222 | #endif | ||
223 | #endif /* COFIG_X86_GENERICARCH */ | ||
224 | |||
225 | static void | 180 | static void |
226 | es7000_spin(int n) | 181 | es7000_spin(int n) |
227 | { | 182 | { |
diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile index 19d6d407737b..0dbd7803a1d5 100644 --- a/arch/x86/mach-generic/Makefile +++ b/arch/x86/mach-generic/Makefile | |||
@@ -2,7 +2,11 @@ | |||
2 | # Makefile for the generic architecture | 2 | # Makefile for the generic architecture |
3 | # | 3 | # |
4 | 4 | ||
5 | EXTRA_CFLAGS := -Iarch/x86/kernel | 5 | EXTRA_CFLAGS := -Iarch/x86/kernel |
6 | 6 | ||
7 | obj-y := probe.o summit.o bigsmp.o es7000.o default.o | 7 | obj-y := probe.o default.o |
8 | obj-y += ../../x86/mach-es7000/ | 8 | obj-$(CONFIG_X86_NUMAQ) += numaq.o |
9 | obj-$(CONFIG_X86_SUMMIT) += summit.o | ||
10 | obj-$(CONFIG_X86_BIGSMP) += bigsmp.o | ||
11 | obj-$(CONFIG_X86_ES7000) += es7000.o | ||
12 | obj-$(CONFIG_X86_ES7000) += ../../x86/mach-es7000/ | ||
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 95fc463056d0..59d771714559 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c | |||
@@ -23,10 +23,8 @@ static int dmi_bigsmp; /* can be set by dmi scanners */ | |||
23 | 23 | ||
24 | static int hp_ht_bigsmp(const struct dmi_system_id *d) | 24 | static int hp_ht_bigsmp(const struct dmi_system_id *d) |
25 | { | 25 | { |
26 | #ifdef CONFIG_X86_GENERICARCH | ||
27 | printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); | 26 | printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); |
28 | dmi_bigsmp = 1; | 27 | dmi_bigsmp = 1; |
29 | #endif | ||
30 | return 0; | 28 | return 0; |
31 | } | 29 | } |
32 | 30 | ||
@@ -48,7 +46,7 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { | |||
48 | static int probe_bigsmp(void) | 46 | static int probe_bigsmp(void) |
49 | { | 47 | { |
50 | if (def_to_bigsmp) | 48 | if (def_to_bigsmp) |
51 | dmi_bigsmp = 1; | 49 | dmi_bigsmp = 1; |
52 | else | 50 | else |
53 | dmi_check_system(bigsmp_dmi_table); | 51 | dmi_check_system(bigsmp_dmi_table); |
54 | return dmi_bigsmp; | 52 | return dmi_bigsmp; |
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c new file mode 100644 index 000000000000..8091e68764c4 --- /dev/null +++ b/arch/x86/mach-generic/numaq.c | |||
@@ -0,0 +1,41 @@ | |||
1 | /* | ||
2 | * APIC driver for the IBM NUMAQ chipset. | ||
3 | */ | ||
4 | #define APIC_DEFINITION 1 | ||
5 | #include <linux/threads.h> | ||
6 | #include <linux/cpumask.h> | ||
7 | #include <linux/smp.h> | ||
8 | #include <asm/mpspec.h> | ||
9 | #include <asm/genapic.h> | ||
10 | #include <asm/fixmap.h> | ||
11 | #include <asm/apicdef.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <asm/mach-numaq/mach_apic.h> | ||
16 | #include <asm/mach-numaq/mach_apicdef.h> | ||
17 | #include <asm/mach-numaq/mach_ipi.h> | ||
18 | #include <asm/mach-numaq/mach_mpparse.h> | ||
19 | #include <asm/mach-numaq/mach_wakecpu.h> | ||
20 | #include <asm/numaq.h> | ||
21 | |||
22 | static int mps_oem_check(struct mp_config_table *mpc, char *oem, | ||
23 | char *productid) | ||
24 | { | ||
25 | numaq_mps_oem_check(mpc, oem, productid); | ||
26 | return found_numaq; | ||
27 | } | ||
28 | |||
29 | static int probe_numaq(void) | ||
30 | { | ||
31 | /* already know from get_memcfg_numaq() */ | ||
32 | return found_numaq; | ||
33 | } | ||
34 | |||
35 | /* Hook from generic ACPI tables.c */ | ||
36 | static int acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
37 | { | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); | ||
diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index c5ae751b994a..ba18dec48555 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <asm/apicdef.h> | 16 | #include <asm/apicdef.h> |
17 | #include <asm/genapic.h> | 17 | #include <asm/genapic.h> |
18 | 18 | ||
19 | extern struct genapic apic_numaq; | ||
19 | extern struct genapic apic_summit; | 20 | extern struct genapic apic_summit; |
20 | extern struct genapic apic_bigsmp; | 21 | extern struct genapic apic_bigsmp; |
21 | extern struct genapic apic_es7000; | 22 | extern struct genapic apic_es7000; |
@@ -24,9 +25,18 @@ extern struct genapic apic_default; | |||
24 | struct genapic *genapic = &apic_default; | 25 | struct genapic *genapic = &apic_default; |
25 | 26 | ||
26 | static struct genapic *apic_probe[] __initdata = { | 27 | static struct genapic *apic_probe[] __initdata = { |
28 | #ifdef CONFIG_X86_NUMAQ | ||
29 | &apic_numaq, | ||
30 | #endif | ||
31 | #ifdef CONFIG_X86_SUMMIT | ||
27 | &apic_summit, | 32 | &apic_summit, |
33 | #endif | ||
34 | #ifdef CONFIG_X86_BIGSMP | ||
28 | &apic_bigsmp, | 35 | &apic_bigsmp, |
36 | #endif | ||
37 | #ifdef CONFIG_X86_ES7000 | ||
29 | &apic_es7000, | 38 | &apic_es7000, |
39 | #endif | ||
30 | &apic_default, /* must be last */ | 40 | &apic_default, /* must be last */ |
31 | NULL, | 41 | NULL, |
32 | }; | 42 | }; |
@@ -54,6 +64,7 @@ early_param("apic", parse_apic); | |||
54 | 64 | ||
55 | void __init generic_bigsmp_probe(void) | 65 | void __init generic_bigsmp_probe(void) |
56 | { | 66 | { |
67 | #if CONFIG_X86_BIGSMP | ||
57 | /* | 68 | /* |
58 | * This routine is used to switch to bigsmp mode when | 69 | * This routine is used to switch to bigsmp mode when |
59 | * - There is no apic= option specified by the user | 70 | * - There is no apic= option specified by the user |
@@ -67,6 +78,7 @@ void __init generic_bigsmp_probe(void) | |||
67 | printk(KERN_INFO "Overriding APIC driver with %s\n", | 78 | printk(KERN_INFO "Overriding APIC driver with %s\n", |
68 | genapic->name); | 79 | genapic->name); |
69 | } | 80 | } |
81 | #endif | ||
70 | } | 82 | } |
71 | 83 | ||
72 | void __init generic_apic_probe(void) | 84 | void __init generic_apic_probe(void) |
@@ -88,7 +100,8 @@ void __init generic_apic_probe(void) | |||
88 | 100 | ||
89 | /* These functions can switch the APIC even after the initial ->probe() */ | 101 | /* These functions can switch the APIC even after the initial ->probe() */ |
90 | 102 | ||
91 | int __init mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid) | 103 | int __init mps_oem_check(struct mp_config_table *mpc, char *oem, |
104 | char *productid) | ||
92 | { | 105 | { |
93 | int i; | 106 | int i; |
94 | for (i = 0; apic_probe[i]; ++i) { | 107 | for (i = 0; apic_probe[i]; ++i) { |
diff --git a/arch/x86/mach-visws/mpparse.c b/arch/x86/mach-visws/mpparse.c index 57484e91ab90..a2fb78c0d154 100644 --- a/arch/x86/mach-visws/mpparse.c +++ b/arch/x86/mach-visws/mpparse.c | |||
@@ -8,11 +8,6 @@ | |||
8 | #include "cobalt.h" | 8 | #include "cobalt.h" |
9 | #include "mach_apic.h" | 9 | #include "mach_apic.h" |
10 | 10 | ||
11 | /* Have we found an MP table */ | ||
12 | int smp_found_config; | ||
13 | |||
14 | int pic_mode; | ||
15 | |||
16 | extern unsigned int __cpuinitdata maxcpus; | 11 | extern unsigned int __cpuinitdata maxcpus; |
17 | 12 | ||
18 | /* | 13 | /* |
@@ -76,7 +71,9 @@ void __init find_smp_config(void) | |||
76 | if (ncpus > maxcpus) | 71 | if (ncpus > maxcpus) |
77 | ncpus = maxcpus; | 72 | ncpus = maxcpus; |
78 | 73 | ||
74 | #ifdef CONFIG_X86_LOCAL_APIC | ||
79 | smp_found_config = 1; | 75 | smp_found_config = 1; |
76 | #endif | ||
80 | while (ncpus--) | 77 | while (ncpus--) |
81 | MP_processor_info(mp++); | 78 | MP_processor_info(mp++); |
82 | 79 | ||
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index 5ae5466b9eb9..f4aca9fa9546 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c | |||
@@ -62,6 +62,7 @@ void __init time_init_hook(void) | |||
62 | char *__init machine_specific_memory_setup(void) | 62 | char *__init machine_specific_memory_setup(void) |
63 | { | 63 | { |
64 | char *who; | 64 | char *who; |
65 | int new_nr; | ||
65 | 66 | ||
66 | who = "NOT VOYAGER"; | 67 | who = "NOT VOYAGER"; |
67 | 68 | ||
@@ -111,7 +112,11 @@ char *__init machine_specific_memory_setup(void) | |||
111 | * Otherwise fake a memory map; one section from 0k->640k, | 112 | * Otherwise fake a memory map; one section from 0k->640k, |
112 | * the next section from 1mb->appropriate_mem_k | 113 | * the next section from 1mb->appropriate_mem_k |
113 | */ | 114 | */ |
114 | sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); | 115 | new_nr = boot_params.e820_entries; |
116 | sanitize_e820_map(boot_params.e820_map, | ||
117 | ARRAY_SIZE(boot_params.e820_map), | ||
118 | &new_nr); | ||
119 | boot_params.e820_entries = new_nr; | ||
115 | if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) | 120 | if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) |
116 | < 0) { | 121 | < 0) { |
117 | unsigned long mem_size; | 122 | unsigned long mem_size; |
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 8acbf0cdf1a5..8dedd01e909f 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c | |||
@@ -59,11 +59,6 @@ __u32 voyager_quad_processors = 0; | |||
59 | * activity count. Finally exported by i386_ksyms.c */ | 59 | * activity count. Finally exported by i386_ksyms.c */ |
60 | static int voyager_extended_cpus = 1; | 60 | static int voyager_extended_cpus = 1; |
61 | 61 | ||
62 | /* Have we found an SMP box - used by time.c to do the profiling | ||
63 | interrupt for timeslicing; do not set to 1 until the per CPU timer | ||
64 | interrupt is active */ | ||
65 | int smp_found_config = 0; | ||
66 | |||
67 | /* Used for the invalidate map that's also checked in the spinlock */ | 62 | /* Used for the invalidate map that's also checked in the spinlock */ |
68 | static volatile unsigned long smp_invalidate_needed; | 63 | static volatile unsigned long smp_invalidate_needed; |
69 | 64 | ||
@@ -1137,15 +1132,6 @@ void flush_tlb_all(void) | |||
1137 | on_each_cpu(do_flush_tlb_all, 0, 1, 1); | 1132 | on_each_cpu(do_flush_tlb_all, 0, 1, 1); |
1138 | } | 1133 | } |
1139 | 1134 | ||
1140 | /* used to set up the trampoline for other CPUs when the memory manager | ||
1141 | * is sorted out */ | ||
1142 | void __init smp_alloc_memory(void) | ||
1143 | { | ||
1144 | trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); | ||
1145 | if (__pa(trampoline_base) >= 0x93000) | ||
1146 | BUG(); | ||
1147 | } | ||
1148 | |||
1149 | /* send a reschedule CPI to one CPU by physical CPU number*/ | 1135 | /* send a reschedule CPI to one CPU by physical CPU number*/ |
1150 | static void voyager_smp_send_reschedule(int cpu) | 1136 | static void voyager_smp_send_reschedule(int cpu) |
1151 | { | 1137 | { |
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 914ccf983687..accc7c6c57fc 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <asm/setup.h> | 38 | #include <asm/setup.h> |
39 | #include <asm/mmzone.h> | 39 | #include <asm/mmzone.h> |
40 | #include <asm/bios_ebda.h> | 40 | #include <asm/bios_ebda.h> |
41 | #include <asm/proto.h> | ||
41 | 42 | ||
42 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 43 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
43 | EXPORT_SYMBOL(node_data); | 44 | EXPORT_SYMBOL(node_data); |
@@ -59,14 +60,14 @@ unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; | |||
59 | /* | 60 | /* |
60 | * 4) physnode_map - the mapping between a pfn and owning node | 61 | * 4) physnode_map - the mapping between a pfn and owning node |
61 | * physnode_map keeps track of the physical memory layout of a generic | 62 | * physnode_map keeps track of the physical memory layout of a generic |
62 | * numa node on a 256Mb break (each element of the array will | 63 | * numa node on a 64Mb break (each element of the array will |
63 | * represent 256Mb of memory and will be marked by the node id. so, | 64 | * represent 64Mb of memory and will be marked by the node id. so, |
64 | * if the first gig is on node 0, and the second gig is on node 1 | 65 | * if the first gig is on node 0, and the second gig is on node 1 |
65 | * physnode_map will contain: | 66 | * physnode_map will contain: |
66 | * | 67 | * |
67 | * physnode_map[0-3] = 0; | 68 | * physnode_map[0-15] = 0; |
68 | * physnode_map[4-7] = 1; | 69 | * physnode_map[16-31] = 1; |
69 | * physnode_map[8- ] = -1; | 70 | * physnode_map[32- ] = -1; |
70 | */ | 71 | */ |
71 | s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; | 72 | s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; |
72 | EXPORT_SYMBOL(physnode_map); | 73 | EXPORT_SYMBOL(physnode_map); |
@@ -81,9 +82,9 @@ void memory_present(int nid, unsigned long start, unsigned long end) | |||
81 | printk(KERN_DEBUG " "); | 82 | printk(KERN_DEBUG " "); |
82 | for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { | 83 | for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { |
83 | physnode_map[pfn / PAGES_PER_ELEMENT] = nid; | 84 | physnode_map[pfn / PAGES_PER_ELEMENT] = nid; |
84 | printk("%ld ", pfn); | 85 | printk(KERN_CONT "%ld ", pfn); |
85 | } | 86 | } |
86 | printk("\n"); | 87 | printk(KERN_CONT "\n"); |
87 | } | 88 | } |
88 | 89 | ||
89 | unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, | 90 | unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, |
@@ -119,11 +120,11 @@ int __init get_memcfg_numa_flat(void) | |||
119 | { | 120 | { |
120 | printk("NUMA - single node, flat memory mode\n"); | 121 | printk("NUMA - single node, flat memory mode\n"); |
121 | 122 | ||
122 | /* Run the memory configuration and find the top of memory. */ | ||
123 | propagate_e820_map(); | ||
124 | node_start_pfn[0] = 0; | 123 | node_start_pfn[0] = 0; |
125 | node_end_pfn[0] = max_pfn; | 124 | node_end_pfn[0] = max_pfn; |
125 | e820_register_active_regions(0, 0, max_pfn); | ||
126 | memory_present(0, 0, max_pfn); | 126 | memory_present(0, 0, max_pfn); |
127 | node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn); | ||
127 | 128 | ||
128 | /* Indicate there is one node available. */ | 129 | /* Indicate there is one node available. */ |
129 | nodes_clear(node_online_map); | 130 | nodes_clear(node_online_map); |
@@ -159,9 +160,17 @@ static void __init allocate_pgdat(int nid) | |||
159 | if (nid && node_has_online_mem(nid)) | 160 | if (nid && node_has_online_mem(nid)) |
160 | NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; | 161 | NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; |
161 | else { | 162 | else { |
162 | NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn)); | 163 | unsigned long pgdat_phys; |
163 | min_low_pfn += PFN_UP(sizeof(pg_data_t)); | 164 | pgdat_phys = find_e820_area(min_low_pfn<<PAGE_SHIFT, |
165 | (nid ? max_low_pfn:max_pfn_mapped)<<PAGE_SHIFT, | ||
166 | sizeof(pg_data_t), | ||
167 | PAGE_SIZE); | ||
168 | NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT)); | ||
169 | reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t), | ||
170 | "NODE_DATA"); | ||
164 | } | 171 | } |
172 | printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", | ||
173 | nid, (unsigned long)NODE_DATA(nid)); | ||
165 | } | 174 | } |
166 | 175 | ||
167 | #ifdef CONFIG_DISCONTIGMEM | 176 | #ifdef CONFIG_DISCONTIGMEM |
@@ -202,8 +211,12 @@ void __init remap_numa_kva(void) | |||
202 | int node; | 211 | int node; |
203 | 212 | ||
204 | for_each_online_node(node) { | 213 | for_each_online_node(node) { |
214 | printk(KERN_DEBUG "remap_numa_kva: node %d\n", node); | ||
205 | for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { | 215 | for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { |
206 | vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); | 216 | vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); |
217 | printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n", | ||
218 | (unsigned long)vaddr, | ||
219 | node_remap_start_pfn[node] + pfn); | ||
207 | set_pmd_pfn((ulong) vaddr, | 220 | set_pmd_pfn((ulong) vaddr, |
208 | node_remap_start_pfn[node] + pfn, | 221 | node_remap_start_pfn[node] + pfn, |
209 | PAGE_KERNEL_LARGE); | 222 | PAGE_KERNEL_LARGE); |
@@ -215,17 +228,21 @@ static unsigned long calculate_numa_remap_pages(void) | |||
215 | { | 228 | { |
216 | int nid; | 229 | int nid; |
217 | unsigned long size, reserve_pages = 0; | 230 | unsigned long size, reserve_pages = 0; |
218 | unsigned long pfn; | ||
219 | 231 | ||
220 | for_each_online_node(nid) { | 232 | for_each_online_node(nid) { |
221 | unsigned old_end_pfn = node_end_pfn[nid]; | 233 | u64 node_end_target; |
234 | u64 node_end_final; | ||
222 | 235 | ||
223 | /* | 236 | /* |
224 | * The acpi/srat node info can show hot-add memroy zones | 237 | * The acpi/srat node info can show hot-add memroy zones |
225 | * where memory could be added but not currently present. | 238 | * where memory could be added but not currently present. |
226 | */ | 239 | */ |
240 | printk("node %d pfn: [%lx - %lx]\n", | ||
241 | nid, node_start_pfn[nid], node_end_pfn[nid]); | ||
227 | if (node_start_pfn[nid] > max_pfn) | 242 | if (node_start_pfn[nid] > max_pfn) |
228 | continue; | 243 | continue; |
244 | if (!node_end_pfn[nid]) | ||
245 | continue; | ||
229 | if (node_end_pfn[nid] > max_pfn) | 246 | if (node_end_pfn[nid] > max_pfn) |
230 | node_end_pfn[nid] = max_pfn; | 247 | node_end_pfn[nid] = max_pfn; |
231 | 248 | ||
@@ -237,39 +254,42 @@ static unsigned long calculate_numa_remap_pages(void) | |||
237 | /* now the roundup is correct, convert to PAGE_SIZE pages */ | 254 | /* now the roundup is correct, convert to PAGE_SIZE pages */ |
238 | size = size * PTRS_PER_PTE; | 255 | size = size * PTRS_PER_PTE; |
239 | 256 | ||
240 | /* | 257 | node_end_target = round_down(node_end_pfn[nid] - size, |
241 | * Validate the region we are allocating only contains valid | 258 | PTRS_PER_PTE); |
242 | * pages. | 259 | node_end_target <<= PAGE_SHIFT; |
243 | */ | 260 | do { |
244 | for (pfn = node_end_pfn[nid] - size; | 261 | node_end_final = find_e820_area(node_end_target, |
245 | pfn < node_end_pfn[nid]; pfn++) | 262 | ((u64)node_end_pfn[nid])<<PAGE_SHIFT, |
246 | if (!page_is_ram(pfn)) | 263 | ((u64)size)<<PAGE_SHIFT, |
247 | break; | 264 | LARGE_PAGE_BYTES); |
248 | 265 | node_end_target -= LARGE_PAGE_BYTES; | |
249 | if (pfn != node_end_pfn[nid]) | 266 | } while (node_end_final == -1ULL && |
250 | size = 0; | 267 | (node_end_target>>PAGE_SHIFT) > (node_start_pfn[nid])); |
268 | |||
269 | if (node_end_final == -1ULL) | ||
270 | panic("Can not get kva ram\n"); | ||
251 | 271 | ||
252 | printk("Reserving %ld pages of KVA for lmem_map of node %d\n", | 272 | printk("Reserving %ld pages of KVA for lmem_map of node %d\n", |
253 | size, nid); | 273 | size, nid); |
254 | node_remap_size[nid] = size; | 274 | node_remap_size[nid] = size; |
255 | node_remap_offset[nid] = reserve_pages; | 275 | node_remap_offset[nid] = reserve_pages; |
256 | reserve_pages += size; | 276 | reserve_pages += size; |
257 | printk("Shrinking node %d from %ld pages to %ld pages\n", | 277 | printk("Shrinking node %d from %ld pages to %lld pages\n", |
258 | nid, node_end_pfn[nid], node_end_pfn[nid] - size); | 278 | nid, node_end_pfn[nid], node_end_final>>PAGE_SHIFT); |
259 | |||
260 | if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) { | ||
261 | /* | ||
262 | * Align node_end_pfn[] and node_remap_start_pfn[] to | ||
263 | * pmd boundary. remap_numa_kva will barf otherwise. | ||
264 | */ | ||
265 | printk("Shrinking node %d further by %ld pages for proper alignment\n", | ||
266 | nid, node_end_pfn[nid] & (PTRS_PER_PTE-1)); | ||
267 | size += node_end_pfn[nid] & (PTRS_PER_PTE-1); | ||
268 | } | ||
269 | 279 | ||
270 | node_end_pfn[nid] -= size; | 280 | /* |
281 | * prevent kva address below max_low_pfn want it on system | ||
282 | * with less memory later. | ||
283 | * layout will be: KVA address , KVA RAM | ||
284 | */ | ||
285 | if ((node_end_final>>PAGE_SHIFT) < max_low_pfn) | ||
286 | reserve_early(node_end_final, | ||
287 | node_end_final+(((u64)size)<<PAGE_SHIFT), | ||
288 | "KVA RAM"); | ||
289 | |||
290 | node_end_pfn[nid] = node_end_final>>PAGE_SHIFT; | ||
271 | node_remap_start_pfn[nid] = node_end_pfn[nid]; | 291 | node_remap_start_pfn[nid] = node_end_pfn[nid]; |
272 | shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]); | 292 | shrink_active_range(nid, node_end_pfn[nid]); |
273 | } | 293 | } |
274 | printk("Reserving total of %ld pages for numa KVA remap\n", | 294 | printk("Reserving total of %ld pages for numa KVA remap\n", |
275 | reserve_pages); | 295 | reserve_pages); |
@@ -287,8 +307,7 @@ static void init_remap_allocator(int nid) | |||
287 | 307 | ||
288 | printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, | 308 | printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, |
289 | (ulong) node_remap_start_vaddr[nid], | 309 | (ulong) node_remap_start_vaddr[nid], |
290 | (ulong) pfn_to_kaddr(highstart_pfn | 310 | (ulong) node_remap_end_vaddr[nid]); |
291 | + node_remap_offset[nid] + node_remap_size[nid])); | ||
292 | } | 311 | } |
293 | #else | 312 | #else |
294 | void *alloc_remap(int nid, unsigned long size) | 313 | void *alloc_remap(int nid, unsigned long size) |
@@ -315,7 +334,7 @@ unsigned long __init setup_memory(void) | |||
315 | { | 334 | { |
316 | int nid; | 335 | int nid; |
317 | unsigned long system_start_pfn, system_max_low_pfn; | 336 | unsigned long system_start_pfn, system_max_low_pfn; |
318 | unsigned long wasted_pages; | 337 | long kva_target_pfn; |
319 | 338 | ||
320 | /* | 339 | /* |
321 | * When mapping a NUMA machine we allocate the node_mem_map arrays | 340 | * When mapping a NUMA machine we allocate the node_mem_map arrays |
@@ -324,34 +343,38 @@ unsigned long __init setup_memory(void) | |||
324 | * this space and use it to adjust the boundary between ZONE_NORMAL | 343 | * this space and use it to adjust the boundary between ZONE_NORMAL |
325 | * and ZONE_HIGHMEM. | 344 | * and ZONE_HIGHMEM. |
326 | */ | 345 | */ |
346 | |||
347 | /* call find_max_low_pfn at first, it could update max_pfn */ | ||
348 | system_max_low_pfn = max_low_pfn = find_max_low_pfn(); | ||
349 | |||
350 | remove_all_active_ranges(); | ||
327 | get_memcfg_numa(); | 351 | get_memcfg_numa(); |
328 | 352 | ||
329 | kva_pages = calculate_numa_remap_pages(); | 353 | kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE); |
330 | 354 | ||
331 | /* partially used pages are not usable - thus round upwards */ | 355 | /* partially used pages are not usable - thus round upwards */ |
332 | system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); | 356 | system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); |
333 | 357 | ||
334 | kva_start_pfn = find_max_low_pfn() - kva_pages; | 358 | kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); |
359 | do { | ||
360 | kva_start_pfn = find_e820_area(kva_target_pfn<<PAGE_SHIFT, | ||
361 | max_low_pfn<<PAGE_SHIFT, | ||
362 | kva_pages<<PAGE_SHIFT, | ||
363 | PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT; | ||
364 | kva_target_pfn -= PTRS_PER_PTE; | ||
365 | } while (kva_start_pfn == -1UL && kva_target_pfn > min_low_pfn); | ||
335 | 366 | ||
336 | #ifdef CONFIG_BLK_DEV_INITRD | 367 | if (kva_start_pfn == -1UL) |
337 | /* Numa kva area is below the initrd */ | 368 | panic("Can not get kva space\n"); |
338 | if (initrd_start) | ||
339 | kva_start_pfn = PFN_DOWN(initrd_start - PAGE_OFFSET) | ||
340 | - kva_pages; | ||
341 | #endif | ||
342 | 369 | ||
343 | /* | ||
344 | * We waste pages past at the end of the KVA for no good reason other | ||
345 | * than how it is located. This is bad. | ||
346 | */ | ||
347 | wasted_pages = kva_start_pfn & (PTRS_PER_PTE-1); | ||
348 | kva_start_pfn -= wasted_pages; | ||
349 | kva_pages += wasted_pages; | ||
350 | |||
351 | system_max_low_pfn = max_low_pfn = find_max_low_pfn(); | ||
352 | printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n", | 370 | printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n", |
353 | kva_start_pfn, max_low_pfn); | 371 | kva_start_pfn, max_low_pfn); |
354 | printk("max_pfn = %ld\n", max_pfn); | 372 | printk("max_pfn = %ld\n", max_pfn); |
373 | |||
374 | /* avoid clash with initrd */ | ||
375 | reserve_early(kva_start_pfn<<PAGE_SHIFT, | ||
376 | (kva_start_pfn + kva_pages)<<PAGE_SHIFT, | ||
377 | "KVA PG"); | ||
355 | #ifdef CONFIG_HIGHMEM | 378 | #ifdef CONFIG_HIGHMEM |
356 | highstart_pfn = highend_pfn = max_pfn; | 379 | highstart_pfn = highend_pfn = max_pfn; |
357 | if (max_pfn > system_max_low_pfn) | 380 | if (max_pfn > system_max_low_pfn) |
@@ -387,16 +410,8 @@ unsigned long __init setup_memory(void) | |||
387 | return max_low_pfn; | 410 | return max_low_pfn; |
388 | } | 411 | } |
389 | 412 | ||
390 | void __init numa_kva_reserve(void) | ||
391 | { | ||
392 | if (kva_pages) | ||
393 | reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages), | ||
394 | BOOTMEM_DEFAULT); | ||
395 | } | ||
396 | |||
397 | void __init zone_sizes_init(void) | 413 | void __init zone_sizes_init(void) |
398 | { | 414 | { |
399 | int nid; | ||
400 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | 415 | unsigned long max_zone_pfns[MAX_NR_ZONES]; |
401 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | 416 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); |
402 | max_zone_pfns[ZONE_DMA] = | 417 | max_zone_pfns[ZONE_DMA] = |
@@ -406,15 +421,6 @@ void __init zone_sizes_init(void) | |||
406 | max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; | 421 | max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; |
407 | #endif | 422 | #endif |
408 | 423 | ||
409 | /* If SRAT has not registered memory, register it now */ | ||
410 | if (find_max_pfn_with_active_regions() == 0) { | ||
411 | for_each_online_node(nid) { | ||
412 | if (node_has_online_mem(nid)) | ||
413 | add_active_range(nid, node_start_pfn[nid], | ||
414 | node_end_pfn[nid]); | ||
415 | } | ||
416 | } | ||
417 | |||
418 | free_area_init_nodes(max_zone_pfns); | 424 | free_area_init_nodes(max_zone_pfns); |
419 | return; | 425 | return; |
420 | } | 426 | } |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ec30d10154b6..0e7bb5e81670 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -289,7 +289,8 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) | |||
289 | 289 | ||
290 | void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) | 290 | void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) |
291 | { | 291 | { |
292 | if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { | 292 | if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn)) && |
293 | !page_is_reserved_early(pfn)) { | ||
293 | ClearPageReserved(page); | 294 | ClearPageReserved(page); |
294 | init_page_count(page); | 295 | init_page_count(page); |
295 | __free_page(page); | 296 | __free_page(page); |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index c5066d519e5d..afb07ffb931d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -233,7 +233,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
233 | else | 233 | else |
234 | bootmap_start = round_up(start, PAGE_SIZE); | 234 | bootmap_start = round_up(start, PAGE_SIZE); |
235 | /* | 235 | /* |
236 | * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like | 236 | * SMP_CACHE_BYTES could be enough, but init_bootmem_node like |
237 | * to use that to align to PAGE_SIZE | 237 | * to use that to align to PAGE_SIZE |
238 | */ | 238 | */ |
239 | bootmap = early_node_mem(nodeid, bootmap_start, end, | 239 | bootmap = early_node_mem(nodeid, bootmap_start, end, |
diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32 index 89ec35d00efd..962d96c0495a 100644 --- a/arch/x86/pci/Makefile_32 +++ b/arch/x86/pci/Makefile_32 | |||
@@ -13,10 +13,11 @@ pci-y := fixup.o | |||
13 | pci-$(CONFIG_ACPI) += acpi.o | 13 | pci-$(CONFIG_ACPI) += acpi.o |
14 | pci-y += legacy.o irq.o | 14 | pci-y += legacy.o irq.o |
15 | 15 | ||
16 | # Careful: VISWS and NUMAQ overrule the pci-y above. The colons are | 16 | # Careful: VISWS overrule the pci-y above. The colons are |
17 | # therefor correct. This needs a proper fix by distangling the code. | 17 | # therefor correct. This needs a proper fix by distangling the code. |
18 | pci-$(CONFIG_X86_VISWS) := visws.o fixup.o | 18 | pci-$(CONFIG_X86_VISWS) := visws.o fixup.o |
19 | pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o | 19 | |
20 | pci-$(CONFIG_X86_NUMAQ) += numa.o | ||
20 | 21 | ||
21 | # Necessary for NUMAQ as well | 22 | # Necessary for NUMAQ as well |
22 | pci-$(CONFIG_NUMA) += mp_bus_to_node.o | 23 | pci-$(CONFIG_NUMA) += mp_bus_to_node.o |
diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/k8-bus_64.c index 5c2799c20e47..bfefdf0f40d4 100644 --- a/arch/x86/pci/k8-bus_64.c +++ b/arch/x86/pci/k8-bus_64.c | |||
@@ -384,7 +384,7 @@ static int __init early_fill_mp_bus_info(void) | |||
384 | /* need to take out [0, TOM) for RAM*/ | 384 | /* need to take out [0, TOM) for RAM*/ |
385 | address = MSR_K8_TOP_MEM1; | 385 | address = MSR_K8_TOP_MEM1; |
386 | rdmsrl(address, val); | 386 | rdmsrl(address, val); |
387 | end = (val & 0xffffff8000000ULL); | 387 | end = (val & 0xffffff800000ULL); |
388 | printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); | 388 | printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); |
389 | if (end < (1ULL<<32)) | 389 | if (end < (1ULL<<32)) |
390 | update_range(range, 0, end - 1); | 390 | update_range(range, 0, end - 1); |
@@ -478,7 +478,7 @@ static int __init early_fill_mp_bus_info(void) | |||
478 | /* TOP_MEM2 */ | 478 | /* TOP_MEM2 */ |
479 | address = MSR_K8_TOP_MEM2; | 479 | address = MSR_K8_TOP_MEM2; |
480 | rdmsrl(address, val); | 480 | rdmsrl(address, val); |
481 | end = (val & 0xffffff8000000ULL); | 481 | end = (val & 0xffffff800000ULL); |
482 | printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); | 482 | printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); |
483 | update_range(range, 1ULL<<32, end - 1); | 483 | update_range(range, 1ULL<<32, end - 1); |
484 | } | 484 | } |
diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c index d9afbae5092b..99f1ecd485b5 100644 --- a/arch/x86/pci/numa.c +++ b/arch/x86/pci/numa.c | |||
@@ -6,45 +6,21 @@ | |||
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/nodemask.h> | 7 | #include <linux/nodemask.h> |
8 | #include <mach_apic.h> | 8 | #include <mach_apic.h> |
9 | #include <asm/mpspec.h> | ||
9 | #include "pci.h" | 10 | #include "pci.h" |
10 | 11 | ||
11 | #define XQUAD_PORTIO_BASE 0xfe400000 | 12 | #define XQUAD_PORTIO_BASE 0xfe400000 |
12 | #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ | 13 | #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ |
13 | 14 | ||
14 | int mp_bus_id_to_node[MAX_MP_BUSSES]; | ||
15 | #define BUS2QUAD(global) (mp_bus_id_to_node[global]) | 15 | #define BUS2QUAD(global) (mp_bus_id_to_node[global]) |
16 | 16 | ||
17 | int mp_bus_id_to_local[MAX_MP_BUSSES]; | ||
18 | #define BUS2LOCAL(global) (mp_bus_id_to_local[global]) | 17 | #define BUS2LOCAL(global) (mp_bus_id_to_local[global]) |
19 | 18 | ||
20 | void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, | ||
21 | struct mpc_config_translation *translation) | ||
22 | { | ||
23 | int quad = translation->trans_quad; | ||
24 | int local = translation->trans_local; | ||
25 | |||
26 | mp_bus_id_to_node[m->mpc_busid] = quad; | ||
27 | mp_bus_id_to_local[m->mpc_busid] = local; | ||
28 | printk(KERN_INFO "Bus #%d is %s (node %d)\n", | ||
29 | m->mpc_busid, name, quad); | ||
30 | } | ||
31 | |||
32 | int quad_local_to_mp_bus_id [NR_CPUS/4][4]; | ||
33 | #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) | 19 | #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) |
34 | void mpc_oem_pci_bus(struct mpc_config_bus *m, | ||
35 | struct mpc_config_translation *translation) | ||
36 | { | ||
37 | int quad = translation->trans_quad; | ||
38 | int local = translation->trans_local; | ||
39 | |||
40 | quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; | ||
41 | } | ||
42 | 20 | ||
43 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ | 21 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ |
44 | void *xquad_portio; | 22 | void *xquad_portio; |
45 | #ifdef CONFIG_X86_NUMAQ | ||
46 | EXPORT_SYMBOL(xquad_portio); | 23 | EXPORT_SYMBOL(xquad_portio); |
47 | #endif | ||
48 | 24 | ||
49 | #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) | 25 | #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) |
50 | 26 | ||
@@ -179,6 +155,9 @@ static int __init pci_numa_init(void) | |||
179 | { | 155 | { |
180 | int quad; | 156 | int quad; |
181 | 157 | ||
158 | if (!found_numaq) | ||
159 | return 0; | ||
160 | |||
182 | raw_pci_ops = &pci_direct_conf1_mq; | 161 | raw_pci_ops = &pci_direct_conf1_mq; |
183 | 162 | ||
184 | if (pcibios_scanned++) | 163 | if (pcibios_scanned++) |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f09c1c69c37a..275163f81464 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -1196,6 +1196,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
1196 | 1196 | ||
1197 | pgd = (pgd_t *)xen_start_info->pt_base; | 1197 | pgd = (pgd_t *)xen_start_info->pt_base; |
1198 | 1198 | ||
1199 | init_pg_tables_start = __pa(pgd); | ||
1199 | init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; | 1200 | init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; |
1200 | 1201 | ||
1201 | init_mm.pgd = pgd; /* use the Xen pagetables to start */ | 1202 | init_mm.pgd = pgd; /* use the Xen pagetables to start */ |
@@ -1236,5 +1237,5 @@ asmlinkage void __init xen_start_kernel(void) | |||
1236 | add_preferred_console("hvc", 0, NULL); | 1237 | add_preferred_console("hvc", 0, NULL); |
1237 | 1238 | ||
1238 | /* Start the world */ | 1239 | /* Start the world */ |
1239 | start_kernel(); | 1240 | i386_start_kernel(); |
1240 | } | 1241 | } |