diff options
36 files changed, 1568 insertions, 727 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0896008f7509..57ccdcec1469 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -184,6 +184,9 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING | |||
| 184 | config ARCH_SUPPORTS_DEBUG_PAGEALLOC | 184 | config ARCH_SUPPORTS_DEBUG_PAGEALLOC |
| 185 | def_bool y | 185 | def_bool y |
| 186 | 186 | ||
| 187 | config HAVE_EARLY_RES | ||
| 188 | def_bool y | ||
| 189 | |||
| 187 | config HAVE_INTEL_TXT | 190 | config HAVE_INTEL_TXT |
| 188 | def_bool y | 191 | def_bool y |
| 189 | depends on EXPERIMENTAL && DMAR && ACPI | 192 | depends on EXPERIMENTAL && DMAR && ACPI |
| @@ -569,6 +572,18 @@ config PARAVIRT_DEBUG | |||
| 569 | Enable to debug paravirt_ops internals. Specifically, BUG if | 572 | Enable to debug paravirt_ops internals. Specifically, BUG if |
| 570 | a paravirt_op is missing when it is called. | 573 | a paravirt_op is missing when it is called. |
| 571 | 574 | ||
| 575 | config NO_BOOTMEM | ||
| 576 | default y | ||
| 577 | bool "Disable Bootmem code" | ||
| 578 | ---help--- | ||
| 579 | Use early_res directly instead of bootmem before slab is ready. | ||
| 580 | - allocator (buddy) [generic] | ||
| 581 | - early allocator (bootmem) [generic] | ||
| 582 | - very early allocator (reserve_early*()) [x86] | ||
| 583 | - very very early allocator (early brk model) [x86] | ||
| 584 | So reduce one layer between early allocator to final allocator | ||
| 585 | |||
| 586 | |||
| 572 | config MEMTEST | 587 | config MEMTEST |
| 573 | bool "Memtest" | 588 | bool "Memtest" |
| 574 | ---help--- | 589 | ---help--- |
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 761249e396fe..0e22296790d3 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
| @@ -111,11 +111,8 @@ extern unsigned long end_user_pfn; | |||
| 111 | 111 | ||
| 112 | extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); | 112 | extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); |
| 113 | extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); | 113 | extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); |
| 114 | extern void reserve_early(u64 start, u64 end, char *name); | ||
| 115 | extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); | ||
| 116 | extern void free_early(u64 start, u64 end); | ||
| 117 | extern void early_res_to_bootmem(u64 start, u64 end); | ||
| 118 | extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); | 114 | extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); |
| 115 | #include <linux/early_res.h> | ||
| 119 | 116 | ||
| 120 | extern unsigned long e820_end_of_ram_pfn(void); | 117 | extern unsigned long e820_end_of_ram_pfn(void); |
| 121 | extern unsigned long e820_end_of_low_ram_pfn(void); | 118 | extern unsigned long e820_end_of_low_ram_pfn(void); |
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index ada8c201d513..b4a00dd4eed5 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
| @@ -124,6 +124,8 @@ extern void pci_iommu_alloc(void); | |||
| 124 | #include "pci_64.h" | 124 | #include "pci_64.h" |
| 125 | #endif | 125 | #endif |
| 126 | 126 | ||
| 127 | void dma32_reserve_bootmem(void); | ||
| 128 | |||
| 127 | /* implement the pci_ DMA API in terms of the generic device dma_ one */ | 129 | /* implement the pci_ DMA API in terms of the generic device dma_ one */ |
| 128 | #include <asm-generic/pci-dma-compat.h> | 130 | #include <asm-generic/pci-dma-compat.h> |
| 129 | 131 | ||
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h index ae5e40f67daf..fe15cfb21b9b 100644 --- a/arch/x86/include/asm/pci_64.h +++ b/arch/x86/include/asm/pci_64.h | |||
| @@ -22,8 +22,6 @@ extern int (*pci_config_read)(int seg, int bus, int dev, int fn, | |||
| 22 | extern int (*pci_config_write)(int seg, int bus, int dev, int fn, | 22 | extern int (*pci_config_write)(int seg, int bus, int dev, int fn, |
| 23 | int reg, int len, u32 value); | 23 | int reg, int len, u32 value); |
| 24 | 24 | ||
| 25 | extern void dma32_reserve_bootmem(void); | ||
| 26 | |||
| 27 | #endif /* __KERNEL__ */ | 25 | #endif /* __KERNEL__ */ |
| 28 | 26 | ||
| 29 | #endif /* _ASM_X86_PCI_64_H */ | 27 | #endif /* _ASM_X86_PCI_64_H */ |
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 4009f6534f52..6f414ed88620 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h | |||
| @@ -23,14 +23,4 @@ extern int reboot_force; | |||
| 23 | 23 | ||
| 24 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); | 24 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); |
| 25 | 25 | ||
| 26 | /* | ||
| 27 | * This looks more complex than it should be. But we need to | ||
| 28 | * get the type for the ~ right in round_down (it needs to be | ||
| 29 | * as wide as the result!), and we want to evaluate the macro | ||
| 30 | * arguments just once each. | ||
| 31 | */ | ||
| 32 | #define __round_mask(x,y) ((__typeof__(x))((y)-1)) | ||
| 33 | #define round_up(x,y) ((((x)-1) | __round_mask(x,y))+1) | ||
| 34 | #define round_down(x,y) ((x) & ~__round_mask(x,y)) | ||
| 35 | |||
| 36 | #endif /* _ASM_X86_PROTO_H */ | 26 | #endif /* _ASM_X86_PROTO_H */ |
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 09b1698e0466..06130b52f012 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
| @@ -22,10 +22,10 @@ | |||
| 22 | #include <linux/pci.h> | 22 | #include <linux/pci.h> |
| 23 | #include <linux/smp.h> | 23 | #include <linux/smp.h> |
| 24 | #include <linux/cpu.h> | 24 | #include <linux/cpu.h> |
| 25 | #include <linux/sort.h> | ||
| 26 | #include <linux/mutex.h> | 25 | #include <linux/mutex.h> |
| 27 | #include <linux/uaccess.h> | 26 | #include <linux/uaccess.h> |
| 28 | #include <linux/kvm_para.h> | 27 | #include <linux/kvm_para.h> |
| 28 | #include <linux/range.h> | ||
| 29 | 29 | ||
| 30 | #include <asm/processor.h> | 30 | #include <asm/processor.h> |
| 31 | #include <asm/e820.h> | 31 | #include <asm/e820.h> |
| @@ -34,11 +34,6 @@ | |||
| 34 | 34 | ||
| 35 | #include "mtrr.h" | 35 | #include "mtrr.h" |
| 36 | 36 | ||
| 37 | struct res_range { | ||
| 38 | unsigned long start; | ||
| 39 | unsigned long end; | ||
| 40 | }; | ||
| 41 | |||
| 42 | struct var_mtrr_range_state { | 37 | struct var_mtrr_range_state { |
| 43 | unsigned long base_pfn; | 38 | unsigned long base_pfn; |
| 44 | unsigned long size_pfn; | 39 | unsigned long size_pfn; |
| @@ -56,7 +51,7 @@ struct var_mtrr_state { | |||
| 56 | /* Should be related to MTRR_VAR_RANGES nums */ | 51 | /* Should be related to MTRR_VAR_RANGES nums */ |
| 57 | #define RANGE_NUM 256 | 52 | #define RANGE_NUM 256 |
| 58 | 53 | ||
| 59 | static struct res_range __initdata range[RANGE_NUM]; | 54 | static struct range __initdata range[RANGE_NUM]; |
| 60 | static int __initdata nr_range; | 55 | static int __initdata nr_range; |
| 61 | 56 | ||
| 62 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | 57 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; |
| @@ -64,152 +59,11 @@ static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | |||
| 64 | static int __initdata debug_print; | 59 | static int __initdata debug_print; |
| 65 | #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) | 60 | #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) |
| 66 | 61 | ||
| 67 | |||
| 68 | static int __init | ||
| 69 | add_range(struct res_range *range, int nr_range, | ||
| 70 | unsigned long start, unsigned long end) | ||
| 71 | { | ||
| 72 | /* Out of slots: */ | ||
| 73 | if (nr_range >= RANGE_NUM) | ||
| 74 | return nr_range; | ||
| 75 | |||
| 76 | range[nr_range].start = start; | ||
| 77 | range[nr_range].end = end; | ||
| 78 | |||
| 79 | nr_range++; | ||
| 80 | |||
| 81 | return nr_range; | ||
| 82 | } | ||
| 83 | |||
| 84 | static int __init | ||
| 85 | add_range_with_merge(struct res_range *range, int nr_range, | ||
| 86 | unsigned long start, unsigned long end) | ||
| 87 | { | ||
| 88 | int i; | ||
| 89 | |||
| 90 | /* Try to merge it with old one: */ | ||
| 91 | for (i = 0; i < nr_range; i++) { | ||
| 92 | unsigned long final_start, final_end; | ||
| 93 | unsigned long common_start, common_end; | ||
| 94 | |||
| 95 | if (!range[i].end) | ||
| 96 | continue; | ||
| 97 | |||
| 98 | common_start = max(range[i].start, start); | ||
| 99 | common_end = min(range[i].end, end); | ||
| 100 | if (common_start > common_end + 1) | ||
| 101 | continue; | ||
| 102 | |||
| 103 | final_start = min(range[i].start, start); | ||
| 104 | final_end = max(range[i].end, end); | ||
| 105 | |||
| 106 | range[i].start = final_start; | ||
| 107 | range[i].end = final_end; | ||
| 108 | return nr_range; | ||
| 109 | } | ||
| 110 | |||
| 111 | /* Need to add it: */ | ||
| 112 | return add_range(range, nr_range, start, end); | ||
| 113 | } | ||
| 114 | |||
| 115 | static void __init | ||
| 116 | subtract_range(struct res_range *range, unsigned long start, unsigned long end) | ||
| 117 | { | ||
| 118 | int i, j; | ||
| 119 | |||
| 120 | for (j = 0; j < RANGE_NUM; j++) { | ||
| 121 | if (!range[j].end) | ||
| 122 | continue; | ||
| 123 | |||
| 124 | if (start <= range[j].start && end >= range[j].end) { | ||
| 125 | range[j].start = 0; | ||
| 126 | range[j].end = 0; | ||
| 127 | continue; | ||
| 128 | } | ||
| 129 | |||
| 130 | if (start <= range[j].start && end < range[j].end && | ||
| 131 | range[j].start < end + 1) { | ||
| 132 | range[j].start = end + 1; | ||
| 133 | continue; | ||
| 134 | } | ||
| 135 | |||
| 136 | |||
| 137 | if (start > range[j].start && end >= range[j].end && | ||
| 138 | range[j].end > start - 1) { | ||
| 139 | range[j].end = start - 1; | ||
| 140 | continue; | ||
| 141 | } | ||
| 142 | |||
| 143 | if (start > range[j].start && end < range[j].end) { | ||
| 144 | /* Find the new spare: */ | ||
| 145 | for (i = 0; i < RANGE_NUM; i++) { | ||
| 146 | if (range[i].end == 0) | ||
| 147 | break; | ||
| 148 | } | ||
| 149 | if (i < RANGE_NUM) { | ||
| 150 | range[i].end = range[j].end; | ||
| 151 | range[i].start = end + 1; | ||
| 152 | } else { | ||
| 153 | printk(KERN_ERR "run of slot in ranges\n"); | ||
| 154 | } | ||
| 155 | range[j].end = start - 1; | ||
| 156 | continue; | ||
| 157 | } | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | static int __init cmp_range(const void *x1, const void *x2) | ||
| 162 | { | ||
| 163 | const struct res_range *r1 = x1; | ||
| 164 | const struct res_range *r2 = x2; | ||
| 165 | long start1, start2; | ||
| 166 | |||
| 167 | start1 = r1->start; | ||
| 168 | start2 = r2->start; | ||
| 169 | |||
| 170 | return start1 - start2; | ||
| 171 | } | ||
| 172 | |||
| 173 | static int __init clean_sort_range(struct res_range *range, int az) | ||
| 174 | { | ||
| 175 | int i, j, k = az - 1, nr_range = 0; | ||
| 176 | |||
| 177 | for (i = 0; i < k; i++) { | ||
| 178 | if (range[i].end) | ||
| 179 | continue; | ||
| 180 | for (j = k; j > i; j--) { | ||
| 181 | if (range[j].end) { | ||
| 182 | k = j; | ||
| 183 | break; | ||
| 184 | } | ||
| 185 | } | ||
| 186 | if (j == i) | ||
| 187 | break; | ||
| 188 | range[i].start = range[k].start; | ||
| 189 | range[i].end = range[k].end; | ||
| 190 | range[k].start = 0; | ||
| 191 | range[k].end = 0; | ||
| 192 | k--; | ||
| 193 | } | ||
| 194 | /* count it */ | ||
| 195 | for (i = 0; i < az; i++) { | ||
| 196 | if (!range[i].end) { | ||
| 197 | nr_range = i; | ||
| 198 | break; | ||
| 199 | } | ||
| 200 | } | ||
| 201 | |||
| 202 | /* sort them */ | ||
| 203 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | ||
| 204 | |||
| 205 | return nr_range; | ||
| 206 | } | ||
| 207 | |||
| 208 | #define BIOS_BUG_MSG KERN_WARNING \ | 62 | #define BIOS_BUG_MSG KERN_WARNING \ |
| 209 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" | 63 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" |
| 210 | 64 | ||
| 211 | static int __init | 65 | static int __init |
| 212 | x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | 66 | x86_get_mtrr_mem_range(struct range *range, int nr_range, |
| 213 | unsigned long extra_remove_base, | 67 | unsigned long extra_remove_base, |
| 214 | unsigned long extra_remove_size) | 68 | unsigned long extra_remove_size) |
| 215 | { | 69 | { |
| @@ -223,14 +77,14 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
| 223 | continue; | 77 | continue; |
| 224 | base = range_state[i].base_pfn; | 78 | base = range_state[i].base_pfn; |
| 225 | size = range_state[i].size_pfn; | 79 | size = range_state[i].size_pfn; |
| 226 | nr_range = add_range_with_merge(range, nr_range, base, | 80 | nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, |
| 227 | base + size - 1); | 81 | base, base + size); |
| 228 | } | 82 | } |
| 229 | if (debug_print) { | 83 | if (debug_print) { |
| 230 | printk(KERN_DEBUG "After WB checking\n"); | 84 | printk(KERN_DEBUG "After WB checking\n"); |
| 231 | for (i = 0; i < nr_range; i++) | 85 | for (i = 0; i < nr_range; i++) |
| 232 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 86 | printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", |
| 233 | range[i].start, range[i].end + 1); | 87 | range[i].start, range[i].end); |
| 234 | } | 88 | } |
| 235 | 89 | ||
| 236 | /* Take out UC ranges: */ | 90 | /* Take out UC ranges: */ |
| @@ -252,19 +106,19 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
| 252 | size -= (1<<(20-PAGE_SHIFT)) - base; | 106 | size -= (1<<(20-PAGE_SHIFT)) - base; |
| 253 | base = 1<<(20-PAGE_SHIFT); | 107 | base = 1<<(20-PAGE_SHIFT); |
| 254 | } | 108 | } |
| 255 | subtract_range(range, base, base + size - 1); | 109 | subtract_range(range, RANGE_NUM, base, base + size); |
| 256 | } | 110 | } |
| 257 | if (extra_remove_size) | 111 | if (extra_remove_size) |
| 258 | subtract_range(range, extra_remove_base, | 112 | subtract_range(range, RANGE_NUM, extra_remove_base, |
| 259 | extra_remove_base + extra_remove_size - 1); | 113 | extra_remove_base + extra_remove_size); |
| 260 | 114 | ||
| 261 | if (debug_print) { | 115 | if (debug_print) { |
| 262 | printk(KERN_DEBUG "After UC checking\n"); | 116 | printk(KERN_DEBUG "After UC checking\n"); |
| 263 | for (i = 0; i < RANGE_NUM; i++) { | 117 | for (i = 0; i < RANGE_NUM; i++) { |
| 264 | if (!range[i].end) | 118 | if (!range[i].end) |
| 265 | continue; | 119 | continue; |
| 266 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 120 | printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", |
| 267 | range[i].start, range[i].end + 1); | 121 | range[i].start, range[i].end); |
| 268 | } | 122 | } |
| 269 | } | 123 | } |
| 270 | 124 | ||
| @@ -273,26 +127,22 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
| 273 | if (debug_print) { | 127 | if (debug_print) { |
| 274 | printk(KERN_DEBUG "After sorting\n"); | 128 | printk(KERN_DEBUG "After sorting\n"); |
| 275 | for (i = 0; i < nr_range; i++) | 129 | for (i = 0; i < nr_range; i++) |
| 276 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 130 | printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", |
| 277 | range[i].start, range[i].end + 1); | 131 | range[i].start, range[i].end); |
| 278 | } | 132 | } |
| 279 | 133 | ||
| 280 | /* clear those is not used */ | ||
| 281 | for (i = nr_range; i < RANGE_NUM; i++) | ||
| 282 | memset(&range[i], 0, sizeof(range[i])); | ||
| 283 | |||
| 284 | return nr_range; | 134 | return nr_range; |
| 285 | } | 135 | } |
| 286 | 136 | ||
| 287 | #ifdef CONFIG_MTRR_SANITIZER | 137 | #ifdef CONFIG_MTRR_SANITIZER |
| 288 | 138 | ||
| 289 | static unsigned long __init sum_ranges(struct res_range *range, int nr_range) | 139 | static unsigned long __init sum_ranges(struct range *range, int nr_range) |
| 290 | { | 140 | { |
| 291 | unsigned long sum = 0; | 141 | unsigned long sum = 0; |
| 292 | int i; | 142 | int i; |
| 293 | 143 | ||
| 294 | for (i = 0; i < nr_range; i++) | 144 | for (i = 0; i < nr_range; i++) |
| 295 | sum += range[i].end + 1 - range[i].start; | 145 | sum += range[i].end - range[i].start; |
| 296 | 146 | ||
| 297 | return sum; | 147 | return sum; |
| 298 | } | 148 | } |
| @@ -621,7 +471,7 @@ static int __init parse_mtrr_spare_reg(char *arg) | |||
| 621 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); | 471 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); |
| 622 | 472 | ||
| 623 | static int __init | 473 | static int __init |
| 624 | x86_setup_var_mtrrs(struct res_range *range, int nr_range, | 474 | x86_setup_var_mtrrs(struct range *range, int nr_range, |
| 625 | u64 chunk_size, u64 gran_size) | 475 | u64 chunk_size, u64 gran_size) |
| 626 | { | 476 | { |
| 627 | struct var_mtrr_state var_state; | 477 | struct var_mtrr_state var_state; |
| @@ -639,7 +489,7 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range, | |||
| 639 | /* Write the range: */ | 489 | /* Write the range: */ |
| 640 | for (i = 0; i < nr_range; i++) { | 490 | for (i = 0; i < nr_range; i++) { |
| 641 | set_var_mtrr_range(&var_state, range[i].start, | 491 | set_var_mtrr_range(&var_state, range[i].start, |
| 642 | range[i].end - range[i].start + 1); | 492 | range[i].end - range[i].start); |
| 643 | } | 493 | } |
| 644 | 494 | ||
| 645 | /* Write the last range: */ | 495 | /* Write the last range: */ |
| @@ -742,7 +592,7 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size, | |||
| 742 | unsigned long x_remove_base, | 592 | unsigned long x_remove_base, |
| 743 | unsigned long x_remove_size, int i) | 593 | unsigned long x_remove_size, int i) |
| 744 | { | 594 | { |
| 745 | static struct res_range range_new[RANGE_NUM]; | 595 | static struct range range_new[RANGE_NUM]; |
| 746 | unsigned long range_sums_new; | 596 | unsigned long range_sums_new; |
| 747 | static int nr_range_new; | 597 | static int nr_range_new; |
| 748 | int num_reg; | 598 | int num_reg; |
| @@ -869,10 +719,10 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
| 869 | * [0, 1M) should always be covered by var mtrr with WB | 719 | * [0, 1M) should always be covered by var mtrr with WB |
| 870 | * and fixed mtrrs should take effect before var mtrr for it: | 720 | * and fixed mtrrs should take effect before var mtrr for it: |
| 871 | */ | 721 | */ |
| 872 | nr_range = add_range_with_merge(range, nr_range, 0, | 722 | nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0, |
| 873 | (1ULL<<(20 - PAGE_SHIFT)) - 1); | 723 | 1ULL<<(20 - PAGE_SHIFT)); |
| 874 | /* Sort the ranges: */ | 724 | /* Sort the ranges: */ |
| 875 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | 725 | sort_range(range, nr_range); |
| 876 | 726 | ||
| 877 | range_sums = sum_ranges(range, nr_range); | 727 | range_sums = sum_ranges(range, nr_range); |
| 878 | printk(KERN_INFO "total RAM covered: %ldM\n", | 728 | printk(KERN_INFO "total RAM covered: %ldM\n", |
| @@ -1089,9 +939,9 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
| 1089 | nr_range = 0; | 939 | nr_range = 0; |
| 1090 | if (mtrr_tom2) { | 940 | if (mtrr_tom2) { |
| 1091 | range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); | 941 | range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); |
| 1092 | range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; | 942 | range[nr_range].end = mtrr_tom2 >> PAGE_SHIFT; |
| 1093 | if (highest_pfn < range[nr_range].end + 1) | 943 | if (highest_pfn < range[nr_range].end) |
| 1094 | highest_pfn = range[nr_range].end + 1; | 944 | highest_pfn = range[nr_range].end; |
| 1095 | nr_range++; | 945 | nr_range++; |
| 1096 | } | 946 | } |
| 1097 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); | 947 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); |
| @@ -1103,15 +953,15 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
| 1103 | 953 | ||
| 1104 | /* Check the holes: */ | 954 | /* Check the holes: */ |
| 1105 | for (i = 0; i < nr_range - 1; i++) { | 955 | for (i = 0; i < nr_range - 1; i++) { |
| 1106 | if (range[i].end + 1 < range[i+1].start) | 956 | if (range[i].end < range[i+1].start) |
| 1107 | total_trim_size += real_trim_memory(range[i].end + 1, | 957 | total_trim_size += real_trim_memory(range[i].end, |
| 1108 | range[i+1].start); | 958 | range[i+1].start); |
| 1109 | } | 959 | } |
| 1110 | 960 | ||
| 1111 | /* Check the top: */ | 961 | /* Check the top: */ |
| 1112 | i = nr_range - 1; | 962 | i = nr_range - 1; |
| 1113 | if (range[i].end + 1 < end_pfn) | 963 | if (range[i].end < end_pfn) |
| 1114 | total_trim_size += real_trim_memory(range[i].end + 1, | 964 | total_trim_size += real_trim_memory(range[i].end, |
| 1115 | end_pfn); | 965 | end_pfn); |
| 1116 | 966 | ||
| 1117 | if (total_trim_size) { | 967 | if (total_trim_size) { |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index a966b753e496..740b440fbd73 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
| @@ -12,21 +12,13 @@ | |||
| 12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
| 13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
| 14 | #include <linux/bootmem.h> | 14 | #include <linux/bootmem.h> |
| 15 | #include <linux/ioport.h> | ||
| 16 | #include <linux/string.h> | ||
| 17 | #include <linux/kexec.h> | ||
| 18 | #include <linux/module.h> | ||
| 19 | #include <linux/mm.h> | ||
| 20 | #include <linux/pfn.h> | 15 | #include <linux/pfn.h> |
| 21 | #include <linux/suspend.h> | 16 | #include <linux/suspend.h> |
| 22 | #include <linux/firmware-map.h> | 17 | #include <linux/firmware-map.h> |
| 23 | 18 | ||
| 24 | #include <asm/pgtable.h> | ||
| 25 | #include <asm/page.h> | ||
| 26 | #include <asm/e820.h> | 19 | #include <asm/e820.h> |
| 27 | #include <asm/proto.h> | 20 | #include <asm/proto.h> |
| 28 | #include <asm/setup.h> | 21 | #include <asm/setup.h> |
| 29 | #include <asm/trampoline.h> | ||
| 30 | 22 | ||
| 31 | /* | 23 | /* |
| 32 | * The e820 map is the map that gets modified e.g. with command line parameters | 24 | * The e820 map is the map that gets modified e.g. with command line parameters |
| @@ -730,319 +722,44 @@ core_initcall(e820_mark_nvs_memory); | |||
| 730 | #endif | 722 | #endif |
| 731 | 723 | ||
| 732 | /* | 724 | /* |
| 733 | * Early reserved memory areas. | 725 | * Find a free area with specified alignment in a specific range. |
| 734 | */ | ||
| 735 | #define MAX_EARLY_RES 32 | ||
| 736 | |||
| 737 | struct early_res { | ||
| 738 | u64 start, end; | ||
| 739 | char name[16]; | ||
| 740 | char overlap_ok; | ||
| 741 | }; | ||
| 742 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { | ||
| 743 | { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */ | ||
| 744 | #if defined(CONFIG_X86_32) && defined(CONFIG_X86_TRAMPOLINE) | ||
| 745 | /* | ||
| 746 | * But first pinch a few for the stack/trampoline stuff | ||
| 747 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
| 748 | * trampoline before removing it. (see the GDT stuff) | ||
| 749 | */ | ||
| 750 | { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE", 1 }, | ||
| 751 | #endif | ||
| 752 | |||
| 753 | {} | ||
| 754 | }; | ||
| 755 | |||
| 756 | static int __init find_overlapped_early(u64 start, u64 end) | ||
| 757 | { | ||
| 758 | int i; | ||
| 759 | struct early_res *r; | ||
| 760 | |||
| 761 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
| 762 | r = &early_res[i]; | ||
| 763 | if (end > r->start && start < r->end) | ||
| 764 | break; | ||
| 765 | } | ||
| 766 | |||
| 767 | return i; | ||
| 768 | } | ||
| 769 | |||
| 770 | /* | ||
| 771 | * Drop the i-th range from the early reservation map, | ||
| 772 | * by copying any higher ranges down one over it, and | ||
| 773 | * clearing what had been the last slot. | ||
| 774 | */ | ||
| 775 | static void __init drop_range(int i) | ||
| 776 | { | ||
| 777 | int j; | ||
| 778 | |||
| 779 | for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) | ||
| 780 | ; | ||
| 781 | |||
| 782 | memmove(&early_res[i], &early_res[i + 1], | ||
| 783 | (j - 1 - i) * sizeof(struct early_res)); | ||
| 784 | |||
| 785 | early_res[j - 1].end = 0; | ||
| 786 | } | ||
| 787 | |||
| 788 | /* | ||
| 789 | * Split any existing ranges that: | ||
| 790 | * 1) are marked 'overlap_ok', and | ||
| 791 | * 2) overlap with the stated range [start, end) | ||
| 792 | * into whatever portion (if any) of the existing range is entirely | ||
| 793 | * below or entirely above the stated range. Drop the portion | ||
| 794 | * of the existing range that overlaps with the stated range, | ||
| 795 | * which will allow the caller of this routine to then add that | ||
| 796 | * stated range without conflicting with any existing range. | ||
| 797 | */ | 726 | */ |
| 798 | static void __init drop_overlaps_that_are_ok(u64 start, u64 end) | 727 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) |
| 799 | { | 728 | { |
| 800 | int i; | 729 | int i; |
| 801 | struct early_res *r; | ||
| 802 | u64 lower_start, lower_end; | ||
| 803 | u64 upper_start, upper_end; | ||
| 804 | char name[16]; | ||
| 805 | 730 | ||
| 806 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | 731 | for (i = 0; i < e820.nr_map; i++) { |
| 807 | r = &early_res[i]; | 732 | struct e820entry *ei = &e820.map[i]; |
| 733 | u64 addr; | ||
| 734 | u64 ei_start, ei_last; | ||
| 808 | 735 | ||
| 809 | /* Continue past non-overlapping ranges */ | 736 | if (ei->type != E820_RAM) |
| 810 | if (end <= r->start || start >= r->end) | ||
| 811 | continue; | 737 | continue; |
| 812 | 738 | ||
| 813 | /* | 739 | ei_last = ei->addr + ei->size; |
| 814 | * Leave non-ok overlaps as is; let caller | 740 | ei_start = ei->addr; |
| 815 | * panic "Overlapping early reservations" | 741 | addr = find_early_area(ei_start, ei_last, start, end, |
| 816 | * when it hits this overlap. | 742 | size, align); |
| 817 | */ | ||
| 818 | if (!r->overlap_ok) | ||
| 819 | return; | ||
| 820 | |||
| 821 | /* | ||
| 822 | * We have an ok overlap. We will drop it from the early | ||
| 823 | * reservation map, and add back in any non-overlapping | ||
| 824 | * portions (lower or upper) as separate, overlap_ok, | ||
| 825 | * non-overlapping ranges. | ||
| 826 | */ | ||
| 827 | |||
| 828 | /* 1. Note any non-overlapping (lower or upper) ranges. */ | ||
| 829 | strncpy(name, r->name, sizeof(name) - 1); | ||
| 830 | |||
| 831 | lower_start = lower_end = 0; | ||
| 832 | upper_start = upper_end = 0; | ||
| 833 | if (r->start < start) { | ||
| 834 | lower_start = r->start; | ||
| 835 | lower_end = start; | ||
| 836 | } | ||
| 837 | if (r->end > end) { | ||
| 838 | upper_start = end; | ||
| 839 | upper_end = r->end; | ||
| 840 | } | ||
| 841 | |||
| 842 | /* 2. Drop the original ok overlapping range */ | ||
| 843 | drop_range(i); | ||
| 844 | |||
| 845 | i--; /* resume for-loop on copied down entry */ | ||
| 846 | |||
| 847 | /* 3. Add back in any non-overlapping ranges. */ | ||
| 848 | if (lower_end) | ||
| 849 | reserve_early_overlap_ok(lower_start, lower_end, name); | ||
| 850 | if (upper_end) | ||
| 851 | reserve_early_overlap_ok(upper_start, upper_end, name); | ||
| 852 | } | ||
| 853 | } | ||
| 854 | |||
| 855 | static void __init __reserve_early(u64 start, u64 end, char *name, | ||
| 856 | int overlap_ok) | ||
| 857 | { | ||
| 858 | int i; | ||
| 859 | struct early_res *r; | ||
| 860 | |||
| 861 | i = find_overlapped_early(start, end); | ||
| 862 | if (i >= MAX_EARLY_RES) | ||
| 863 | panic("Too many early reservations"); | ||
| 864 | r = &early_res[i]; | ||
| 865 | if (r->end) | ||
| 866 | panic("Overlapping early reservations " | ||
| 867 | "%llx-%llx %s to %llx-%llx %s\n", | ||
| 868 | start, end - 1, name?name:"", r->start, | ||
| 869 | r->end - 1, r->name); | ||
| 870 | r->start = start; | ||
| 871 | r->end = end; | ||
| 872 | r->overlap_ok = overlap_ok; | ||
| 873 | if (name) | ||
| 874 | strncpy(r->name, name, sizeof(r->name) - 1); | ||
| 875 | } | ||
| 876 | |||
| 877 | /* | ||
| 878 | * A few early reservtations come here. | ||
| 879 | * | ||
| 880 | * The 'overlap_ok' in the name of this routine does -not- mean it | ||
| 881 | * is ok for these reservations to overlap an earlier reservation. | ||
| 882 | * Rather it means that it is ok for subsequent reservations to | ||
| 883 | * overlap this one. | ||
| 884 | * | ||
| 885 | * Use this entry point to reserve early ranges when you are doing | ||
| 886 | * so out of "Paranoia", reserving perhaps more memory than you need, | ||
| 887 | * just in case, and don't mind a subsequent overlapping reservation | ||
| 888 | * that is known to be needed. | ||
| 889 | * | ||
| 890 | * The drop_overlaps_that_are_ok() call here isn't really needed. | ||
| 891 | * It would be needed if we had two colliding 'overlap_ok' | ||
| 892 | * reservations, so that the second such would not panic on the | ||
| 893 | * overlap with the first. We don't have any such as of this | ||
| 894 | * writing, but might as well tolerate such if it happens in | ||
| 895 | * the future. | ||
| 896 | */ | ||
| 897 | void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) | ||
| 898 | { | ||
| 899 | drop_overlaps_that_are_ok(start, end); | ||
| 900 | __reserve_early(start, end, name, 1); | ||
| 901 | } | ||
| 902 | |||
| 903 | /* | ||
| 904 | * Most early reservations come here. | ||
| 905 | * | ||
| 906 | * We first have drop_overlaps_that_are_ok() drop any pre-existing | ||
| 907 | * 'overlap_ok' ranges, so that we can then reserve this memory | ||
| 908 | * range without risk of panic'ing on an overlapping overlap_ok | ||
| 909 | * early reservation. | ||
| 910 | */ | ||
| 911 | void __init reserve_early(u64 start, u64 end, char *name) | ||
| 912 | { | ||
| 913 | if (start >= end) | ||
| 914 | return; | ||
| 915 | |||
| 916 | drop_overlaps_that_are_ok(start, end); | ||
| 917 | __reserve_early(start, end, name, 0); | ||
| 918 | } | ||
| 919 | |||
| 920 | void __init free_early(u64 start, u64 end) | ||
| 921 | { | ||
| 922 | struct early_res *r; | ||
| 923 | int i; | ||
| 924 | |||
| 925 | i = find_overlapped_early(start, end); | ||
| 926 | r = &early_res[i]; | ||
| 927 | if (i >= MAX_EARLY_RES || r->end != end || r->start != start) | ||
| 928 | panic("free_early on not reserved area: %llx-%llx!", | ||
| 929 | start, end - 1); | ||
| 930 | |||
| 931 | drop_range(i); | ||
| 932 | } | ||
| 933 | |||
| 934 | void __init early_res_to_bootmem(u64 start, u64 end) | ||
| 935 | { | ||
| 936 | int i, count; | ||
| 937 | u64 final_start, final_end; | ||
| 938 | |||
| 939 | count = 0; | ||
| 940 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) | ||
| 941 | count++; | ||
| 942 | |||
| 943 | printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n", | ||
| 944 | count, start, end); | ||
| 945 | for (i = 0; i < count; i++) { | ||
| 946 | struct early_res *r = &early_res[i]; | ||
| 947 | printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, | ||
| 948 | r->start, r->end, r->name); | ||
| 949 | final_start = max(start, r->start); | ||
| 950 | final_end = min(end, r->end); | ||
| 951 | if (final_start >= final_end) { | ||
| 952 | printk(KERN_CONT "\n"); | ||
| 953 | continue; | ||
| 954 | } | ||
| 955 | printk(KERN_CONT " ==> [%010llx - %010llx]\n", | ||
| 956 | final_start, final_end); | ||
| 957 | reserve_bootmem_generic(final_start, final_end - final_start, | ||
| 958 | BOOTMEM_DEFAULT); | ||
| 959 | } | ||
| 960 | } | ||
| 961 | 743 | ||
| 962 | /* Check for already reserved areas */ | 744 | if (addr != -1ULL) |
| 963 | static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) | 745 | return addr; |
| 964 | { | ||
| 965 | int i; | ||
| 966 | u64 addr = *addrp; | ||
| 967 | int changed = 0; | ||
| 968 | struct early_res *r; | ||
| 969 | again: | ||
| 970 | i = find_overlapped_early(addr, addr + size); | ||
| 971 | r = &early_res[i]; | ||
| 972 | if (i < MAX_EARLY_RES && r->end) { | ||
| 973 | *addrp = addr = round_up(r->end, align); | ||
| 974 | changed = 1; | ||
| 975 | goto again; | ||
| 976 | } | 746 | } |
| 977 | return changed; | 747 | return -1ULL; |
| 978 | } | 748 | } |
| 979 | 749 | ||
| 980 | /* Check for already reserved areas */ | 750 | u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align) |
| 981 | static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) | ||
| 982 | { | 751 | { |
| 983 | int i; | 752 | return find_e820_area(start, end, size, align); |
| 984 | u64 addr = *addrp, last; | ||
| 985 | u64 size = *sizep; | ||
| 986 | int changed = 0; | ||
| 987 | again: | ||
| 988 | last = addr + size; | ||
| 989 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
| 990 | struct early_res *r = &early_res[i]; | ||
| 991 | if (last > r->start && addr < r->start) { | ||
| 992 | size = r->start - addr; | ||
| 993 | changed = 1; | ||
| 994 | goto again; | ||
| 995 | } | ||
| 996 | if (last > r->end && addr < r->end) { | ||
| 997 | addr = round_up(r->end, align); | ||
| 998 | size = last - addr; | ||
| 999 | changed = 1; | ||
| 1000 | goto again; | ||
| 1001 | } | ||
| 1002 | if (last <= r->end && addr >= r->start) { | ||
| 1003 | (*sizep)++; | ||
| 1004 | return 0; | ||
| 1005 | } | ||
| 1006 | } | ||
| 1007 | if (changed) { | ||
| 1008 | *addrp = addr; | ||
| 1009 | *sizep = size; | ||
| 1010 | } | ||
| 1011 | return changed; | ||
| 1012 | } | 753 | } |
| 1013 | 754 | ||
| 1014 | /* | 755 | u64 __init get_max_mapped(void) |
| 1015 | * Find a free area with specified alignment in a specific range. | ||
| 1016 | */ | ||
| 1017 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) | ||
| 1018 | { | 756 | { |
| 1019 | int i; | 757 | u64 end = max_pfn_mapped; |
| 1020 | 758 | ||
| 1021 | for (i = 0; i < e820.nr_map; i++) { | 759 | end <<= PAGE_SHIFT; |
| 1022 | struct e820entry *ei = &e820.map[i]; | ||
| 1023 | u64 addr, last; | ||
| 1024 | u64 ei_last; | ||
| 1025 | 760 | ||
| 1026 | if (ei->type != E820_RAM) | 761 | return end; |
| 1027 | continue; | ||
| 1028 | addr = round_up(ei->addr, align); | ||
| 1029 | ei_last = ei->addr + ei->size; | ||
| 1030 | if (addr < start) | ||
| 1031 | addr = round_up(start, align); | ||
| 1032 | if (addr >= ei_last) | ||
| 1033 | continue; | ||
| 1034 | while (bad_addr(&addr, size, align) && addr+size <= ei_last) | ||
| 1035 | ; | ||
| 1036 | last = addr + size; | ||
| 1037 | if (last > ei_last) | ||
| 1038 | continue; | ||
| 1039 | if (last > end) | ||
| 1040 | continue; | ||
| 1041 | return addr; | ||
| 1042 | } | ||
| 1043 | return -1ULL; | ||
| 1044 | } | 762 | } |
| 1045 | |||
| 1046 | /* | 763 | /* |
| 1047 | * Find next free range after *start | 764 | * Find next free range after *start |
| 1048 | */ | 765 | */ |
| @@ -1052,25 +769,19 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) | |||
| 1052 | 769 | ||
| 1053 | for (i = 0; i < e820.nr_map; i++) { | 770 | for (i = 0; i < e820.nr_map; i++) { |
| 1054 | struct e820entry *ei = &e820.map[i]; | 771 | struct e820entry *ei = &e820.map[i]; |
| 1055 | u64 addr, last; | 772 | u64 addr; |
| 1056 | u64 ei_last; | 773 | u64 ei_start, ei_last; |
| 1057 | 774 | ||
| 1058 | if (ei->type != E820_RAM) | 775 | if (ei->type != E820_RAM) |
| 1059 | continue; | 776 | continue; |
| 1060 | addr = round_up(ei->addr, align); | 777 | |
| 1061 | ei_last = ei->addr + ei->size; | 778 | ei_last = ei->addr + ei->size; |
| 1062 | if (addr < start) | 779 | ei_start = ei->addr; |
| 1063 | addr = round_up(start, align); | 780 | addr = find_early_area_size(ei_start, ei_last, start, |
| 1064 | if (addr >= ei_last) | 781 | sizep, align); |
| 1065 | continue; | 782 | |
| 1066 | *sizep = ei_last - addr; | 783 | if (addr != -1ULL) |
| 1067 | while (bad_addr_size(&addr, sizep, align) && | 784 | return addr; |
| 1068 | addr + *sizep <= ei_last) | ||
| 1069 | ; | ||
| 1070 | last = addr + *sizep; | ||
| 1071 | if (last > ei_last) | ||
| 1072 | continue; | ||
| 1073 | return addr; | ||
| 1074 | } | 785 | } |
| 1075 | 786 | ||
| 1076 | return -1ULL; | 787 | return -1ULL; |
| @@ -1429,6 +1140,8 @@ void __init e820_reserve_resources_late(void) | |||
| 1429 | end = MAX_RESOURCE_SIZE; | 1140 | end = MAX_RESOURCE_SIZE; |
| 1430 | if (start >= end) | 1141 | if (start >= end) |
| 1431 | continue; | 1142 | continue; |
| 1143 | printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ", | ||
| 1144 | start, end); | ||
| 1432 | reserve_region_with_split(&iomem_resource, start, end, | 1145 | reserve_region_with_split(&iomem_resource, start, end, |
| 1433 | "RAM buffer"); | 1146 | "RAM buffer"); |
| 1434 | } | 1147 | } |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 5051b94c9069..adedeef1dedc 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
| @@ -29,6 +29,16 @@ static void __init i386_default_early_setup(void) | |||
| 29 | 29 | ||
| 30 | void __init i386_start_kernel(void) | 30 | void __init i386_start_kernel(void) |
| 31 | { | 31 | { |
| 32 | #ifdef CONFIG_X86_TRAMPOLINE | ||
| 33 | /* | ||
| 34 | * But first pinch a few for the stack/trampoline stuff | ||
| 35 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
| 36 | * trampoline before removing it. (see the GDT stuff) | ||
| 37 | */ | ||
| 38 | reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, | ||
| 39 | "EX TRAMPOLINE"); | ||
| 40 | #endif | ||
| 41 | |||
| 32 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | 42 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); |
| 33 | 43 | ||
| 34 | #ifdef CONFIG_BLK_DEV_INITRD | 44 | #ifdef CONFIG_BLK_DEV_INITRD |
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index 712d15fdc416..71825806cd44 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | #include <linux/string.h> | 7 | #include <linux/string.h> |
| 8 | #include <linux/pci.h> | 8 | #include <linux/pci.h> |
| 9 | #include <linux/dmi.h> | 9 | #include <linux/dmi.h> |
| 10 | #include <linux/range.h> | ||
| 11 | |||
| 10 | #include <asm/pci-direct.h> | 12 | #include <asm/pci-direct.h> |
| 11 | #include <linux/sort.h> | 13 | #include <linux/sort.h> |
| 12 | #include <asm/io.h> | 14 | #include <asm/io.h> |
| @@ -30,11 +32,6 @@ static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = { | |||
| 30 | { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, | 32 | { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, |
| 31 | }; | 33 | }; |
| 32 | 34 | ||
| 33 | struct range { | ||
| 34 | u64 start; | ||
| 35 | u64 end; | ||
| 36 | }; | ||
| 37 | |||
| 38 | static int __cpuinit cmp_range(const void *x1, const void *x2) | 35 | static int __cpuinit cmp_range(const void *x1, const void *x2) |
| 39 | { | 36 | { |
| 40 | const struct range *r1 = x1; | 37 | const struct range *r1 = x1; |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 75e14e21f61a..1aa966c565f9 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
| @@ -65,7 +65,7 @@ int dma_set_mask(struct device *dev, u64 mask) | |||
| 65 | } | 65 | } |
| 66 | EXPORT_SYMBOL(dma_set_mask); | 66 | EXPORT_SYMBOL(dma_set_mask); |
| 67 | 67 | ||
| 68 | #ifdef CONFIG_X86_64 | 68 | #if defined(CONFIG_X86_64) && !defined(CONFIG_NUMA) |
| 69 | static __initdata void *dma32_bootmem_ptr; | 69 | static __initdata void *dma32_bootmem_ptr; |
| 70 | static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); | 70 | static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); |
| 71 | 71 | ||
| @@ -116,14 +116,21 @@ static void __init dma32_free_bootmem(void) | |||
| 116 | dma32_bootmem_ptr = NULL; | 116 | dma32_bootmem_ptr = NULL; |
| 117 | dma32_bootmem_size = 0; | 117 | dma32_bootmem_size = 0; |
| 118 | } | 118 | } |
| 119 | #else | ||
| 120 | void __init dma32_reserve_bootmem(void) | ||
| 121 | { | ||
| 122 | } | ||
| 123 | static void __init dma32_free_bootmem(void) | ||
| 124 | { | ||
| 125 | } | ||
| 126 | |||
| 119 | #endif | 127 | #endif |
| 120 | 128 | ||
| 121 | void __init pci_iommu_alloc(void) | 129 | void __init pci_iommu_alloc(void) |
| 122 | { | 130 | { |
| 123 | #ifdef CONFIG_X86_64 | ||
| 124 | /* free the range so iommu could get some range less than 4G */ | 131 | /* free the range so iommu could get some range less than 4G */ |
| 125 | dma32_free_bootmem(); | 132 | dma32_free_bootmem(); |
| 126 | #endif | 133 | |
| 127 | if (pci_swiotlb_detect()) | 134 | if (pci_swiotlb_detect()) |
| 128 | goto out; | 135 | goto out; |
| 129 | 136 | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index cb42109a55b4..5d7ba1a449bd 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
| @@ -969,15 +969,11 @@ void __init setup_arch(char **cmdline_p) | |||
| 969 | #endif | 969 | #endif |
| 970 | 970 | ||
| 971 | initmem_init(0, max_pfn, acpi, k8); | 971 | initmem_init(0, max_pfn, acpi, k8); |
| 972 | #ifndef CONFIG_NO_BOOTMEM | ||
| 973 | early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); | ||
| 974 | #endif | ||
| 972 | 975 | ||
| 973 | #ifdef CONFIG_X86_64 | ||
| 974 | /* | ||
| 975 | * dma32_reserve_bootmem() allocates bootmem which may conflict | ||
| 976 | * with the crashkernel command line, so do that after | ||
| 977 | * reserve_crashkernel() | ||
| 978 | */ | ||
| 979 | dma32_reserve_bootmem(); | 976 | dma32_reserve_bootmem(); |
| 980 | #endif | ||
| 981 | 977 | ||
| 982 | reserve_ibft_region(); | 978 | reserve_ibft_region(); |
| 983 | 979 | ||
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 35abcb8b00e9..ef6370b00e70 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
| @@ -137,7 +137,13 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) | |||
| 137 | 137 | ||
| 138 | static void __init pcpu_fc_free(void *ptr, size_t size) | 138 | static void __init pcpu_fc_free(void *ptr, size_t size) |
| 139 | { | 139 | { |
| 140 | #ifdef CONFIG_NO_BOOTMEM | ||
| 141 | u64 start = __pa(ptr); | ||
| 142 | u64 end = start + size; | ||
| 143 | free_early_partial(start, end); | ||
| 144 | #else | ||
| 140 | free_bootmem(__pa(ptr), size); | 145 | free_bootmem(__pa(ptr), size); |
| 146 | #endif | ||
| 141 | } | 147 | } |
| 142 | 148 | ||
| 143 | static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) | 149 | static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 2226f2c70ea3..5cb3f0f54f47 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
| @@ -750,6 +750,7 @@ static void __init zone_sizes_init(void) | |||
| 750 | free_area_init_nodes(max_zone_pfns); | 750 | free_area_init_nodes(max_zone_pfns); |
| 751 | } | 751 | } |
| 752 | 752 | ||
| 753 | #ifndef CONFIG_NO_BOOTMEM | ||
| 753 | static unsigned long __init setup_node_bootmem(int nodeid, | 754 | static unsigned long __init setup_node_bootmem(int nodeid, |
| 754 | unsigned long start_pfn, | 755 | unsigned long start_pfn, |
| 755 | unsigned long end_pfn, | 756 | unsigned long end_pfn, |
| @@ -766,13 +767,14 @@ static unsigned long __init setup_node_bootmem(int nodeid, | |||
| 766 | printk(KERN_INFO " node %d bootmap %08lx - %08lx\n", | 767 | printk(KERN_INFO " node %d bootmap %08lx - %08lx\n", |
| 767 | nodeid, bootmap, bootmap + bootmap_size); | 768 | nodeid, bootmap, bootmap + bootmap_size); |
| 768 | free_bootmem_with_active_regions(nodeid, end_pfn); | 769 | free_bootmem_with_active_regions(nodeid, end_pfn); |
| 769 | early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
| 770 | 770 | ||
| 771 | return bootmap + bootmap_size; | 771 | return bootmap + bootmap_size; |
| 772 | } | 772 | } |
| 773 | #endif | ||
| 773 | 774 | ||
| 774 | void __init setup_bootmem_allocator(void) | 775 | void __init setup_bootmem_allocator(void) |
| 775 | { | 776 | { |
| 777 | #ifndef CONFIG_NO_BOOTMEM | ||
| 776 | int nodeid; | 778 | int nodeid; |
| 777 | unsigned long bootmap_size, bootmap; | 779 | unsigned long bootmap_size, bootmap; |
| 778 | /* | 780 | /* |
| @@ -784,11 +786,13 @@ void __init setup_bootmem_allocator(void) | |||
| 784 | if (bootmap == -1L) | 786 | if (bootmap == -1L) |
| 785 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); | 787 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); |
| 786 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); | 788 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); |
| 789 | #endif | ||
| 787 | 790 | ||
| 788 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", | 791 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", |
| 789 | max_pfn_mapped<<PAGE_SHIFT); | 792 | max_pfn_mapped<<PAGE_SHIFT); |
| 790 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); | 793 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); |
| 791 | 794 | ||
| 795 | #ifndef CONFIG_NO_BOOTMEM | ||
| 792 | for_each_online_node(nodeid) { | 796 | for_each_online_node(nodeid) { |
| 793 | unsigned long start_pfn, end_pfn; | 797 | unsigned long start_pfn, end_pfn; |
| 794 | 798 | ||
| @@ -806,6 +810,7 @@ void __init setup_bootmem_allocator(void) | |||
| 806 | bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, | 810 | bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, |
| 807 | bootmap); | 811 | bootmap); |
| 808 | } | 812 | } |
| 813 | #endif | ||
| 809 | 814 | ||
| 810 | after_bootmem = 1; | 815 | after_bootmem = 1; |
| 811 | } | 816 | } |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 69ddfbd91135..e9b040e1cde5 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
| @@ -572,6 +572,7 @@ kernel_physical_mapping_init(unsigned long start, | |||
| 572 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | 572 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
| 573 | int acpi, int k8) | 573 | int acpi, int k8) |
| 574 | { | 574 | { |
| 575 | #ifndef CONFIG_NO_BOOTMEM | ||
| 575 | unsigned long bootmap_size, bootmap; | 576 | unsigned long bootmap_size, bootmap; |
| 576 | 577 | ||
| 577 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; | 578 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; |
| @@ -579,13 +580,15 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | |||
| 579 | PAGE_SIZE); | 580 | PAGE_SIZE); |
| 580 | if (bootmap == -1L) | 581 | if (bootmap == -1L) |
| 581 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); | 582 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); |
| 583 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); | ||
| 582 | /* don't touch min_low_pfn */ | 584 | /* don't touch min_low_pfn */ |
| 583 | bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, | 585 | bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, |
| 584 | 0, end_pfn); | 586 | 0, end_pfn); |
| 585 | e820_register_active_regions(0, start_pfn, end_pfn); | 587 | e820_register_active_regions(0, start_pfn, end_pfn); |
| 586 | free_bootmem_with_active_regions(0, end_pfn); | 588 | free_bootmem_with_active_regions(0, end_pfn); |
| 587 | early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); | 589 | #else |
| 588 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); | 590 | e820_register_active_regions(0, start_pfn, end_pfn); |
| 591 | #endif | ||
| 589 | } | 592 | } |
| 590 | #endif | 593 | #endif |
| 591 | 594 | ||
| @@ -974,7 +977,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) | |||
| 974 | if (pmd_none(*pmd)) { | 977 | if (pmd_none(*pmd)) { |
| 975 | pte_t entry; | 978 | pte_t entry; |
| 976 | 979 | ||
| 977 | p = vmemmap_alloc_block(PMD_SIZE, node); | 980 | p = vmemmap_alloc_block_buf(PMD_SIZE, node); |
| 978 | if (!p) | 981 | if (!p) |
| 979 | return -ENOMEM; | 982 | return -ENOMEM; |
| 980 | 983 | ||
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index b20760ca7244..809baaaf48b1 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
| @@ -418,7 +418,10 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | |||
| 418 | 418 | ||
| 419 | for_each_online_node(nid) { | 419 | for_each_online_node(nid) { |
| 420 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | 420 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); |
| 421 | NODE_DATA(nid)->node_id = nid; | ||
| 422 | #ifndef CONFIG_NO_BOOTMEM | ||
| 421 | NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; | 423 | NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; |
| 424 | #endif | ||
| 422 | } | 425 | } |
| 423 | 426 | ||
| 424 | setup_bootmem_allocator(); | 427 | setup_bootmem_allocator(); |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 3307ea8bd43a..8948f47fde05 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
| @@ -163,30 +163,48 @@ static void * __init early_node_mem(int nodeid, unsigned long start, | |||
| 163 | unsigned long end, unsigned long size, | 163 | unsigned long end, unsigned long size, |
| 164 | unsigned long align) | 164 | unsigned long align) |
| 165 | { | 165 | { |
| 166 | unsigned long mem = find_e820_area(start, end, size, align); | 166 | unsigned long mem; |
| 167 | void *ptr; | ||
| 168 | 167 | ||
| 168 | /* | ||
| 169 | * put it on high as possible | ||
| 170 | * something will go with NODE_DATA | ||
| 171 | */ | ||
| 172 | if (start < (MAX_DMA_PFN<<PAGE_SHIFT)) | ||
| 173 | start = MAX_DMA_PFN<<PAGE_SHIFT; | ||
| 174 | if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) && | ||
| 175 | end > (MAX_DMA32_PFN<<PAGE_SHIFT)) | ||
| 176 | start = MAX_DMA32_PFN<<PAGE_SHIFT; | ||
| 177 | mem = find_e820_area(start, end, size, align); | ||
| 178 | if (mem != -1L) | ||
| 179 | return __va(mem); | ||
| 180 | |||
| 181 | /* extend the search scope */ | ||
| 182 | end = max_pfn_mapped << PAGE_SHIFT; | ||
| 183 | if (end > (MAX_DMA32_PFN<<PAGE_SHIFT)) | ||
| 184 | start = MAX_DMA32_PFN<<PAGE_SHIFT; | ||
| 185 | else | ||
| 186 | start = MAX_DMA_PFN<<PAGE_SHIFT; | ||
| 187 | mem = find_e820_area(start, end, size, align); | ||
| 169 | if (mem != -1L) | 188 | if (mem != -1L) |
| 170 | return __va(mem); | 189 | return __va(mem); |
| 171 | 190 | ||
| 172 | ptr = __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS)); | 191 | printk(KERN_ERR "Cannot find %lu bytes in node %d\n", |
| 173 | if (ptr == NULL) { | ||
| 174 | printk(KERN_ERR "Cannot find %lu bytes in node %d\n", | ||
| 175 | size, nodeid); | 192 | size, nodeid); |
| 176 | return NULL; | 193 | |
| 177 | } | 194 | return NULL; |
| 178 | return ptr; | ||
| 179 | } | 195 | } |
| 180 | 196 | ||
| 181 | /* Initialize bootmem allocator for a node */ | 197 | /* Initialize bootmem allocator for a node */ |
| 182 | void __init | 198 | void __init |
| 183 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | 199 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) |
| 184 | { | 200 | { |
| 185 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; | 201 | unsigned long start_pfn, last_pfn, nodedata_phys; |
| 186 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | 202 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); |
| 187 | unsigned long bootmap_start, nodedata_phys; | ||
| 188 | void *bootmap; | ||
| 189 | int nid; | 203 | int nid; |
| 204 | #ifndef CONFIG_NO_BOOTMEM | ||
| 205 | unsigned long bootmap_start, bootmap_pages, bootmap_size; | ||
| 206 | void *bootmap; | ||
| 207 | #endif | ||
| 190 | 208 | ||
| 191 | if (!end) | 209 | if (!end) |
| 192 | return; | 210 | return; |
| @@ -200,7 +218,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
| 200 | 218 | ||
| 201 | start = roundup(start, ZONE_ALIGN); | 219 | start = roundup(start, ZONE_ALIGN); |
| 202 | 220 | ||
| 203 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, | 221 | printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid, |
| 204 | start, end); | 222 | start, end); |
| 205 | 223 | ||
| 206 | start_pfn = start >> PAGE_SHIFT; | 224 | start_pfn = start >> PAGE_SHIFT; |
| @@ -211,14 +229,21 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
| 211 | if (node_data[nodeid] == NULL) | 229 | if (node_data[nodeid] == NULL) |
| 212 | return; | 230 | return; |
| 213 | nodedata_phys = __pa(node_data[nodeid]); | 231 | nodedata_phys = __pa(node_data[nodeid]); |
| 232 | reserve_early(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA"); | ||
| 214 | printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, | 233 | printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, |
| 215 | nodedata_phys + pgdat_size - 1); | 234 | nodedata_phys + pgdat_size - 1); |
| 235 | nid = phys_to_nid(nodedata_phys); | ||
| 236 | if (nid != nodeid) | ||
| 237 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); | ||
| 216 | 238 | ||
| 217 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); | 239 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); |
| 218 | NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; | 240 | NODE_DATA(nodeid)->node_id = nodeid; |
| 219 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 241 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
| 220 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; | 242 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; |
| 221 | 243 | ||
| 244 | #ifndef CONFIG_NO_BOOTMEM | ||
| 245 | NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; | ||
| 246 | |||
| 222 | /* | 247 | /* |
| 223 | * Find a place for the bootmem map | 248 | * Find a place for the bootmem map |
| 224 | * nodedata_phys could be on other nodes by alloc_bootmem, | 249 | * nodedata_phys could be on other nodes by alloc_bootmem, |
| @@ -227,11 +252,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
| 227 | * of alloc_bootmem, that could clash with reserved range | 252 | * of alloc_bootmem, that could clash with reserved range |
| 228 | */ | 253 | */ |
| 229 | bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); | 254 | bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); |
| 230 | nid = phys_to_nid(nodedata_phys); | 255 | bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE); |
| 231 | if (nid == nodeid) | ||
| 232 | bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE); | ||
| 233 | else | ||
| 234 | bootmap_start = roundup(start, PAGE_SIZE); | ||
| 235 | /* | 256 | /* |
| 236 | * SMP_CACHE_BYTES could be enough, but init_bootmem_node like | 257 | * SMP_CACHE_BYTES could be enough, but init_bootmem_node like |
| 237 | * to use that to align to PAGE_SIZE | 258 | * to use that to align to PAGE_SIZE |
| @@ -239,18 +260,13 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
| 239 | bootmap = early_node_mem(nodeid, bootmap_start, end, | 260 | bootmap = early_node_mem(nodeid, bootmap_start, end, |
| 240 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); | 261 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); |
| 241 | if (bootmap == NULL) { | 262 | if (bootmap == NULL) { |
| 242 | if (nodedata_phys < start || nodedata_phys >= end) { | 263 | free_early(nodedata_phys, nodedata_phys + pgdat_size); |
| 243 | /* | ||
| 244 | * only need to free it if it is from other node | ||
| 245 | * bootmem | ||
| 246 | */ | ||
| 247 | if (nid != nodeid) | ||
| 248 | free_bootmem(nodedata_phys, pgdat_size); | ||
| 249 | } | ||
| 250 | node_data[nodeid] = NULL; | 264 | node_data[nodeid] = NULL; |
| 251 | return; | 265 | return; |
| 252 | } | 266 | } |
| 253 | bootmap_start = __pa(bootmap); | 267 | bootmap_start = __pa(bootmap); |
| 268 | reserve_early(bootmap_start, bootmap_start+(bootmap_pages<<PAGE_SHIFT), | ||
| 269 | "BOOTMAP"); | ||
| 254 | 270 | ||
| 255 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), | 271 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), |
| 256 | bootmap_start >> PAGE_SHIFT, | 272 | bootmap_start >> PAGE_SHIFT, |
| @@ -259,31 +275,12 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
| 259 | printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", | 275 | printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", |
| 260 | bootmap_start, bootmap_start + bootmap_size - 1, | 276 | bootmap_start, bootmap_start + bootmap_size - 1, |
| 261 | bootmap_pages); | 277 | bootmap_pages); |
| 262 | |||
| 263 | free_bootmem_with_active_regions(nodeid, end); | ||
| 264 | |||
| 265 | /* | ||
| 266 | * convert early reserve to bootmem reserve earlier | ||
| 267 | * otherwise early_node_mem could use early reserved mem | ||
| 268 | * on previous node | ||
| 269 | */ | ||
| 270 | early_res_to_bootmem(start, end); | ||
| 271 | |||
| 272 | /* | ||
| 273 | * in some case early_node_mem could use alloc_bootmem | ||
| 274 | * to get range on other node, don't reserve that again | ||
| 275 | */ | ||
| 276 | if (nid != nodeid) | ||
| 277 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); | ||
| 278 | else | ||
| 279 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, | ||
| 280 | pgdat_size, BOOTMEM_DEFAULT); | ||
| 281 | nid = phys_to_nid(bootmap_start); | 278 | nid = phys_to_nid(bootmap_start); |
| 282 | if (nid != nodeid) | 279 | if (nid != nodeid) |
| 283 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); | 280 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); |
| 284 | else | 281 | |
| 285 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | 282 | free_bootmem_with_active_regions(nodeid, end); |
| 286 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | 283 | #endif |
| 287 | 284 | ||
| 288 | node_set_online(nodeid); | 285 | node_set_online(nodeid); |
| 289 | } | 286 | } |
| @@ -709,6 +706,10 @@ unsigned long __init numa_free_all_bootmem(void) | |||
| 709 | for_each_online_node(i) | 706 | for_each_online_node(i) |
| 710 | pages += free_all_bootmem_node(NODE_DATA(i)); | 707 | pages += free_all_bootmem_node(NODE_DATA(i)); |
| 711 | 708 | ||
| 709 | #ifdef CONFIG_NO_BOOTMEM | ||
| 710 | pages += free_all_memory_core_early(MAX_NUMNODES); | ||
| 711 | #endif | ||
| 712 | |||
| 712 | return pages; | 713 | return pages; |
| 713 | } | 714 | } |
| 714 | 715 | ||
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 39fba37f702f..0b7d3e9593e1 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile | |||
| @@ -14,8 +14,7 @@ obj-$(CONFIG_X86_VISWS) += visws.o | |||
| 14 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o | 14 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o |
| 15 | 15 | ||
| 16 | obj-y += common.o early.o | 16 | obj-y += common.o early.o |
| 17 | obj-y += amd_bus.o | 17 | obj-y += amd_bus.o bus_numa.o |
| 18 | obj-$(CONFIG_X86_64) += bus_numa.o | ||
| 19 | 18 | ||
| 20 | ifeq ($(CONFIG_PCI_DEBUG),y) | 19 | ifeq ($(CONFIG_PCI_DEBUG),y) |
| 21 | EXTRA_CFLAGS += -DDEBUG | 20 | EXTRA_CFLAGS += -DDEBUG |
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 95ecbd495955..fc1e8fe07e5c 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c | |||
| @@ -2,11 +2,11 @@ | |||
| 2 | #include <linux/pci.h> | 2 | #include <linux/pci.h> |
| 3 | #include <linux/topology.h> | 3 | #include <linux/topology.h> |
| 4 | #include <linux/cpu.h> | 4 | #include <linux/cpu.h> |
| 5 | #include <linux/range.h> | ||
| 6 | |||
| 5 | #include <asm/pci_x86.h> | 7 | #include <asm/pci_x86.h> |
| 6 | 8 | ||
| 7 | #ifdef CONFIG_X86_64 | ||
| 8 | #include <asm/pci-direct.h> | 9 | #include <asm/pci-direct.h> |
| 9 | #endif | ||
| 10 | 10 | ||
| 11 | #include "bus_numa.h" | 11 | #include "bus_numa.h" |
| 12 | 12 | ||
| @@ -15,60 +15,6 @@ | |||
| 15 | * also get peer root bus resource for io,mmio | 15 | * also get peer root bus resource for io,mmio |
| 16 | */ | 16 | */ |
| 17 | 17 | ||
| 18 | #ifdef CONFIG_X86_64 | ||
| 19 | |||
| 20 | #define RANGE_NUM 16 | ||
| 21 | |||
| 22 | struct res_range { | ||
| 23 | size_t start; | ||
| 24 | size_t end; | ||
| 25 | }; | ||
| 26 | |||
| 27 | static void __init update_range(struct res_range *range, size_t start, | ||
| 28 | size_t end) | ||
| 29 | { | ||
| 30 | int i; | ||
| 31 | int j; | ||
| 32 | |||
| 33 | for (j = 0; j < RANGE_NUM; j++) { | ||
| 34 | if (!range[j].end) | ||
| 35 | continue; | ||
| 36 | |||
| 37 | if (start <= range[j].start && end >= range[j].end) { | ||
| 38 | range[j].start = 0; | ||
| 39 | range[j].end = 0; | ||
| 40 | continue; | ||
| 41 | } | ||
| 42 | |||
| 43 | if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) { | ||
| 44 | range[j].start = end + 1; | ||
| 45 | continue; | ||
| 46 | } | ||
| 47 | |||
| 48 | |||
| 49 | if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) { | ||
| 50 | range[j].end = start - 1; | ||
| 51 | continue; | ||
| 52 | } | ||
| 53 | |||
| 54 | if (start > range[j].start && end < range[j].end) { | ||
| 55 | /* find the new spare */ | ||
| 56 | for (i = 0; i < RANGE_NUM; i++) { | ||
| 57 | if (range[i].end == 0) | ||
| 58 | break; | ||
| 59 | } | ||
| 60 | if (i < RANGE_NUM) { | ||
| 61 | range[i].end = range[j].end; | ||
| 62 | range[i].start = end + 1; | ||
| 63 | } else { | ||
| 64 | printk(KERN_ERR "run of slot in ranges\n"); | ||
| 65 | } | ||
| 66 | range[j].end = start - 1; | ||
| 67 | continue; | ||
| 68 | } | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | struct pci_hostbridge_probe { | 18 | struct pci_hostbridge_probe { |
| 73 | u32 bus; | 19 | u32 bus; |
| 74 | u32 slot; | 20 | u32 slot; |
| @@ -111,6 +57,8 @@ static void __init get_pci_mmcfg_amd_fam10h_range(void) | |||
| 111 | fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1; | 57 | fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1; |
| 112 | } | 58 | } |
| 113 | 59 | ||
| 60 | #define RANGE_NUM 16 | ||
| 61 | |||
| 114 | /** | 62 | /** |
| 115 | * early_fill_mp_bus_to_node() | 63 | * early_fill_mp_bus_to_node() |
| 116 | * called before pcibios_scan_root and pci_scan_bus | 64 | * called before pcibios_scan_root and pci_scan_bus |
| @@ -130,16 +78,17 @@ static int __init early_fill_mp_bus_info(void) | |||
| 130 | struct pci_root_info *info; | 78 | struct pci_root_info *info; |
| 131 | u32 reg; | 79 | u32 reg; |
| 132 | struct resource *res; | 80 | struct resource *res; |
| 133 | size_t start; | 81 | u64 start; |
| 134 | size_t end; | 82 | u64 end; |
| 135 | struct res_range range[RANGE_NUM]; | 83 | struct range range[RANGE_NUM]; |
| 136 | u64 val; | 84 | u64 val; |
| 137 | u32 address; | 85 | u32 address; |
| 86 | bool found; | ||
| 138 | 87 | ||
| 139 | if (!early_pci_allowed()) | 88 | if (!early_pci_allowed()) |
| 140 | return -1; | 89 | return -1; |
| 141 | 90 | ||
| 142 | found_all_numa_early = 0; | 91 | found = false; |
| 143 | for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { | 92 | for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { |
| 144 | u32 id; | 93 | u32 id; |
| 145 | u16 device; | 94 | u16 device; |
| @@ -153,12 +102,12 @@ static int __init early_fill_mp_bus_info(void) | |||
| 153 | device = (id>>16) & 0xffff; | 102 | device = (id>>16) & 0xffff; |
| 154 | if (pci_probes[i].vendor == vendor && | 103 | if (pci_probes[i].vendor == vendor && |
| 155 | pci_probes[i].device == device) { | 104 | pci_probes[i].device == device) { |
| 156 | found_all_numa_early = 1; | 105 | found = true; |
| 157 | break; | 106 | break; |
| 158 | } | 107 | } |
| 159 | } | 108 | } |
| 160 | 109 | ||
| 161 | if (!found_all_numa_early) | 110 | if (!found) |
| 162 | return 0; | 111 | return 0; |
| 163 | 112 | ||
| 164 | pci_root_num = 0; | 113 | pci_root_num = 0; |
| @@ -196,7 +145,7 @@ static int __init early_fill_mp_bus_info(void) | |||
| 196 | def_link = (reg >> 8) & 0x03; | 145 | def_link = (reg >> 8) & 0x03; |
| 197 | 146 | ||
| 198 | memset(range, 0, sizeof(range)); | 147 | memset(range, 0, sizeof(range)); |
| 199 | range[0].end = 0xffff; | 148 | add_range(range, RANGE_NUM, 0, 0, 0xffff + 1); |
| 200 | /* io port resource */ | 149 | /* io port resource */ |
| 201 | for (i = 0; i < 4; i++) { | 150 | for (i = 0; i < 4; i++) { |
| 202 | reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3)); | 151 | reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3)); |
| @@ -220,13 +169,13 @@ static int __init early_fill_mp_bus_info(void) | |||
| 220 | 169 | ||
| 221 | info = &pci_root_info[j]; | 170 | info = &pci_root_info[j]; |
| 222 | printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n", | 171 | printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n", |
| 223 | node, link, (u64)start, (u64)end); | 172 | node, link, start, end); |
| 224 | 173 | ||
| 225 | /* kernel only handle 16 bit only */ | 174 | /* kernel only handle 16 bit only */ |
| 226 | if (end > 0xffff) | 175 | if (end > 0xffff) |
| 227 | end = 0xffff; | 176 | end = 0xffff; |
| 228 | update_res(info, start, end, IORESOURCE_IO, 1); | 177 | update_res(info, start, end, IORESOURCE_IO, 1); |
| 229 | update_range(range, start, end); | 178 | subtract_range(range, RANGE_NUM, start, end + 1); |
| 230 | } | 179 | } |
| 231 | /* add left over io port range to def node/link, [0, 0xffff] */ | 180 | /* add left over io port range to def node/link, [0, 0xffff] */ |
| 232 | /* find the position */ | 181 | /* find the position */ |
| @@ -241,29 +190,32 @@ static int __init early_fill_mp_bus_info(void) | |||
| 241 | if (!range[i].end) | 190 | if (!range[i].end) |
| 242 | continue; | 191 | continue; |
| 243 | 192 | ||
| 244 | update_res(info, range[i].start, range[i].end, | 193 | update_res(info, range[i].start, range[i].end - 1, |
| 245 | IORESOURCE_IO, 1); | 194 | IORESOURCE_IO, 1); |
| 246 | } | 195 | } |
| 247 | } | 196 | } |
| 248 | 197 | ||
| 249 | memset(range, 0, sizeof(range)); | 198 | memset(range, 0, sizeof(range)); |
| 250 | /* 0xfd00000000-0xffffffffff for HT */ | 199 | /* 0xfd00000000-0xffffffffff for HT */ |
| 251 | range[0].end = (0xfdULL<<32) - 1; | 200 | end = cap_resource((0xfdULL<<32) - 1); |
| 201 | end++; | ||
| 202 | add_range(range, RANGE_NUM, 0, 0, end); | ||
| 252 | 203 | ||
| 253 | /* need to take out [0, TOM) for RAM*/ | 204 | /* need to take out [0, TOM) for RAM*/ |
| 254 | address = MSR_K8_TOP_MEM1; | 205 | address = MSR_K8_TOP_MEM1; |
| 255 | rdmsrl(address, val); | 206 | rdmsrl(address, val); |
| 256 | end = (val & 0xffffff800000ULL); | 207 | end = (val & 0xffffff800000ULL); |
| 257 | printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); | 208 | printk(KERN_INFO "TOM: %016llx aka %lldM\n", end, end>>20); |
| 258 | if (end < (1ULL<<32)) | 209 | if (end < (1ULL<<32)) |
| 259 | update_range(range, 0, end - 1); | 210 | subtract_range(range, RANGE_NUM, 0, end); |
| 260 | 211 | ||
| 261 | /* get mmconfig */ | 212 | /* get mmconfig */ |
| 262 | get_pci_mmcfg_amd_fam10h_range(); | 213 | get_pci_mmcfg_amd_fam10h_range(); |
| 263 | /* need to take out mmconf range */ | 214 | /* need to take out mmconf range */ |
| 264 | if (fam10h_mmconf_end) { | 215 | if (fam10h_mmconf_end) { |
| 265 | printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end); | 216 | printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end); |
| 266 | update_range(range, fam10h_mmconf_start, fam10h_mmconf_end); | 217 | subtract_range(range, RANGE_NUM, fam10h_mmconf_start, |
| 218 | fam10h_mmconf_end + 1); | ||
| 267 | } | 219 | } |
| 268 | 220 | ||
| 269 | /* mmio resource */ | 221 | /* mmio resource */ |
| @@ -293,7 +245,7 @@ static int __init early_fill_mp_bus_info(void) | |||
| 293 | info = &pci_root_info[j]; | 245 | info = &pci_root_info[j]; |
| 294 | 246 | ||
| 295 | printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]", | 247 | printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]", |
| 296 | node, link, (u64)start, (u64)end); | 248 | node, link, start, end); |
| 297 | /* | 249 | /* |
| 298 | * some sick allocation would have range overlap with fam10h | 250 | * some sick allocation would have range overlap with fam10h |
| 299 | * mmconf range, so need to update start and end. | 251 | * mmconf range, so need to update start and end. |
| @@ -318,14 +270,15 @@ static int __init early_fill_mp_bus_info(void) | |||
| 318 | /* we got a hole */ | 270 | /* we got a hole */ |
| 319 | endx = fam10h_mmconf_start - 1; | 271 | endx = fam10h_mmconf_start - 1; |
| 320 | update_res(info, start, endx, IORESOURCE_MEM, 0); | 272 | update_res(info, start, endx, IORESOURCE_MEM, 0); |
| 321 | update_range(range, start, endx); | 273 | subtract_range(range, RANGE_NUM, start, |
| 322 | printk(KERN_CONT " ==> [%llx, %llx]", (u64)start, endx); | 274 | endx + 1); |
| 275 | printk(KERN_CONT " ==> [%llx, %llx]", start, endx); | ||
| 323 | start = fam10h_mmconf_end + 1; | 276 | start = fam10h_mmconf_end + 1; |
| 324 | changed = 1; | 277 | changed = 1; |
| 325 | } | 278 | } |
| 326 | if (changed) { | 279 | if (changed) { |
| 327 | if (start <= end) { | 280 | if (start <= end) { |
| 328 | printk(KERN_CONT " %s [%llx, %llx]", endx?"and":"==>", (u64)start, (u64)end); | 281 | printk(KERN_CONT " %s [%llx, %llx]", endx ? "and" : "==>", start, end); |
| 329 | } else { | 282 | } else { |
| 330 | printk(KERN_CONT "%s\n", endx?"":" ==> none"); | 283 | printk(KERN_CONT "%s\n", endx?"":" ==> none"); |
| 331 | continue; | 284 | continue; |
| @@ -333,8 +286,9 @@ static int __init early_fill_mp_bus_info(void) | |||
| 333 | } | 286 | } |
| 334 | } | 287 | } |
| 335 | 288 | ||
| 336 | update_res(info, start, end, IORESOURCE_MEM, 1); | 289 | update_res(info, cap_resource(start), cap_resource(end), |
| 337 | update_range(range, start, end); | 290 | IORESOURCE_MEM, 1); |
| 291 | subtract_range(range, RANGE_NUM, start, end + 1); | ||
| 338 | printk(KERN_CONT "\n"); | 292 | printk(KERN_CONT "\n"); |
| 339 | } | 293 | } |
| 340 | 294 | ||
| @@ -348,8 +302,8 @@ static int __init early_fill_mp_bus_info(void) | |||
| 348 | address = MSR_K8_TOP_MEM2; | 302 | address = MSR_K8_TOP_MEM2; |
| 349 | rdmsrl(address, val); | 303 | rdmsrl(address, val); |
| 350 | end = (val & 0xffffff800000ULL); | 304 | end = (val & 0xffffff800000ULL); |
| 351 | printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); | 305 | printk(KERN_INFO "TOM2: %016llx aka %lldM\n", end, end>>20); |
| 352 | update_range(range, 1ULL<<32, end - 1); | 306 | subtract_range(range, RANGE_NUM, 1ULL<<32, end); |
| 353 | } | 307 | } |
| 354 | 308 | ||
| 355 | /* | 309 | /* |
| @@ -368,7 +322,8 @@ static int __init early_fill_mp_bus_info(void) | |||
| 368 | if (!range[i].end) | 322 | if (!range[i].end) |
| 369 | continue; | 323 | continue; |
| 370 | 324 | ||
| 371 | update_res(info, range[i].start, range[i].end, | 325 | update_res(info, cap_resource(range[i].start), |
| 326 | cap_resource(range[i].end - 1), | ||
| 372 | IORESOURCE_MEM, 1); | 327 | IORESOURCE_MEM, 1); |
| 373 | } | 328 | } |
| 374 | } | 329 | } |
| @@ -384,24 +339,14 @@ static int __init early_fill_mp_bus_info(void) | |||
| 384 | info->bus_min, info->bus_max, info->node, info->link); | 339 | info->bus_min, info->bus_max, info->node, info->link); |
| 385 | for (j = 0; j < res_num; j++) { | 340 | for (j = 0; j < res_num; j++) { |
| 386 | res = &info->res[j]; | 341 | res = &info->res[j]; |
| 387 | printk(KERN_DEBUG "bus: %02x index %x %s: [%llx, %llx]\n", | 342 | printk(KERN_DEBUG "bus: %02x index %x %pR\n", |
| 388 | busnum, j, | 343 | busnum, j, res); |
| 389 | (res->flags & IORESOURCE_IO)?"io port":"mmio", | ||
| 390 | res->start, res->end); | ||
| 391 | } | 344 | } |
| 392 | } | 345 | } |
| 393 | 346 | ||
| 394 | return 0; | 347 | return 0; |
| 395 | } | 348 | } |
| 396 | 349 | ||
| 397 | #else /* !CONFIG_X86_64 */ | ||
| 398 | |||
| 399 | static int __init early_fill_mp_bus_info(void) { return 0; } | ||
| 400 | |||
| 401 | #endif /* !CONFIG_X86_64 */ | ||
| 402 | |||
| 403 | /* common 32/64 bit code */ | ||
| 404 | |||
| 405 | #define ENABLE_CF8_EXT_CFG (1ULL << 46) | 350 | #define ENABLE_CF8_EXT_CFG (1ULL << 46) |
| 406 | 351 | ||
| 407 | static void enable_pci_io_ecs(void *unused) | 352 | static void enable_pci_io_ecs(void *unused) |
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c index 12d54ff3654d..64a122883896 100644 --- a/arch/x86/pci/bus_numa.c +++ b/arch/x86/pci/bus_numa.c | |||
| @@ -1,11 +1,11 @@ | |||
| 1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
| 2 | #include <linux/pci.h> | 2 | #include <linux/pci.h> |
| 3 | #include <linux/range.h> | ||
| 3 | 4 | ||
| 4 | #include "bus_numa.h" | 5 | #include "bus_numa.h" |
| 5 | 6 | ||
| 6 | int pci_root_num; | 7 | int pci_root_num; |
| 7 | struct pci_root_info pci_root_info[PCI_ROOT_NR]; | 8 | struct pci_root_info pci_root_info[PCI_ROOT_NR]; |
| 8 | int found_all_numa_early; | ||
| 9 | 9 | ||
| 10 | void x86_pci_root_bus_res_quirks(struct pci_bus *b) | 10 | void x86_pci_root_bus_res_quirks(struct pci_bus *b) |
| 11 | { | 11 | { |
| @@ -21,10 +21,6 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) | |||
| 21 | if (!pci_root_num) | 21 | if (!pci_root_num) |
| 22 | return; | 22 | return; |
| 23 | 23 | ||
| 24 | /* for amd, if only one root bus, don't need to do anything */ | ||
| 25 | if (pci_root_num < 2 && found_all_numa_early) | ||
| 26 | return; | ||
| 27 | |||
| 28 | for (i = 0; i < pci_root_num; i++) { | 24 | for (i = 0; i < pci_root_num; i++) { |
| 29 | if (pci_root_info[i].bus_min == b->number) | 25 | if (pci_root_info[i].bus_min == b->number) |
| 30 | break; | 26 | break; |
| @@ -52,8 +48,8 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) | |||
| 52 | } | 48 | } |
| 53 | } | 49 | } |
| 54 | 50 | ||
| 55 | void __devinit update_res(struct pci_root_info *info, size_t start, | 51 | void __devinit update_res(struct pci_root_info *info, resource_size_t start, |
| 56 | size_t end, unsigned long flags, int merge) | 52 | resource_size_t end, unsigned long flags, int merge) |
| 57 | { | 53 | { |
| 58 | int i; | 54 | int i; |
| 59 | struct resource *res; | 55 | struct resource *res; |
| @@ -61,25 +57,28 @@ void __devinit update_res(struct pci_root_info *info, size_t start, | |||
| 61 | if (start > end) | 57 | if (start > end) |
| 62 | return; | 58 | return; |
| 63 | 59 | ||
| 60 | if (start == MAX_RESOURCE) | ||
| 61 | return; | ||
| 62 | |||
| 64 | if (!merge) | 63 | if (!merge) |
| 65 | goto addit; | 64 | goto addit; |
| 66 | 65 | ||
| 67 | /* try to merge it with old one */ | 66 | /* try to merge it with old one */ |
| 68 | for (i = 0; i < info->res_num; i++) { | 67 | for (i = 0; i < info->res_num; i++) { |
| 69 | size_t final_start, final_end; | 68 | resource_size_t final_start, final_end; |
| 70 | size_t common_start, common_end; | 69 | resource_size_t common_start, common_end; |
| 71 | 70 | ||
| 72 | res = &info->res[i]; | 71 | res = &info->res[i]; |
| 73 | if (res->flags != flags) | 72 | if (res->flags != flags) |
| 74 | continue; | 73 | continue; |
| 75 | 74 | ||
| 76 | common_start = max((size_t)res->start, start); | 75 | common_start = max(res->start, start); |
| 77 | common_end = min((size_t)res->end, end); | 76 | common_end = min(res->end, end); |
| 78 | if (common_start > common_end + 1) | 77 | if (common_start > common_end + 1) |
| 79 | continue; | 78 | continue; |
| 80 | 79 | ||
| 81 | final_start = min((size_t)res->start, start); | 80 | final_start = min(res->start, start); |
| 82 | final_end = max((size_t)res->end, end); | 81 | final_end = max(res->end, end); |
| 83 | 82 | ||
| 84 | res->start = final_start; | 83 | res->start = final_start; |
| 85 | res->end = final_end; | 84 | res->end = final_end; |
diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h index 731b64ee8d84..804a4b40c31a 100644 --- a/arch/x86/pci/bus_numa.h +++ b/arch/x86/pci/bus_numa.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | #ifdef CONFIG_X86_64 | 1 | #ifndef __BUS_NUMA_H |
| 2 | 2 | #define __BUS_NUMA_H | |
| 3 | /* | 3 | /* |
| 4 | * sub bus (transparent) will use entres from 3 to store extra from | 4 | * sub bus (transparent) will use entres from 3 to store extra from |
| 5 | * root, so need to make sure we have enough slot there. | 5 | * root, so need to make sure we have enough slot there. |
| @@ -19,8 +19,7 @@ struct pci_root_info { | |||
| 19 | #define PCI_ROOT_NR 4 | 19 | #define PCI_ROOT_NR 4 |
| 20 | extern int pci_root_num; | 20 | extern int pci_root_num; |
| 21 | extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; | 21 | extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; |
| 22 | extern int found_all_numa_early; | ||
| 23 | 22 | ||
| 24 | extern void update_res(struct pci_root_info *info, size_t start, | 23 | extern void update_res(struct pci_root_info *info, resource_size_t start, |
| 25 | size_t end, unsigned long flags, int merge); | 24 | resource_size_t end, unsigned long flags, int merge); |
| 26 | #endif | 25 | #endif |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 5a8fbf8d4cac..dece3eb9c906 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
| @@ -255,10 +255,6 @@ void __init pcibios_resource_survey(void) | |||
| 255 | */ | 255 | */ |
| 256 | fs_initcall(pcibios_assign_resources); | 256 | fs_initcall(pcibios_assign_resources); |
| 257 | 257 | ||
| 258 | void __weak x86_pci_root_bus_res_quirks(struct pci_bus *b) | ||
| 259 | { | ||
| 260 | } | ||
| 261 | |||
| 262 | /* | 258 | /* |
| 263 | * If we set up a device for bus mastering, we need to check the latency | 259 | * If we set up a device for bus mastering, we need to check the latency |
| 264 | * timer as certain crappy BIOSes forget to set it properly. | 260 | * timer as certain crappy BIOSes forget to set it properly. |
diff --git a/drivers/pci/hotplug/ibmphp_res.c b/drivers/pci/hotplug/ibmphp_res.c index ec73294d1fa6..e2dc289f767c 100644 --- a/drivers/pci/hotplug/ibmphp_res.c +++ b/drivers/pci/hotplug/ibmphp_res.c | |||
| @@ -40,7 +40,7 @@ static void update_resources (struct bus_node *bus_cur, int type, int rangeno); | |||
| 40 | static int once_over (void); | 40 | static int once_over (void); |
| 41 | static int remove_ranges (struct bus_node *, struct bus_node *); | 41 | static int remove_ranges (struct bus_node *, struct bus_node *); |
| 42 | static int update_bridge_ranges (struct bus_node **); | 42 | static int update_bridge_ranges (struct bus_node **); |
| 43 | static int add_range (int type, struct range_node *, struct bus_node *); | 43 | static int add_bus_range (int type, struct range_node *, struct bus_node *); |
| 44 | static void fix_resources (struct bus_node *); | 44 | static void fix_resources (struct bus_node *); |
| 45 | static struct bus_node *find_bus_wprev (u8, struct bus_node **, u8); | 45 | static struct bus_node *find_bus_wprev (u8, struct bus_node **, u8); |
| 46 | 46 | ||
| @@ -133,7 +133,7 @@ static int __init alloc_bus_range (struct bus_node **new_bus, struct range_node | |||
| 133 | newrange->rangeno = 1; | 133 | newrange->rangeno = 1; |
| 134 | else { | 134 | else { |
| 135 | /* need to insert our range */ | 135 | /* need to insert our range */ |
| 136 | add_range (flag, newrange, newbus); | 136 | add_bus_range (flag, newrange, newbus); |
| 137 | debug ("%d resource Primary Bus inserted on bus %x [%x - %x]\n", flag, newbus->busno, newrange->start, newrange->end); | 137 | debug ("%d resource Primary Bus inserted on bus %x [%x - %x]\n", flag, newbus->busno, newrange->start, newrange->end); |
| 138 | } | 138 | } |
| 139 | 139 | ||
| @@ -384,7 +384,7 @@ int __init ibmphp_rsrc_init (void) | |||
| 384 | * Input: type of the resource, range to add, current bus | 384 | * Input: type of the resource, range to add, current bus |
| 385 | * Output: 0 or -1, bus and range ptrs | 385 | * Output: 0 or -1, bus and range ptrs |
| 386 | ********************************************************************************/ | 386 | ********************************************************************************/ |
| 387 | static int add_range (int type, struct range_node *range, struct bus_node *bus_cur) | 387 | static int add_bus_range (int type, struct range_node *range, struct bus_node *bus_cur) |
| 388 | { | 388 | { |
| 389 | struct range_node *range_cur = NULL; | 389 | struct range_node *range_cur = NULL; |
| 390 | struct range_node *range_prev; | 390 | struct range_node *range_prev; |
| @@ -455,7 +455,7 @@ static int add_range (int type, struct range_node *range, struct bus_node *bus_c | |||
| 455 | 455 | ||
| 456 | /******************************************************************************* | 456 | /******************************************************************************* |
| 457 | * This routine goes through the list of resources of type 'type' and updates | 457 | * This routine goes through the list of resources of type 'type' and updates |
| 458 | * the range numbers that they correspond to. It was called from add_range fnc | 458 | * the range numbers that they correspond to. It was called from add_bus_range fnc |
| 459 | * | 459 | * |
| 460 | * Input: bus, type of the resource, the rangeno starting from which to update | 460 | * Input: bus, type of the resource, the rangeno starting from which to update |
| 461 | ******************************************************************************/ | 461 | ******************************************************************************/ |
| @@ -1999,7 +1999,7 @@ static int __init update_bridge_ranges (struct bus_node **bus) | |||
| 1999 | 1999 | ||
| 2000 | if (bus_sec->noIORanges > 0) { | 2000 | if (bus_sec->noIORanges > 0) { |
| 2001 | if (!range_exists_already (range, bus_sec, IO)) { | 2001 | if (!range_exists_already (range, bus_sec, IO)) { |
| 2002 | add_range (IO, range, bus_sec); | 2002 | add_bus_range (IO, range, bus_sec); |
| 2003 | ++bus_sec->noIORanges; | 2003 | ++bus_sec->noIORanges; |
| 2004 | } else { | 2004 | } else { |
| 2005 | kfree (range); | 2005 | kfree (range); |
| @@ -2048,7 +2048,7 @@ static int __init update_bridge_ranges (struct bus_node **bus) | |||
| 2048 | 2048 | ||
| 2049 | if (bus_sec->noMemRanges > 0) { | 2049 | if (bus_sec->noMemRanges > 0) { |
| 2050 | if (!range_exists_already (range, bus_sec, MEM)) { | 2050 | if (!range_exists_already (range, bus_sec, MEM)) { |
| 2051 | add_range (MEM, range, bus_sec); | 2051 | add_bus_range (MEM, range, bus_sec); |
| 2052 | ++bus_sec->noMemRanges; | 2052 | ++bus_sec->noMemRanges; |
| 2053 | } else { | 2053 | } else { |
| 2054 | kfree (range); | 2054 | kfree (range); |
| @@ -2102,7 +2102,7 @@ static int __init update_bridge_ranges (struct bus_node **bus) | |||
| 2102 | 2102 | ||
| 2103 | if (bus_sec->noPFMemRanges > 0) { | 2103 | if (bus_sec->noPFMemRanges > 0) { |
| 2104 | if (!range_exists_already (range, bus_sec, PFMEM)) { | 2104 | if (!range_exists_already (range, bus_sec, PFMEM)) { |
| 2105 | add_range (PFMEM, range, bus_sec); | 2105 | add_bus_range (PFMEM, range, bus_sec); |
| 2106 | ++bus_sec->noPFMemRanges; | 2106 | ++bus_sec->noPFMemRanges; |
| 2107 | } else { | 2107 | } else { |
| 2108 | kfree (range); | 2108 | kfree (range); |
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index b10ec49ee2dd..266ab9291232 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h | |||
| @@ -23,6 +23,7 @@ extern unsigned long max_pfn; | |||
| 23 | extern unsigned long saved_max_pfn; | 23 | extern unsigned long saved_max_pfn; |
| 24 | #endif | 24 | #endif |
| 25 | 25 | ||
| 26 | #ifndef CONFIG_NO_BOOTMEM | ||
| 26 | /* | 27 | /* |
| 27 | * node_bootmem_map is a map pointer - the bits represent all physical | 28 | * node_bootmem_map is a map pointer - the bits represent all physical |
| 28 | * memory pages (including holes) on the node. | 29 | * memory pages (including holes) on the node. |
| @@ -37,6 +38,7 @@ typedef struct bootmem_data { | |||
| 37 | } bootmem_data_t; | 38 | } bootmem_data_t; |
| 38 | 39 | ||
| 39 | extern bootmem_data_t bootmem_node_data[]; | 40 | extern bootmem_data_t bootmem_node_data[]; |
| 41 | #endif | ||
| 40 | 42 | ||
| 41 | extern unsigned long bootmem_bootmap_pages(unsigned long); | 43 | extern unsigned long bootmem_bootmap_pages(unsigned long); |
| 42 | 44 | ||
| @@ -46,6 +48,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat, | |||
| 46 | unsigned long endpfn); | 48 | unsigned long endpfn); |
| 47 | extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); | 49 | extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); |
| 48 | 50 | ||
| 51 | unsigned long free_all_memory_core_early(int nodeid); | ||
| 49 | extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); | 52 | extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); |
| 50 | extern unsigned long free_all_bootmem(void); | 53 | extern unsigned long free_all_bootmem(void); |
| 51 | 54 | ||
| @@ -84,6 +87,10 @@ extern void *__alloc_bootmem_node(pg_data_t *pgdat, | |||
| 84 | unsigned long size, | 87 | unsigned long size, |
| 85 | unsigned long align, | 88 | unsigned long align, |
| 86 | unsigned long goal); | 89 | unsigned long goal); |
| 90 | void *__alloc_bootmem_node_high(pg_data_t *pgdat, | ||
| 91 | unsigned long size, | ||
| 92 | unsigned long align, | ||
| 93 | unsigned long goal); | ||
| 87 | extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat, | 94 | extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat, |
| 88 | unsigned long size, | 95 | unsigned long size, |
| 89 | unsigned long align, | 96 | unsigned long align, |
diff --git a/include/linux/early_res.h b/include/linux/early_res.h new file mode 100644 index 000000000000..29c09f57a13c --- /dev/null +++ b/include/linux/early_res.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | #ifndef _LINUX_EARLY_RES_H | ||
| 2 | #define _LINUX_EARLY_RES_H | ||
| 3 | #ifdef __KERNEL__ | ||
| 4 | |||
| 5 | extern void reserve_early(u64 start, u64 end, char *name); | ||
| 6 | extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); | ||
| 7 | extern void free_early(u64 start, u64 end); | ||
| 8 | void free_early_partial(u64 start, u64 end); | ||
| 9 | extern void early_res_to_bootmem(u64 start, u64 end); | ||
| 10 | |||
| 11 | void reserve_early_without_check(u64 start, u64 end, char *name); | ||
| 12 | u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, | ||
| 13 | u64 size, u64 align); | ||
| 14 | u64 find_early_area_size(u64 ei_start, u64 ei_last, u64 start, | ||
| 15 | u64 *sizep, u64 align); | ||
| 16 | u64 find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align); | ||
| 17 | u64 get_max_mapped(void); | ||
| 18 | #include <linux/range.h> | ||
| 19 | int get_free_all_memory_range(struct range **rangep, int nodeid); | ||
| 20 | |||
| 21 | #endif /* __KERNEL__ */ | ||
| 22 | |||
| 23 | #endif /* _LINUX_EARLY_RES_H */ | ||
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 1221d2331a6d..7f0707463360 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
| @@ -44,6 +44,16 @@ extern const char linux_proc_banner[]; | |||
| 44 | 44 | ||
| 45 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) | 45 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) |
| 46 | 46 | ||
| 47 | /* | ||
| 48 | * This looks more complex than it should be. But we need to | ||
| 49 | * get the type for the ~ right in round_down (it needs to be | ||
| 50 | * as wide as the result!), and we want to evaluate the macro | ||
| 51 | * arguments just once each. | ||
| 52 | */ | ||
| 53 | #define __round_mask(x, y) ((__typeof__(x))((y)-1)) | ||
| 54 | #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) | ||
| 55 | #define round_down(x, y) ((x) & ~__round_mask(x, y)) | ||
| 56 | |||
| 47 | #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) | 57 | #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) |
| 48 | #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) | 58 | #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) |
| 49 | #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) | 59 | #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 2e724c877ec1..90957f14195c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <linux/prio_tree.h> | 12 | #include <linux/prio_tree.h> |
| 13 | #include <linux/debug_locks.h> | 13 | #include <linux/debug_locks.h> |
| 14 | #include <linux/mm_types.h> | 14 | #include <linux/mm_types.h> |
| 15 | #include <linux/range.h> | ||
| 15 | 16 | ||
| 16 | struct mempolicy; | 17 | struct mempolicy; |
| 17 | struct anon_vma; | 18 | struct anon_vma; |
| @@ -1049,6 +1050,10 @@ extern void get_pfn_range_for_nid(unsigned int nid, | |||
| 1049 | extern unsigned long find_min_pfn_with_active_regions(void); | 1050 | extern unsigned long find_min_pfn_with_active_regions(void); |
| 1050 | extern void free_bootmem_with_active_regions(int nid, | 1051 | extern void free_bootmem_with_active_regions(int nid, |
| 1051 | unsigned long max_low_pfn); | 1052 | unsigned long max_low_pfn); |
| 1053 | int add_from_early_node_map(struct range *range, int az, | ||
| 1054 | int nr_range, int nid); | ||
| 1055 | void *__alloc_memory_core_early(int nodeid, u64 size, u64 align, | ||
| 1056 | u64 goal, u64 limit); | ||
| 1052 | typedef int (*work_fn_t)(unsigned long, unsigned long, void *); | 1057 | typedef int (*work_fn_t)(unsigned long, unsigned long, void *); |
| 1053 | extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); | 1058 | extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); |
| 1054 | extern void sparse_memory_present_with_active_regions(int nid); | 1059 | extern void sparse_memory_present_with_active_regions(int nid); |
| @@ -1317,12 +1322,19 @@ extern int randomize_va_space; | |||
| 1317 | const char * arch_vma_name(struct vm_area_struct *vma); | 1322 | const char * arch_vma_name(struct vm_area_struct *vma); |
| 1318 | void print_vma_addr(char *prefix, unsigned long rip); | 1323 | void print_vma_addr(char *prefix, unsigned long rip); |
| 1319 | 1324 | ||
| 1325 | void sparse_mem_maps_populate_node(struct page **map_map, | ||
| 1326 | unsigned long pnum_begin, | ||
| 1327 | unsigned long pnum_end, | ||
| 1328 | unsigned long map_count, | ||
| 1329 | int nodeid); | ||
| 1330 | |||
| 1320 | struct page *sparse_mem_map_populate(unsigned long pnum, int nid); | 1331 | struct page *sparse_mem_map_populate(unsigned long pnum, int nid); |
| 1321 | pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); | 1332 | pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); |
| 1322 | pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node); | 1333 | pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node); |
| 1323 | pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); | 1334 | pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); |
| 1324 | pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); | 1335 | pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); |
| 1325 | void *vmemmap_alloc_block(unsigned long size, int node); | 1336 | void *vmemmap_alloc_block(unsigned long size, int node); |
| 1337 | void *vmemmap_alloc_block_buf(unsigned long size, int node); | ||
| 1326 | void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); | 1338 | void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); |
| 1327 | int vmemmap_populate_basepages(struct page *start_page, | 1339 | int vmemmap_populate_basepages(struct page *start_page, |
| 1328 | unsigned long pages, int node); | 1340 | unsigned long pages, int node); |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 41acd4bf7664..a01a103341bd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
| @@ -612,7 +612,9 @@ typedef struct pglist_data { | |||
| 612 | struct page_cgroup *node_page_cgroup; | 612 | struct page_cgroup *node_page_cgroup; |
| 613 | #endif | 613 | #endif |
| 614 | #endif | 614 | #endif |
| 615 | #ifndef CONFIG_NO_BOOTMEM | ||
| 615 | struct bootmem_data *bdata; | 616 | struct bootmem_data *bdata; |
| 617 | #endif | ||
| 616 | #ifdef CONFIG_MEMORY_HOTPLUG | 618 | #ifdef CONFIG_MEMORY_HOTPLUG |
| 617 | /* | 619 | /* |
| 618 | * Must be held any time you expect node_start_pfn, node_present_pages | 620 | * Must be held any time you expect node_start_pfn, node_present_pages |
diff --git a/include/linux/range.h b/include/linux/range.h new file mode 100644 index 000000000000..bd184a5db791 --- /dev/null +++ b/include/linux/range.h | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | #ifndef _LINUX_RANGE_H | ||
| 2 | #define _LINUX_RANGE_H | ||
| 3 | |||
| 4 | struct range { | ||
| 5 | u64 start; | ||
| 6 | u64 end; | ||
| 7 | }; | ||
| 8 | |||
| 9 | int add_range(struct range *range, int az, int nr_range, | ||
| 10 | u64 start, u64 end); | ||
| 11 | |||
| 12 | |||
| 13 | int add_range_with_merge(struct range *range, int az, int nr_range, | ||
| 14 | u64 start, u64 end); | ||
| 15 | |||
| 16 | void subtract_range(struct range *range, int az, u64 start, u64 end); | ||
| 17 | |||
| 18 | int clean_sort_range(struct range *range, int az); | ||
| 19 | |||
| 20 | void sort_range(struct range *range, int nr_range); | ||
| 21 | |||
| 22 | #define MAX_RESOURCE ((resource_size_t)~0) | ||
| 23 | static inline resource_size_t cap_resource(u64 val) | ||
| 24 | { | ||
| 25 | if (val > MAX_RESOURCE) | ||
| 26 | return MAX_RESOURCE; | ||
| 27 | |||
| 28 | return val; | ||
| 29 | } | ||
| 30 | #endif | ||
diff --git a/kernel/Makefile b/kernel/Makefile index 6aebdeb2aa34..7b974699f8c2 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -10,7 +10,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ | |||
| 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
| 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ | 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ |
| 12 | notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ | 12 | notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ |
| 13 | async.o | 13 | async.o range.o |
| 14 | obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o | ||
| 14 | obj-y += groups.o | 15 | obj-y += groups.o |
| 15 | 16 | ||
| 16 | ifdef CONFIG_FUNCTION_TRACER | 17 | ifdef CONFIG_FUNCTION_TRACER |
diff --git a/kernel/early_res.c b/kernel/early_res.c new file mode 100644 index 000000000000..3cb2c661bb78 --- /dev/null +++ b/kernel/early_res.c | |||
| @@ -0,0 +1,578 @@ | |||
| 1 | /* | ||
| 2 | * early_res, could be used to replace bootmem | ||
| 3 | */ | ||
| 4 | #include <linux/kernel.h> | ||
| 5 | #include <linux/types.h> | ||
| 6 | #include <linux/init.h> | ||
| 7 | #include <linux/bootmem.h> | ||
| 8 | #include <linux/mm.h> | ||
| 9 | #include <linux/early_res.h> | ||
| 10 | |||
| 11 | /* | ||
| 12 | * Early reserved memory areas. | ||
| 13 | */ | ||
| 14 | /* | ||
| 15 | * need to make sure this one is bigger enough before | ||
| 16 | * find_fw_memmap_area could be used | ||
| 17 | */ | ||
| 18 | #define MAX_EARLY_RES_X 32 | ||
| 19 | |||
| 20 | struct early_res { | ||
| 21 | u64 start, end; | ||
| 22 | char name[15]; | ||
| 23 | char overlap_ok; | ||
| 24 | }; | ||
| 25 | static struct early_res early_res_x[MAX_EARLY_RES_X] __initdata; | ||
| 26 | |||
| 27 | static int max_early_res __initdata = MAX_EARLY_RES_X; | ||
| 28 | static struct early_res *early_res __initdata = &early_res_x[0]; | ||
| 29 | static int early_res_count __initdata; | ||
| 30 | |||
| 31 | static int __init find_overlapped_early(u64 start, u64 end) | ||
| 32 | { | ||
| 33 | int i; | ||
| 34 | struct early_res *r; | ||
| 35 | |||
| 36 | for (i = 0; i < max_early_res && early_res[i].end; i++) { | ||
| 37 | r = &early_res[i]; | ||
| 38 | if (end > r->start && start < r->end) | ||
| 39 | break; | ||
| 40 | } | ||
| 41 | |||
| 42 | return i; | ||
| 43 | } | ||
| 44 | |||
| 45 | /* | ||
| 46 | * Drop the i-th range from the early reservation map, | ||
| 47 | * by copying any higher ranges down one over it, and | ||
| 48 | * clearing what had been the last slot. | ||
| 49 | */ | ||
| 50 | static void __init drop_range(int i) | ||
| 51 | { | ||
| 52 | int j; | ||
| 53 | |||
| 54 | for (j = i + 1; j < max_early_res && early_res[j].end; j++) | ||
| 55 | ; | ||
| 56 | |||
| 57 | memmove(&early_res[i], &early_res[i + 1], | ||
| 58 | (j - 1 - i) * sizeof(struct early_res)); | ||
| 59 | |||
| 60 | early_res[j - 1].end = 0; | ||
| 61 | early_res_count--; | ||
| 62 | } | ||
| 63 | |||
| 64 | static void __init drop_range_partial(int i, u64 start, u64 end) | ||
| 65 | { | ||
| 66 | u64 common_start, common_end; | ||
| 67 | u64 old_start, old_end; | ||
| 68 | |||
| 69 | old_start = early_res[i].start; | ||
| 70 | old_end = early_res[i].end; | ||
| 71 | common_start = max(old_start, start); | ||
| 72 | common_end = min(old_end, end); | ||
| 73 | |||
| 74 | /* no overlap ? */ | ||
| 75 | if (common_start >= common_end) | ||
| 76 | return; | ||
| 77 | |||
| 78 | if (old_start < common_start) { | ||
| 79 | /* make head segment */ | ||
| 80 | early_res[i].end = common_start; | ||
| 81 | if (old_end > common_end) { | ||
| 82 | char name[15]; | ||
| 83 | |||
| 84 | /* | ||
| 85 | * Save a local copy of the name, since the | ||
| 86 | * early_res array could get resized inside | ||
| 87 | * reserve_early_without_check() -> | ||
| 88 | * __check_and_double_early_res(), which would | ||
| 89 | * make the current name pointer invalid. | ||
| 90 | */ | ||
| 91 | strncpy(name, early_res[i].name, | ||
| 92 | sizeof(early_res[i].name) - 1); | ||
| 93 | /* add another for left over on tail */ | ||
| 94 | reserve_early_without_check(common_end, old_end, name); | ||
| 95 | } | ||
| 96 | return; | ||
| 97 | } else { | ||
| 98 | if (old_end > common_end) { | ||
| 99 | /* reuse the entry for tail left */ | ||
| 100 | early_res[i].start = common_end; | ||
| 101 | return; | ||
| 102 | } | ||
| 103 | /* all covered */ | ||
| 104 | drop_range(i); | ||
| 105 | } | ||
| 106 | } | ||
| 107 | |||
| 108 | /* | ||
| 109 | * Split any existing ranges that: | ||
| 110 | * 1) are marked 'overlap_ok', and | ||
| 111 | * 2) overlap with the stated range [start, end) | ||
| 112 | * into whatever portion (if any) of the existing range is entirely | ||
| 113 | * below or entirely above the stated range. Drop the portion | ||
| 114 | * of the existing range that overlaps with the stated range, | ||
| 115 | * which will allow the caller of this routine to then add that | ||
| 116 | * stated range without conflicting with any existing range. | ||
| 117 | */ | ||
| 118 | static void __init drop_overlaps_that_are_ok(u64 start, u64 end) | ||
| 119 | { | ||
| 120 | int i; | ||
| 121 | struct early_res *r; | ||
| 122 | u64 lower_start, lower_end; | ||
| 123 | u64 upper_start, upper_end; | ||
| 124 | char name[15]; | ||
| 125 | |||
| 126 | for (i = 0; i < max_early_res && early_res[i].end; i++) { | ||
| 127 | r = &early_res[i]; | ||
| 128 | |||
| 129 | /* Continue past non-overlapping ranges */ | ||
| 130 | if (end <= r->start || start >= r->end) | ||
| 131 | continue; | ||
| 132 | |||
| 133 | /* | ||
| 134 | * Leave non-ok overlaps as is; let caller | ||
| 135 | * panic "Overlapping early reservations" | ||
| 136 | * when it hits this overlap. | ||
| 137 | */ | ||
| 138 | if (!r->overlap_ok) | ||
| 139 | return; | ||
| 140 | |||
| 141 | /* | ||
| 142 | * We have an ok overlap. We will drop it from the early | ||
| 143 | * reservation map, and add back in any non-overlapping | ||
| 144 | * portions (lower or upper) as separate, overlap_ok, | ||
| 145 | * non-overlapping ranges. | ||
| 146 | */ | ||
| 147 | |||
| 148 | /* 1. Note any non-overlapping (lower or upper) ranges. */ | ||
| 149 | strncpy(name, r->name, sizeof(name) - 1); | ||
| 150 | |||
| 151 | lower_start = lower_end = 0; | ||
| 152 | upper_start = upper_end = 0; | ||
| 153 | if (r->start < start) { | ||
| 154 | lower_start = r->start; | ||
| 155 | lower_end = start; | ||
| 156 | } | ||
| 157 | if (r->end > end) { | ||
| 158 | upper_start = end; | ||
| 159 | upper_end = r->end; | ||
| 160 | } | ||
| 161 | |||
| 162 | /* 2. Drop the original ok overlapping range */ | ||
| 163 | drop_range(i); | ||
| 164 | |||
| 165 | i--; /* resume for-loop on copied down entry */ | ||
| 166 | |||
| 167 | /* 3. Add back in any non-overlapping ranges. */ | ||
| 168 | if (lower_end) | ||
| 169 | reserve_early_overlap_ok(lower_start, lower_end, name); | ||
| 170 | if (upper_end) | ||
| 171 | reserve_early_overlap_ok(upper_start, upper_end, name); | ||
| 172 | } | ||
| 173 | } | ||
| 174 | |||
| 175 | static void __init __reserve_early(u64 start, u64 end, char *name, | ||
| 176 | int overlap_ok) | ||
| 177 | { | ||
| 178 | int i; | ||
| 179 | struct early_res *r; | ||
| 180 | |||
| 181 | i = find_overlapped_early(start, end); | ||
| 182 | if (i >= max_early_res) | ||
| 183 | panic("Too many early reservations"); | ||
| 184 | r = &early_res[i]; | ||
| 185 | if (r->end) | ||
| 186 | panic("Overlapping early reservations " | ||
| 187 | "%llx-%llx %s to %llx-%llx %s\n", | ||
| 188 | start, end - 1, name ? name : "", r->start, | ||
| 189 | r->end - 1, r->name); | ||
| 190 | r->start = start; | ||
| 191 | r->end = end; | ||
| 192 | r->overlap_ok = overlap_ok; | ||
| 193 | if (name) | ||
| 194 | strncpy(r->name, name, sizeof(r->name) - 1); | ||
| 195 | early_res_count++; | ||
| 196 | } | ||
| 197 | |||
| 198 | /* | ||
| 199 | * A few early reservtations come here. | ||
| 200 | * | ||
| 201 | * The 'overlap_ok' in the name of this routine does -not- mean it | ||
| 202 | * is ok for these reservations to overlap an earlier reservation. | ||
| 203 | * Rather it means that it is ok for subsequent reservations to | ||
| 204 | * overlap this one. | ||
| 205 | * | ||
| 206 | * Use this entry point to reserve early ranges when you are doing | ||
| 207 | * so out of "Paranoia", reserving perhaps more memory than you need, | ||
| 208 | * just in case, and don't mind a subsequent overlapping reservation | ||
| 209 | * that is known to be needed. | ||
| 210 | * | ||
| 211 | * The drop_overlaps_that_are_ok() call here isn't really needed. | ||
| 212 | * It would be needed if we had two colliding 'overlap_ok' | ||
| 213 | * reservations, so that the second such would not panic on the | ||
| 214 | * overlap with the first. We don't have any such as of this | ||
| 215 | * writing, but might as well tolerate such if it happens in | ||
| 216 | * the future. | ||
| 217 | */ | ||
| 218 | void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) | ||
| 219 | { | ||
| 220 | drop_overlaps_that_are_ok(start, end); | ||
| 221 | __reserve_early(start, end, name, 1); | ||
| 222 | } | ||
| 223 | |||
| 224 | static void __init __check_and_double_early_res(u64 ex_start, u64 ex_end) | ||
| 225 | { | ||
| 226 | u64 start, end, size, mem; | ||
| 227 | struct early_res *new; | ||
| 228 | |||
| 229 | /* do we have enough slots left ? */ | ||
| 230 | if ((max_early_res - early_res_count) > max(max_early_res/8, 2)) | ||
| 231 | return; | ||
| 232 | |||
| 233 | /* double it */ | ||
| 234 | mem = -1ULL; | ||
| 235 | size = sizeof(struct early_res) * max_early_res * 2; | ||
| 236 | if (early_res == early_res_x) | ||
| 237 | start = 0; | ||
| 238 | else | ||
| 239 | start = early_res[0].end; | ||
| 240 | end = ex_start; | ||
| 241 | if (start + size < end) | ||
| 242 | mem = find_fw_memmap_area(start, end, size, | ||
| 243 | sizeof(struct early_res)); | ||
| 244 | if (mem == -1ULL) { | ||
| 245 | start = ex_end; | ||
| 246 | end = get_max_mapped(); | ||
| 247 | if (start + size < end) | ||
| 248 | mem = find_fw_memmap_area(start, end, size, | ||
| 249 | sizeof(struct early_res)); | ||
| 250 | } | ||
| 251 | if (mem == -1ULL) | ||
| 252 | panic("can not find more space for early_res array"); | ||
| 253 | |||
| 254 | new = __va(mem); | ||
| 255 | /* save the first one for own */ | ||
| 256 | new[0].start = mem; | ||
| 257 | new[0].end = mem + size; | ||
| 258 | new[0].overlap_ok = 0; | ||
| 259 | /* copy old to new */ | ||
| 260 | if (early_res == early_res_x) { | ||
| 261 | memcpy(&new[1], &early_res[0], | ||
| 262 | sizeof(struct early_res) * max_early_res); | ||
| 263 | memset(&new[max_early_res+1], 0, | ||
| 264 | sizeof(struct early_res) * (max_early_res - 1)); | ||
| 265 | early_res_count++; | ||
| 266 | } else { | ||
| 267 | memcpy(&new[1], &early_res[1], | ||
| 268 | sizeof(struct early_res) * (max_early_res - 1)); | ||
| 269 | memset(&new[max_early_res], 0, | ||
| 270 | sizeof(struct early_res) * max_early_res); | ||
| 271 | } | ||
| 272 | memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res); | ||
| 273 | early_res = new; | ||
| 274 | max_early_res *= 2; | ||
| 275 | printk(KERN_DEBUG "early_res array is doubled to %d at [%llx - %llx]\n", | ||
| 276 | max_early_res, mem, mem + size - 1); | ||
| 277 | } | ||
| 278 | |||
| 279 | /* | ||
| 280 | * Most early reservations come here. | ||
| 281 | * | ||
| 282 | * We first have drop_overlaps_that_are_ok() drop any pre-existing | ||
| 283 | * 'overlap_ok' ranges, so that we can then reserve this memory | ||
| 284 | * range without risk of panic'ing on an overlapping overlap_ok | ||
| 285 | * early reservation. | ||
| 286 | */ | ||
| 287 | void __init reserve_early(u64 start, u64 end, char *name) | ||
| 288 | { | ||
| 289 | if (start >= end) | ||
| 290 | return; | ||
| 291 | |||
| 292 | __check_and_double_early_res(start, end); | ||
| 293 | |||
| 294 | drop_overlaps_that_are_ok(start, end); | ||
| 295 | __reserve_early(start, end, name, 0); | ||
| 296 | } | ||
| 297 | |||
| 298 | void __init reserve_early_without_check(u64 start, u64 end, char *name) | ||
| 299 | { | ||
| 300 | struct early_res *r; | ||
| 301 | |||
| 302 | if (start >= end) | ||
| 303 | return; | ||
| 304 | |||
| 305 | __check_and_double_early_res(start, end); | ||
| 306 | |||
| 307 | r = &early_res[early_res_count]; | ||
| 308 | |||
| 309 | r->start = start; | ||
| 310 | r->end = end; | ||
| 311 | r->overlap_ok = 0; | ||
| 312 | if (name) | ||
| 313 | strncpy(r->name, name, sizeof(r->name) - 1); | ||
| 314 | early_res_count++; | ||
| 315 | } | ||
| 316 | |||
| 317 | void __init free_early(u64 start, u64 end) | ||
| 318 | { | ||
| 319 | struct early_res *r; | ||
| 320 | int i; | ||
| 321 | |||
| 322 | i = find_overlapped_early(start, end); | ||
| 323 | r = &early_res[i]; | ||
| 324 | if (i >= max_early_res || r->end != end || r->start != start) | ||
| 325 | panic("free_early on not reserved area: %llx-%llx!", | ||
| 326 | start, end - 1); | ||
| 327 | |||
| 328 | drop_range(i); | ||
| 329 | } | ||
| 330 | |||
| 331 | void __init free_early_partial(u64 start, u64 end) | ||
| 332 | { | ||
| 333 | struct early_res *r; | ||
| 334 | int i; | ||
| 335 | |||
| 336 | try_next: | ||
| 337 | i = find_overlapped_early(start, end); | ||
| 338 | if (i >= max_early_res) | ||
| 339 | return; | ||
| 340 | |||
| 341 | r = &early_res[i]; | ||
| 342 | /* hole ? */ | ||
| 343 | if (r->end >= end && r->start <= start) { | ||
| 344 | drop_range_partial(i, start, end); | ||
| 345 | return; | ||
| 346 | } | ||
| 347 | |||
| 348 | drop_range_partial(i, start, end); | ||
| 349 | goto try_next; | ||
| 350 | } | ||
| 351 | |||
| 352 | #ifdef CONFIG_NO_BOOTMEM | ||
| 353 | static void __init subtract_early_res(struct range *range, int az) | ||
| 354 | { | ||
| 355 | int i, count; | ||
| 356 | u64 final_start, final_end; | ||
| 357 | int idx = 0; | ||
| 358 | |||
| 359 | count = 0; | ||
| 360 | for (i = 0; i < max_early_res && early_res[i].end; i++) | ||
| 361 | count++; | ||
| 362 | |||
| 363 | /* need to skip first one ?*/ | ||
| 364 | if (early_res != early_res_x) | ||
| 365 | idx = 1; | ||
| 366 | |||
| 367 | #define DEBUG_PRINT_EARLY_RES 1 | ||
| 368 | |||
| 369 | #if DEBUG_PRINT_EARLY_RES | ||
| 370 | printk(KERN_INFO "Subtract (%d early reservations)\n", count); | ||
| 371 | #endif | ||
| 372 | for (i = idx; i < count; i++) { | ||
| 373 | struct early_res *r = &early_res[i]; | ||
| 374 | #if DEBUG_PRINT_EARLY_RES | ||
| 375 | printk(KERN_INFO " #%d [%010llx - %010llx] %15s\n", i, | ||
| 376 | r->start, r->end, r->name); | ||
| 377 | #endif | ||
| 378 | final_start = PFN_DOWN(r->start); | ||
| 379 | final_end = PFN_UP(r->end); | ||
| 380 | if (final_start >= final_end) | ||
| 381 | continue; | ||
| 382 | subtract_range(range, az, final_start, final_end); | ||
| 383 | } | ||
| 384 | |||
| 385 | } | ||
| 386 | |||
| 387 | int __init get_free_all_memory_range(struct range **rangep, int nodeid) | ||
| 388 | { | ||
| 389 | int i, count; | ||
| 390 | u64 start = 0, end; | ||
| 391 | u64 size; | ||
| 392 | u64 mem; | ||
| 393 | struct range *range; | ||
| 394 | int nr_range; | ||
| 395 | |||
| 396 | count = 0; | ||
| 397 | for (i = 0; i < max_early_res && early_res[i].end; i++) | ||
| 398 | count++; | ||
| 399 | |||
| 400 | count *= 2; | ||
| 401 | |||
| 402 | size = sizeof(struct range) * count; | ||
| 403 | end = get_max_mapped(); | ||
| 404 | #ifdef MAX_DMA32_PFN | ||
| 405 | if (end > (MAX_DMA32_PFN << PAGE_SHIFT)) | ||
| 406 | start = MAX_DMA32_PFN << PAGE_SHIFT; | ||
| 407 | #endif | ||
| 408 | mem = find_fw_memmap_area(start, end, size, sizeof(struct range)); | ||
| 409 | if (mem == -1ULL) | ||
| 410 | panic("can not find more space for range free"); | ||
| 411 | |||
| 412 | range = __va(mem); | ||
| 413 | /* use early_node_map[] and early_res to get range array at first */ | ||
| 414 | memset(range, 0, size); | ||
| 415 | nr_range = 0; | ||
| 416 | |||
| 417 | /* need to go over early_node_map to find out good range for node */ | ||
| 418 | nr_range = add_from_early_node_map(range, count, nr_range, nodeid); | ||
| 419 | #ifdef CONFIG_X86_32 | ||
| 420 | subtract_range(range, count, max_low_pfn, -1ULL); | ||
| 421 | #endif | ||
| 422 | subtract_early_res(range, count); | ||
| 423 | nr_range = clean_sort_range(range, count); | ||
| 424 | |||
| 425 | /* need to clear it ? */ | ||
| 426 | if (nodeid == MAX_NUMNODES) { | ||
| 427 | memset(&early_res[0], 0, | ||
| 428 | sizeof(struct early_res) * max_early_res); | ||
| 429 | early_res = NULL; | ||
| 430 | max_early_res = 0; | ||
| 431 | } | ||
| 432 | |||
| 433 | *rangep = range; | ||
| 434 | return nr_range; | ||
| 435 | } | ||
| 436 | #else | ||
| 437 | void __init early_res_to_bootmem(u64 start, u64 end) | ||
| 438 | { | ||
| 439 | int i, count; | ||
| 440 | u64 final_start, final_end; | ||
| 441 | int idx = 0; | ||
| 442 | |||
| 443 | count = 0; | ||
| 444 | for (i = 0; i < max_early_res && early_res[i].end; i++) | ||
| 445 | count++; | ||
| 446 | |||
| 447 | /* need to skip first one ?*/ | ||
| 448 | if (early_res != early_res_x) | ||
| 449 | idx = 1; | ||
| 450 | |||
| 451 | printk(KERN_INFO "(%d/%d early reservations) ==> bootmem [%010llx - %010llx]\n", | ||
| 452 | count - idx, max_early_res, start, end); | ||
| 453 | for (i = idx; i < count; i++) { | ||
| 454 | struct early_res *r = &early_res[i]; | ||
| 455 | printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, | ||
| 456 | r->start, r->end, r->name); | ||
| 457 | final_start = max(start, r->start); | ||
| 458 | final_end = min(end, r->end); | ||
| 459 | if (final_start >= final_end) { | ||
| 460 | printk(KERN_CONT "\n"); | ||
| 461 | continue; | ||
| 462 | } | ||
| 463 | printk(KERN_CONT " ==> [%010llx - %010llx]\n", | ||
| 464 | final_start, final_end); | ||
| 465 | reserve_bootmem_generic(final_start, final_end - final_start, | ||
| 466 | BOOTMEM_DEFAULT); | ||
| 467 | } | ||
| 468 | /* clear them */ | ||
| 469 | memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res); | ||
| 470 | early_res = NULL; | ||
| 471 | max_early_res = 0; | ||
| 472 | early_res_count = 0; | ||
| 473 | } | ||
| 474 | #endif | ||
| 475 | |||
| 476 | /* Check for already reserved areas */ | ||
| 477 | static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) | ||
| 478 | { | ||
| 479 | int i; | ||
| 480 | u64 addr = *addrp; | ||
| 481 | int changed = 0; | ||
| 482 | struct early_res *r; | ||
| 483 | again: | ||
| 484 | i = find_overlapped_early(addr, addr + size); | ||
| 485 | r = &early_res[i]; | ||
| 486 | if (i < max_early_res && r->end) { | ||
| 487 | *addrp = addr = round_up(r->end, align); | ||
| 488 | changed = 1; | ||
| 489 | goto again; | ||
| 490 | } | ||
| 491 | return changed; | ||
| 492 | } | ||
| 493 | |||
| 494 | /* Check for already reserved areas */ | ||
| 495 | static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) | ||
| 496 | { | ||
| 497 | int i; | ||
| 498 | u64 addr = *addrp, last; | ||
| 499 | u64 size = *sizep; | ||
| 500 | int changed = 0; | ||
| 501 | again: | ||
| 502 | last = addr + size; | ||
| 503 | for (i = 0; i < max_early_res && early_res[i].end; i++) { | ||
| 504 | struct early_res *r = &early_res[i]; | ||
| 505 | if (last > r->start && addr < r->start) { | ||
| 506 | size = r->start - addr; | ||
| 507 | changed = 1; | ||
| 508 | goto again; | ||
| 509 | } | ||
| 510 | if (last > r->end && addr < r->end) { | ||
| 511 | addr = round_up(r->end, align); | ||
| 512 | size = last - addr; | ||
| 513 | changed = 1; | ||
| 514 | goto again; | ||
| 515 | } | ||
| 516 | if (last <= r->end && addr >= r->start) { | ||
| 517 | (*sizep)++; | ||
| 518 | return 0; | ||
| 519 | } | ||
| 520 | } | ||
| 521 | if (changed) { | ||
| 522 | *addrp = addr; | ||
| 523 | *sizep = size; | ||
| 524 | } | ||
| 525 | return changed; | ||
| 526 | } | ||
| 527 | |||
| 528 | /* | ||
| 529 | * Find a free area with specified alignment in a specific range. | ||
| 530 | * only with the area.between start to end is active range from early_node_map | ||
| 531 | * so they are good as RAM | ||
| 532 | */ | ||
| 533 | u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, | ||
| 534 | u64 size, u64 align) | ||
| 535 | { | ||
| 536 | u64 addr, last; | ||
| 537 | |||
| 538 | addr = round_up(ei_start, align); | ||
| 539 | if (addr < start) | ||
| 540 | addr = round_up(start, align); | ||
| 541 | if (addr >= ei_last) | ||
| 542 | goto out; | ||
| 543 | while (bad_addr(&addr, size, align) && addr+size <= ei_last) | ||
| 544 | ; | ||
| 545 | last = addr + size; | ||
| 546 | if (last > ei_last) | ||
| 547 | goto out; | ||
| 548 | if (last > end) | ||
| 549 | goto out; | ||
| 550 | |||
| 551 | return addr; | ||
| 552 | |||
| 553 | out: | ||
| 554 | return -1ULL; | ||
| 555 | } | ||
| 556 | |||
| 557 | u64 __init find_early_area_size(u64 ei_start, u64 ei_last, u64 start, | ||
| 558 | u64 *sizep, u64 align) | ||
| 559 | { | ||
| 560 | u64 addr, last; | ||
| 561 | |||
| 562 | addr = round_up(ei_start, align); | ||
| 563 | if (addr < start) | ||
| 564 | addr = round_up(start, align); | ||
| 565 | if (addr >= ei_last) | ||
| 566 | goto out; | ||
| 567 | *sizep = ei_last - addr; | ||
| 568 | while (bad_addr_size(&addr, sizep, align) && addr + *sizep <= ei_last) | ||
| 569 | ; | ||
| 570 | last = addr + *sizep; | ||
| 571 | if (last > ei_last) | ||
| 572 | goto out; | ||
| 573 | |||
| 574 | return addr; | ||
| 575 | |||
| 576 | out: | ||
| 577 | return -1ULL; | ||
| 578 | } | ||
diff --git a/kernel/range.c b/kernel/range.c new file mode 100644 index 000000000000..74e2e6114927 --- /dev/null +++ b/kernel/range.c | |||
| @@ -0,0 +1,163 @@ | |||
| 1 | /* | ||
| 2 | * Range add and subtract | ||
| 3 | */ | ||
| 4 | #include <linux/module.h> | ||
| 5 | #include <linux/init.h> | ||
| 6 | #include <linux/sort.h> | ||
| 7 | |||
| 8 | #include <linux/range.h> | ||
| 9 | |||
| 10 | #ifndef ARRAY_SIZE | ||
| 11 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) | ||
| 12 | #endif | ||
| 13 | |||
| 14 | int add_range(struct range *range, int az, int nr_range, u64 start, u64 end) | ||
| 15 | { | ||
| 16 | if (start >= end) | ||
| 17 | return nr_range; | ||
| 18 | |||
| 19 | /* Out of slots: */ | ||
| 20 | if (nr_range >= az) | ||
| 21 | return nr_range; | ||
| 22 | |||
| 23 | range[nr_range].start = start; | ||
| 24 | range[nr_range].end = end; | ||
| 25 | |||
| 26 | nr_range++; | ||
| 27 | |||
| 28 | return nr_range; | ||
| 29 | } | ||
| 30 | |||
| 31 | int add_range_with_merge(struct range *range, int az, int nr_range, | ||
| 32 | u64 start, u64 end) | ||
| 33 | { | ||
| 34 | int i; | ||
| 35 | |||
| 36 | if (start >= end) | ||
| 37 | return nr_range; | ||
| 38 | |||
| 39 | /* Try to merge it with old one: */ | ||
| 40 | for (i = 0; i < nr_range; i++) { | ||
| 41 | u64 final_start, final_end; | ||
| 42 | u64 common_start, common_end; | ||
| 43 | |||
| 44 | if (!range[i].end) | ||
| 45 | continue; | ||
| 46 | |||
| 47 | common_start = max(range[i].start, start); | ||
| 48 | common_end = min(range[i].end, end); | ||
| 49 | if (common_start > common_end) | ||
| 50 | continue; | ||
| 51 | |||
| 52 | final_start = min(range[i].start, start); | ||
| 53 | final_end = max(range[i].end, end); | ||
| 54 | |||
| 55 | range[i].start = final_start; | ||
| 56 | range[i].end = final_end; | ||
| 57 | return nr_range; | ||
| 58 | } | ||
| 59 | |||
| 60 | /* Need to add it: */ | ||
| 61 | return add_range(range, az, nr_range, start, end); | ||
| 62 | } | ||
| 63 | |||
| 64 | void subtract_range(struct range *range, int az, u64 start, u64 end) | ||
| 65 | { | ||
| 66 | int i, j; | ||
| 67 | |||
| 68 | if (start >= end) | ||
| 69 | return; | ||
| 70 | |||
| 71 | for (j = 0; j < az; j++) { | ||
| 72 | if (!range[j].end) | ||
| 73 | continue; | ||
| 74 | |||
| 75 | if (start <= range[j].start && end >= range[j].end) { | ||
| 76 | range[j].start = 0; | ||
| 77 | range[j].end = 0; | ||
| 78 | continue; | ||
| 79 | } | ||
| 80 | |||
| 81 | if (start <= range[j].start && end < range[j].end && | ||
| 82 | range[j].start < end) { | ||
| 83 | range[j].start = end; | ||
| 84 | continue; | ||
| 85 | } | ||
| 86 | |||
| 87 | |||
| 88 | if (start > range[j].start && end >= range[j].end && | ||
| 89 | range[j].end > start) { | ||
| 90 | range[j].end = start; | ||
| 91 | continue; | ||
| 92 | } | ||
| 93 | |||
| 94 | if (start > range[j].start && end < range[j].end) { | ||
| 95 | /* Find the new spare: */ | ||
| 96 | for (i = 0; i < az; i++) { | ||
| 97 | if (range[i].end == 0) | ||
| 98 | break; | ||
| 99 | } | ||
| 100 | if (i < az) { | ||
| 101 | range[i].end = range[j].end; | ||
| 102 | range[i].start = end; | ||
| 103 | } else { | ||
| 104 | printk(KERN_ERR "run of slot in ranges\n"); | ||
| 105 | } | ||
| 106 | range[j].end = start; | ||
| 107 | continue; | ||
| 108 | } | ||
| 109 | } | ||
| 110 | } | ||
| 111 | |||
| 112 | static int cmp_range(const void *x1, const void *x2) | ||
| 113 | { | ||
| 114 | const struct range *r1 = x1; | ||
| 115 | const struct range *r2 = x2; | ||
| 116 | s64 start1, start2; | ||
| 117 | |||
| 118 | start1 = r1->start; | ||
| 119 | start2 = r2->start; | ||
| 120 | |||
| 121 | return start1 - start2; | ||
| 122 | } | ||
| 123 | |||
| 124 | int clean_sort_range(struct range *range, int az) | ||
| 125 | { | ||
| 126 | int i, j, k = az - 1, nr_range = 0; | ||
| 127 | |||
| 128 | for (i = 0; i < k; i++) { | ||
| 129 | if (range[i].end) | ||
| 130 | continue; | ||
| 131 | for (j = k; j > i; j--) { | ||
| 132 | if (range[j].end) { | ||
| 133 | k = j; | ||
| 134 | break; | ||
| 135 | } | ||
| 136 | } | ||
| 137 | if (j == i) | ||
| 138 | break; | ||
| 139 | range[i].start = range[k].start; | ||
| 140 | range[i].end = range[k].end; | ||
| 141 | range[k].start = 0; | ||
| 142 | range[k].end = 0; | ||
| 143 | k--; | ||
| 144 | } | ||
| 145 | /* count it */ | ||
| 146 | for (i = 0; i < az; i++) { | ||
| 147 | if (!range[i].end) { | ||
| 148 | nr_range = i; | ||
| 149 | break; | ||
| 150 | } | ||
| 151 | } | ||
| 152 | |||
| 153 | /* sort them */ | ||
| 154 | sort(range, nr_range, sizeof(struct range), cmp_range, NULL); | ||
| 155 | |||
| 156 | return nr_range; | ||
| 157 | } | ||
| 158 | |||
| 159 | void sort_range(struct range *range, int nr_range) | ||
| 160 | { | ||
| 161 | /* sort them */ | ||
| 162 | sort(range, nr_range, sizeof(struct range), cmp_range, NULL); | ||
| 163 | } | ||
diff --git a/mm/Kconfig b/mm/Kconfig index d34c2b971032..9c61158308dc 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
| @@ -115,6 +115,10 @@ config SPARSEMEM_EXTREME | |||
| 115 | config SPARSEMEM_VMEMMAP_ENABLE | 115 | config SPARSEMEM_VMEMMAP_ENABLE |
| 116 | bool | 116 | bool |
| 117 | 117 | ||
| 118 | config SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
| 119 | def_bool y | ||
| 120 | depends on SPARSEMEM && X86_64 | ||
| 121 | |||
| 118 | config SPARSEMEM_VMEMMAP | 122 | config SPARSEMEM_VMEMMAP |
| 119 | bool "Sparse Memory virtual memmap" | 123 | bool "Sparse Memory virtual memmap" |
| 120 | depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE | 124 | depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE |
diff --git a/mm/bootmem.c b/mm/bootmem.c index 7d1486875e1c..d7c791ef0036 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/bootmem.h> | 13 | #include <linux/bootmem.h> |
| 14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
| 15 | #include <linux/kmemleak.h> | 15 | #include <linux/kmemleak.h> |
| 16 | #include <linux/range.h> | ||
| 16 | 17 | ||
| 17 | #include <asm/bug.h> | 18 | #include <asm/bug.h> |
| 18 | #include <asm/io.h> | 19 | #include <asm/io.h> |
| @@ -32,6 +33,7 @@ unsigned long max_pfn; | |||
| 32 | unsigned long saved_max_pfn; | 33 | unsigned long saved_max_pfn; |
| 33 | #endif | 34 | #endif |
| 34 | 35 | ||
| 36 | #ifndef CONFIG_NO_BOOTMEM | ||
| 35 | bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; | 37 | bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; |
| 36 | 38 | ||
| 37 | static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); | 39 | static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); |
| @@ -142,7 +144,7 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages) | |||
| 142 | min_low_pfn = start; | 144 | min_low_pfn = start; |
| 143 | return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); | 145 | return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); |
| 144 | } | 146 | } |
| 145 | 147 | #endif | |
| 146 | /* | 148 | /* |
| 147 | * free_bootmem_late - free bootmem pages directly to page allocator | 149 | * free_bootmem_late - free bootmem pages directly to page allocator |
| 148 | * @addr: starting address of the range | 150 | * @addr: starting address of the range |
| @@ -167,6 +169,60 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size) | |||
| 167 | } | 169 | } |
| 168 | } | 170 | } |
| 169 | 171 | ||
| 172 | #ifdef CONFIG_NO_BOOTMEM | ||
| 173 | static void __init __free_pages_memory(unsigned long start, unsigned long end) | ||
| 174 | { | ||
| 175 | int i; | ||
| 176 | unsigned long start_aligned, end_aligned; | ||
| 177 | int order = ilog2(BITS_PER_LONG); | ||
| 178 | |||
| 179 | start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1); | ||
| 180 | end_aligned = end & ~(BITS_PER_LONG - 1); | ||
| 181 | |||
| 182 | if (end_aligned <= start_aligned) { | ||
| 183 | #if 1 | ||
| 184 | printk(KERN_DEBUG " %lx - %lx\n", start, end); | ||
| 185 | #endif | ||
| 186 | for (i = start; i < end; i++) | ||
| 187 | __free_pages_bootmem(pfn_to_page(i), 0); | ||
| 188 | |||
| 189 | return; | ||
| 190 | } | ||
| 191 | |||
| 192 | #if 1 | ||
| 193 | printk(KERN_DEBUG " %lx %lx - %lx %lx\n", | ||
| 194 | start, start_aligned, end_aligned, end); | ||
| 195 | #endif | ||
| 196 | for (i = start; i < start_aligned; i++) | ||
| 197 | __free_pages_bootmem(pfn_to_page(i), 0); | ||
| 198 | |||
| 199 | for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG) | ||
| 200 | __free_pages_bootmem(pfn_to_page(i), order); | ||
| 201 | |||
| 202 | for (i = end_aligned; i < end; i++) | ||
| 203 | __free_pages_bootmem(pfn_to_page(i), 0); | ||
| 204 | } | ||
| 205 | |||
| 206 | unsigned long __init free_all_memory_core_early(int nodeid) | ||
| 207 | { | ||
| 208 | int i; | ||
| 209 | u64 start, end; | ||
| 210 | unsigned long count = 0; | ||
| 211 | struct range *range = NULL; | ||
| 212 | int nr_range; | ||
| 213 | |||
| 214 | nr_range = get_free_all_memory_range(&range, nodeid); | ||
| 215 | |||
| 216 | for (i = 0; i < nr_range; i++) { | ||
| 217 | start = range[i].start; | ||
| 218 | end = range[i].end; | ||
| 219 | count += end - start; | ||
| 220 | __free_pages_memory(start, end); | ||
| 221 | } | ||
| 222 | |||
| 223 | return count; | ||
| 224 | } | ||
| 225 | #else | ||
| 170 | static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | 226 | static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) |
| 171 | { | 227 | { |
| 172 | int aligned; | 228 | int aligned; |
| @@ -227,6 +283,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | |||
| 227 | 283 | ||
| 228 | return count; | 284 | return count; |
| 229 | } | 285 | } |
| 286 | #endif | ||
| 230 | 287 | ||
| 231 | /** | 288 | /** |
| 232 | * free_all_bootmem_node - release a node's free pages to the buddy allocator | 289 | * free_all_bootmem_node - release a node's free pages to the buddy allocator |
| @@ -237,7 +294,12 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | |||
| 237 | unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) | 294 | unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) |
| 238 | { | 295 | { |
| 239 | register_page_bootmem_info_node(pgdat); | 296 | register_page_bootmem_info_node(pgdat); |
| 297 | #ifdef CONFIG_NO_BOOTMEM | ||
| 298 | /* free_all_memory_core_early(MAX_NUMNODES) will be called later */ | ||
| 299 | return 0; | ||
| 300 | #else | ||
| 240 | return free_all_bootmem_core(pgdat->bdata); | 301 | return free_all_bootmem_core(pgdat->bdata); |
| 302 | #endif | ||
| 241 | } | 303 | } |
| 242 | 304 | ||
| 243 | /** | 305 | /** |
| @@ -247,9 +309,14 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) | |||
| 247 | */ | 309 | */ |
| 248 | unsigned long __init free_all_bootmem(void) | 310 | unsigned long __init free_all_bootmem(void) |
| 249 | { | 311 | { |
| 312 | #ifdef CONFIG_NO_BOOTMEM | ||
| 313 | return free_all_memory_core_early(NODE_DATA(0)->node_id); | ||
| 314 | #else | ||
| 250 | return free_all_bootmem_core(NODE_DATA(0)->bdata); | 315 | return free_all_bootmem_core(NODE_DATA(0)->bdata); |
| 316 | #endif | ||
| 251 | } | 317 | } |
| 252 | 318 | ||
| 319 | #ifndef CONFIG_NO_BOOTMEM | ||
| 253 | static void __init __free(bootmem_data_t *bdata, | 320 | static void __init __free(bootmem_data_t *bdata, |
| 254 | unsigned long sidx, unsigned long eidx) | 321 | unsigned long sidx, unsigned long eidx) |
| 255 | { | 322 | { |
| @@ -344,6 +411,7 @@ static int __init mark_bootmem(unsigned long start, unsigned long end, | |||
| 344 | } | 411 | } |
| 345 | BUG(); | 412 | BUG(); |
| 346 | } | 413 | } |
| 414 | #endif | ||
| 347 | 415 | ||
| 348 | /** | 416 | /** |
| 349 | * free_bootmem_node - mark a page range as usable | 417 | * free_bootmem_node - mark a page range as usable |
| @@ -358,6 +426,12 @@ static int __init mark_bootmem(unsigned long start, unsigned long end, | |||
| 358 | void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | 426 | void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, |
| 359 | unsigned long size) | 427 | unsigned long size) |
| 360 | { | 428 | { |
| 429 | #ifdef CONFIG_NO_BOOTMEM | ||
| 430 | free_early(physaddr, physaddr + size); | ||
| 431 | #if 0 | ||
| 432 | printk(KERN_DEBUG "free %lx %lx\n", physaddr, size); | ||
| 433 | #endif | ||
| 434 | #else | ||
| 361 | unsigned long start, end; | 435 | unsigned long start, end; |
| 362 | 436 | ||
| 363 | kmemleak_free_part(__va(physaddr), size); | 437 | kmemleak_free_part(__va(physaddr), size); |
| @@ -366,6 +440,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | |||
| 366 | end = PFN_DOWN(physaddr + size); | 440 | end = PFN_DOWN(physaddr + size); |
| 367 | 441 | ||
| 368 | mark_bootmem_node(pgdat->bdata, start, end, 0, 0); | 442 | mark_bootmem_node(pgdat->bdata, start, end, 0, 0); |
| 443 | #endif | ||
| 369 | } | 444 | } |
| 370 | 445 | ||
| 371 | /** | 446 | /** |
| @@ -379,6 +454,12 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | |||
| 379 | */ | 454 | */ |
| 380 | void __init free_bootmem(unsigned long addr, unsigned long size) | 455 | void __init free_bootmem(unsigned long addr, unsigned long size) |
| 381 | { | 456 | { |
| 457 | #ifdef CONFIG_NO_BOOTMEM | ||
| 458 | free_early(addr, addr + size); | ||
| 459 | #if 0 | ||
| 460 | printk(KERN_DEBUG "free %lx %lx\n", addr, size); | ||
| 461 | #endif | ||
| 462 | #else | ||
| 382 | unsigned long start, end; | 463 | unsigned long start, end; |
| 383 | 464 | ||
| 384 | kmemleak_free_part(__va(addr), size); | 465 | kmemleak_free_part(__va(addr), size); |
| @@ -387,6 +468,7 @@ void __init free_bootmem(unsigned long addr, unsigned long size) | |||
| 387 | end = PFN_DOWN(addr + size); | 468 | end = PFN_DOWN(addr + size); |
| 388 | 469 | ||
| 389 | mark_bootmem(start, end, 0, 0); | 470 | mark_bootmem(start, end, 0, 0); |
| 471 | #endif | ||
| 390 | } | 472 | } |
| 391 | 473 | ||
| 392 | /** | 474 | /** |
| @@ -403,12 +485,17 @@ void __init free_bootmem(unsigned long addr, unsigned long size) | |||
| 403 | int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | 485 | int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, |
| 404 | unsigned long size, int flags) | 486 | unsigned long size, int flags) |
| 405 | { | 487 | { |
| 488 | #ifdef CONFIG_NO_BOOTMEM | ||
| 489 | panic("no bootmem"); | ||
| 490 | return 0; | ||
| 491 | #else | ||
| 406 | unsigned long start, end; | 492 | unsigned long start, end; |
| 407 | 493 | ||
| 408 | start = PFN_DOWN(physaddr); | 494 | start = PFN_DOWN(physaddr); |
| 409 | end = PFN_UP(physaddr + size); | 495 | end = PFN_UP(physaddr + size); |
| 410 | 496 | ||
| 411 | return mark_bootmem_node(pgdat->bdata, start, end, 1, flags); | 497 | return mark_bootmem_node(pgdat->bdata, start, end, 1, flags); |
| 498 | #endif | ||
| 412 | } | 499 | } |
| 413 | 500 | ||
| 414 | /** | 501 | /** |
| @@ -424,14 +511,20 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | |||
| 424 | int __init reserve_bootmem(unsigned long addr, unsigned long size, | 511 | int __init reserve_bootmem(unsigned long addr, unsigned long size, |
| 425 | int flags) | 512 | int flags) |
| 426 | { | 513 | { |
| 514 | #ifdef CONFIG_NO_BOOTMEM | ||
| 515 | panic("no bootmem"); | ||
| 516 | return 0; | ||
| 517 | #else | ||
| 427 | unsigned long start, end; | 518 | unsigned long start, end; |
| 428 | 519 | ||
| 429 | start = PFN_DOWN(addr); | 520 | start = PFN_DOWN(addr); |
| 430 | end = PFN_UP(addr + size); | 521 | end = PFN_UP(addr + size); |
| 431 | 522 | ||
| 432 | return mark_bootmem(start, end, 1, flags); | 523 | return mark_bootmem(start, end, 1, flags); |
| 524 | #endif | ||
| 433 | } | 525 | } |
| 434 | 526 | ||
| 527 | #ifndef CONFIG_NO_BOOTMEM | ||
| 435 | static unsigned long __init align_idx(struct bootmem_data *bdata, | 528 | static unsigned long __init align_idx(struct bootmem_data *bdata, |
| 436 | unsigned long idx, unsigned long step) | 529 | unsigned long idx, unsigned long step) |
| 437 | { | 530 | { |
| @@ -582,12 +675,33 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata, | |||
| 582 | #endif | 675 | #endif |
| 583 | return NULL; | 676 | return NULL; |
| 584 | } | 677 | } |
| 678 | #endif | ||
| 585 | 679 | ||
| 586 | static void * __init ___alloc_bootmem_nopanic(unsigned long size, | 680 | static void * __init ___alloc_bootmem_nopanic(unsigned long size, |
| 587 | unsigned long align, | 681 | unsigned long align, |
| 588 | unsigned long goal, | 682 | unsigned long goal, |
| 589 | unsigned long limit) | 683 | unsigned long limit) |
| 590 | { | 684 | { |
| 685 | #ifdef CONFIG_NO_BOOTMEM | ||
| 686 | void *ptr; | ||
| 687 | |||
| 688 | if (WARN_ON_ONCE(slab_is_available())) | ||
| 689 | return kzalloc(size, GFP_NOWAIT); | ||
| 690 | |||
| 691 | restart: | ||
| 692 | |||
| 693 | ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit); | ||
| 694 | |||
| 695 | if (ptr) | ||
| 696 | return ptr; | ||
| 697 | |||
| 698 | if (goal != 0) { | ||
| 699 | goal = 0; | ||
| 700 | goto restart; | ||
| 701 | } | ||
| 702 | |||
| 703 | return NULL; | ||
| 704 | #else | ||
| 591 | bootmem_data_t *bdata; | 705 | bootmem_data_t *bdata; |
| 592 | void *region; | 706 | void *region; |
| 593 | 707 | ||
| @@ -613,6 +727,7 @@ restart: | |||
| 613 | } | 727 | } |
| 614 | 728 | ||
| 615 | return NULL; | 729 | return NULL; |
| 730 | #endif | ||
| 616 | } | 731 | } |
| 617 | 732 | ||
| 618 | /** | 733 | /** |
| @@ -631,7 +746,13 @@ restart: | |||
| 631 | void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, | 746 | void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, |
| 632 | unsigned long goal) | 747 | unsigned long goal) |
| 633 | { | 748 | { |
| 634 | return ___alloc_bootmem_nopanic(size, align, goal, 0); | 749 | unsigned long limit = 0; |
| 750 | |||
| 751 | #ifdef CONFIG_NO_BOOTMEM | ||
| 752 | limit = -1UL; | ||
| 753 | #endif | ||
| 754 | |||
| 755 | return ___alloc_bootmem_nopanic(size, align, goal, limit); | ||
| 635 | } | 756 | } |
| 636 | 757 | ||
| 637 | static void * __init ___alloc_bootmem(unsigned long size, unsigned long align, | 758 | static void * __init ___alloc_bootmem(unsigned long size, unsigned long align, |
| @@ -665,9 +786,16 @@ static void * __init ___alloc_bootmem(unsigned long size, unsigned long align, | |||
| 665 | void * __init __alloc_bootmem(unsigned long size, unsigned long align, | 786 | void * __init __alloc_bootmem(unsigned long size, unsigned long align, |
| 666 | unsigned long goal) | 787 | unsigned long goal) |
| 667 | { | 788 | { |
| 668 | return ___alloc_bootmem(size, align, goal, 0); | 789 | unsigned long limit = 0; |
| 790 | |||
| 791 | #ifdef CONFIG_NO_BOOTMEM | ||
| 792 | limit = -1UL; | ||
| 793 | #endif | ||
| 794 | |||
| 795 | return ___alloc_bootmem(size, align, goal, limit); | ||
| 669 | } | 796 | } |
| 670 | 797 | ||
| 798 | #ifndef CONFIG_NO_BOOTMEM | ||
| 671 | static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, | 799 | static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, |
| 672 | unsigned long size, unsigned long align, | 800 | unsigned long size, unsigned long align, |
| 673 | unsigned long goal, unsigned long limit) | 801 | unsigned long goal, unsigned long limit) |
| @@ -684,6 +812,7 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, | |||
| 684 | 812 | ||
| 685 | return ___alloc_bootmem(size, align, goal, limit); | 813 | return ___alloc_bootmem(size, align, goal, limit); |
| 686 | } | 814 | } |
| 815 | #endif | ||
| 687 | 816 | ||
| 688 | /** | 817 | /** |
| 689 | * __alloc_bootmem_node - allocate boot memory from a specific node | 818 | * __alloc_bootmem_node - allocate boot memory from a specific node |
| @@ -706,7 +835,46 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, | |||
| 706 | if (WARN_ON_ONCE(slab_is_available())) | 835 | if (WARN_ON_ONCE(slab_is_available())) |
| 707 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | 836 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); |
| 708 | 837 | ||
| 838 | #ifdef CONFIG_NO_BOOTMEM | ||
| 839 | return __alloc_memory_core_early(pgdat->node_id, size, align, | ||
| 840 | goal, -1ULL); | ||
| 841 | #else | ||
| 709 | return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); | 842 | return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); |
| 843 | #endif | ||
| 844 | } | ||
| 845 | |||
| 846 | void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, | ||
| 847 | unsigned long align, unsigned long goal) | ||
| 848 | { | ||
| 849 | #ifdef MAX_DMA32_PFN | ||
| 850 | unsigned long end_pfn; | ||
| 851 | |||
| 852 | if (WARN_ON_ONCE(slab_is_available())) | ||
| 853 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | ||
| 854 | |||
| 855 | /* update goal according ...MAX_DMA32_PFN */ | ||
| 856 | end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages; | ||
| 857 | |||
| 858 | if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) && | ||
| 859 | (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) { | ||
| 860 | void *ptr; | ||
| 861 | unsigned long new_goal; | ||
| 862 | |||
| 863 | new_goal = MAX_DMA32_PFN << PAGE_SHIFT; | ||
| 864 | #ifdef CONFIG_NO_BOOTMEM | ||
| 865 | ptr = __alloc_memory_core_early(pgdat->node_id, size, align, | ||
| 866 | new_goal, -1ULL); | ||
| 867 | #else | ||
| 868 | ptr = alloc_bootmem_core(pgdat->bdata, size, align, | ||
| 869 | new_goal, 0); | ||
| 870 | #endif | ||
| 871 | if (ptr) | ||
| 872 | return ptr; | ||
| 873 | } | ||
| 874 | #endif | ||
| 875 | |||
| 876 | return __alloc_bootmem_node(pgdat, size, align, goal); | ||
| 877 | |||
| 710 | } | 878 | } |
| 711 | 879 | ||
| 712 | #ifdef CONFIG_SPARSEMEM | 880 | #ifdef CONFIG_SPARSEMEM |
| @@ -720,6 +888,16 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, | |||
| 720 | void * __init alloc_bootmem_section(unsigned long size, | 888 | void * __init alloc_bootmem_section(unsigned long size, |
| 721 | unsigned long section_nr) | 889 | unsigned long section_nr) |
| 722 | { | 890 | { |
| 891 | #ifdef CONFIG_NO_BOOTMEM | ||
| 892 | unsigned long pfn, goal, limit; | ||
| 893 | |||
| 894 | pfn = section_nr_to_pfn(section_nr); | ||
| 895 | goal = pfn << PAGE_SHIFT; | ||
| 896 | limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT; | ||
| 897 | |||
| 898 | return __alloc_memory_core_early(early_pfn_to_nid(pfn), size, | ||
| 899 | SMP_CACHE_BYTES, goal, limit); | ||
| 900 | #else | ||
| 723 | bootmem_data_t *bdata; | 901 | bootmem_data_t *bdata; |
| 724 | unsigned long pfn, goal, limit; | 902 | unsigned long pfn, goal, limit; |
| 725 | 903 | ||
| @@ -729,6 +907,7 @@ void * __init alloc_bootmem_section(unsigned long size, | |||
| 729 | bdata = &bootmem_node_data[early_pfn_to_nid(pfn)]; | 907 | bdata = &bootmem_node_data[early_pfn_to_nid(pfn)]; |
| 730 | 908 | ||
| 731 | return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit); | 909 | return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit); |
| 910 | #endif | ||
| 732 | } | 911 | } |
| 733 | #endif | 912 | #endif |
| 734 | 913 | ||
| @@ -740,11 +919,16 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, | |||
| 740 | if (WARN_ON_ONCE(slab_is_available())) | 919 | if (WARN_ON_ONCE(slab_is_available())) |
| 741 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | 920 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); |
| 742 | 921 | ||
| 922 | #ifdef CONFIG_NO_BOOTMEM | ||
| 923 | ptr = __alloc_memory_core_early(pgdat->node_id, size, align, | ||
| 924 | goal, -1ULL); | ||
| 925 | #else | ||
| 743 | ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); | 926 | ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); |
| 744 | if (ptr) | 927 | if (ptr) |
| 745 | return ptr; | 928 | return ptr; |
| 746 | 929 | ||
| 747 | ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); | 930 | ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); |
| 931 | #endif | ||
| 748 | if (ptr) | 932 | if (ptr) |
| 749 | return ptr; | 933 | return ptr; |
| 750 | 934 | ||
| @@ -795,6 +979,11 @@ void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, | |||
| 795 | if (WARN_ON_ONCE(slab_is_available())) | 979 | if (WARN_ON_ONCE(slab_is_available())) |
| 796 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | 980 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); |
| 797 | 981 | ||
| 982 | #ifdef CONFIG_NO_BOOTMEM | ||
| 983 | return __alloc_memory_core_early(pgdat->node_id, size, align, | ||
| 984 | goal, ARCH_LOW_ADDRESS_LIMIT); | ||
| 985 | #else | ||
| 798 | return ___alloc_bootmem_node(pgdat->bdata, size, align, | 986 | return ___alloc_bootmem_node(pgdat->bdata, size, align, |
| 799 | goal, ARCH_LOW_ADDRESS_LIMIT); | 987 | goal, ARCH_LOW_ADDRESS_LIMIT); |
| 988 | #endif | ||
| 800 | } | 989 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9a7aaae07ab4..a6b17aa4740b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -3374,6 +3374,61 @@ void __init free_bootmem_with_active_regions(int nid, | |||
| 3374 | } | 3374 | } |
| 3375 | } | 3375 | } |
| 3376 | 3376 | ||
| 3377 | int __init add_from_early_node_map(struct range *range, int az, | ||
| 3378 | int nr_range, int nid) | ||
| 3379 | { | ||
| 3380 | int i; | ||
| 3381 | u64 start, end; | ||
| 3382 | |||
| 3383 | /* need to go over early_node_map to find out good range for node */ | ||
| 3384 | for_each_active_range_index_in_nid(i, nid) { | ||
| 3385 | start = early_node_map[i].start_pfn; | ||
| 3386 | end = early_node_map[i].end_pfn; | ||
| 3387 | nr_range = add_range(range, az, nr_range, start, end); | ||
| 3388 | } | ||
| 3389 | return nr_range; | ||
| 3390 | } | ||
| 3391 | |||
| 3392 | #ifdef CONFIG_NO_BOOTMEM | ||
| 3393 | void * __init __alloc_memory_core_early(int nid, u64 size, u64 align, | ||
| 3394 | u64 goal, u64 limit) | ||
| 3395 | { | ||
| 3396 | int i; | ||
| 3397 | void *ptr; | ||
| 3398 | |||
| 3399 | /* need to go over early_node_map to find out good range for node */ | ||
| 3400 | for_each_active_range_index_in_nid(i, nid) { | ||
| 3401 | u64 addr; | ||
| 3402 | u64 ei_start, ei_last; | ||
| 3403 | |||
| 3404 | ei_last = early_node_map[i].end_pfn; | ||
| 3405 | ei_last <<= PAGE_SHIFT; | ||
| 3406 | ei_start = early_node_map[i].start_pfn; | ||
| 3407 | ei_start <<= PAGE_SHIFT; | ||
| 3408 | addr = find_early_area(ei_start, ei_last, | ||
| 3409 | goal, limit, size, align); | ||
| 3410 | |||
| 3411 | if (addr == -1ULL) | ||
| 3412 | continue; | ||
| 3413 | |||
| 3414 | #if 0 | ||
| 3415 | printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n", | ||
| 3416 | nid, | ||
| 3417 | ei_start, ei_last, goal, limit, size, | ||
| 3418 | align, addr); | ||
| 3419 | #endif | ||
| 3420 | |||
| 3421 | ptr = phys_to_virt(addr); | ||
| 3422 | memset(ptr, 0, size); | ||
| 3423 | reserve_early_without_check(addr, addr + size, "BOOTMEM"); | ||
| 3424 | return ptr; | ||
| 3425 | } | ||
| 3426 | |||
| 3427 | return NULL; | ||
| 3428 | } | ||
| 3429 | #endif | ||
| 3430 | |||
| 3431 | |||
| 3377 | void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) | 3432 | void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) |
| 3378 | { | 3433 | { |
| 3379 | int i; | 3434 | int i; |
| @@ -4406,7 +4461,11 @@ void __init set_dma_reserve(unsigned long new_dma_reserve) | |||
| 4406 | } | 4461 | } |
| 4407 | 4462 | ||
| 4408 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 4463 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
| 4409 | struct pglist_data __refdata contig_page_data = { .bdata = &bootmem_node_data[0] }; | 4464 | struct pglist_data __refdata contig_page_data = { |
| 4465 | #ifndef CONFIG_NO_BOOTMEM | ||
| 4466 | .bdata = &bootmem_node_data[0] | ||
| 4467 | #endif | ||
| 4468 | }; | ||
| 4410 | EXPORT_SYMBOL(contig_page_data); | 4469 | EXPORT_SYMBOL(contig_page_data); |
| 4411 | #endif | 4470 | #endif |
| 4412 | 4471 | ||
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index d9714bdcb4a3..392b9bb5bc01 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c | |||
| @@ -40,9 +40,11 @@ static void * __init_refok __earlyonly_bootmem_alloc(int node, | |||
| 40 | unsigned long align, | 40 | unsigned long align, |
| 41 | unsigned long goal) | 41 | unsigned long goal) |
| 42 | { | 42 | { |
| 43 | return __alloc_bootmem_node(NODE_DATA(node), size, align, goal); | 43 | return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal); |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | static void *vmemmap_buf; | ||
| 47 | static void *vmemmap_buf_end; | ||
| 46 | 48 | ||
| 47 | void * __meminit vmemmap_alloc_block(unsigned long size, int node) | 49 | void * __meminit vmemmap_alloc_block(unsigned long size, int node) |
| 48 | { | 50 | { |
| @@ -64,6 +66,24 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node) | |||
| 64 | __pa(MAX_DMA_ADDRESS)); | 66 | __pa(MAX_DMA_ADDRESS)); |
| 65 | } | 67 | } |
| 66 | 68 | ||
| 69 | /* need to make sure size is all the same during early stage */ | ||
| 70 | void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node) | ||
| 71 | { | ||
| 72 | void *ptr; | ||
| 73 | |||
| 74 | if (!vmemmap_buf) | ||
| 75 | return vmemmap_alloc_block(size, node); | ||
| 76 | |||
| 77 | /* take the from buf */ | ||
| 78 | ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size); | ||
| 79 | if (ptr + size > vmemmap_buf_end) | ||
| 80 | return vmemmap_alloc_block(size, node); | ||
| 81 | |||
| 82 | vmemmap_buf = ptr + size; | ||
| 83 | |||
| 84 | return ptr; | ||
| 85 | } | ||
| 86 | |||
| 67 | void __meminit vmemmap_verify(pte_t *pte, int node, | 87 | void __meminit vmemmap_verify(pte_t *pte, int node, |
| 68 | unsigned long start, unsigned long end) | 88 | unsigned long start, unsigned long end) |
| 69 | { | 89 | { |
| @@ -80,7 +100,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) | |||
| 80 | pte_t *pte = pte_offset_kernel(pmd, addr); | 100 | pte_t *pte = pte_offset_kernel(pmd, addr); |
| 81 | if (pte_none(*pte)) { | 101 | if (pte_none(*pte)) { |
| 82 | pte_t entry; | 102 | pte_t entry; |
| 83 | void *p = vmemmap_alloc_block(PAGE_SIZE, node); | 103 | void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node); |
| 84 | if (!p) | 104 | if (!p) |
| 85 | return NULL; | 105 | return NULL; |
| 86 | entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); | 106 | entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); |
| @@ -163,3 +183,55 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) | |||
| 163 | 183 | ||
| 164 | return map; | 184 | return map; |
| 165 | } | 185 | } |
| 186 | |||
| 187 | void __init sparse_mem_maps_populate_node(struct page **map_map, | ||
| 188 | unsigned long pnum_begin, | ||
| 189 | unsigned long pnum_end, | ||
| 190 | unsigned long map_count, int nodeid) | ||
| 191 | { | ||
| 192 | unsigned long pnum; | ||
| 193 | unsigned long size = sizeof(struct page) * PAGES_PER_SECTION; | ||
| 194 | void *vmemmap_buf_start; | ||
| 195 | |||
| 196 | size = ALIGN(size, PMD_SIZE); | ||
| 197 | vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count, | ||
| 198 | PMD_SIZE, __pa(MAX_DMA_ADDRESS)); | ||
| 199 | |||
| 200 | if (vmemmap_buf_start) { | ||
| 201 | vmemmap_buf = vmemmap_buf_start; | ||
| 202 | vmemmap_buf_end = vmemmap_buf_start + size * map_count; | ||
| 203 | } | ||
| 204 | |||
| 205 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
| 206 | struct mem_section *ms; | ||
| 207 | |||
| 208 | if (!present_section_nr(pnum)) | ||
| 209 | continue; | ||
| 210 | |||
| 211 | map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); | ||
| 212 | if (map_map[pnum]) | ||
| 213 | continue; | ||
| 214 | ms = __nr_to_section(pnum); | ||
| 215 | printk(KERN_ERR "%s: sparsemem memory map backing failed " | ||
| 216 | "some memory will not be available.\n", __func__); | ||
| 217 | ms->section_mem_map = 0; | ||
| 218 | } | ||
| 219 | |||
| 220 | if (vmemmap_buf_start) { | ||
| 221 | /* need to free left buf */ | ||
| 222 | #ifdef CONFIG_NO_BOOTMEM | ||
| 223 | free_early(__pa(vmemmap_buf_start), __pa(vmemmap_buf_end)); | ||
| 224 | if (vmemmap_buf_start < vmemmap_buf) { | ||
| 225 | char name[15]; | ||
| 226 | |||
| 227 | snprintf(name, sizeof(name), "MEMMAP %d", nodeid); | ||
| 228 | reserve_early_without_check(__pa(vmemmap_buf_start), | ||
| 229 | __pa(vmemmap_buf), name); | ||
| 230 | } | ||
| 231 | #else | ||
| 232 | free_bootmem(__pa(vmemmap_buf), vmemmap_buf_end - vmemmap_buf); | ||
| 233 | #endif | ||
| 234 | vmemmap_buf = NULL; | ||
| 235 | vmemmap_buf_end = NULL; | ||
| 236 | } | ||
| 237 | } | ||
diff --git a/mm/sparse.c b/mm/sparse.c index 6ce4aab69e99..22896d589133 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
| @@ -271,7 +271,8 @@ static unsigned long *__kmalloc_section_usemap(void) | |||
| 271 | 271 | ||
| 272 | #ifdef CONFIG_MEMORY_HOTREMOVE | 272 | #ifdef CONFIG_MEMORY_HOTREMOVE |
| 273 | static unsigned long * __init | 273 | static unsigned long * __init |
| 274 | sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat) | 274 | sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, |
| 275 | unsigned long count) | ||
| 275 | { | 276 | { |
| 276 | unsigned long section_nr; | 277 | unsigned long section_nr; |
| 277 | 278 | ||
| @@ -286,7 +287,7 @@ sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat) | |||
| 286 | * this problem. | 287 | * this problem. |
| 287 | */ | 288 | */ |
| 288 | section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); | 289 | section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); |
| 289 | return alloc_bootmem_section(usemap_size(), section_nr); | 290 | return alloc_bootmem_section(usemap_size() * count, section_nr); |
| 290 | } | 291 | } |
| 291 | 292 | ||
| 292 | static void __init check_usemap_section_nr(int nid, unsigned long *usemap) | 293 | static void __init check_usemap_section_nr(int nid, unsigned long *usemap) |
| @@ -329,7 +330,8 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) | |||
| 329 | } | 330 | } |
| 330 | #else | 331 | #else |
| 331 | static unsigned long * __init | 332 | static unsigned long * __init |
| 332 | sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat) | 333 | sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, |
| 334 | unsigned long count) | ||
| 333 | { | 335 | { |
| 334 | return NULL; | 336 | return NULL; |
| 335 | } | 337 | } |
| @@ -339,27 +341,40 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) | |||
| 339 | } | 341 | } |
| 340 | #endif /* CONFIG_MEMORY_HOTREMOVE */ | 342 | #endif /* CONFIG_MEMORY_HOTREMOVE */ |
| 341 | 343 | ||
| 342 | static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum) | 344 | static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map, |
| 345 | unsigned long pnum_begin, | ||
| 346 | unsigned long pnum_end, | ||
| 347 | unsigned long usemap_count, int nodeid) | ||
| 343 | { | 348 | { |
| 344 | unsigned long *usemap; | 349 | void *usemap; |
| 345 | struct mem_section *ms = __nr_to_section(pnum); | 350 | unsigned long pnum; |
| 346 | int nid = sparse_early_nid(ms); | 351 | int size = usemap_size(); |
| 347 | |||
| 348 | usemap = sparse_early_usemap_alloc_pgdat_section(NODE_DATA(nid)); | ||
| 349 | if (usemap) | ||
| 350 | return usemap; | ||
| 351 | 352 | ||
| 352 | usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size()); | 353 | usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid), |
| 354 | usemap_count); | ||
| 353 | if (usemap) { | 355 | if (usemap) { |
| 354 | check_usemap_section_nr(nid, usemap); | 356 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { |
| 355 | return usemap; | 357 | if (!present_section_nr(pnum)) |
| 358 | continue; | ||
| 359 | usemap_map[pnum] = usemap; | ||
| 360 | usemap += size; | ||
| 361 | } | ||
| 362 | return; | ||
| 356 | } | 363 | } |
| 357 | 364 | ||
| 358 | /* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */ | 365 | usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count); |
| 359 | nid = 0; | 366 | if (usemap) { |
| 367 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
| 368 | if (!present_section_nr(pnum)) | ||
| 369 | continue; | ||
| 370 | usemap_map[pnum] = usemap; | ||
| 371 | usemap += size; | ||
| 372 | check_usemap_section_nr(nodeid, usemap_map[pnum]); | ||
| 373 | } | ||
| 374 | return; | ||
| 375 | } | ||
| 360 | 376 | ||
| 361 | printk(KERN_WARNING "%s: allocation failed\n", __func__); | 377 | printk(KERN_WARNING "%s: allocation failed\n", __func__); |
| 362 | return NULL; | ||
| 363 | } | 378 | } |
| 364 | 379 | ||
| 365 | #ifndef CONFIG_SPARSEMEM_VMEMMAP | 380 | #ifndef CONFIG_SPARSEMEM_VMEMMAP |
| @@ -375,8 +390,65 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid) | |||
| 375 | PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION)); | 390 | PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION)); |
| 376 | return map; | 391 | return map; |
| 377 | } | 392 | } |
| 393 | void __init sparse_mem_maps_populate_node(struct page **map_map, | ||
| 394 | unsigned long pnum_begin, | ||
| 395 | unsigned long pnum_end, | ||
| 396 | unsigned long map_count, int nodeid) | ||
| 397 | { | ||
| 398 | void *map; | ||
| 399 | unsigned long pnum; | ||
| 400 | unsigned long size = sizeof(struct page) * PAGES_PER_SECTION; | ||
| 401 | |||
| 402 | map = alloc_remap(nodeid, size * map_count); | ||
| 403 | if (map) { | ||
| 404 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
| 405 | if (!present_section_nr(pnum)) | ||
| 406 | continue; | ||
| 407 | map_map[pnum] = map; | ||
| 408 | map += size; | ||
| 409 | } | ||
| 410 | return; | ||
| 411 | } | ||
| 412 | |||
| 413 | size = PAGE_ALIGN(size); | ||
| 414 | map = alloc_bootmem_pages_node(NODE_DATA(nodeid), size * map_count); | ||
| 415 | if (map) { | ||
| 416 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
| 417 | if (!present_section_nr(pnum)) | ||
| 418 | continue; | ||
| 419 | map_map[pnum] = map; | ||
| 420 | map += size; | ||
| 421 | } | ||
| 422 | return; | ||
| 423 | } | ||
| 424 | |||
| 425 | /* fallback */ | ||
| 426 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
| 427 | struct mem_section *ms; | ||
| 428 | |||
| 429 | if (!present_section_nr(pnum)) | ||
| 430 | continue; | ||
| 431 | map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); | ||
| 432 | if (map_map[pnum]) | ||
| 433 | continue; | ||
| 434 | ms = __nr_to_section(pnum); | ||
| 435 | printk(KERN_ERR "%s: sparsemem memory map backing failed " | ||
| 436 | "some memory will not be available.\n", __func__); | ||
| 437 | ms->section_mem_map = 0; | ||
| 438 | } | ||
| 439 | } | ||
| 378 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ | 440 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ |
| 379 | 441 | ||
| 442 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
| 443 | static void __init sparse_early_mem_maps_alloc_node(struct page **map_map, | ||
| 444 | unsigned long pnum_begin, | ||
| 445 | unsigned long pnum_end, | ||
| 446 | unsigned long map_count, int nodeid) | ||
| 447 | { | ||
| 448 | sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end, | ||
| 449 | map_count, nodeid); | ||
| 450 | } | ||
| 451 | #else | ||
| 380 | static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) | 452 | static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) |
| 381 | { | 453 | { |
| 382 | struct page *map; | 454 | struct page *map; |
| @@ -392,10 +464,12 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) | |||
| 392 | ms->section_mem_map = 0; | 464 | ms->section_mem_map = 0; |
| 393 | return NULL; | 465 | return NULL; |
| 394 | } | 466 | } |
| 467 | #endif | ||
| 395 | 468 | ||
| 396 | void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) | 469 | void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) |
| 397 | { | 470 | { |
| 398 | } | 471 | } |
| 472 | |||
| 399 | /* | 473 | /* |
| 400 | * Allocate the accumulated non-linear sections, allocate a mem_map | 474 | * Allocate the accumulated non-linear sections, allocate a mem_map |
| 401 | * for each and record the physical to section mapping. | 475 | * for each and record the physical to section mapping. |
| @@ -407,6 +481,14 @@ void __init sparse_init(void) | |||
| 407 | unsigned long *usemap; | 481 | unsigned long *usemap; |
| 408 | unsigned long **usemap_map; | 482 | unsigned long **usemap_map; |
| 409 | int size; | 483 | int size; |
| 484 | int nodeid_begin = 0; | ||
| 485 | unsigned long pnum_begin = 0; | ||
| 486 | unsigned long usemap_count; | ||
| 487 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
| 488 | unsigned long map_count; | ||
| 489 | int size2; | ||
| 490 | struct page **map_map; | ||
| 491 | #endif | ||
| 410 | 492 | ||
| 411 | /* | 493 | /* |
| 412 | * map is using big page (aka 2M in x86 64 bit) | 494 | * map is using big page (aka 2M in x86 64 bit) |
| @@ -425,10 +507,81 @@ void __init sparse_init(void) | |||
| 425 | panic("can not allocate usemap_map\n"); | 507 | panic("can not allocate usemap_map\n"); |
| 426 | 508 | ||
| 427 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | 509 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { |
| 510 | struct mem_section *ms; | ||
| 511 | |||
| 428 | if (!present_section_nr(pnum)) | 512 | if (!present_section_nr(pnum)) |
| 429 | continue; | 513 | continue; |
| 430 | usemap_map[pnum] = sparse_early_usemap_alloc(pnum); | 514 | ms = __nr_to_section(pnum); |
| 515 | nodeid_begin = sparse_early_nid(ms); | ||
| 516 | pnum_begin = pnum; | ||
| 517 | break; | ||
| 431 | } | 518 | } |
| 519 | usemap_count = 1; | ||
| 520 | for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) { | ||
| 521 | struct mem_section *ms; | ||
| 522 | int nodeid; | ||
| 523 | |||
| 524 | if (!present_section_nr(pnum)) | ||
| 525 | continue; | ||
| 526 | ms = __nr_to_section(pnum); | ||
| 527 | nodeid = sparse_early_nid(ms); | ||
| 528 | if (nodeid == nodeid_begin) { | ||
| 529 | usemap_count++; | ||
| 530 | continue; | ||
| 531 | } | ||
| 532 | /* ok, we need to take cake of from pnum_begin to pnum - 1*/ | ||
| 533 | sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, pnum, | ||
| 534 | usemap_count, nodeid_begin); | ||
| 535 | /* new start, update count etc*/ | ||
| 536 | nodeid_begin = nodeid; | ||
| 537 | pnum_begin = pnum; | ||
| 538 | usemap_count = 1; | ||
| 539 | } | ||
| 540 | /* ok, last chunk */ | ||
| 541 | sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS, | ||
| 542 | usemap_count, nodeid_begin); | ||
| 543 | |||
| 544 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
| 545 | size2 = sizeof(struct page *) * NR_MEM_SECTIONS; | ||
| 546 | map_map = alloc_bootmem(size2); | ||
| 547 | if (!map_map) | ||
| 548 | panic("can not allocate map_map\n"); | ||
| 549 | |||
| 550 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | ||
| 551 | struct mem_section *ms; | ||
| 552 | |||
| 553 | if (!present_section_nr(pnum)) | ||
| 554 | continue; | ||
| 555 | ms = __nr_to_section(pnum); | ||
| 556 | nodeid_begin = sparse_early_nid(ms); | ||
| 557 | pnum_begin = pnum; | ||
| 558 | break; | ||
| 559 | } | ||
| 560 | map_count = 1; | ||
| 561 | for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) { | ||
| 562 | struct mem_section *ms; | ||
| 563 | int nodeid; | ||
| 564 | |||
| 565 | if (!present_section_nr(pnum)) | ||
| 566 | continue; | ||
| 567 | ms = __nr_to_section(pnum); | ||
| 568 | nodeid = sparse_early_nid(ms); | ||
| 569 | if (nodeid == nodeid_begin) { | ||
| 570 | map_count++; | ||
| 571 | continue; | ||
| 572 | } | ||
| 573 | /* ok, we need to take cake of from pnum_begin to pnum - 1*/ | ||
| 574 | sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum, | ||
| 575 | map_count, nodeid_begin); | ||
| 576 | /* new start, update count etc*/ | ||
| 577 | nodeid_begin = nodeid; | ||
| 578 | pnum_begin = pnum; | ||
| 579 | map_count = 1; | ||
| 580 | } | ||
| 581 | /* ok, last chunk */ | ||
| 582 | sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS, | ||
| 583 | map_count, nodeid_begin); | ||
| 584 | #endif | ||
| 432 | 585 | ||
| 433 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | 586 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { |
| 434 | if (!present_section_nr(pnum)) | 587 | if (!present_section_nr(pnum)) |
| @@ -438,7 +591,11 @@ void __init sparse_init(void) | |||
| 438 | if (!usemap) | 591 | if (!usemap) |
| 439 | continue; | 592 | continue; |
| 440 | 593 | ||
| 594 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
| 595 | map = map_map[pnum]; | ||
| 596 | #else | ||
| 441 | map = sparse_early_mem_map_alloc(pnum); | 597 | map = sparse_early_mem_map_alloc(pnum); |
| 598 | #endif | ||
| 442 | if (!map) | 599 | if (!map) |
| 443 | continue; | 600 | continue; |
| 444 | 601 | ||
| @@ -448,6 +605,9 @@ void __init sparse_init(void) | |||
| 448 | 605 | ||
| 449 | vmemmap_populate_print_last(); | 606 | vmemmap_populate_print_last(); |
| 450 | 607 | ||
| 608 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
| 609 | free_bootmem(__pa(map_map), size2); | ||
| 610 | #endif | ||
| 451 | free_bootmem(__pa(usemap_map), size); | 611 | free_bootmem(__pa(usemap_map), size); |
| 452 | } | 612 | } |
| 453 | 613 | ||
